mirror of
				https://git.suyu.dev/suyu/suyu.git
				synced 2025-10-22 18:36:51 +08:00 
			
		
		
		
	Merge pull request #329 from bunnei/shader-gen-part-1
OpenGL shader generation part 1
This commit is contained in:
		
						commit
						fdca7b5f7a
					
				| @ -32,6 +32,8 @@ add_library(common STATIC | |||||||
|     break_points.cpp |     break_points.cpp | ||||||
|     break_points.h |     break_points.h | ||||||
|     chunk_file.h |     chunk_file.h | ||||||
|  |     cityhash.cpp | ||||||
|  |     cityhash.h | ||||||
|     code_block.h |     code_block.h | ||||||
|     color.h |     color.h | ||||||
|     common_funcs.h |     common_funcs.h | ||||||
| @ -39,7 +41,6 @@ add_library(common STATIC | |||||||
|     common_types.h |     common_types.h | ||||||
|     file_util.cpp |     file_util.cpp | ||||||
|     file_util.h |     file_util.h | ||||||
|     hash.cpp |  | ||||||
|     hash.h |     hash.h | ||||||
|     linear_disk_cache.h |     linear_disk_cache.h | ||||||
|     logging/backend.cpp |     logging/backend.cpp | ||||||
|  | |||||||
| @ -115,7 +115,7 @@ private: | |||||||
|     // assignment would copy the full storage value, rather than just the bits
 |     // assignment would copy the full storage value, rather than just the bits
 | ||||||
|     // relevant to this particular bit field.
 |     // relevant to this particular bit field.
 | ||||||
|     // We don't delete it because we want BitField to be trivially copyable.
 |     // We don't delete it because we want BitField to be trivially copyable.
 | ||||||
|     BitField& operator=(const BitField&) = default; |     constexpr BitField& operator=(const BitField&) = default; | ||||||
| 
 | 
 | ||||||
|     // StorageType is T for non-enum types and the underlying type of T if
 |     // StorageType is T for non-enum types and the underlying type of T if
 | ||||||
|     // T is an enumeration. Note that T is wrapped within an enable_if in the
 |     // T is an enumeration. Note that T is wrapped within an enable_if in the
 | ||||||
| @ -166,20 +166,20 @@ public: | |||||||
|     // so that we can use this within unions
 |     // so that we can use this within unions
 | ||||||
|     constexpr BitField() = default; |     constexpr BitField() = default; | ||||||
| 
 | 
 | ||||||
|     FORCE_INLINE operator T() const { |     constexpr FORCE_INLINE operator T() const { | ||||||
|         return Value(); |         return Value(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     FORCE_INLINE void Assign(const T& value) { |     constexpr FORCE_INLINE void Assign(const T& value) { | ||||||
|         storage = (storage & ~mask) | FormatValue(value); |         storage = (storage & ~mask) | FormatValue(value); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     FORCE_INLINE T Value() const { |     constexpr T Value() const { | ||||||
|         return ExtractValue(storage); |         return ExtractValue(storage); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015
 |     // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015
 | ||||||
|     FORCE_INLINE bool ToBool() const { |     constexpr FORCE_INLINE bool ToBool() const { | ||||||
|         return Value() != 0; |         return Value() != 0; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | |||||||
							
								
								
									
										340
									
								
								src/common/cityhash.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										340
									
								
								src/common/cityhash.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,340 @@ | |||||||
|  | // Copyright (c) 2011 Google, Inc.
 | ||||||
|  | //
 | ||||||
|  | // Permission is hereby granted, free of charge, to any person obtaining a copy
 | ||||||
|  | // of this software and associated documentation files (the "Software"), to deal
 | ||||||
|  | // in the Software without restriction, including without limitation the rights
 | ||||||
|  | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 | ||||||
|  | // copies of the Software, and to permit persons to whom the Software is
 | ||||||
|  | // furnished to do so, subject to the following conditions:
 | ||||||
|  | //
 | ||||||
|  | // The above copyright notice and this permission notice shall be included in
 | ||||||
|  | // all copies or substantial portions of the Software.
 | ||||||
|  | //
 | ||||||
|  | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | ||||||
|  | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | ||||||
|  | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | ||||||
|  | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | ||||||
|  | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 | ||||||
|  | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 | ||||||
|  | // THE SOFTWARE.
 | ||||||
|  | //
 | ||||||
|  | // CityHash, by Geoff Pike and Jyrki Alakuijala
 | ||||||
|  | //
 | ||||||
|  | // This file provides CityHash64() and related functions.
 | ||||||
|  | //
 | ||||||
|  | // It's probably possible to create even faster hash functions by
 | ||||||
|  | // writing a program that systematically explores some of the space of
 | ||||||
|  | // possible hash functions, by using SIMD instructions, or by
 | ||||||
|  | // compromising on hash quality.
 | ||||||
|  | 
 | ||||||
|  | #include <algorithm> | ||||||
|  | #include <string.h> // for memcpy and memset
 | ||||||
|  | #include "cityhash.h" | ||||||
|  | #include "common/swap.h" | ||||||
|  | 
 | ||||||
|  | // #include "config.h"
 | ||||||
|  | #ifdef __GNUC__ | ||||||
|  | #define HAVE_BUILTIN_EXPECT 1 | ||||||
|  | #endif | ||||||
|  | #ifdef COMMON_BIG_ENDIAN | ||||||
|  | #define WORDS_BIGENDIAN 1 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | using namespace std; | ||||||
|  | 
 | ||||||
|  | typedef uint8_t uint8; | ||||||
|  | typedef uint32_t uint32; | ||||||
|  | typedef uint64_t uint64; | ||||||
|  | 
 | ||||||
|  | namespace Common { | ||||||
|  | 
 | ||||||
|  | static uint64 UNALIGNED_LOAD64(const char* p) { | ||||||
|  |     uint64 result; | ||||||
|  |     memcpy(&result, p, sizeof(result)); | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static uint32 UNALIGNED_LOAD32(const char* p) { | ||||||
|  |     uint32 result; | ||||||
|  |     memcpy(&result, p, sizeof(result)); | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #ifdef WORDS_BIGENDIAN | ||||||
|  | #define uint32_in_expected_order(x) (swap32(x)) | ||||||
|  | #define uint64_in_expected_order(x) (swap64(x)) | ||||||
|  | #else | ||||||
|  | #define uint32_in_expected_order(x) (x) | ||||||
|  | #define uint64_in_expected_order(x) (x) | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if !defined(LIKELY) | ||||||
|  | #if HAVE_BUILTIN_EXPECT | ||||||
|  | #define LIKELY(x) (__builtin_expect(!!(x), 1)) | ||||||
|  | #else | ||||||
|  | #define LIKELY(x) (x) | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | static uint64 Fetch64(const char* p) { | ||||||
|  |     return uint64_in_expected_order(UNALIGNED_LOAD64(p)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static uint32 Fetch32(const char* p) { | ||||||
|  |     return uint32_in_expected_order(UNALIGNED_LOAD32(p)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Some primes between 2^63 and 2^64 for various uses.
 | ||||||
|  | static const uint64 k0 = 0xc3a5c85c97cb3127ULL; | ||||||
|  | static const uint64 k1 = 0xb492b66fbe98f273ULL; | ||||||
|  | static const uint64 k2 = 0x9ae16a3b2f90404fULL; | ||||||
|  | 
 | ||||||
|  | // Bitwise right rotate.  Normally this will compile to a single
 | ||||||
|  | // instruction, especially if the shift is a manifest constant.
 | ||||||
|  | static uint64 Rotate(uint64 val, int shift) { | ||||||
|  |     // Avoid shifting by 64: doing so yields an undefined result.
 | ||||||
|  |     return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static uint64 ShiftMix(uint64 val) { | ||||||
|  |     return val ^ (val >> 47); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static uint64 HashLen16(uint64 u, uint64 v) { | ||||||
|  |     return Hash128to64(uint128(u, v)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) { | ||||||
|  |     // Murmur-inspired hashing.
 | ||||||
|  |     uint64 a = (u ^ v) * mul; | ||||||
|  |     a ^= (a >> 47); | ||||||
|  |     uint64 b = (v ^ a) * mul; | ||||||
|  |     b ^= (b >> 47); | ||||||
|  |     b *= mul; | ||||||
|  |     return b; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static uint64 HashLen0to16(const char* s, size_t len) { | ||||||
|  |     if (len >= 8) { | ||||||
|  |         uint64 mul = k2 + len * 2; | ||||||
|  |         uint64 a = Fetch64(s) + k2; | ||||||
|  |         uint64 b = Fetch64(s + len - 8); | ||||||
|  |         uint64 c = Rotate(b, 37) * mul + a; | ||||||
|  |         uint64 d = (Rotate(a, 25) + b) * mul; | ||||||
|  |         return HashLen16(c, d, mul); | ||||||
|  |     } | ||||||
|  |     if (len >= 4) { | ||||||
|  |         uint64 mul = k2 + len * 2; | ||||||
|  |         uint64 a = Fetch32(s); | ||||||
|  |         return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); | ||||||
|  |     } | ||||||
|  |     if (len > 0) { | ||||||
|  |         uint8 a = s[0]; | ||||||
|  |         uint8 b = s[len >> 1]; | ||||||
|  |         uint8 c = s[len - 1]; | ||||||
|  |         uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8); | ||||||
|  |         uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2); | ||||||
|  |         return ShiftMix(y * k2 ^ z * k0) * k2; | ||||||
|  |     } | ||||||
|  |     return k2; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // This probably works well for 16-byte strings as well, but it may be overkill
 | ||||||
|  | // in that case.
 | ||||||
|  | static uint64 HashLen17to32(const char* s, size_t len) { | ||||||
|  |     uint64 mul = k2 + len * 2; | ||||||
|  |     uint64 a = Fetch64(s) * k1; | ||||||
|  |     uint64 b = Fetch64(s + 8); | ||||||
|  |     uint64 c = Fetch64(s + len - 8) * mul; | ||||||
|  |     uint64 d = Fetch64(s + len - 16) * k2; | ||||||
|  |     return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, a + Rotate(b + k2, 18) + c, mul); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Return a 16-byte hash for 48 bytes.  Quick and dirty.
 | ||||||
|  | // Callers do best to use "random-looking" values for a and b.
 | ||||||
|  | static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, | ||||||
|  |                                                    uint64 b) { | ||||||
|  |     a += w; | ||||||
|  |     b = Rotate(b + a + z, 21); | ||||||
|  |     uint64 c = a; | ||||||
|  |     a += x; | ||||||
|  |     a += y; | ||||||
|  |     b += Rotate(a, 44); | ||||||
|  |     return make_pair(a + z, b + c); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
 | ||||||
|  | static pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) { | ||||||
|  |     return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a, | ||||||
|  |                                   b); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Return an 8-byte hash for 33 to 64 bytes.
 | ||||||
|  | static uint64 HashLen33to64(const char* s, size_t len) { | ||||||
|  |     uint64 mul = k2 + len * 2; | ||||||
|  |     uint64 a = Fetch64(s) * k2; | ||||||
|  |     uint64 b = Fetch64(s + 8); | ||||||
|  |     uint64 c = Fetch64(s + len - 24); | ||||||
|  |     uint64 d = Fetch64(s + len - 32); | ||||||
|  |     uint64 e = Fetch64(s + 16) * k2; | ||||||
|  |     uint64 f = Fetch64(s + 24) * 9; | ||||||
|  |     uint64 g = Fetch64(s + len - 8); | ||||||
|  |     uint64 h = Fetch64(s + len - 16) * mul; | ||||||
|  |     uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9; | ||||||
|  |     uint64 v = ((a + g) ^ d) + f + 1; | ||||||
|  |     uint64 w = swap64((u + v) * mul) + h; | ||||||
|  |     uint64 x = Rotate(e + f, 42) + c; | ||||||
|  |     uint64 y = (swap64((v + w) * mul) + g) * mul; | ||||||
|  |     uint64 z = e + f + c; | ||||||
|  |     a = swap64((x + z) * mul + y) + b; | ||||||
|  |     b = ShiftMix((z + a) * mul + d + h) * mul; | ||||||
|  |     return b + x; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | uint64 CityHash64(const char* s, size_t len) { | ||||||
|  |     if (len <= 32) { | ||||||
|  |         if (len <= 16) { | ||||||
|  |             return HashLen0to16(s, len); | ||||||
|  |         } else { | ||||||
|  |             return HashLen17to32(s, len); | ||||||
|  |         } | ||||||
|  |     } else if (len <= 64) { | ||||||
|  |         return HashLen33to64(s, len); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // For strings over 64 bytes we hash the end first, and then as we
 | ||||||
|  |     // loop we keep 56 bytes of state: v, w, x, y, and z.
 | ||||||
|  |     uint64 x = Fetch64(s + len - 40); | ||||||
|  |     uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); | ||||||
|  |     uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); | ||||||
|  |     pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z); | ||||||
|  |     pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); | ||||||
|  |     x = x * k1 + Fetch64(s); | ||||||
|  | 
 | ||||||
|  |     // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
 | ||||||
|  |     len = (len - 1) & ~static_cast<size_t>(63); | ||||||
|  |     do { | ||||||
|  |         x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; | ||||||
|  |         y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; | ||||||
|  |         x ^= w.second; | ||||||
|  |         y += v.first + Fetch64(s + 40); | ||||||
|  |         z = Rotate(z + w.first, 33) * k1; | ||||||
|  |         v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); | ||||||
|  |         w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); | ||||||
|  |         std::swap(z, x); | ||||||
|  |         s += 64; | ||||||
|  |         len -= 64; | ||||||
|  |     } while (len != 0); | ||||||
|  |     return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, | ||||||
|  |                      HashLen16(v.second, w.second) + x); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) { | ||||||
|  |     return CityHash64WithSeeds(s, len, k2, seed); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) { | ||||||
|  |     return HashLen16(CityHash64(s, len) - seed0, seed1); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
 | ||||||
|  | // of any length representable in signed long.  Based on City and Murmur.
 | ||||||
|  | static uint128 CityMurmur(const char* s, size_t len, uint128 seed) { | ||||||
|  |     uint64 a = Uint128Low64(seed); | ||||||
|  |     uint64 b = Uint128High64(seed); | ||||||
|  |     uint64 c = 0; | ||||||
|  |     uint64 d = 0; | ||||||
|  |     signed long l = static_cast<long>(len) - 16; | ||||||
|  |     if (l <= 0) { // len <= 16
 | ||||||
|  |         a = ShiftMix(a * k1) * k1; | ||||||
|  |         c = b * k1 + HashLen0to16(s, len); | ||||||
|  |         d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); | ||||||
|  |     } else { // len > 16
 | ||||||
|  |         c = HashLen16(Fetch64(s + len - 8) + k1, a); | ||||||
|  |         d = HashLen16(b + len, c + Fetch64(s + len - 16)); | ||||||
|  |         a += d; | ||||||
|  |         do { | ||||||
|  |             a ^= ShiftMix(Fetch64(s) * k1) * k1; | ||||||
|  |             a *= k1; | ||||||
|  |             b ^= a; | ||||||
|  |             c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; | ||||||
|  |             c *= k1; | ||||||
|  |             d ^= c; | ||||||
|  |             s += 16; | ||||||
|  |             l -= 16; | ||||||
|  |         } while (l > 0); | ||||||
|  |     } | ||||||
|  |     a = HashLen16(a, c); | ||||||
|  |     b = HashLen16(d, b); | ||||||
|  |     return uint128(a ^ b, HashLen16(b, a)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) { | ||||||
|  |     if (len < 128) { | ||||||
|  |         return CityMurmur(s, len, seed); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
 | ||||||
|  |     // v, w, x, y, and z.
 | ||||||
|  |     pair<uint64, uint64> v, w; | ||||||
|  |     uint64 x = Uint128Low64(seed); | ||||||
|  |     uint64 y = Uint128High64(seed); | ||||||
|  |     uint64 z = len * k1; | ||||||
|  |     v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); | ||||||
|  |     v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); | ||||||
|  |     w.first = Rotate(y + z, 35) * k1 + x; | ||||||
|  |     w.second = Rotate(x + Fetch64(s + 88), 53) * k1; | ||||||
|  | 
 | ||||||
|  |     // This is the same inner loop as CityHash64(), manually unrolled.
 | ||||||
|  |     do { | ||||||
|  |         x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; | ||||||
|  |         y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; | ||||||
|  |         x ^= w.second; | ||||||
|  |         y += v.first + Fetch64(s + 40); | ||||||
|  |         z = Rotate(z + w.first, 33) * k1; | ||||||
|  |         v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); | ||||||
|  |         w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); | ||||||
|  |         std::swap(z, x); | ||||||
|  |         s += 64; | ||||||
|  |         x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; | ||||||
|  |         y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; | ||||||
|  |         x ^= w.second; | ||||||
|  |         y += v.first + Fetch64(s + 40); | ||||||
|  |         z = Rotate(z + w.first, 33) * k1; | ||||||
|  |         v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); | ||||||
|  |         w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); | ||||||
|  |         std::swap(z, x); | ||||||
|  |         s += 64; | ||||||
|  |         len -= 128; | ||||||
|  |     } while (LIKELY(len >= 128)); | ||||||
|  |     x += Rotate(v.first + z, 49) * k0; | ||||||
|  |     y = y * k0 + Rotate(w.second, 37); | ||||||
|  |     z = z * k0 + Rotate(w.first, 27); | ||||||
|  |     w.first *= 9; | ||||||
|  |     v.first *= k0; | ||||||
|  |     // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
 | ||||||
|  |     for (size_t tail_done = 0; tail_done < len;) { | ||||||
|  |         tail_done += 32; | ||||||
|  |         y = Rotate(x + y, 42) * k0 + v.second; | ||||||
|  |         w.first += Fetch64(s + len - tail_done + 16); | ||||||
|  |         x = x * k0 + w.first; | ||||||
|  |         z += w.second + Fetch64(s + len - tail_done); | ||||||
|  |         w.second += v.first; | ||||||
|  |         v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second); | ||||||
|  |         v.first *= k0; | ||||||
|  |     } | ||||||
|  |     // At this point our 56 bytes of state should contain more than
 | ||||||
|  |     // enough information for a strong 128-bit hash.  We use two
 | ||||||
|  |     // different 56-byte-to-8-byte hashes to get a 16-byte final result.
 | ||||||
|  |     x = HashLen16(x, v.first); | ||||||
|  |     y = HashLen16(y + z, w.first); | ||||||
|  |     return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | uint128 CityHash128(const char* s, size_t len) { | ||||||
|  |     return len >= 16 | ||||||
|  |                ? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0)) | ||||||
|  |                : CityHash128WithSeed(s, len, uint128(k0, k1)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace Common
 | ||||||
							
								
								
									
										110
									
								
								src/common/cityhash.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								src/common/cityhash.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,110 @@ | |||||||
|  | // Copyright (c) 2011 Google, Inc.
 | ||||||
|  | //
 | ||||||
|  | // Permission is hereby granted, free of charge, to any person obtaining a copy
 | ||||||
|  | // of this software and associated documentation files (the "Software"), to deal
 | ||||||
|  | // in the Software without restriction, including without limitation the rights
 | ||||||
|  | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 | ||||||
|  | // copies of the Software, and to permit persons to whom the Software is
 | ||||||
|  | // furnished to do so, subject to the following conditions:
 | ||||||
|  | //
 | ||||||
|  | // The above copyright notice and this permission notice shall be included in
 | ||||||
|  | // all copies or substantial portions of the Software.
 | ||||||
|  | //
 | ||||||
|  | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | ||||||
|  | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | ||||||
|  | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | ||||||
|  | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | ||||||
|  | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 | ||||||
|  | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 | ||||||
|  | // THE SOFTWARE.
 | ||||||
|  | //
 | ||||||
|  | // CityHash, by Geoff Pike and Jyrki Alakuijala
 | ||||||
|  | //
 | ||||||
|  | // http://code.google.com/p/cityhash/
 | ||||||
|  | //
 | ||||||
|  | // This file provides a few functions for hashing strings.  All of them are
 | ||||||
|  | // high-quality functions in the sense that they pass standard tests such
 | ||||||
|  | // as Austin Appleby's SMHasher.  They are also fast.
 | ||||||
|  | //
 | ||||||
|  | // For 64-bit x86 code, on short strings, we don't know of anything faster than
 | ||||||
|  | // CityHash64 that is of comparable quality.  We believe our nearest competitor
 | ||||||
|  | // is Murmur3.  For 64-bit x86 code, CityHash64 is an excellent choice for hash
 | ||||||
|  | // tables and most other hashing (excluding cryptography).
 | ||||||
|  | //
 | ||||||
|  | // For 64-bit x86 code, on long strings, the picture is more complicated.
 | ||||||
|  | // On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
 | ||||||
|  | // CityHashCrc128 appears to be faster than all competitors of comparable
 | ||||||
|  | // quality.  CityHash128 is also good but not quite as fast.  We believe our
 | ||||||
|  | // nearest competitor is Bob Jenkins' Spooky.  We don't have great data for
 | ||||||
|  | // other 64-bit CPUs, but for long strings we know that Spooky is slightly
 | ||||||
|  | // faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
 | ||||||
|  | // Note that CityHashCrc128 is declared in citycrc.h.
 | ||||||
|  | //
 | ||||||
|  | // For 32-bit x86 code, we don't know of anything faster than CityHash32 that
 | ||||||
|  | // is of comparable quality.  We believe our nearest competitor is Murmur3A.
 | ||||||
|  | // (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
 | ||||||
|  | //
 | ||||||
|  | // Functions in the CityHash family are not suitable for cryptography.
 | ||||||
|  | //
 | ||||||
|  | // Please see CityHash's README file for more details on our performance
 | ||||||
|  | // measurements and so on.
 | ||||||
|  | //
 | ||||||
|  | // WARNING: This code has been only lightly tested on big-endian platforms!
 | ||||||
|  | // It is known to work well on little-endian platforms that have a small penalty
 | ||||||
|  | // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
 | ||||||
|  | // It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
 | ||||||
|  | // bug reports are welcome.
 | ||||||
|  | //
 | ||||||
|  | // By the way, for some hash functions, given strings a and b, the hash
 | ||||||
|  | // of a+b is easily derived from the hashes of a and b.  This property
 | ||||||
|  | // doesn't hold for any hash functions in this file.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <utility> | ||||||
|  | #include <stdint.h> | ||||||
|  | #include <stdlib.h> // for size_t. | ||||||
|  | 
 | ||||||
|  | namespace Common { | ||||||
|  | 
 | ||||||
|  | typedef std::pair<uint64_t, uint64_t> uint128; | ||||||
|  | 
 | ||||||
|  | inline uint64_t Uint128Low64(const uint128& x) { | ||||||
|  |     return x.first; | ||||||
|  | } | ||||||
|  | inline uint64_t Uint128High64(const uint128& x) { | ||||||
|  |     return x.second; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Hash function for a byte array.
 | ||||||
|  | uint64_t CityHash64(const char* buf, size_t len); | ||||||
|  | 
 | ||||||
|  | // Hash function for a byte array.  For convenience, a 64-bit seed is also
 | ||||||
|  | // hashed into the result.
 | ||||||
|  | uint64_t CityHash64WithSeed(const char* buf, size_t len, uint64_t seed); | ||||||
|  | 
 | ||||||
|  | // Hash function for a byte array.  For convenience, two seeds are also
 | ||||||
|  | // hashed into the result.
 | ||||||
|  | uint64_t CityHash64WithSeeds(const char* buf, size_t len, uint64_t seed0, uint64_t seed1); | ||||||
|  | 
 | ||||||
|  | // Hash function for a byte array.
 | ||||||
|  | uint128 CityHash128(const char* s, size_t len); | ||||||
|  | 
 | ||||||
|  | // Hash function for a byte array.  For convenience, a 128-bit seed is also
 | ||||||
|  | // hashed into the result.
 | ||||||
|  | uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed); | ||||||
|  | 
 | ||||||
|  | // Hash 128 input bits down to 64 bits of output.
 | ||||||
|  | // This is intended to be a reasonably good hash function.
 | ||||||
|  | inline uint64_t Hash128to64(const uint128& x) { | ||||||
|  |     // Murmur-inspired hashing.
 | ||||||
|  |     const uint64_t kMul = 0x9ddfea08eb382d69ULL; | ||||||
|  |     uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; | ||||||
|  |     a ^= (a >> 47); | ||||||
|  |     uint64_t b = (Uint128High64(x) ^ a) * kMul; | ||||||
|  |     b ^= (b >> 47); | ||||||
|  |     b *= kMul; | ||||||
|  |     return b; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace Common
 | ||||||
| @ -1,141 +0,0 @@ | |||||||
| // Copyright 2015 Citra Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #if defined(_MSC_VER) |  | ||||||
| #include <stdlib.h> |  | ||||||
| #endif |  | ||||||
| #include "common/common_funcs.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/hash.h" |  | ||||||
| 
 |  | ||||||
| namespace Common { |  | ||||||
| 
 |  | ||||||
| // MurmurHash3 was written by Austin Appleby, and is placed in the public
 |  | ||||||
| // domain. The author hereby disclaims copyright to this source code.
 |  | ||||||
| 
 |  | ||||||
| // Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do
 |  | ||||||
| // the conversion here
 |  | ||||||
| static FORCE_INLINE u64 getblock64(const u64* p, size_t i) { |  | ||||||
|     return p[i]; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // Finalization mix - force all bits of a hash block to avalanche
 |  | ||||||
| static FORCE_INLINE u64 fmix64(u64 k) { |  | ||||||
|     k ^= k >> 33; |  | ||||||
|     k *= 0xff51afd7ed558ccdllu; |  | ||||||
|     k ^= k >> 33; |  | ||||||
|     k *= 0xc4ceb9fe1a85ec53llu; |  | ||||||
|     k ^= k >> 33; |  | ||||||
| 
 |  | ||||||
|     return k; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit
 |  | ||||||
| // platforms (MurmurHash3_x64_128). It was taken from:
 |  | ||||||
| // https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
 |  | ||||||
| void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out) { |  | ||||||
|     const u8* data = (const u8*)key; |  | ||||||
|     const size_t nblocks = len / 16; |  | ||||||
| 
 |  | ||||||
|     u64 h1 = seed; |  | ||||||
|     u64 h2 = seed; |  | ||||||
| 
 |  | ||||||
|     const u64 c1 = 0x87c37b91114253d5llu; |  | ||||||
|     const u64 c2 = 0x4cf5ad432745937fllu; |  | ||||||
| 
 |  | ||||||
|     // Body
 |  | ||||||
| 
 |  | ||||||
|     const u64* blocks = (const u64*)(data); |  | ||||||
| 
 |  | ||||||
|     for (size_t i = 0; i < nblocks; i++) { |  | ||||||
|         u64 k1 = getblock64(blocks, i * 2 + 0); |  | ||||||
|         u64 k2 = getblock64(blocks, i * 2 + 1); |  | ||||||
| 
 |  | ||||||
|         k1 *= c1; |  | ||||||
|         k1 = _rotl64(k1, 31); |  | ||||||
|         k1 *= c2; |  | ||||||
|         h1 ^= k1; |  | ||||||
| 
 |  | ||||||
|         h1 = _rotl64(h1, 27); |  | ||||||
|         h1 += h2; |  | ||||||
|         h1 = h1 * 5 + 0x52dce729; |  | ||||||
| 
 |  | ||||||
|         k2 *= c2; |  | ||||||
|         k2 = _rotl64(k2, 33); |  | ||||||
|         k2 *= c1; |  | ||||||
|         h2 ^= k2; |  | ||||||
| 
 |  | ||||||
|         h2 = _rotl64(h2, 31); |  | ||||||
|         h2 += h1; |  | ||||||
|         h2 = h2 * 5 + 0x38495ab5; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Tail
 |  | ||||||
| 
 |  | ||||||
|     const u8* tail = (const u8*)(data + nblocks * 16); |  | ||||||
| 
 |  | ||||||
|     u64 k1 = 0; |  | ||||||
|     u64 k2 = 0; |  | ||||||
| 
 |  | ||||||
|     switch (len & 15) { |  | ||||||
|     case 15: |  | ||||||
|         k2 ^= ((u64)tail[14]) << 48; |  | ||||||
|     case 14: |  | ||||||
|         k2 ^= ((u64)tail[13]) << 40; |  | ||||||
|     case 13: |  | ||||||
|         k2 ^= ((u64)tail[12]) << 32; |  | ||||||
|     case 12: |  | ||||||
|         k2 ^= ((u64)tail[11]) << 24; |  | ||||||
|     case 11: |  | ||||||
|         k2 ^= ((u64)tail[10]) << 16; |  | ||||||
|     case 10: |  | ||||||
|         k2 ^= ((u64)tail[9]) << 8; |  | ||||||
|     case 9: |  | ||||||
|         k2 ^= ((u64)tail[8]) << 0; |  | ||||||
|         k2 *= c2; |  | ||||||
|         k2 = _rotl64(k2, 33); |  | ||||||
|         k2 *= c1; |  | ||||||
|         h2 ^= k2; |  | ||||||
| 
 |  | ||||||
|     case 8: |  | ||||||
|         k1 ^= ((u64)tail[7]) << 56; |  | ||||||
|     case 7: |  | ||||||
|         k1 ^= ((u64)tail[6]) << 48; |  | ||||||
|     case 6: |  | ||||||
|         k1 ^= ((u64)tail[5]) << 40; |  | ||||||
|     case 5: |  | ||||||
|         k1 ^= ((u64)tail[4]) << 32; |  | ||||||
|     case 4: |  | ||||||
|         k1 ^= ((u64)tail[3]) << 24; |  | ||||||
|     case 3: |  | ||||||
|         k1 ^= ((u64)tail[2]) << 16; |  | ||||||
|     case 2: |  | ||||||
|         k1 ^= ((u64)tail[1]) << 8; |  | ||||||
|     case 1: |  | ||||||
|         k1 ^= ((u64)tail[0]) << 0; |  | ||||||
|         k1 *= c1; |  | ||||||
|         k1 = _rotl64(k1, 31); |  | ||||||
|         k1 *= c2; |  | ||||||
|         h1 ^= k1; |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     // Finalization
 |  | ||||||
| 
 |  | ||||||
|     h1 ^= len; |  | ||||||
|     h2 ^= len; |  | ||||||
| 
 |  | ||||||
|     h1 += h2; |  | ||||||
|     h2 += h1; |  | ||||||
| 
 |  | ||||||
|     h1 = fmix64(h1); |  | ||||||
|     h2 = fmix64(h2); |  | ||||||
| 
 |  | ||||||
|     h1 += h2; |  | ||||||
|     h2 += h1; |  | ||||||
| 
 |  | ||||||
|     ((u64*)out)[0] = h1; |  | ||||||
|     ((u64*)out)[1] = h2; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace Common
 |  | ||||||
| @ -5,12 +5,12 @@ | |||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
| #include <cstddef> | #include <cstddef> | ||||||
|  | #include <cstring> | ||||||
|  | #include "common/cityhash.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| 
 | 
 | ||||||
| namespace Common { | namespace Common { | ||||||
| 
 | 
 | ||||||
| void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out); |  | ||||||
| 
 |  | ||||||
| /**
 | /**
 | ||||||
|  * Computes a 64-bit hash over the specified block of data |  * Computes a 64-bit hash over the specified block of data | ||||||
|  * @param data Block of data to compute hash over |  * @param data Block of data to compute hash over | ||||||
| @ -18,9 +18,54 @@ void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out); | |||||||
|  * @returns 64-bit hash value that was computed over the data block |  * @returns 64-bit hash value that was computed over the data block | ||||||
|  */ |  */ | ||||||
| static inline u64 ComputeHash64(const void* data, size_t len) { | static inline u64 ComputeHash64(const void* data, size_t len) { | ||||||
|     u64 res[2]; |     return CityHash64(static_cast<const char*>(data), len); | ||||||
|     MurmurHash3_128(data, len, 0, res); |  | ||||||
|     return res[0]; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /**
 | ||||||
|  |  * Computes a 64-bit hash of a struct. In addition to being trivially copyable, it is also critical | ||||||
|  |  * that either the struct includes no padding, or that any padding is initialized to a known value | ||||||
|  |  * by memsetting the struct to 0 before filling it in. | ||||||
|  |  */ | ||||||
|  | template <typename T> | ||||||
|  | static inline u64 ComputeStructHash64(const T& data) { | ||||||
|  |     static_assert(std::is_trivially_copyable<T>(), | ||||||
|  |                   "Type passed to ComputeStructHash64 must be trivially copyable"); | ||||||
|  |     return ComputeHash64(&data, sizeof(data)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /// A helper template that ensures the padding in a struct is initialized by memsetting to 0.
 | ||||||
|  | template <typename T> | ||||||
|  | struct HashableStruct { | ||||||
|  |     // In addition to being trivially copyable, T must also have a trivial default constructor,
 | ||||||
|  |     // because any member initialization would be overridden by memset
 | ||||||
|  |     static_assert(std::is_trivial<T>(), "Type passed to HashableStruct must be trivial"); | ||||||
|  |     /*
 | ||||||
|  |      * We use a union because "implicitly-defined copy/move constructor for a union X copies the | ||||||
|  |      * object representation of X." and "implicitly-defined copy assignment operator for a union X | ||||||
|  |      * copies the object representation (3.9) of X." = Bytewise copy instead of memberwise copy. | ||||||
|  |      * This is important because the padding bytes are included in the hash and comparison between | ||||||
|  |      * objects. | ||||||
|  |      */ | ||||||
|  |     union { | ||||||
|  |         T state; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     HashableStruct() { | ||||||
|  |         // Memset structure to zero padding bits, so that they will be deterministic when hashing
 | ||||||
|  |         std::memset(&state, 0, sizeof(T)); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool operator==(const HashableStruct<T>& o) const { | ||||||
|  |         return std::memcmp(&state, &o.state, sizeof(T)) == 0; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     bool operator!=(const HashableStruct<T>& o) const { | ||||||
|  |         return !(*this == o); | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     size_t Hash() const { | ||||||
|  |         return Common::ComputeStructHash64(state); | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| } // namespace Common
 | } // namespace Common
 | ||||||
|  | |||||||
| @ -9,6 +9,7 @@ add_library(video_core STATIC | |||||||
|     engines/maxwell_3d.h |     engines/maxwell_3d.h | ||||||
|     engines/maxwell_compute.cpp |     engines/maxwell_compute.cpp | ||||||
|     engines/maxwell_compute.h |     engines/maxwell_compute.h | ||||||
|  |     engines/shader_bytecode.h | ||||||
|     gpu.cpp |     gpu.cpp | ||||||
|     gpu.h |     gpu.h | ||||||
|     macro_interpreter.cpp |     macro_interpreter.cpp | ||||||
| @ -27,6 +28,8 @@ add_library(video_core STATIC | |||||||
|     renderer_opengl/gl_shader_decompiler.h |     renderer_opengl/gl_shader_decompiler.h | ||||||
|     renderer_opengl/gl_shader_gen.cpp |     renderer_opengl/gl_shader_gen.cpp | ||||||
|     renderer_opengl/gl_shader_gen.h |     renderer_opengl/gl_shader_gen.h | ||||||
|  |     renderer_opengl/gl_shader_manager.cpp | ||||||
|  |     renderer_opengl/gl_shader_manager.h | ||||||
|     renderer_opengl/gl_shader_util.cpp |     renderer_opengl/gl_shader_util.cpp | ||||||
|     renderer_opengl/gl_shader_util.h |     renderer_opengl/gl_shader_util.h | ||||||
|     renderer_opengl/gl_state.cpp |     renderer_opengl/gl_state.cpp | ||||||
|  | |||||||
| @ -427,14 +427,11 @@ public: | |||||||
|                         BitField<0, 1, u32> enable; |                         BitField<0, 1, u32> enable; | ||||||
|                         BitField<4, 4, ShaderProgram> program; |                         BitField<4, 4, ShaderProgram> program; | ||||||
|                     }; |                     }; | ||||||
|                     u32 start_id; |                     u32 offset; | ||||||
|                     INSERT_PADDING_WORDS(1); |                     INSERT_PADDING_WORDS(14); | ||||||
|                     u32 gpr_alloc; |  | ||||||
|                     ShaderStage type; |  | ||||||
|                     INSERT_PADDING_WORDS(9); |  | ||||||
|                 } shader_config[MaxShaderProgram]; |                 } shader_config[MaxShaderProgram]; | ||||||
| 
 | 
 | ||||||
|                 INSERT_PADDING_WORDS(0x8C); |                 INSERT_PADDING_WORDS(0x80); | ||||||
| 
 | 
 | ||||||
|                 struct { |                 struct { | ||||||
|                     u32 cb_size; |                     u32 cb_size; | ||||||
| @ -507,6 +504,7 @@ public: | |||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     State state{}; |     State state{}; | ||||||
|  |     MemoryManager& memory_manager; | ||||||
| 
 | 
 | ||||||
|     /// Reads a register value located at the input method address
 |     /// Reads a register value located at the input method address
 | ||||||
|     u32 GetRegisterValue(u32 method) const; |     u32 GetRegisterValue(u32 method) const; | ||||||
| @ -521,8 +519,6 @@ public: | |||||||
|     std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; |     std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     MemoryManager& memory_manager; |  | ||||||
| 
 |  | ||||||
|     std::unordered_map<u32, std::vector<u32>> uploaded_macros; |     std::unordered_map<u32, std::vector<u32>> uploaded_macros; | ||||||
| 
 | 
 | ||||||
|     /// Macro method that is currently being executed / being fed parameters.
 |     /// Macro method that is currently being executed / being fed parameters.
 | ||||||
|  | |||||||
							
								
								
									
										327
									
								
								src/video_core/engines/shader_bytecode.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										327
									
								
								src/video_core/engines/shader_bytecode.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,327 @@ | |||||||
|  | // Copyright 2018 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <map> | ||||||
|  | #include <string> | ||||||
|  | #include "common/bit_field.h" | ||||||
|  | 
 | ||||||
|  | namespace Tegra { | ||||||
|  | namespace Shader { | ||||||
|  | 
 | ||||||
|  | struct Register { | ||||||
|  |     Register() = default; | ||||||
|  | 
 | ||||||
|  |     constexpr Register(u64 value) : value(value) {} | ||||||
|  | 
 | ||||||
|  |     constexpr u64 GetIndex() const { | ||||||
|  |         return value; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr operator u64() const { | ||||||
|  |         return value; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     template <typename T> | ||||||
|  |     constexpr u64 operator-(const T& oth) const { | ||||||
|  |         return value - oth; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     template <typename T> | ||||||
|  |     constexpr u64 operator&(const T& oth) const { | ||||||
|  |         return value & oth; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr u64 operator&(const Register& oth) const { | ||||||
|  |         return value & oth.value; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr u64 operator~() const { | ||||||
|  |         return ~value; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     u64 value; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | union Attribute { | ||||||
|  |     Attribute() = default; | ||||||
|  | 
 | ||||||
|  |     constexpr Attribute(u64 value) : value(value) {} | ||||||
|  | 
 | ||||||
|  |     enum class Index : u64 { | ||||||
|  |         Position = 7, | ||||||
|  |         Attribute_0 = 8, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     union { | ||||||
|  |         BitField<22, 2, u64> element; | ||||||
|  |         BitField<24, 6, Index> index; | ||||||
|  |         BitField<47, 3, u64> size; | ||||||
|  |     } fmt20; | ||||||
|  | 
 | ||||||
|  |     union { | ||||||
|  |         BitField<30, 2, u64> element; | ||||||
|  |         BitField<32, 6, Index> index; | ||||||
|  |     } fmt28; | ||||||
|  | 
 | ||||||
|  |     BitField<39, 8, u64> reg; | ||||||
|  |     u64 value; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | union Uniform { | ||||||
|  |     BitField<20, 14, u64> offset; | ||||||
|  |     BitField<34, 5, u64> index; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | union OpCode { | ||||||
|  |     enum class Id : u64 { | ||||||
|  |         TEXS = 0x6C, | ||||||
|  |         IPA = 0xE0, | ||||||
|  |         FFMA_IMM = 0x65, | ||||||
|  |         FFMA_CR = 0x93, | ||||||
|  |         FFMA_RC = 0xA3, | ||||||
|  |         FFMA_RR = 0xB3, | ||||||
|  | 
 | ||||||
|  |         FADD_C = 0x98B, | ||||||
|  |         FMUL_C = 0x98D, | ||||||
|  |         MUFU = 0xA10, | ||||||
|  |         FADD_R = 0xB8B, | ||||||
|  |         FMUL_R = 0xB8D, | ||||||
|  |         LD_A = 0x1DFB, | ||||||
|  |         ST_A = 0x1DFE, | ||||||
|  | 
 | ||||||
|  |         FSETP_R = 0x5BB, | ||||||
|  |         FSETP_C = 0x4BB, | ||||||
|  |         EXIT = 0xE30, | ||||||
|  |         KIL = 0xE33, | ||||||
|  | 
 | ||||||
|  |         FMUL_IMM = 0x70D, | ||||||
|  |         FMUL_IMM_x = 0x72D, | ||||||
|  |         FADD_IMM = 0x70B, | ||||||
|  |         FADD_IMM_x = 0x72B, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     enum class Type { | ||||||
|  |         Trivial, | ||||||
|  |         Arithmetic, | ||||||
|  |         Ffma, | ||||||
|  |         Flow, | ||||||
|  |         Memory, | ||||||
|  |         Unknown, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     struct Info { | ||||||
|  |         Type type; | ||||||
|  |         std::string name; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     OpCode() = default; | ||||||
|  | 
 | ||||||
|  |     constexpr OpCode(Id value) : value(static_cast<u64>(value)) {} | ||||||
|  | 
 | ||||||
|  |     constexpr OpCode(u64 value) : value{value} {} | ||||||
|  | 
 | ||||||
|  |     constexpr Id EffectiveOpCode() const { | ||||||
|  |         switch (op1) { | ||||||
|  |         case Id::TEXS: | ||||||
|  |             return op1; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         switch (op2) { | ||||||
|  |         case Id::IPA: | ||||||
|  |             return op2; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         switch (op3) { | ||||||
|  |         case Id::FFMA_IMM: | ||||||
|  |         case Id::FFMA_CR: | ||||||
|  |         case Id::FFMA_RC: | ||||||
|  |         case Id::FFMA_RR: | ||||||
|  |             return op3; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         switch (op4) { | ||||||
|  |         case Id::EXIT: | ||||||
|  |         case Id::FSETP_R: | ||||||
|  |         case Id::FSETP_C: | ||||||
|  |         case Id::KIL: | ||||||
|  |             return op4; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         switch (op5) { | ||||||
|  |         case Id::MUFU: | ||||||
|  |         case Id::LD_A: | ||||||
|  |         case Id::ST_A: | ||||||
|  |         case Id::FADD_R: | ||||||
|  |         case Id::FADD_C: | ||||||
|  |         case Id::FMUL_R: | ||||||
|  |         case Id::FMUL_C: | ||||||
|  |             return op5; | ||||||
|  | 
 | ||||||
|  |         case Id::FMUL_IMM: | ||||||
|  |         case Id::FMUL_IMM_x: | ||||||
|  |             return Id::FMUL_IMM; | ||||||
|  | 
 | ||||||
|  |         case Id::FADD_IMM: | ||||||
|  |         case Id::FADD_IMM_x: | ||||||
|  |             return Id::FADD_IMM; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         return static_cast<Id>(value); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     static const Info& GetInfo(const OpCode& opcode) { | ||||||
|  |         static const std::map<Id, Info> info_table{BuildInfoTable()}; | ||||||
|  |         const auto& search{info_table.find(opcode.EffectiveOpCode())}; | ||||||
|  |         if (search != info_table.end()) { | ||||||
|  |             return search->second; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         static const Info unknown{Type::Unknown, "UNK"}; | ||||||
|  |         return unknown; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr operator Id() const { | ||||||
|  |         return static_cast<Id>(value); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr OpCode operator<<(size_t bits) const { | ||||||
|  |         return value << bits; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr OpCode operator>>(size_t bits) const { | ||||||
|  |         return value >> bits; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     template <typename T> | ||||||
|  |     constexpr u64 operator-(const T& oth) const { | ||||||
|  |         return value - oth; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr u64 operator&(const OpCode& oth) const { | ||||||
|  |         return value & oth.value; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr u64 operator~() const { | ||||||
|  |         return ~value; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     static std::map<Id, Info> BuildInfoTable() { | ||||||
|  |         std::map<Id, Info> info_table; | ||||||
|  |         info_table[Id::TEXS] = {Type::Memory, "texs"}; | ||||||
|  |         info_table[Id::LD_A] = {Type::Memory, "ld_a"}; | ||||||
|  |         info_table[Id::ST_A] = {Type::Memory, "st_a"}; | ||||||
|  |         info_table[Id::MUFU] = {Type::Arithmetic, "mufu"}; | ||||||
|  |         info_table[Id::FFMA_IMM] = {Type::Ffma, "ffma_imm"}; | ||||||
|  |         info_table[Id::FFMA_CR] = {Type::Ffma, "ffma_cr"}; | ||||||
|  |         info_table[Id::FFMA_RC] = {Type::Ffma, "ffma_rc"}; | ||||||
|  |         info_table[Id::FFMA_RR] = {Type::Ffma, "ffma_rr"}; | ||||||
|  |         info_table[Id::FADD_R] = {Type::Arithmetic, "fadd_r"}; | ||||||
|  |         info_table[Id::FADD_C] = {Type::Arithmetic, "fadd_c"}; | ||||||
|  |         info_table[Id::FADD_IMM] = {Type::Arithmetic, "fadd_imm"}; | ||||||
|  |         info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"}; | ||||||
|  |         info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"}; | ||||||
|  |         info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"}; | ||||||
|  |         info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"}; | ||||||
|  |         info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"}; | ||||||
|  |         info_table[Id::EXIT] = {Type::Trivial, "exit"}; | ||||||
|  |         info_table[Id::IPA] = {Type::Trivial, "ipa"}; | ||||||
|  |         info_table[Id::KIL] = {Type::Flow, "kil"}; | ||||||
|  |         return info_table; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     BitField<57, 7, Id> op1; | ||||||
|  |     BitField<56, 8, Id> op2; | ||||||
|  |     BitField<55, 9, Id> op3; | ||||||
|  |     BitField<52, 12, Id> op4; | ||||||
|  |     BitField<51, 13, Id> op5; | ||||||
|  |     u64 value; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size"); | ||||||
|  | 
 | ||||||
|  | } // namespace Shader
 | ||||||
|  | } // namespace Tegra
 | ||||||
|  | 
 | ||||||
|  | namespace std { | ||||||
|  | 
 | ||||||
|  | // TODO(bunne): The below is forbidden by the C++ standard, but works fine. See #330.
 | ||||||
|  | template <> | ||||||
|  | struct make_unsigned<Tegra::Shader::Attribute> { | ||||||
|  |     using type = Tegra::Shader::Attribute; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | template <> | ||||||
|  | struct make_unsigned<Tegra::Shader::Register> { | ||||||
|  |     using type = Tegra::Shader::Register; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | template <> | ||||||
|  | struct make_unsigned<Tegra::Shader::OpCode> { | ||||||
|  |     using type = Tegra::Shader::OpCode; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | } // namespace std
 | ||||||
|  | 
 | ||||||
|  | namespace Tegra { | ||||||
|  | namespace Shader { | ||||||
|  | 
 | ||||||
|  | enum class Pred : u64 { | ||||||
|  |     UnusedIndex = 0x7, | ||||||
|  |     NeverExecute = 0xf, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | enum class SubOp : u64 { | ||||||
|  |     Cos = 0x0, | ||||||
|  |     Sin = 0x1, | ||||||
|  |     Ex2 = 0x2, | ||||||
|  |     Lg2 = 0x3, | ||||||
|  |     Rcp = 0x4, | ||||||
|  |     Rsq = 0x5, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | union Instruction { | ||||||
|  |     Instruction& operator=(const Instruction& instr) { | ||||||
|  |         hex = instr.hex; | ||||||
|  |         return *this; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     OpCode opcode; | ||||||
|  |     BitField<0, 8, Register> gpr0; | ||||||
|  |     BitField<8, 8, Register> gpr8; | ||||||
|  |     BitField<16, 4, Pred> pred; | ||||||
|  |     BitField<20, 8, Register> gpr20; | ||||||
|  |     BitField<20, 7, SubOp> sub_op; | ||||||
|  |     BitField<28, 8, Register> gpr28; | ||||||
|  |     BitField<36, 13, u64> imm36; | ||||||
|  |     BitField<39, 8, Register> gpr39; | ||||||
|  | 
 | ||||||
|  |     union { | ||||||
|  |         BitField<45, 1, u64> negate_b; | ||||||
|  |         BitField<46, 1, u64> abs_a; | ||||||
|  |         BitField<48, 1, u64> negate_a; | ||||||
|  |         BitField<49, 1, u64> abs_b; | ||||||
|  |         BitField<50, 1, u64> abs_d; | ||||||
|  |     } alu; | ||||||
|  | 
 | ||||||
|  |     union { | ||||||
|  |         BitField<48, 1, u64> negate_b; | ||||||
|  |         BitField<49, 1, u64> negate_c; | ||||||
|  |     } ffma; | ||||||
|  | 
 | ||||||
|  |     BitField<60, 1, u64> is_b_gpr; | ||||||
|  |     BitField<59, 1, u64> is_c_gpr; | ||||||
|  | 
 | ||||||
|  |     Attribute attribute; | ||||||
|  |     Uniform uniform; | ||||||
|  | 
 | ||||||
|  |     u64 hex; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); | ||||||
|  | static_assert(std::is_standard_layout<Instruction>::value, | ||||||
|  |               "Structure does not have standard layout"); | ||||||
|  | 
 | ||||||
|  | } // namespace Shader
 | ||||||
|  | } // namespace Tegra
 | ||||||
| @ -34,33 +34,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | |||||||
| MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); | MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); | ||||||
| MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | ||||||
| 
 | 
 | ||||||
| enum class UniformBindings : GLuint { Common, VS, FS }; |  | ||||||
| 
 |  | ||||||
| static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, |  | ||||||
|                                          size_t expected_size) { |  | ||||||
|     GLuint ub_index = glGetUniformBlockIndex(shader, name); |  | ||||||
|     if (ub_index != GL_INVALID_INDEX) { |  | ||||||
|         GLint ub_size = 0; |  | ||||||
|         glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); |  | ||||||
|         ASSERT_MSG(ub_size == expected_size, |  | ||||||
|                    "Uniform block size did not match! Got %d, expected %zu", |  | ||||||
|                    static_cast<int>(ub_size), expected_size); |  | ||||||
|         glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void SetShaderUniformBlockBindings(GLuint shader) { |  | ||||||
|     SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, |  | ||||||
|                                  sizeof(RasterizerOpenGL::UniformData)); |  | ||||||
|     SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, |  | ||||||
|                                  sizeof(RasterizerOpenGL::VSUniformData)); |  | ||||||
|     SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS, |  | ||||||
|                                  sizeof(RasterizerOpenGL::FSUniformData)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| RasterizerOpenGL::RasterizerOpenGL() { | RasterizerOpenGL::RasterizerOpenGL() { | ||||||
|     shader_dirty = true; |  | ||||||
| 
 |  | ||||||
|     has_ARB_buffer_storage = false; |     has_ARB_buffer_storage = false; | ||||||
|     has_ARB_direct_state_access = false; |     has_ARB_direct_state_access = false; | ||||||
|     has_ARB_separate_shader_objects = false; |     has_ARB_separate_shader_objects = false; | ||||||
| @ -88,6 +62,8 @@ RasterizerOpenGL::RasterizerOpenGL() { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported"); | ||||||
|  | 
 | ||||||
|     // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
 |     // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
 | ||||||
|     state.clip_distance[0] = true; |     state.clip_distance[0] = true; | ||||||
| 
 | 
 | ||||||
| @ -102,36 +78,31 @@ RasterizerOpenGL::RasterizerOpenGL() { | |||||||
|     state.draw.uniform_buffer = uniform_buffer.handle; |     state.draw.uniform_buffer = uniform_buffer.handle; | ||||||
|     state.Apply(); |     state.Apply(); | ||||||
| 
 | 
 | ||||||
|     glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW); |  | ||||||
|     glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle); |  | ||||||
| 
 |  | ||||||
|     uniform_block_data.dirty = true; |  | ||||||
| 
 |  | ||||||
|     // Create render framebuffer
 |     // Create render framebuffer
 | ||||||
|     framebuffer.Create(); |     framebuffer.Create(); | ||||||
| 
 | 
 | ||||||
|     if (has_ARB_separate_shader_objects) { |     hw_vao.Create(); | ||||||
|         hw_vao.Create(); |     hw_vao_enabled_attributes.fill(false); | ||||||
|         hw_vao_enabled_attributes.fill(false); |  | ||||||
| 
 | 
 | ||||||
|         stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); |     stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); | ||||||
|         stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); |     stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); | ||||||
|         state.draw.vertex_buffer = stream_buffer->GetHandle(); |     state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||||
| 
 | 
 | ||||||
|         pipeline.Create(); |     shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | ||||||
|         state.draw.program_pipeline = pipeline.handle; |  | ||||||
|         state.draw.shader_program = 0; |  | ||||||
|         state.draw.vertex_array = hw_vao.handle; |  | ||||||
|         state.Apply(); |  | ||||||
| 
 | 
 | ||||||
|         glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); |     state.draw.shader_program = 0; | ||||||
|  |     state.draw.vertex_array = hw_vao.handle; | ||||||
|  |     state.Apply(); | ||||||
| 
 | 
 | ||||||
|         vs_uniform_buffer.Create(); |     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); | ||||||
|         glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle); | 
 | ||||||
|         glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); |     for (unsigned index = 0; index < uniform_buffers.size(); ++index) { | ||||||
|         glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); |         auto& buffer = uniform_buffers[index]; | ||||||
|     } else { |         buffer.Create(); | ||||||
|         UNREACHABLE(); |         glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle); | ||||||
|  |         glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr, | ||||||
|  |                      GL_STREAM_COPY); | ||||||
|  |         glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     accelerate_draw = AccelDraw::Disabled; |     accelerate_draw = AccelDraw::Disabled; | ||||||
| @ -200,26 +171,74 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { | |||||||
|     buffer_offset += data_size; |     buffer_offset += data_size; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { | void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { | ||||||
|     MICROPROFILE_SCOPE(OpenGL_VS); |     // Helper function for uploading uniform data
 | ||||||
|     LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); |     const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { | ||||||
|     glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle); |         if (has_ARB_direct_state_access) { | ||||||
| } |             glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); | ||||||
|  |         } else { | ||||||
|  |             glBindBuffer(GL_COPY_WRITE_BUFFER, handle); | ||||||
|  |             glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); | ||||||
|  |         } | ||||||
|  |     }; | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { |     auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); | ||||||
|     MICROPROFILE_SCOPE(OpenGL_FS); |     ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); | ||||||
|     UNREACHABLE(); | 
 | ||||||
|  |     for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { | ||||||
|  |         ptr_pos += sizeof(GLShader::MaxwellUniformData); | ||||||
|  | 
 | ||||||
|  |         auto& shader_config = gpu.regs.shader_config[index]; | ||||||
|  |         const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | ||||||
|  | 
 | ||||||
|  |         // VertexB program is always enabled, despite bit setting
 | ||||||
|  |         const bool is_enabled{shader_config.enable || program == Maxwell::ShaderProgram::VertexB}; | ||||||
|  | 
 | ||||||
|  |         // Skip stages that are not enabled
 | ||||||
|  |         if (!is_enabled) { | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Upload uniform data as one UBO per stage
 | ||||||
|  |         const auto& stage = index - 1; // Stage indices are 0 - 5
 | ||||||
|  |         const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); | ||||||
|  |         copy_buffer(uniform_buffers[stage].handle, ubo_offset, | ||||||
|  |                     sizeof(GLShader::MaxwellUniformData)); | ||||||
|  |         GLShader::MaxwellUniformData* ub_ptr = | ||||||
|  |             reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]); | ||||||
|  |         ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); | ||||||
|  | 
 | ||||||
|  |         // Fetch program code from memory
 | ||||||
|  |         GLShader::ProgramCode program_code; | ||||||
|  |         const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; | ||||||
|  |         const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)}; | ||||||
|  |         Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64)); | ||||||
|  |         GLShader::ShaderSetup setup{std::move(program_code)}; | ||||||
|  | 
 | ||||||
|  |         switch (program) { | ||||||
|  |         case Maxwell::ShaderProgram::VertexB: { | ||||||
|  |             GLShader::MaxwellVSConfig vs_config{setup}; | ||||||
|  |             shader_program_manager->UseProgrammableVertexShader(vs_config, setup); | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |         case Maxwell::ShaderProgram::Fragment: { | ||||||
|  |             GLShader::MaxwellFSConfig fs_config{setup}; | ||||||
|  |             shader_program_manager->UseProgrammableFragmentShader(fs_config, setup); | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |         default: | ||||||
|  |             LOG_CRITICAL(HW_GPU, "Unimplemented shader index=%d, enable=%d, offset=0x%08X", index, | ||||||
|  |                          shader_config.enable.Value(), shader_config.offset); | ||||||
|  |             UNREACHABLE(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     shader_program_manager->UseTrivialGeometryShader(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | ||||||
|     if (!has_ARB_separate_shader_objects) { |  | ||||||
|         UNREACHABLE(); |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; |     accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; | ||||||
|     DrawArrays(); |     DrawArrays(); | ||||||
| 
 |  | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -280,18 +299,6 @@ void RasterizerOpenGL::DrawArrays() { | |||||||
|     // Sync and bind the texture surfaces
 |     // Sync and bind the texture surfaces
 | ||||||
|     BindTextures(); |     BindTextures(); | ||||||
| 
 | 
 | ||||||
|     // Sync and bind the shader
 |  | ||||||
|     if (shader_dirty) { |  | ||||||
|         SetShader(); |  | ||||||
|         shader_dirty = false; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Sync the uniform data
 |  | ||||||
|     if (uniform_block_data.dirty) { |  | ||||||
|         glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data); |  | ||||||
|         uniform_block_data.dirty = false; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
 |     // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
 | ||||||
|     // scissor test to prevent drawing outside of the framebuffer region
 |     // scissor test to prevent drawing outside of the framebuffer region
 | ||||||
|     state.scissor.enabled = true; |     state.scissor.enabled = true; | ||||||
| @ -311,7 +318,9 @@ void RasterizerOpenGL::DrawArrays() { | |||||||
|     if (is_indexed) { |     if (is_indexed) { | ||||||
|         UNREACHABLE(); |         UNREACHABLE(); | ||||||
|     } |     } | ||||||
|     buffer_size += sizeof(VSUniformData); | 
 | ||||||
|  |     // Uniform space for the 5 shader stages
 | ||||||
|  |     buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; | ||||||
| 
 | 
 | ||||||
|     size_t ptr_pos = 0; |     size_t ptr_pos = 0; | ||||||
|     u8* buffer_ptr; |     u8* buffer_ptr; | ||||||
| @ -327,25 +336,12 @@ void RasterizerOpenGL::DrawArrays() { | |||||||
|         UNREACHABLE(); |         UNREACHABLE(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]), |     SetupShaders(buffer_ptr, buffer_offset, ptr_pos); | ||||||
|                       buffer_offset + static_cast<GLintptr>(ptr_pos)); |  | ||||||
|     const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); |  | ||||||
|     ptr_pos += sizeof(VSUniformData); |  | ||||||
| 
 | 
 | ||||||
|     stream_buffer->Unmap(); |     stream_buffer->Unmap(); | ||||||
| 
 | 
 | ||||||
|     const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { |     shader_program_manager->ApplyTo(state); | ||||||
|         if (has_ARB_direct_state_access) { |     state.Apply(); | ||||||
|             glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); |  | ||||||
|         } else { |  | ||||||
|             glBindBuffer(GL_COPY_WRITE_BUFFER, handle); |  | ||||||
|             glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData)); |  | ||||||
| 
 |  | ||||||
|     glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle); |  | ||||||
| 
 | 
 | ||||||
|     if (is_indexed) { |     if (is_indexed) { | ||||||
|         UNREACHABLE(); |         UNREACHABLE(); | ||||||
| @ -531,72 +527,6 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::SetShader() { |  | ||||||
|     // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to
 |  | ||||||
|     // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell
 |  | ||||||
|     // shaders.
 |  | ||||||
| 
 |  | ||||||
|     static constexpr char vertex_shader[] = R"( |  | ||||||
| #version 150 core |  | ||||||
| 
 |  | ||||||
| in vec2 vert_position; |  | ||||||
| in vec2 vert_tex_coord; |  | ||||||
| out vec2 frag_tex_coord; |  | ||||||
| 
 |  | ||||||
| void main() { |  | ||||||
|     // Multiply input position by the rotscale part of the matrix and then manually translate by
 |  | ||||||
|     // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
 |  | ||||||
|     // to `vec3(vert_position.xy, 1.0)`
 |  | ||||||
|     gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0); |  | ||||||
|     frag_tex_coord = vert_tex_coord; |  | ||||||
| } |  | ||||||
| )"; |  | ||||||
| 
 |  | ||||||
|     static constexpr char fragment_shader[] = R"( |  | ||||||
| #version 150 core |  | ||||||
| 
 |  | ||||||
| in vec2 frag_tex_coord; |  | ||||||
| out vec4 color; |  | ||||||
| 
 |  | ||||||
| uniform sampler2D tex[32]; |  | ||||||
| 
 |  | ||||||
| void main() { |  | ||||||
|     color = texture(tex[0], frag_tex_coord); |  | ||||||
| } |  | ||||||
| )"; |  | ||||||
| 
 |  | ||||||
|     if (current_shader) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); |  | ||||||
| 
 |  | ||||||
|     current_shader = &test_shader; |  | ||||||
|     if (has_ARB_separate_shader_objects) { |  | ||||||
|         test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true); |  | ||||||
|         glActiveShaderProgram(pipeline.handle, test_shader.shader.handle); |  | ||||||
|     } else { |  | ||||||
|         UNREACHABLE(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     state.draw.shader_program = test_shader.shader.handle; |  | ||||||
|     state.Apply(); |  | ||||||
| 
 |  | ||||||
|     for (u32 texture = 0; texture < texture_samplers.size(); ++texture) { |  | ||||||
|         // Set the texture samplers to correspond to different texture units
 |  | ||||||
|         std::string uniform_name = "tex[" + std::to_string(texture) + "]"; |  | ||||||
|         GLint uniform_tex = glGetUniformLocation(test_shader.shader.handle, uniform_name.c_str()); |  | ||||||
|         if (uniform_tex != -1) { |  | ||||||
|             glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (has_ARB_separate_shader_objects) { |  | ||||||
|         state.draw.shader_program = 0; |  | ||||||
|         state.Apply(); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, | void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, | ||||||
|                                                const Surface& depth_surface, bool has_stencil) { |                                                const Surface& depth_surface, bool has_stencil) { | ||||||
|     state.draw.draw_framebuffer = framebuffer.handle; |     state.draw.draw_framebuffer = framebuffer.handle; | ||||||
|  | |||||||
| @ -15,10 +15,12 @@ | |||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/hash.h" | #include "common/hash.h" | ||||||
| #include "common/vector_math.h" | #include "common/vector_math.h" | ||||||
|  | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | ||||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_gen.h" | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||||||
|  | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_state.h" | #include "video_core/renderer_opengl/gl_state.h" | ||||||
| #include "video_core/renderer_opengl/gl_stream_buffer.h" | #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||||||
| 
 | 
 | ||||||
| @ -45,7 +47,7 @@ public: | |||||||
|     /// OpenGL shader generated for a given Maxwell register state
 |     /// OpenGL shader generated for a given Maxwell register state
 | ||||||
|     struct MaxwellShader { |     struct MaxwellShader { | ||||||
|         /// OpenGL shader resource
 |         /// OpenGL shader resource
 | ||||||
|         OGLShader shader; |         OGLProgram shader; | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     struct VertexShader { |     struct VertexShader { | ||||||
| @ -56,34 +58,6 @@ public: | |||||||
|         OGLShader shader; |         OGLShader shader; | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
 |  | ||||||
|     // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
 |  | ||||||
|     //       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
 |  | ||||||
|     //       Not following that rule will cause problems on some AMD drivers.
 |  | ||||||
|     struct UniformData {}; |  | ||||||
| 
 |  | ||||||
|     // static_assert(
 |  | ||||||
|     //    sizeof(UniformData) == 0x460,
 |  | ||||||
|     //    "The size of the UniformData structure has changed, update the structure in the shader");
 |  | ||||||
|     static_assert(sizeof(UniformData) < 16384, |  | ||||||
|                   "UniformData structure must be less than 16kb as per the OpenGL spec"); |  | ||||||
| 
 |  | ||||||
|     struct VSUniformData {}; |  | ||||||
|     // static_assert(
 |  | ||||||
|     //    sizeof(VSUniformData) == 1856,
 |  | ||||||
|     //    "The size of the VSUniformData structure has changed, update the structure in the
 |  | ||||||
|     //    shader");
 |  | ||||||
|     static_assert(sizeof(VSUniformData) < 16384, |  | ||||||
|                   "VSUniformData structure must be less than 16kb as per the OpenGL spec"); |  | ||||||
| 
 |  | ||||||
|     struct FSUniformData {}; |  | ||||||
|     // static_assert(
 |  | ||||||
|     //    sizeof(FSUniformData) == 1856,
 |  | ||||||
|     //    "The size of the FSUniformData structure has changed, update the structure in the
 |  | ||||||
|     //    shader");
 |  | ||||||
|     static_assert(sizeof(FSUniformData) < 16384, |  | ||||||
|                   "FSUniformData structure must be less than 16kb as per the OpenGL spec"); |  | ||||||
| 
 |  | ||||||
| private: | private: | ||||||
|     class SamplerInfo { |     class SamplerInfo { | ||||||
|     public: |     public: | ||||||
| @ -122,9 +96,6 @@ private: | |||||||
|     /// Syncs the clip coefficients to match the guest state
 |     /// Syncs the clip coefficients to match the guest state
 | ||||||
|     void SyncClipCoef(); |     void SyncClipCoef(); | ||||||
| 
 | 
 | ||||||
|     /// Sets the OpenGL shader in accordance with the current guest state
 |  | ||||||
|     void SetShader(); |  | ||||||
| 
 |  | ||||||
|     /// Syncs the cull mode to match the guest state
 |     /// Syncs the cull mode to match the guest state
 | ||||||
|     void SyncCullMode(); |     void SyncCullMode(); | ||||||
| 
 | 
 | ||||||
| @ -152,23 +123,12 @@ private: | |||||||
| 
 | 
 | ||||||
|     RasterizerCacheOpenGL res_cache; |     RasterizerCacheOpenGL res_cache; | ||||||
| 
 | 
 | ||||||
|     /// Shader used for test renderering - to be removed once we have emulated shaders
 |     std::unique_ptr<GLShader::ProgramManager> shader_program_manager; | ||||||
|     MaxwellShader test_shader{}; |  | ||||||
| 
 |  | ||||||
|     const MaxwellShader* current_shader{}; |  | ||||||
|     bool shader_dirty{}; |  | ||||||
| 
 |  | ||||||
|     struct { |  | ||||||
|         UniformData data; |  | ||||||
|         bool dirty; |  | ||||||
|     } uniform_block_data = {}; |  | ||||||
| 
 |  | ||||||
|     OGLPipeline pipeline; |  | ||||||
|     OGLVertexArray sw_vao; |     OGLVertexArray sw_vao; | ||||||
|     OGLVertexArray hw_vao; |     OGLVertexArray hw_vao; | ||||||
|     std::array<bool, 16> hw_vao_enabled_attributes; |     std::array<bool, 16> hw_vao_enabled_attributes; | ||||||
| 
 | 
 | ||||||
|     std::array<SamplerInfo, 32> texture_samplers; |     std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; | ||||||
|     static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; |     static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; | ||||||
|     std::unique_ptr<OGLStreamBuffer> vertex_buffer; |     std::unique_ptr<OGLStreamBuffer> vertex_buffer; | ||||||
|     OGLBuffer uniform_buffer; |     OGLBuffer uniform_buffer; | ||||||
| @ -182,19 +142,9 @@ private: | |||||||
|     void AnalyzeVertexArray(bool is_indexed); |     void AnalyzeVertexArray(bool is_indexed); | ||||||
|     void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); |     void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); | ||||||
| 
 | 
 | ||||||
|     OGLBuffer vs_uniform_buffer; |     std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; | ||||||
|     std::unordered_map<GLShader::MaxwellVSConfig, VertexShader*> vs_shader_map; |  | ||||||
|     std::unordered_map<std::string, VertexShader> vs_shader_cache; |  | ||||||
|     OGLShader vs_default_shader; |  | ||||||
| 
 | 
 | ||||||
|     void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset); |     void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos); | ||||||
| 
 |  | ||||||
|     OGLBuffer fs_uniform_buffer; |  | ||||||
|     std::unordered_map<GLShader::MaxwellFSConfig, FragmentShader*> fs_shader_map; |  | ||||||
|     std::unordered_map<std::string, FragmentShader> fs_shader_cache; |  | ||||||
|     OGLShader fs_default_shader; |  | ||||||
| 
 |  | ||||||
|     void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset); |  | ||||||
| 
 | 
 | ||||||
|     enum class AccelDraw { Disabled, Arrays, Indexed }; |     enum class AccelDraw { Disabled, Arrays, Indexed }; | ||||||
|     AccelDraw accelerate_draw; |     AccelDraw accelerate_draw; | ||||||
|  | |||||||
| @ -818,7 +818,7 @@ void main() { | |||||||
|     color = texelFetch(tbo, tbo_offset).rabg; |     color = texelFetch(tbo, tbo_offset).rabg; | ||||||
| } | } | ||||||
| )"; | )"; | ||||||
|     d24s8_abgr_shader.Create(vs_source, nullptr, fs_source); |     d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source); | ||||||
| 
 | 
 | ||||||
|     OpenGLState state = OpenGLState::GetCurState(); |     OpenGLState state = OpenGLState::GetCurState(); | ||||||
|     GLuint old_program = state.draw.shader_program; |     GLuint old_program = state.draw.shader_program; | ||||||
|  | |||||||
| @ -334,7 +334,7 @@ private: | |||||||
|     OGLVertexArray attributeless_vao; |     OGLVertexArray attributeless_vao; | ||||||
|     OGLBuffer d24s8_abgr_buffer; |     OGLBuffer d24s8_abgr_buffer; | ||||||
|     GLsizeiptr d24s8_abgr_buffer_size; |     GLsizeiptr d24s8_abgr_buffer_size; | ||||||
|     OGLShader d24s8_abgr_shader; |     OGLProgram d24s8_abgr_shader; | ||||||
|     GLint d24s8_abgr_tbo_size_u_id; |     GLint d24s8_abgr_tbo_size_u_id; | ||||||
|     GLint d24s8_abgr_viewport_u_id; |     GLint d24s8_abgr_viewport_u_id; | ||||||
| }; | }; | ||||||
|  | |||||||
| @ -13,14 +13,16 @@ | |||||||
| class OGLTexture : private NonCopyable { | class OGLTexture : private NonCopyable { | ||||||
| public: | public: | ||||||
|     OGLTexture() = default; |     OGLTexture() = default; | ||||||
|     OGLTexture(OGLTexture&& o) { | 
 | ||||||
|         std::swap(handle, o.handle); |     OGLTexture(OGLTexture&& o) : handle(std::exchange(o.handle, 0)) {} | ||||||
|     } | 
 | ||||||
|     ~OGLTexture() { |     ~OGLTexture() { | ||||||
|         Release(); |         Release(); | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|     OGLTexture& operator=(OGLTexture&& o) { |     OGLTexture& operator=(OGLTexture&& o) { | ||||||
|         std::swap(handle, o.handle); |         Release(); | ||||||
|  |         handle = std::exchange(o.handle, 0); | ||||||
|         return *this; |         return *this; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| @ -46,14 +48,16 @@ public: | |||||||
| class OGLSampler : private NonCopyable { | class OGLSampler : private NonCopyable { | ||||||
| public: | public: | ||||||
|     OGLSampler() = default; |     OGLSampler() = default; | ||||||
|     OGLSampler(OGLSampler&& o) { | 
 | ||||||
|         std::swap(handle, o.handle); |     OGLSampler(OGLSampler&& o) : handle(std::exchange(o.handle, 0)) {} | ||||||
|     } | 
 | ||||||
|     ~OGLSampler() { |     ~OGLSampler() { | ||||||
|         Release(); |         Release(); | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|     OGLSampler& operator=(OGLSampler&& o) { |     OGLSampler& operator=(OGLSampler&& o) { | ||||||
|         std::swap(handle, o.handle); |         Release(); | ||||||
|  |         handle = std::exchange(o.handle, 0); | ||||||
|         return *this; |         return *this; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| @ -79,25 +83,71 @@ public: | |||||||
| class OGLShader : private NonCopyable { | class OGLShader : private NonCopyable { | ||||||
| public: | public: | ||||||
|     OGLShader() = default; |     OGLShader() = default; | ||||||
|     OGLShader(OGLShader&& o) { | 
 | ||||||
|         std::swap(handle, o.handle); |     OGLShader(OGLShader&& o) : handle(std::exchange(o.handle, 0)) {} | ||||||
|     } | 
 | ||||||
|     ~OGLShader() { |     ~OGLShader() { | ||||||
|         Release(); |         Release(); | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|     OGLShader& operator=(OGLShader&& o) { |     OGLShader& operator=(OGLShader&& o) { | ||||||
|         std::swap(handle, o.handle); |         Release(); | ||||||
|  |         handle = std::exchange(o.handle, 0); | ||||||
|         return *this; |         return *this; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Creates a new internal OpenGL resource and stores the handle
 |     void Create(const char* source, GLenum type) { | ||||||
|     void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader, |  | ||||||
|                 const std::vector<const char*>& feedback_vars = {}, |  | ||||||
|                 bool separable_program = false) { |  | ||||||
|         if (handle != 0) |         if (handle != 0) | ||||||
|             return; |             return; | ||||||
|         handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars, |         if (source == nullptr) | ||||||
|                                        separable_program); |             return; | ||||||
|  |         handle = GLShader::LoadShader(source, type); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void Release() { | ||||||
|  |         if (handle == 0) | ||||||
|  |             return; | ||||||
|  |         glDeleteShader(handle); | ||||||
|  |         handle = 0; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     GLuint handle = 0; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | class OGLProgram : private NonCopyable { | ||||||
|  | public: | ||||||
|  |     OGLProgram() = default; | ||||||
|  | 
 | ||||||
|  |     OGLProgram(OGLProgram&& o) : handle(std::exchange(o.handle, 0)) {} | ||||||
|  | 
 | ||||||
|  |     ~OGLProgram() { | ||||||
|  |         Release(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     OGLProgram& operator=(OGLProgram&& o) { | ||||||
|  |         Release(); | ||||||
|  |         handle = std::exchange(o.handle, 0); | ||||||
|  |         return *this; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     template <typename... T> | ||||||
|  |     void Create(bool separable_program, T... shaders) { | ||||||
|  |         if (handle != 0) | ||||||
|  |             return; | ||||||
|  |         handle = GLShader::LoadProgram(separable_program, shaders...); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /// Creates a new internal OpenGL resource and stores the handle
 | ||||||
|  |     void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, | ||||||
|  |                           bool separable_program = false) { | ||||||
|  |         OGLShader vert, geo, frag; | ||||||
|  |         if (vert_shader) | ||||||
|  |             vert.Create(vert_shader, GL_VERTEX_SHADER); | ||||||
|  |         if (geo_shader) | ||||||
|  |             geo.Create(geo_shader, GL_GEOMETRY_SHADER); | ||||||
|  |         if (frag_shader) | ||||||
|  |             frag.Create(frag_shader, GL_FRAGMENT_SHADER); | ||||||
|  |         Create(separable_program, vert.handle, geo.handle, frag.handle); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Deletes the internal OpenGL resource
 |     /// Deletes the internal OpenGL resource
 | ||||||
| @ -148,14 +198,16 @@ public: | |||||||
| class OGLBuffer : private NonCopyable { | class OGLBuffer : private NonCopyable { | ||||||
| public: | public: | ||||||
|     OGLBuffer() = default; |     OGLBuffer() = default; | ||||||
|     OGLBuffer(OGLBuffer&& o) { | 
 | ||||||
|         std::swap(handle, o.handle); |     OGLBuffer(OGLBuffer&& o) : handle(std::exchange(o.handle, 0)) {} | ||||||
|     } | 
 | ||||||
|     ~OGLBuffer() { |     ~OGLBuffer() { | ||||||
|         Release(); |         Release(); | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|     OGLBuffer& operator=(OGLBuffer&& o) { |     OGLBuffer& operator=(OGLBuffer&& o) { | ||||||
|         std::swap(handle, o.handle); |         Release(); | ||||||
|  |         handle = std::exchange(o.handle, 0); | ||||||
|         return *this; |         return *this; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| @ -214,14 +266,16 @@ public: | |||||||
| class OGLVertexArray : private NonCopyable { | class OGLVertexArray : private NonCopyable { | ||||||
| public: | public: | ||||||
|     OGLVertexArray() = default; |     OGLVertexArray() = default; | ||||||
|     OGLVertexArray(OGLVertexArray&& o) { | 
 | ||||||
|         std::swap(handle, o.handle); |     OGLVertexArray(OGLVertexArray&& o) : handle(std::exchange(o.handle, 0)) {} | ||||||
|     } | 
 | ||||||
|     ~OGLVertexArray() { |     ~OGLVertexArray() { | ||||||
|         Release(); |         Release(); | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|     OGLVertexArray& operator=(OGLVertexArray&& o) { |     OGLVertexArray& operator=(OGLVertexArray&& o) { | ||||||
|         std::swap(handle, o.handle); |         Release(); | ||||||
|  |         handle = std::exchange(o.handle, 0); | ||||||
|         return *this; |         return *this; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| @ -247,14 +301,16 @@ public: | |||||||
| class OGLFramebuffer : private NonCopyable { | class OGLFramebuffer : private NonCopyable { | ||||||
| public: | public: | ||||||
|     OGLFramebuffer() = default; |     OGLFramebuffer() = default; | ||||||
|     OGLFramebuffer(OGLFramebuffer&& o) { | 
 | ||||||
|         std::swap(handle, o.handle); |     OGLFramebuffer(OGLFramebuffer&& o) : handle(std::exchange(o.handle, 0)) {} | ||||||
|     } | 
 | ||||||
|     ~OGLFramebuffer() { |     ~OGLFramebuffer() { | ||||||
|         Release(); |         Release(); | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|     OGLFramebuffer& operator=(OGLFramebuffer&& o) { |     OGLFramebuffer& operator=(OGLFramebuffer&& o) { | ||||||
|         std::swap(handle, o.handle); |         Release(); | ||||||
|  |         handle = std::exchange(o.handle, 0); | ||||||
|         return *this; |         return *this; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -2,57 +2,499 @@ | |||||||
| // Licensed under GPLv2 or any later version
 | // Licensed under GPLv2 or any later version
 | ||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
|  | #include <map> | ||||||
|  | #include <set> | ||||||
| #include <string> | #include <string> | ||||||
| #include <queue> |  | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "video_core/engines/shader_bytecode.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_decompiler.h" | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||||||
| 
 | 
 | ||||||
| namespace Maxwell3D { | namespace GLShader { | ||||||
| namespace Shader { |  | ||||||
| namespace Decompiler { | namespace Decompiler { | ||||||
| 
 | 
 | ||||||
|  | using Tegra::Shader::Attribute; | ||||||
|  | using Tegra::Shader::Instruction; | ||||||
|  | using Tegra::Shader::OpCode; | ||||||
|  | using Tegra::Shader::Register; | ||||||
|  | using Tegra::Shader::SubOp; | ||||||
|  | using Tegra::Shader::Uniform; | ||||||
|  | 
 | ||||||
| constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; | ||||||
| 
 | 
 | ||||||
| class Impl { | class DecompileFail : public std::runtime_error { | ||||||
| public: | public: | ||||||
|     Impl(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, |     using std::runtime_error::runtime_error; | ||||||
|          const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, u32 main_offset, | }; | ||||||
|          const std::function<std::string(u32)>& inputreg_getter, |  | ||||||
|          const std::function<std::string(u32)>& outputreg_getter, bool sanitize_mul, |  | ||||||
|          const std::string& emit_cb, const std::string& setemit_cb) |  | ||||||
|         : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset), |  | ||||||
|           inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter), |  | ||||||
|           sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {} |  | ||||||
| 
 | 
 | ||||||
|     std::string Decompile() { | /// Describes the behaviour of code path of a given entry point and a return point.
 | ||||||
|         UNREACHABLE(); | enum class ExitMethod { | ||||||
|         return {}; |     Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
 | ||||||
|  |     AlwaysReturn, ///< All code paths reach the return point.
 | ||||||
|  |     Conditional,  ///< Code path reaches the return point or an END instruction conditionally.
 | ||||||
|  |     AlwaysEnd,    ///< All code paths reach a END instruction.
 | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction.
 | ||||||
|  | struct Subroutine { | ||||||
|  |     /// Generates a name suitable for GLSL source code.
 | ||||||
|  |     std::string GetName() const { | ||||||
|  |         return "sub_" + std::to_string(begin) + "_" + std::to_string(end); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u32 begin;              ///< Entry point of the subroutine.
 | ||||||
|  |     u32 end;                ///< Return point of the subroutine.
 | ||||||
|  |     ExitMethod exit_method; ///< Exit method of the subroutine.
 | ||||||
|  |     std::set<u32> labels;   ///< Addresses refereced by JMP instructions.
 | ||||||
|  | 
 | ||||||
|  |     bool operator<(const Subroutine& rhs) const { | ||||||
|  |         return std::tie(begin, end) < std::tie(rhs.begin, rhs.end); | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /// Analyzes shader code and produces a set of subroutines.
 | ||||||
|  | class ControlFlowAnalyzer { | ||||||
|  | public: | ||||||
|  |     ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset) | ||||||
|  |         : program_code(program_code) { | ||||||
|  | 
 | ||||||
|  |         // Recursively finds all subroutines.
 | ||||||
|  |         const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END); | ||||||
|  |         if (program_main.exit_method != ExitMethod::AlwaysEnd) | ||||||
|  |             throw DecompileFail("Program does not always end"); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::set<Subroutine> GetSubroutines() { | ||||||
|  |         return std::move(subroutines); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code; |     const ProgramCode& program_code; | ||||||
|     const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data; |     std::set<Subroutine> subroutines; | ||||||
|     u32 main_offset; |     std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||||||
|     const std::function<std::string(u32)>& inputreg_getter; | 
 | ||||||
|     const std::function<std::string(u32)>& outputreg_getter; |     /// Adds and analyzes a new subroutine if it is not added yet.
 | ||||||
|     bool sanitize_mul; |     const Subroutine& AddSubroutine(u32 begin, u32 end) { | ||||||
|     const std::string& emit_cb; |         auto iter = subroutines.find(Subroutine{begin, end}); | ||||||
|     const std::string& setemit_cb; |         if (iter != subroutines.end()) | ||||||
|  |             return *iter; | ||||||
|  | 
 | ||||||
|  |         Subroutine subroutine{begin, end}; | ||||||
|  |         subroutine.exit_method = Scan(begin, end, subroutine.labels); | ||||||
|  |         if (subroutine.exit_method == ExitMethod::Undetermined) | ||||||
|  |             throw DecompileFail("Recursive function detected"); | ||||||
|  |         return *subroutines.insert(std::move(subroutine)).first; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /// Scans a range of code for labels and determines the exit method.
 | ||||||
|  |     ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) { | ||||||
|  |         auto [iter, inserted] = | ||||||
|  |             exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | ||||||
|  |         ExitMethod& exit_method = iter->second; | ||||||
|  |         if (!inserted) | ||||||
|  |             return exit_method; | ||||||
|  | 
 | ||||||
|  |         for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) { | ||||||
|  |             const Instruction instr = {program_code[offset]}; | ||||||
|  |             switch (instr.opcode.EffectiveOpCode()) { | ||||||
|  |             case OpCode::Id::EXIT: { | ||||||
|  |                 return exit_method = ExitMethod::AlwaysEnd; | ||||||
|  |             } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         return exit_method = ExitMethod::AlwaysReturn; | ||||||
|  |     } | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, | class ShaderWriter { | ||||||
|                              const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, | public: | ||||||
|                              u32 main_offset, |     void AddLine(const std::string& text) { | ||||||
|                              const std::function<std::string(u32)>& inputreg_getter, |         DEBUG_ASSERT(scope >= 0); | ||||||
|                              const std::function<std::string(u32)>& outputreg_getter, |         if (!text.empty()) { | ||||||
|                              bool sanitize_mul, const std::string& emit_cb, |             shader_source += std::string(static_cast<size_t>(scope) * 4, ' '); | ||||||
|                              const std::string& setemit_cb) { |         } | ||||||
|     Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter, |         shader_source += text + '\n'; | ||||||
|               sanitize_mul, emit_cb, setemit_cb); |     } | ||||||
|     return impl.Decompile(); | 
 | ||||||
|  |     std::string GetResult() { | ||||||
|  |         return std::move(shader_source); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     int scope = 0; | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     std::string shader_source; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | class GLSLGenerator { | ||||||
|  | public: | ||||||
|  |     GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code, | ||||||
|  |                   u32 main_offset, Maxwell3D::Regs::ShaderStage stage) | ||||||
|  |         : subroutines(subroutines), program_code(program_code), main_offset(main_offset), | ||||||
|  |           stage(stage) { | ||||||
|  | 
 | ||||||
|  |         Generate(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::string GetShaderCode() { | ||||||
|  |         return declarations.GetResult() + shader.GetResult(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     /// Gets the Subroutine object corresponding to the specified address.
 | ||||||
|  |     const Subroutine& GetSubroutine(u32 begin, u32 end) const { | ||||||
|  |         auto iter = subroutines.find(Subroutine{begin, end}); | ||||||
|  |         ASSERT(iter != subroutines.end()); | ||||||
|  |         return *iter; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /// Generates code representing an input attribute register.
 | ||||||
|  |     std::string GetInputAttribute(Attribute::Index attribute) { | ||||||
|  |         declr_input_attribute.insert(attribute); | ||||||
|  | 
 | ||||||
|  |         const u32 index{static_cast<u32>(attribute) - | ||||||
|  |                         static_cast<u32>(Attribute::Index::Attribute_0)}; | ||||||
|  |         if (attribute >= Attribute::Index::Attribute_0) { | ||||||
|  |             return "input_attribute_" + std::to_string(index); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index); | ||||||
|  |         UNREACHABLE(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /// Generates code representing an output attribute register.
 | ||||||
|  |     std::string GetOutputAttribute(Attribute::Index attribute) { | ||||||
|  |         switch (attribute) { | ||||||
|  |         case Attribute::Index::Position: | ||||||
|  |             return "gl_Position"; | ||||||
|  |         default: | ||||||
|  |             const u32 index{static_cast<u32>(attribute) - | ||||||
|  |                             static_cast<u32>(Attribute::Index::Attribute_0)}; | ||||||
|  |             if (attribute >= Attribute::Index::Attribute_0) { | ||||||
|  |                 declr_output_attribute.insert(attribute); | ||||||
|  |                 return "output_attribute_" + std::to_string(index); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index); | ||||||
|  |             UNREACHABLE(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /// Generates code representing a temporary (GPR) register.
 | ||||||
|  |     std::string GetRegister(const Register& reg) { | ||||||
|  |         return *declr_register.insert("register_" + std::to_string(reg)).first; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /// Generates code representing a uniform (C buffer) register.
 | ||||||
|  |     std::string GetUniform(const Uniform& reg) const { | ||||||
|  |         std::string index = std::to_string(reg.index); | ||||||
|  |         return "uniform_" + index + "[" + std::to_string(reg.offset >> 2) + "][" + | ||||||
|  |                std::to_string(reg.offset & 3) + "]"; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Adds code that calls a subroutine. | ||||||
|  |      * @param subroutine the subroutine to call. | ||||||
|  |      */ | ||||||
|  |     void CallSubroutine(const Subroutine& subroutine) { | ||||||
|  |         if (subroutine.exit_method == ExitMethod::AlwaysEnd) { | ||||||
|  |             shader.AddLine(subroutine.GetName() + "();"); | ||||||
|  |             shader.AddLine("return true;"); | ||||||
|  |         } else if (subroutine.exit_method == ExitMethod::Conditional) { | ||||||
|  |             shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }"); | ||||||
|  |         } else { | ||||||
|  |             shader.AddLine(subroutine.GetName() + "();"); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Writes code that does an assignment operation. | ||||||
|  |      * @param reg the destination register code. | ||||||
|  |      * @param value the code representing the value to assign. | ||||||
|  |      */ | ||||||
|  |     void SetDest(u64 elem, const std::string& reg, const std::string& value, | ||||||
|  |                  u64 dest_num_components, u64 value_num_components) { | ||||||
|  |         std::string swizzle = "."; | ||||||
|  |         swizzle += "xyzw"[elem]; | ||||||
|  | 
 | ||||||
|  |         std::string dest = reg + (dest_num_components != 1 ? swizzle : ""); | ||||||
|  |         std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : ""); | ||||||
|  | 
 | ||||||
|  |         shader.AddLine(dest + " = " + src + ";"); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Compiles a single instruction from Tegra to GLSL. | ||||||
|  |      * @param offset the offset of the Tegra shader instruction. | ||||||
|  |      * @return the offset of the next instruction to execute. Usually it is the current offset | ||||||
|  |      * + 1. If the current instruction always terminates the program, returns PROGRAM_END. | ||||||
|  |      */ | ||||||
|  |     u32 CompileInstr(u32 offset) { | ||||||
|  |         const Instruction instr = {program_code[offset]}; | ||||||
|  | 
 | ||||||
|  |         shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name); | ||||||
|  | 
 | ||||||
|  |         switch (OpCode::GetInfo(instr.opcode).type) { | ||||||
|  |         case OpCode::Type::Arithmetic: { | ||||||
|  |             ASSERT(!instr.alu.abs_d); | ||||||
|  | 
 | ||||||
|  |             std::string dest = GetRegister(instr.gpr0); | ||||||
|  |             std::string op_a = instr.alu.negate_a ? "-" : ""; | ||||||
|  |             op_a += GetRegister(instr.gpr8); | ||||||
|  |             if (instr.alu.abs_a) { | ||||||
|  |                 op_a = "abs(" + op_a + ")"; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             std::string op_b = instr.alu.negate_b ? "-" : ""; | ||||||
|  |             if (instr.is_b_gpr) { | ||||||
|  |                 op_b += GetRegister(instr.gpr20); | ||||||
|  |             } else { | ||||||
|  |                 op_b += GetUniform(instr.uniform); | ||||||
|  |             } | ||||||
|  |             if (instr.alu.abs_b) { | ||||||
|  |                 op_b = "abs(" + op_b + ")"; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             switch (instr.opcode.EffectiveOpCode()) { | ||||||
|  |             case OpCode::Id::FMUL_C: | ||||||
|  |             case OpCode::Id::FMUL_R: { | ||||||
|  |                 SetDest(0, dest, op_a + " * " + op_b, 1, 1); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             case OpCode::Id::FADD_C: | ||||||
|  |             case OpCode::Id::FADD_R: { | ||||||
|  |                 SetDest(0, dest, op_a + " + " + op_b, 1, 1); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             default: { | ||||||
|  |                 LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", | ||||||
|  |                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||||||
|  |                              OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); | ||||||
|  |                 throw DecompileFail("Unhandled instruction"); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |         case OpCode::Type::Ffma: { | ||||||
|  |             ASSERT_MSG(!instr.ffma.negate_b, "untested"); | ||||||
|  |             ASSERT_MSG(!instr.ffma.negate_c, "untested"); | ||||||
|  | 
 | ||||||
|  |             std::string dest = GetRegister(instr.gpr0); | ||||||
|  |             std::string op_a = GetRegister(instr.gpr8); | ||||||
|  | 
 | ||||||
|  |             std::string op_b = instr.ffma.negate_b ? "-" : ""; | ||||||
|  |             op_b += GetUniform(instr.uniform); | ||||||
|  | 
 | ||||||
|  |             std::string op_c = instr.ffma.negate_c ? "-" : ""; | ||||||
|  |             op_c += GetRegister(instr.gpr39); | ||||||
|  | 
 | ||||||
|  |             switch (instr.opcode.EffectiveOpCode()) { | ||||||
|  |             case OpCode::Id::FFMA_CR: { | ||||||
|  |                 SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             default: { | ||||||
|  |                 LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x", | ||||||
|  |                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||||||
|  |                              OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); | ||||||
|  |                 throw DecompileFail("Unhandled instruction"); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |         case OpCode::Type::Memory: { | ||||||
|  |             std::string gpr0 = GetRegister(instr.gpr0); | ||||||
|  |             const Attribute::Index attribute = instr.attribute.fmt20.index; | ||||||
|  | 
 | ||||||
|  |             switch (instr.opcode.EffectiveOpCode()) { | ||||||
|  |             case OpCode::Id::LD_A: { | ||||||
|  |                 ASSERT(instr.attribute.fmt20.size == 0); | ||||||
|  |                 SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             case OpCode::Id::ST_A: { | ||||||
|  |                 ASSERT(instr.attribute.fmt20.size == 0); | ||||||
|  |                 SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             default: { | ||||||
|  |                 LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x", | ||||||
|  |                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||||||
|  |                              OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); | ||||||
|  |                 throw DecompileFail("Unhandled instruction"); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         default: { | ||||||
|  |             switch (instr.opcode.EffectiveOpCode()) { | ||||||
|  |             case OpCode::Id::EXIT: { | ||||||
|  |                 shader.AddLine("return true;"); | ||||||
|  |                 offset = PROGRAM_END - 1; | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             default: { | ||||||
|  |                 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | ||||||
|  |                              static_cast<unsigned>(instr.opcode.EffectiveOpCode()), | ||||||
|  |                              OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); | ||||||
|  |                 throw DecompileFail("Unhandled instruction"); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         return offset + 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Compiles a range of instructions from Tegra to GLSL. | ||||||
|  |      * @param begin the offset of the starting instruction. | ||||||
|  |      * @param end the offset where the compilation should stop (exclusive). | ||||||
|  |      * @return the offset of the next instruction to compile. PROGRAM_END if the program | ||||||
|  |      * terminates. | ||||||
|  |      */ | ||||||
|  |     u32 CompileRange(u32 begin, u32 end) { | ||||||
|  |         u32 program_counter; | ||||||
|  |         for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) { | ||||||
|  |             program_counter = CompileInstr(program_counter); | ||||||
|  |         } | ||||||
|  |         return program_counter; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void Generate() { | ||||||
|  |         // Add declarations for all subroutines
 | ||||||
|  |         for (const auto& subroutine : subroutines) { | ||||||
|  |             shader.AddLine("bool " + subroutine.GetName() + "();"); | ||||||
|  |         } | ||||||
|  |         shader.AddLine(""); | ||||||
|  | 
 | ||||||
|  |         // Add the main entry point
 | ||||||
|  |         shader.AddLine("bool exec_shader() {"); | ||||||
|  |         ++shader.scope; | ||||||
|  |         CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); | ||||||
|  |         --shader.scope; | ||||||
|  |         shader.AddLine("}\n"); | ||||||
|  | 
 | ||||||
|  |         // Add definitions for all subroutines
 | ||||||
|  |         for (const auto& subroutine : subroutines) { | ||||||
|  |             std::set<u32> labels = subroutine.labels; | ||||||
|  | 
 | ||||||
|  |             shader.AddLine("bool " + subroutine.GetName() + "() {"); | ||||||
|  |             ++shader.scope; | ||||||
|  | 
 | ||||||
|  |             if (labels.empty()) { | ||||||
|  |                 if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) { | ||||||
|  |                     shader.AddLine("return false;"); | ||||||
|  |                 } | ||||||
|  |             } else { | ||||||
|  |                 labels.insert(subroutine.begin); | ||||||
|  |                 shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); | ||||||
|  |                 shader.AddLine("while (true) {"); | ||||||
|  |                 ++shader.scope; | ||||||
|  | 
 | ||||||
|  |                 shader.AddLine("switch (jmp_to) {"); | ||||||
|  | 
 | ||||||
|  |                 for (auto label : labels) { | ||||||
|  |                     shader.AddLine("case " + std::to_string(label) + "u: {"); | ||||||
|  |                     ++shader.scope; | ||||||
|  | 
 | ||||||
|  |                     auto next_it = labels.lower_bound(label + 1); | ||||||
|  |                     u32 next_label = next_it == labels.end() ? subroutine.end : *next_it; | ||||||
|  | 
 | ||||||
|  |                     u32 compile_end = CompileRange(label, next_label); | ||||||
|  |                     if (compile_end > next_label && compile_end != PROGRAM_END) { | ||||||
|  |                         // This happens only when there is a label inside a IF/LOOP block
 | ||||||
|  |                         shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }"); | ||||||
|  |                         labels.emplace(compile_end); | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     --shader.scope; | ||||||
|  |                     shader.AddLine("}"); | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 shader.AddLine("default: return false;"); | ||||||
|  |                 shader.AddLine("}"); | ||||||
|  | 
 | ||||||
|  |                 --shader.scope; | ||||||
|  |                 shader.AddLine("}"); | ||||||
|  | 
 | ||||||
|  |                 shader.AddLine("return false;"); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             --shader.scope; | ||||||
|  |             shader.AddLine("}\n"); | ||||||
|  | 
 | ||||||
|  |             DEBUG_ASSERT(shader.scope == 0); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         GenerateDeclarations(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /// Add declarations for registers
 | ||||||
|  |     void GenerateDeclarations() { | ||||||
|  |         for (const auto& reg : declr_register) { | ||||||
|  |             declarations.AddLine("float " + reg + " = 0.0;"); | ||||||
|  |         } | ||||||
|  |         declarations.AddLine(""); | ||||||
|  | 
 | ||||||
|  |         for (const auto& index : declr_input_attribute) { | ||||||
|  |             // TODO(bunnei): Use proper number of elements for these
 | ||||||
|  |             declarations.AddLine("layout(location = " + | ||||||
|  |                                  std::to_string(static_cast<u32>(index) - | ||||||
|  |                                                 static_cast<u32>(Attribute::Index::Attribute_0)) + | ||||||
|  |                                  ") in vec4 " + GetInputAttribute(index) + ";"); | ||||||
|  |         } | ||||||
|  |         declarations.AddLine(""); | ||||||
|  | 
 | ||||||
|  |         for (const auto& index : declr_output_attribute) { | ||||||
|  |             // TODO(bunnei): Use proper number of elements for these
 | ||||||
|  |             declarations.AddLine("layout(location = " + | ||||||
|  |                                  std::to_string(static_cast<u32>(index) - | ||||||
|  |                                                 static_cast<u32>(Attribute::Index::Attribute_0)) + | ||||||
|  |                                  ") out vec4 " + GetOutputAttribute(index) + ";"); | ||||||
|  |         } | ||||||
|  |         declarations.AddLine(""); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     const std::set<Subroutine>& subroutines; | ||||||
|  |     const ProgramCode& program_code; | ||||||
|  |     const u32 main_offset; | ||||||
|  |     Maxwell3D::Regs::ShaderStage stage; | ||||||
|  | 
 | ||||||
|  |     ShaderWriter shader; | ||||||
|  |     ShaderWriter declarations; | ||||||
|  | 
 | ||||||
|  |     // Declarations
 | ||||||
|  |     std::set<std::string> declr_register; | ||||||
|  |     std::set<Attribute::Index> declr_input_attribute; | ||||||
|  |     std::set<Attribute::Index> declr_output_attribute; | ||||||
|  | }; // namespace Decompiler
 | ||||||
|  | 
 | ||||||
|  | std::string GetCommonDeclarations() { | ||||||
|  |     return "bool exec_shader();"; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | boost::optional<std::string> DecompileProgram(const ProgramCode& program_code, u32 main_offset, | ||||||
|  |                                               Maxwell3D::Regs::ShaderStage stage) { | ||||||
|  |     try { | ||||||
|  |         auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines(); | ||||||
|  |         GLSLGenerator generator(subroutines, program_code, main_offset, stage); | ||||||
|  |         return generator.GetShaderCode(); | ||||||
|  |     } catch (const DecompileFail& exception) { | ||||||
|  |         LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what()); | ||||||
|  |     } | ||||||
|  |     return boost::none; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace Decompiler
 | } // namespace Decompiler
 | ||||||
| } // namespace Shader
 | } // namespace GLShader
 | ||||||
| } // namespace Maxwell3D
 |  | ||||||
|  | |||||||
| @ -5,23 +5,20 @@ | |||||||
| #include <array> | #include <array> | ||||||
| #include <functional> | #include <functional> | ||||||
| #include <string> | #include <string> | ||||||
|  | #include <boost/optional.hpp> | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "video_core/engines/maxwell_3d.h" | ||||||
|  | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||||||
| 
 | 
 | ||||||
| namespace Maxwell3D { | namespace GLShader { | ||||||
| namespace Shader { |  | ||||||
| namespace Decompiler { | namespace Decompiler { | ||||||
| 
 | 
 | ||||||
| constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000}; | using Tegra::Engines::Maxwell3D; | ||||||
| constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000}; |  | ||||||
| 
 | 
 | ||||||
| std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, | std::string GetCommonDeclarations(); | ||||||
|                              const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, | 
 | ||||||
|                              u32 main_offset, | boost::optional<std::string> DecompileProgram(const ProgramCode& program_code, u32 main_offset, | ||||||
|                              const std::function<std::string(u32)>& inputreg_getter, |                                               Maxwell3D::Regs::ShaderStage stage); | ||||||
|                              const std::function<std::string(u32)>& outputreg_getter, |  | ||||||
|                              bool sanitize_mul, const std::string& emit_cb = "", |  | ||||||
|                              const std::string& setemit_cb = ""); |  | ||||||
| 
 | 
 | ||||||
| } // namespace Decompiler
 | } // namespace Decompiler
 | ||||||
| } // namespace Shader
 | } // namespace GLShader
 | ||||||
| } // namespace Maxwell3D
 |  | ||||||
|  | |||||||
| @ -7,12 +7,12 @@ | |||||||
| 
 | 
 | ||||||
| namespace GLShader { | namespace GLShader { | ||||||
| 
 | 
 | ||||||
| std::string GenerateVertexShader(const MaxwellVSConfig& config) { | std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) { | ||||||
|     UNREACHABLE(); |     UNREACHABLE(); | ||||||
|     return {}; |     return {}; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::string GenerateFragmentShader(const MaxwellFSConfig& config) { | std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) { | ||||||
|     UNREACHABLE(); |     UNREACHABLE(); | ||||||
|     return {}; |     return {}; | ||||||
| } | } | ||||||
|  | |||||||
| @ -4,46 +4,67 @@ | |||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
| #include <cstring> | #include <array> | ||||||
| #include <string> | #include <string> | ||||||
| #include <type_traits> | #include <type_traits> | ||||||
|  | #include "common/common_types.h" | ||||||
| #include "common/hash.h" | #include "common/hash.h" | ||||||
| 
 | 
 | ||||||
| namespace GLShader { | namespace GLShader { | ||||||
| 
 | 
 | ||||||
| enum Attributes { | constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; | ||||||
|     ATTRIBUTE_POSITION, | 
 | ||||||
|     ATTRIBUTE_COLOR, | using ProgramCode = std::array<u64, MAX_PROGRAM_CODE_LENGTH>; | ||||||
|     ATTRIBUTE_TEXCOORD0, | 
 | ||||||
|     ATTRIBUTE_TEXCOORD1, | struct ShaderSetup { | ||||||
|     ATTRIBUTE_TEXCOORD2, |     ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {} | ||||||
|     ATTRIBUTE_TEXCOORD0_W, | 
 | ||||||
|     ATTRIBUTE_NORMQUAT, |     ProgramCode program_code; | ||||||
|     ATTRIBUTE_VIEW, |     bool program_code_hash_dirty = true; | ||||||
|  | 
 | ||||||
|  |     u64 GetProgramCodeHash() { | ||||||
|  |         if (program_code_hash_dirty) { | ||||||
|  |             program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); | ||||||
|  |             program_code_hash_dirty = false; | ||||||
|  |         } | ||||||
|  |         return program_code_hash; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     u64 program_code_hash{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct MaxwellShaderConfigCommon { | struct MaxwellShaderConfigCommon { | ||||||
|     explicit MaxwellShaderConfigCommon(){}; |     void Init(ShaderSetup& setup) { | ||||||
|  |         program_hash = setup.GetProgramCodeHash(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u64 program_hash; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct MaxwellVSConfig : MaxwellShaderConfigCommon { | struct MaxwellVSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> { | ||||||
|     explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {} |     explicit MaxwellVSConfig(ShaderSetup& setup) { | ||||||
| 
 |         state.Init(setup); | ||||||
|     bool operator==(const MaxwellVSConfig& o) const { |     } | ||||||
|         return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0; |  | ||||||
|     }; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct MaxwellFSConfig : MaxwellShaderConfigCommon { | struct MaxwellFSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> { | ||||||
|     explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {} |     explicit MaxwellFSConfig(ShaderSetup& setup) { | ||||||
| 
 |         state.Init(setup); | ||||||
|     bool operator==(const MaxwellFSConfig& o) const { |     } | ||||||
|         return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0; |  | ||||||
|     }; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| std::string GenerateVertexShader(const MaxwellVSConfig& config); | /**
 | ||||||
| std::string GenerateFragmentShader(const MaxwellFSConfig& config); |  * Generates the GLSL vertex shader program source code for the given VS program | ||||||
|  |  * @returns String of the shader source code | ||||||
|  |  */ | ||||||
|  | std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Generates the GLSL fragment shader program source code for the given FS program | ||||||
|  |  * @returns String of the shader source code | ||||||
|  |  */ | ||||||
|  | std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config); | ||||||
| 
 | 
 | ||||||
| } // namespace GLShader
 | } // namespace GLShader
 | ||||||
| 
 | 
 | ||||||
| @ -52,14 +73,14 @@ namespace std { | |||||||
| template <> | template <> | ||||||
| struct hash<GLShader::MaxwellVSConfig> { | struct hash<GLShader::MaxwellVSConfig> { | ||||||
|     size_t operator()(const GLShader::MaxwellVSConfig& k) const { |     size_t operator()(const GLShader::MaxwellVSConfig& k) const { | ||||||
|         return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig)); |         return k.Hash(); | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| template <> | template <> | ||||||
| struct hash<GLShader::MaxwellFSConfig> { | struct hash<GLShader::MaxwellFSConfig> { | ||||||
|     size_t operator()(const GLShader::MaxwellFSConfig& k) const { |     size_t operator()(const GLShader::MaxwellFSConfig& k) const { | ||||||
|         return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig)); |         return k.Hash(); | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | |||||||
							
								
								
									
										65
									
								
								src/video_core/renderer_opengl/gl_shader_manager.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								src/video_core/renderer_opengl/gl_shader_manager.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,65 @@ | |||||||
|  | // Copyright 2018 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include "core/core.h" | ||||||
|  | #include "core/hle/kernel/process.h" | ||||||
|  | #include "video_core/engines/maxwell_3d.h" | ||||||
|  | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||||
|  | 
 | ||||||
|  | namespace GLShader { | ||||||
|  | 
 | ||||||
|  | namespace Impl { | ||||||
|  | void SetShaderUniformBlockBinding(GLuint shader, const char* name, | ||||||
|  |                                   Maxwell3D::Regs::ShaderStage binding, size_t expected_size) { | ||||||
|  |     GLuint ub_index = glGetUniformBlockIndex(shader, name); | ||||||
|  |     if (ub_index != GL_INVALID_INDEX) { | ||||||
|  |         GLint ub_size = 0; | ||||||
|  |         glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); | ||||||
|  |         ASSERT_MSG(ub_size == expected_size, | ||||||
|  |                    "Uniform block size did not match! Got %d, expected %zu", | ||||||
|  |                    static_cast<int>(ub_size), expected_size); | ||||||
|  |         glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void SetShaderUniformBlockBindings(GLuint shader) { | ||||||
|  |     SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex, | ||||||
|  |                                  sizeof(MaxwellUniformData)); | ||||||
|  |     SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry, | ||||||
|  |                                  sizeof(MaxwellUniformData)); | ||||||
|  |     SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment, | ||||||
|  |                                  sizeof(MaxwellUniformData)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void SetShaderSamplerBindings(GLuint shader) { | ||||||
|  |     OpenGLState cur_state = OpenGLState::GetCurState(); | ||||||
|  |     GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); | ||||||
|  |     cur_state.Apply(); | ||||||
|  | 
 | ||||||
|  |     // Set the texture samplers to correspond to different texture units
 | ||||||
|  |     for (u32 texture = 0; texture < NumTextureSamplers; ++texture) { | ||||||
|  |         // Set the texture samplers to correspond to different texture units
 | ||||||
|  |         std::string uniform_name = "tex[" + std::to_string(texture) + "]"; | ||||||
|  |         GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str()); | ||||||
|  |         if (uniform_tex != -1) { | ||||||
|  |             glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     cur_state.draw.shader_program = old_program; | ||||||
|  |     cur_state.Apply(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace Impl
 | ||||||
|  | 
 | ||||||
|  | void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { | ||||||
|  |     const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; | ||||||
|  |     for (unsigned index = 0; index < shader_stage.const_buffers.size(); ++index) { | ||||||
|  |         const auto& const_buffer = shader_stage.const_buffers[index]; | ||||||
|  |         const VAddr vaddr = memory_manager->PhysicalToVirtualAddress(const_buffer.address); | ||||||
|  |         Memory::ReadBlock(vaddr, const_buffers[index].data(), sizeof(ConstBuffer)); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace GLShader
 | ||||||
							
								
								
									
										151
									
								
								src/video_core/renderer_opengl/gl_shader_manager.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										151
									
								
								src/video_core/renderer_opengl/gl_shader_manager.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,151 @@ | |||||||
|  | // Copyright 2018 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <tuple> | ||||||
|  | #include <unordered_map> | ||||||
|  | #include <boost/functional/hash.hpp> | ||||||
|  | #include <glad/glad.h> | ||||||
|  | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||||
|  | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||||||
|  | #include "video_core/renderer_opengl/maxwell_to_gl.h" | ||||||
|  | 
 | ||||||
|  | namespace GLShader { | ||||||
|  | 
 | ||||||
|  | /// Number of OpenGL texture samplers that can be used in the fragment shader
 | ||||||
|  | static constexpr size_t NumTextureSamplers = 32; | ||||||
|  | 
 | ||||||
|  | using Tegra::Engines::Maxwell3D; | ||||||
|  | 
 | ||||||
|  | namespace Impl { | ||||||
|  | void SetShaderUniformBlockBindings(GLuint shader); | ||||||
|  | void SetShaderSamplerBindings(GLuint shader); | ||||||
|  | } // namespace Impl
 | ||||||
|  | 
 | ||||||
|  | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
 | ||||||
|  | // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
 | ||||||
|  | //       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
 | ||||||
|  | //       Not following that rule will cause problems on some AMD drivers.
 | ||||||
|  | struct MaxwellUniformData { | ||||||
|  |     void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); | ||||||
|  | 
 | ||||||
|  |     using ConstBuffer = std::array<GLvec4, 4>; | ||||||
|  |     alignas(16) std::array<ConstBuffer, Maxwell3D::Regs::MaxConstBuffers> const_buffers; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is incorrect"); | ||||||
|  | static_assert(sizeof(MaxwellUniformData) < 16384, | ||||||
|  |               "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); | ||||||
|  | 
 | ||||||
|  | class OGLShaderStage { | ||||||
|  | public: | ||||||
|  |     OGLShaderStage() = default; | ||||||
|  | 
 | ||||||
|  |     void Create(const char* source, GLenum type) { | ||||||
|  |         OGLShader shader; | ||||||
|  |         shader.Create(source, type); | ||||||
|  |         program.Create(true, shader.handle); | ||||||
|  |         Impl::SetShaderUniformBlockBindings(program.handle); | ||||||
|  |         Impl::SetShaderSamplerBindings(program.handle); | ||||||
|  |     } | ||||||
|  |     GLuint GetHandle() const { | ||||||
|  |         return program.handle; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     OGLProgram program; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | // TODO(wwylele): beautify this doc
 | ||||||
|  | // This is a shader cache designed for translating PICA shader to GLSL shader.
 | ||||||
|  | // The double cache is needed because diffent KeyConfigType, which includes a hash of the code
 | ||||||
|  | // region (including its leftover unused code) can generate the same GLSL code.
 | ||||||
|  | template <typename KeyConfigType, | ||||||
|  |           std::string (*CodeGenerator)(const ShaderSetup&, const KeyConfigType&), GLenum ShaderType> | ||||||
|  | class ShaderCache { | ||||||
|  | public: | ||||||
|  |     ShaderCache() = default; | ||||||
|  | 
 | ||||||
|  |     GLuint Get(const KeyConfigType& key, const ShaderSetup& setup) { | ||||||
|  |         auto map_it = shader_map.find(key); | ||||||
|  |         if (map_it == shader_map.end()) { | ||||||
|  |             std::string program = CodeGenerator(setup, key); | ||||||
|  | 
 | ||||||
|  |             auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{}); | ||||||
|  |             OGLShaderStage& cached_shader = iter->second; | ||||||
|  |             if (new_shader) { | ||||||
|  |                 cached_shader.Create(program.c_str(), ShaderType); | ||||||
|  |             } | ||||||
|  |             shader_map[key] = &cached_shader; | ||||||
|  |             return cached_shader.GetHandle(); | ||||||
|  |         } else { | ||||||
|  |             return map_it->second->GetHandle(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map; | ||||||
|  |     std::unordered_map<std::string, OGLShaderStage> shader_cache; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | using VertexShaders = ShaderCache<MaxwellVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>; | ||||||
|  | 
 | ||||||
|  | using FragmentShaders = ShaderCache<MaxwellFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>; | ||||||
|  | 
 | ||||||
|  | class ProgramManager { | ||||||
|  | public: | ||||||
|  |     ProgramManager() { | ||||||
|  |         pipeline.Create(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void UseProgrammableVertexShader(const MaxwellVSConfig& config, const ShaderSetup setup) { | ||||||
|  |         current.vs = vertex_shaders.Get(config, setup); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void UseProgrammableFragmentShader(const MaxwellFSConfig& config, const ShaderSetup setup) { | ||||||
|  |         current.fs = fragment_shaders.Get(config, setup); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void UseTrivialGeometryShader() { | ||||||
|  |         current.gs = 0; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void ApplyTo(OpenGLState& state) { | ||||||
|  |         // Workaround for AMD bug
 | ||||||
|  |         glUseProgramStages(pipeline.handle, | ||||||
|  |                            GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, | ||||||
|  |                            0); | ||||||
|  | 
 | ||||||
|  |         glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs); | ||||||
|  |         glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs); | ||||||
|  |         glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs); | ||||||
|  |         state.draw.shader_program = 0; | ||||||
|  |         state.draw.program_pipeline = pipeline.handle; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     struct ShaderTuple { | ||||||
|  |         GLuint vs = 0, gs = 0, fs = 0; | ||||||
|  |         bool operator==(const ShaderTuple& rhs) const { | ||||||
|  |             return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); | ||||||
|  |         } | ||||||
|  |         struct Hash { | ||||||
|  |             std::size_t operator()(const ShaderTuple& tuple) const { | ||||||
|  |                 std::size_t hash = 0; | ||||||
|  |                 boost::hash_combine(hash, tuple.vs); | ||||||
|  |                 boost::hash_combine(hash, tuple.gs); | ||||||
|  |                 boost::hash_combine(hash, tuple.fs); | ||||||
|  |                 return hash; | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  |     }; | ||||||
|  |     ShaderTuple current; | ||||||
|  |     VertexShaders vertex_shaders; | ||||||
|  |     FragmentShaders fragment_shaders; | ||||||
|  | 
 | ||||||
|  |     std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache; | ||||||
|  |     OGLPipeline pipeline; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | } // namespace GLShader
 | ||||||
| @ -10,156 +10,41 @@ | |||||||
| 
 | 
 | ||||||
| namespace GLShader { | namespace GLShader { | ||||||
| 
 | 
 | ||||||
| GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, | GLuint LoadShader(const char* source, GLenum type) { | ||||||
|                    const char* fragment_shader, const std::vector<const char*>& feedback_vars, |     const char* debug_type; | ||||||
|                    bool separable_program) { |     switch (type) { | ||||||
|     // Create the shaders
 |     case GL_VERTEX_SHADER: | ||||||
|     GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0; |         debug_type = "vertex"; | ||||||
|     GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0; |         break; | ||||||
|     GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0; |     case GL_GEOMETRY_SHADER: | ||||||
|  |         debug_type = "geometry"; | ||||||
|  |         break; | ||||||
|  |     case GL_FRAGMENT_SHADER: | ||||||
|  |         debug_type = "fragment"; | ||||||
|  |         break; | ||||||
|  |     default: | ||||||
|  |         UNREACHABLE(); | ||||||
|  |     } | ||||||
|  |     GLuint shader_id = glCreateShader(type); | ||||||
|  |     glShaderSource(shader_id, 1, &source, nullptr); | ||||||
|  |     NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); | ||||||
|  |     glCompileShader(shader_id); | ||||||
| 
 | 
 | ||||||
|     GLint result = GL_FALSE; |     GLint result = GL_FALSE; | ||||||
|     int info_log_length; |     GLint info_log_length; | ||||||
| 
 |     glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); | ||||||
|     if (vertex_shader) { |     glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); | ||||||
|         // Compile Vertex Shader
 |  | ||||||
|         LOG_DEBUG(Render_OpenGL, "Compiling vertex shader..."); |  | ||||||
| 
 |  | ||||||
|         glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr); |  | ||||||
|         glCompileShader(vertex_shader_id); |  | ||||||
| 
 |  | ||||||
|         // Check Vertex Shader
 |  | ||||||
|         glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result); |  | ||||||
|         glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); |  | ||||||
| 
 |  | ||||||
|         if (info_log_length > 1) { |  | ||||||
|             std::vector<char> vertex_shader_error(info_log_length); |  | ||||||
|             glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]); |  | ||||||
|             if (result == GL_TRUE) { |  | ||||||
|                 LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); |  | ||||||
|             } else { |  | ||||||
|                 LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s", |  | ||||||
|                              &vertex_shader_error[0]); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (geometry_shader) { |  | ||||||
|         // Compile Geometry Shader
 |  | ||||||
|         LOG_DEBUG(Render_OpenGL, "Compiling geometry shader..."); |  | ||||||
| 
 |  | ||||||
|         glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr); |  | ||||||
|         glCompileShader(geometry_shader_id); |  | ||||||
| 
 |  | ||||||
|         // Check Geometry Shader
 |  | ||||||
|         glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result); |  | ||||||
|         glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); |  | ||||||
| 
 |  | ||||||
|         if (info_log_length > 1) { |  | ||||||
|             std::vector<char> geometry_shader_error(info_log_length); |  | ||||||
|             glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr, |  | ||||||
|                                &geometry_shader_error[0]); |  | ||||||
|             if (result == GL_TRUE) { |  | ||||||
|                 LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]); |  | ||||||
|             } else { |  | ||||||
|                 LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s", |  | ||||||
|                              &geometry_shader_error[0]); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (fragment_shader) { |  | ||||||
|         // Compile Fragment Shader
 |  | ||||||
|         LOG_DEBUG(Render_OpenGL, "Compiling fragment shader..."); |  | ||||||
| 
 |  | ||||||
|         glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr); |  | ||||||
|         glCompileShader(fragment_shader_id); |  | ||||||
| 
 |  | ||||||
|         // Check Fragment Shader
 |  | ||||||
|         glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result); |  | ||||||
|         glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); |  | ||||||
| 
 |  | ||||||
|         if (info_log_length > 1) { |  | ||||||
|             std::vector<char> fragment_shader_error(info_log_length); |  | ||||||
|             glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, |  | ||||||
|                                &fragment_shader_error[0]); |  | ||||||
|             if (result == GL_TRUE) { |  | ||||||
|                 LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); |  | ||||||
|             } else { |  | ||||||
|                 LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s", |  | ||||||
|                              &fragment_shader_error[0]); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Link the program
 |  | ||||||
|     LOG_DEBUG(Render_OpenGL, "Linking program..."); |  | ||||||
| 
 |  | ||||||
|     GLuint program_id = glCreateProgram(); |  | ||||||
|     if (vertex_shader) { |  | ||||||
|         glAttachShader(program_id, vertex_shader_id); |  | ||||||
|     } |  | ||||||
|     if (geometry_shader) { |  | ||||||
|         glAttachShader(program_id, geometry_shader_id); |  | ||||||
|     } |  | ||||||
|     if (fragment_shader) { |  | ||||||
|         glAttachShader(program_id, fragment_shader_id); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (!feedback_vars.empty()) { |  | ||||||
|         auto varyings = feedback_vars; |  | ||||||
|         glTransformFeedbackVaryings(program_id, static_cast<GLsizei>(feedback_vars.size()), |  | ||||||
|                                     &varyings[0], GL_INTERLEAVED_ATTRIBS); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (separable_program) { |  | ||||||
|         glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     glLinkProgram(program_id); |  | ||||||
| 
 |  | ||||||
|     // Check the program
 |  | ||||||
|     glGetProgramiv(program_id, GL_LINK_STATUS, &result); |  | ||||||
|     glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); |  | ||||||
| 
 | 
 | ||||||
|     if (info_log_length > 1) { |     if (info_log_length > 1) { | ||||||
|         std::vector<char> program_error(info_log_length); |         std::string shader_error(info_log_length, ' '); | ||||||
|         glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); |         glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); | ||||||
|         if (result == GL_TRUE) { |         if (result == GL_TRUE) { | ||||||
|             LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]); |             NGLOG_DEBUG(Render_OpenGL, "{}", shader_error); | ||||||
|         } else { |         } else { | ||||||
|             LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); |             NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 |     return shader_id; | ||||||
|     // If the program linking failed at least one of the shaders was probably bad
 |  | ||||||
|     if (result == GL_FALSE) { |  | ||||||
|         if (vertex_shader) { |  | ||||||
|             LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); |  | ||||||
|         } |  | ||||||
|         if (geometry_shader) { |  | ||||||
|             LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); |  | ||||||
|         } |  | ||||||
|         if (fragment_shader) { |  | ||||||
|             LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     ASSERT_MSG(result == GL_TRUE, "Shader not linked"); |  | ||||||
| 
 |  | ||||||
|     if (vertex_shader) { |  | ||||||
|         glDetachShader(program_id, vertex_shader_id); |  | ||||||
|         glDeleteShader(vertex_shader_id); |  | ||||||
|     } |  | ||||||
|     if (geometry_shader) { |  | ||||||
|         glDetachShader(program_id, geometry_shader_id); |  | ||||||
|         glDeleteShader(geometry_shader_id); |  | ||||||
|     } |  | ||||||
|     if (fragment_shader) { |  | ||||||
|         glDetachShader(program_id, fragment_shader_id); |  | ||||||
|         glDeleteShader(fragment_shader_id); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return program_id; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace GLShader
 | } // namespace GLShader
 | ||||||
|  | |||||||
| @ -6,18 +6,60 @@ | |||||||
| 
 | 
 | ||||||
| #include <vector> | #include <vector> | ||||||
| #include <glad/glad.h> | #include <glad/glad.h> | ||||||
|  | #include "common/assert.h" | ||||||
|  | #include "common/logging/log.h" | ||||||
| 
 | 
 | ||||||
| namespace GLShader { | namespace GLShader { | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) |  * Utility function to create and compile an OpenGL GLSL shader | ||||||
|  * @param vertex_shader String of the GLSL vertex shader program |  * @param source String of the GLSL shader program | ||||||
|  * @param geometry_shader String of the GLSL geometry shader program |  * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) | ||||||
|  * @param fragment_shader String of the GLSL fragment shader program |  | ||||||
|  * @returns Handle of the newly created OpenGL shader object |  | ||||||
|  */ |  */ | ||||||
| GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, | GLuint LoadShader(const char* source, GLenum type); | ||||||
|                    const char* fragment_shader, const std::vector<const char*>& feedback_vars = {}, | 
 | ||||||
|                    bool separable_program = false); | /**
 | ||||||
|  |  * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) | ||||||
|  |  * @param separable_program whether to create a separable program | ||||||
|  |  * @param shaders ID of shaders to attach to the program | ||||||
|  |  * @returns Handle of the newly created OpenGL program object | ||||||
|  |  */ | ||||||
|  | template <typename... T> | ||||||
|  | GLuint LoadProgram(bool separable_program, T... shaders) { | ||||||
|  |     // Link the program
 | ||||||
|  |     NGLOG_DEBUG(Render_OpenGL, "Linking program..."); | ||||||
|  | 
 | ||||||
|  |     GLuint program_id = glCreateProgram(); | ||||||
|  | 
 | ||||||
|  |     ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...); | ||||||
|  | 
 | ||||||
|  |     if (separable_program) { | ||||||
|  |         glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     glLinkProgram(program_id); | ||||||
|  | 
 | ||||||
|  |     // Check the program
 | ||||||
|  |     GLint result = GL_FALSE; | ||||||
|  |     GLint info_log_length; | ||||||
|  |     glGetProgramiv(program_id, GL_LINK_STATUS, &result); | ||||||
|  |     glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); | ||||||
|  | 
 | ||||||
|  |     if (info_log_length > 1) { | ||||||
|  |         std::string program_error(info_log_length, ' '); | ||||||
|  |         glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); | ||||||
|  |         if (result == GL_TRUE) { | ||||||
|  |             NGLOG_DEBUG(Render_OpenGL, "{}", program_error); | ||||||
|  |         } else { | ||||||
|  |             NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     ASSERT_MSG(result == GL_TRUE, "Shader not linked"); | ||||||
|  | 
 | ||||||
|  |     ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); | ||||||
|  | 
 | ||||||
|  |     return program_id; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| } // namespace GLShader
 | } // namespace GLShader
 | ||||||
|  | |||||||
| @ -10,6 +10,14 @@ | |||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| 
 | 
 | ||||||
|  | using GLvec2 = std::array<GLfloat, 2>; | ||||||
|  | using GLvec3 = std::array<GLfloat, 3>; | ||||||
|  | using GLvec4 = std::array<GLfloat, 4>; | ||||||
|  | 
 | ||||||
|  | using GLuvec2 = std::array<GLuint, 2>; | ||||||
|  | using GLuvec3 = std::array<GLuint, 3>; | ||||||
|  | using GLuvec4 = std::array<GLuint, 4>; | ||||||
|  | 
 | ||||||
| namespace MaxwellToGL { | namespace MaxwellToGL { | ||||||
| 
 | 
 | ||||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||||
| @ -39,6 +47,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { | |||||||
| 
 | 
 | ||||||
| inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { | inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { | ||||||
|     switch (topology) { |     switch (topology) { | ||||||
|  |     case Maxwell::PrimitiveTopology::Triangles: | ||||||
|  |         return GL_TRIANGLES; | ||||||
|     case Maxwell::PrimitiveTopology::TriangleStrip: |     case Maxwell::PrimitiveTopology::TriangleStrip: | ||||||
|         return GL_TRIANGLE_STRIP; |         return GL_TRIANGLE_STRIP; | ||||||
|     } |     } | ||||||
|  | |||||||
| @ -57,7 +57,7 @@ uniform sampler2D color_texture; | |||||||
| void main() { | void main() { | ||||||
|     // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
 |     // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
 | ||||||
|     // support more framebuffer pixel formats.
 |     // support more framebuffer pixel formats.
 | ||||||
|     color = texture(color_texture, frag_tex_coord).abgr; |     color = texture(color_texture, frag_tex_coord); | ||||||
| } | } | ||||||
| )"; | )"; | ||||||
| 
 | 
 | ||||||
| @ -210,7 +210,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||||||
|                  0.0f); |                  0.0f); | ||||||
| 
 | 
 | ||||||
|     // Link shaders and get variable locations
 |     // Link shaders and get variable locations
 | ||||||
|     shader.Create(vertex_shader, nullptr, fragment_shader); |     shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); | ||||||
|     state.draw.shader_program = shader.handle; |     state.draw.shader_program = shader.handle; | ||||||
|     state.Apply(); |     state.Apply(); | ||||||
|     uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); |     uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); | ||||||
| @ -311,10 +311,10 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, | |||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|     std::array<ScreenRectVertex, 4> vertices = {{ |     std::array<ScreenRectVertex, 4> vertices = {{ | ||||||
|         ScreenRectVertex(x, y, texcoords.top, right), |         ScreenRectVertex(x, y, texcoords.top, left), | ||||||
|         ScreenRectVertex(x + w, y, texcoords.bottom, right), |         ScreenRectVertex(x + w, y, texcoords.bottom, left), | ||||||
|         ScreenRectVertex(x, y + h, texcoords.top, left), |         ScreenRectVertex(x, y + h, texcoords.top, right), | ||||||
|         ScreenRectVertex(x + w, y + h, texcoords.bottom, left), |         ScreenRectVertex(x + w, y + h, texcoords.bottom, right), | ||||||
|     }}; |     }}; | ||||||
| 
 | 
 | ||||||
|     state.texture_units[0].texture_2d = screen_info.display_texture; |     state.texture_units[0].texture_2d = screen_info.display_texture; | ||||||
|  | |||||||
| @ -72,7 +72,7 @@ private: | |||||||
|     // OpenGL object IDs
 |     // OpenGL object IDs
 | ||||||
|     OGLVertexArray vertex_array; |     OGLVertexArray vertex_array; | ||||||
|     OGLBuffer vertex_buffer; |     OGLBuffer vertex_buffer; | ||||||
|     OGLShader shader; |     OGLProgram shader; | ||||||
| 
 | 
 | ||||||
|     /// Display information for Switch screen
 |     /// Display information for Switch screen
 | ||||||
|     ScreenInfo screen_info; |     ScreenInfo screen_info; | ||||||
|  | |||||||
| @ -151,7 +151,7 @@ static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixe | |||||||
|             const u32 coarse_y = y & ~127; |             const u32 coarse_y = y & ~127; | ||||||
|             u32 morton_offset = |             u32 morton_offset = | ||||||
|                 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; |                 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; | ||||||
|             u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; |             u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel; | ||||||
| 
 | 
 | ||||||
|             data_ptrs[morton_to_gl] = morton_data + morton_offset; |             data_ptrs[morton_to_gl] = morton_data + morton_offset; | ||||||
|             data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; |             data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user