14#elif defined(__GNUC__)
21#if !defined(BLAKE3_ATOMICS)
22#if defined(__has_include)
23#if __has_include(<stdatomic.h>) && !defined(_MSC_VER)
24#define BLAKE3_ATOMICS 1
26#define BLAKE3_ATOMICS 0
29#define BLAKE3_ATOMICS 0
34#define ATOMIC_INT _Atomic int
35#define ATOMIC_LOAD(x) x
36#define ATOMIC_STORE(x, y) x = y
37#elif defined(_MSC_VER)
38#define ATOMIC_INT LONG
39#define ATOMIC_LOAD(x) InterlockedOr(&x, 0)
40#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y)
43#define ATOMIC_LOAD(x) x
44#define ATOMIC_STORE(x, y) x = y
47#define MAYBE_UNUSED(x) (void)((x))
55 __asm__ __volatile__(
"xgetbv\n" :
"=a"(eax),
"=d"(edx) :
"c"(0));
62 __cpuid((
int *)out,
id);
63#elif defined(__i386__) || defined(_M_IX86)
64 __asm__ __volatile__(
"movl %%ebx, %1\n"
67 :
"=a"(out[0]),
"=r"(out[1]),
"=c"(out[2]),
"=d"(out[3])
70 __asm__ __volatile__(
"cpuid\n"
71 :
"=a"(out[0]),
"=b"(out[1]),
"=c"(out[2]),
"=d"(out[3])
78 __cpuidex((
int *)out,
id, sid);
79#elif defined(__i386__) || defined(_M_IX86)
80 __asm__ __volatile__(
"movl %%ebx, %1\n"
83 :
"=a"(out[0]),
"=r"(out[1]),
"=c"(out[2]),
"=d"(out[3])
86 __asm__ __volatile__(
"cpuid\n"
87 :
"=a"(out[0]),
"=b"(out[1]),
"=c"(out[2]),
"=d"(out[3])
106#if !defined(BLAKE3_TESTING)
112#if !defined(BLAKE3_TESTING)
125 uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3];
129 const int max_id = *eax;
131#if defined(__amd64__) || defined(_M_X64)
134 if (*edx & (1UL << 26))
137 if (*ecx & (1UL << 9))
139 if (*ecx & (1UL << 19))
142 if (*ecx & (1UL << 27)) {
144 if ((mask & 6) == 6) {
145 if (*ecx & (1UL << 28))
149 if (*ebx & (1UL << 5))
151 if ((mask & 224) == 224) {
152 if (*ebx & (1UL << 31))
154 if (*ebx & (1UL << 16))
176#if !defined(BLAKE3_NO_AVX512)
182#if !defined(BLAKE3_NO_SSE41)
183 if (features &
SSE41) {
188#if !defined(BLAKE3_NO_SSE2)
189 if (features &
SSE2) {
205#if !defined(BLAKE3_NO_AVX512)
211#if !defined(BLAKE3_NO_SSE41)
212 if (features &
SSE41) {
217#if !defined(BLAKE3_NO_SSE2)
218 if (features &
SSE2) {
231 uint8_t out[64],
size_t outblocks) {
232 if (outblocks == 0) {
239#if !defined(_WIN32) && !defined(__CYGWIN__) && !defined(BLAKE3_NO_AVX512)
246 for(
size_t i = 0; i < outblocks; ++i) {
253 bool increment_counter,
uint8_t flags,
258#if !defined(BLAKE3_NO_AVX512)
261 increment_counter, flags, flags_start, flags_end,
266#if !defined(BLAKE3_NO_AVX2)
267 if (features &
AVX2) {
269 increment_counter, flags, flags_start, flags_end,
274#if !defined(BLAKE3_NO_SSE41)
275 if (features &
SSE41) {
277 increment_counter, flags, flags_start, flags_end,
282#if !defined(BLAKE3_NO_SSE2)
283 if (features &
SSE2) {
285 increment_counter, flags, flags_start, flags_end,
292#if BLAKE3_USE_NEON == 1
294 increment_counter, flags, flags_start, flags_end, out);
299 increment_counter, flags, flags_start, flags_end,
308#if !defined(BLAKE3_NO_AVX512)
313#if !defined(BLAKE3_NO_AVX2)
314 if (features &
AVX2) {
318#if !defined(BLAKE3_NO_SSE41)
319 if (features &
SSE41) {
323#if !defined(BLAKE3_NO_SSE2)
324 if (features &
SSE2) {
329#if BLAKE3_USE_NEON == 1
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
#define LLVM_ATTRIBUTE_USED
unify loop Fixup each natural loop to have a single exit block
size_t blake3_simd_degree(void)
static LLVM_ATTRIBUTE_USED enum cpu_feature get_cpu_features(void)
#define ATOMIC_STORE(x, y)
static ATOMIC_INT g_cpu_features
#define blake3_compress_in_place_sse41
#define blake3_hash_many_neon
#define blake3_hash_many_avx512
#define blake3_hash_many_avx2
#define blake3_compress_xof_sse2
#define blake3_hash_many_sse41
#define blake3_compress_xof
#define blake3_compress_xof_sse41
#define blake3_compress_in_place_sse2
#define blake3_compress_in_place
#define blake3_compress_xof_avx512
#define blake3_xof_many_avx512
#define blake3_compress_xof_portable
#define blake3_hash_many_portable
#define blake3_hash_many_sse2
#define blake3_compress_in_place_portable
#define blake3_compress_in_place_avx512