Skip to content

Commit a05ce3a

Browse files
committed
Support avx512f + vpclmulqdq crc32() acceleration
Cosmo's _Cz_crc32() function now goes 73 GiB/s on Threadripper. This will significantly improve the performance of the PKZIP file format. This algorithm is also used by apelink, to create deterministic ids.
1 parent 7c8df05 commit a05ce3a

File tree

8 files changed

+385
-8
lines changed

8 files changed

+385
-8
lines changed

test/libc/str/crc32z_test.c

Lines changed: 351 additions & 3 deletions
Large diffs are not rendered by default.

third_party/zlib/BUILD.mk

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,25 +38,42 @@ $(THIRD_PARTY_ZLIB_A).pkg: \
3838
ifeq ($(ARCH), x86_64)
3939
o/$(MODE)/third_party/zlib/adler32_simd.o: private \
4040
TARGET_ARCH += \
41+
-O3 \
4142
-mssse3
4243
o/$(MODE)/third_party/zlib/crc_folding.o \
43-
o/$(MODE)/third_party/zlib/crc32_simd.o: private \
44+
o/$(MODE)/third_party/zlib/crc32_simd_sse42.o: private \
4445
TARGET_ARCH += \
46+
-O3 \
4547
-msse4.2 \
46-
-mpclmul
48+
-mpclmul \
49+
-UCRC32_SIMD_AVX512_PCLMUL \
50+
-DCRC32_SIMD_SSE42_PCLMUL \
51+
-DBUILD_SSE42
52+
o/$(MODE)/third_party/zlib/crc32_simd_avx512.o: private \
53+
TARGET_ARCH += \
54+
-O3 \
55+
-mpclmul \
56+
-mavx512f \
57+
-mvpclmulqdq \
58+
-UCRC32_SIMD_SSE42_PCLMUL \
59+
-DCRC32_SIMD_AVX512_PCLMUL \
60+
-DBUILD_AVX512
4761
$(THIRD_PARTY_ZLIB_A_OBJS): private \
4862
CPPFLAGS += \
4963
-DADLER32_SIMD_SSSE3 \
5064
-DCRC32_SIMD_SSE42_PCLMUL \
65+
-DCRC32_SIMD_AVX512_PCLMUL \
5166
-DDEFLATE_SLIDE_HASH_SSE2 \
5267
-DINFLATE_CHUNK_SIMD_SSE2 \
5368
-DINFLATE_CHUNK_READ_64LE
5469
endif
5570

5671
ifeq ($(ARCH), aarch64)
5772
o/$(MODE)/third_party/zlib/deflate.o \
58-
o/$(MODE)/third_party/zlib/crc32_simd.o: private \
73+
o/$(MODE)/third_party/zlib/crc32_simd_neon.o: private \
5974
TARGET_ARCH += \
75+
-O3 \
76+
-DBUILD_NEON \
6077
-march=armv8-a+aes+crc
6178
$(THIRD_PARTY_ZLIB_A_OBJS): private \
6279
CPPFLAGS += \

third_party/zlib/cpu_features.internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ COSMOPOLITAN_C_START_
1616
#define x86_cpu_enable_sse2 X86_HAVE(SSE2)
1717
#define x86_cpu_enable_ssse3 X86_HAVE(SSSE3)
1818
#define x86_cpu_enable_simd (X86_HAVE(SSE4_2) && X86_HAVE(PCLMUL))
19-
#define x86_cpu_enable_avx512 X86_HAVE(AVX512F)
19+
#define x86_cpu_enable_avx512 (X86_HAVE(AVX512F) && X86_HAVE(PCLMUL) && X86_HAVE(VPCLMULQDQ))
2020
#define cpu_check_features() ((void)0)
2121

2222
#elif defined(__aarch64__)

third_party/zlib/crc32.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,7 @@ uint32_t ZEXPORT crc32_z(crc, buf_, len)
780780
}
781781

782782
#endif
783+
#if defined(__x86_64__)
783784
#if defined(CRC32_SIMD_AVX512_PCLMUL)
784785
if (x86_cpu_enable_avx512 && len >= Z_CRC32_AVX512_MINIMUM_LENGTH) {
785786
/* crc32 64-byte chunks */
@@ -792,7 +793,8 @@ uint32_t ZEXPORT crc32_z(crc, buf_, len)
792793
/* Fall into the default crc32 for the remaining data. */
793794
buf += chunk_size;
794795
}
795-
#elif defined(CRC32_SIMD_SSE42_PCLMUL)
796+
#endif
797+
#if defined(CRC32_SIMD_SSE42_PCLMUL)
796798
if (x86_cpu_enable_simd && len >= Z_CRC32_SSE42_MINIMUM_LENGTH) {
797799
/* crc32 16-byte chunks */
798800
z_size_t chunk_size = len & ~Z_CRC32_SSE42_CHUNKSIZE_MASK;
@@ -804,6 +806,7 @@ uint32_t ZEXPORT crc32_z(crc, buf_, len)
804806
/* Fall into the default crc32 for the remaining data. */
805807
buf += chunk_size;
806808
}
809+
#endif
807810
#elif defined(CRC32_ARMV8_CRC32)
808811
if (arm_cpu_enable_crc32) {
809812
#if defined(__aarch64__)
File renamed without changes.

third_party/zlib/crc32_simd_avx512.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#ifdef BUILD_AVX512
2+
#include "third_party/zlib/crc32_simd.inc"
3+
#endif

third_party/zlib/crc32_simd_neon.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#ifdef BUILD_NEON
2+
#include "third_party/zlib/crc32_simd.inc"
3+
#endif

third_party/zlib/crc32_simd_sse42.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#ifdef BUILD_SSE42
2+
#include "third_party/zlib/crc32_simd.inc"
3+
#endif

0 commit comments

Comments
 (0)