From 03f41ac508d2c47fdfef08f0fd27ab154f7dcfd8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 12 Sep 2023 23:32:38 +0200 Subject: [PATCH] btrfs-progs: detect PCLMUL CPU support for accelerated crc32c The accelerated crc32c needs to check for two CPU features, the crc32c instructions is in SSE 4.2 and 'pclmulqdq' is a separate. There's still old hardware used that does not have the PCLMUL instructions. Detect it and make it the condition. The pclmul is not supported on old compilers so also add a configure-time detection and leave the SSE 4.2 only implementation as the accelerated one if possible. Issue: #676 Signed-off-by: David Sterba --- common/cpu-utils.c | 5 +++++ common/cpu-utils.h | 1 + config/ax_gcc_builtin.m4 | 1 + configure.ac | 1 + crypto/crc32c.c | 9 +++------ crypto/hash-speedtest.c | 2 +- crypto/hash-vectest.c | 2 +- 7 files changed, 13 insertions(+), 8 deletions(-) diff --git a/common/cpu-utils.c b/common/cpu-utils.c index 3527a3cb01..568e3af55a 100644 --- a/common/cpu-utils.c +++ b/common/cpu-utils.c @@ -54,6 +54,7 @@ void cpu_print_flags(void) { FLAG(SSE2); FLAG(SSSE3); FLAG(SSE41); + FLAG(PCLMUL); FLAG(SSE42); FLAG(SHA); FLAG(AVX); @@ -76,6 +77,10 @@ void cpu_detect_flags(void) __cpu_flags |= CPU_FLAG_SSSE3; if (__builtin_cpu_supports("sse4.1")) __cpu_flags |= CPU_FLAG_SSE41; +#if HAVE___BUILTIN_CPU_SUPPORTS__PCLMUL + if (__builtin_cpu_supports("pclmul")) + __cpu_flags |= CPU_FLAG_PCLMUL; +#endif if (__builtin_cpu_supports("sse4.2")) __cpu_flags |= CPU_FLAG_SSE42; if (__builtin_cpu_supports("avx")) diff --git a/common/cpu-utils.h b/common/cpu-utils.h index e4a8641b05..014788f422 100644 --- a/common/cpu-utils.h +++ b/common/cpu-utils.h @@ -33,6 +33,7 @@ enum cpu_feature { ENUM_CPU_BIT(CPU_FLAG_SSE2), ENUM_CPU_BIT(CPU_FLAG_SSSE3), ENUM_CPU_BIT(CPU_FLAG_SSE41), + ENUM_CPU_BIT(CPU_FLAG_PCLMUL), ENUM_CPU_BIT(CPU_FLAG_SSE42), ENUM_CPU_BIT(CPU_FLAG_SHA), ENUM_CPU_BIT(CPU_FLAG_AVX), diff --git a/config/ax_gcc_builtin.m4 b/config/ax_gcc_builtin.m4 index c89f89ec3d..224e824c1d 100644 --- a/config/ax_gcc_builtin.m4 +++ b/config/ax_gcc_builtin.m4 @@ -124,6 +124,7 @@ AC_DEFUN([AX_GCC_BUILTIN], [ [__builtin_cpu_init], [$1()], [__builtin_cpu_is], [$1("intel")], [__builtin_cpu_supports], [$1("sse")], + [__builtin_cpu_supports__pclmul], [__builtin_cpu_supports("pclmul")], [__builtin_ctz], [$1(0)], [__builtin_ctzl], [$1(0)], [__builtin_ctzll], [$1(0)], diff --git a/configure.ac b/configure.ac index 1ef5f83cfe..1850048bbb 100644 --- a/configure.ac +++ b/configure.ac @@ -86,6 +86,7 @@ AC_SUBST([HAVE_GLIBC]) AX_GCC_BUILTIN([__builtin_add_overflow]) AX_GCC_BUILTIN([__builtin_sub_overflow]) AX_GCC_BUILTIN([__builtin_mul_overflow]) +AX_GCC_BUILTIN([__builtin_cpu_supports__pclmul]) AC_CHECK_HEADERS([linux/perf_event.h]) AC_CHECK_HEADERS([linux/hw_breakpoint.h]) diff --git a/crypto/crc32c.c b/crypto/crc32c.c index 23cbb1a63d..0d540ef658 100644 --- a/crypto/crc32c.c +++ b/crypto/crc32c.c @@ -25,7 +25,7 @@ static unsigned int crc32c_pcl(uint32_t crc, unsigned char const *data, uint32_t return crc_pcl(data, len, crc); } -#else +#endif /* * Based on a posting to lkml by Austin Zhang @@ -86,8 +86,6 @@ static uint32_t crc32c_intel(uint32_t crc, unsigned char const *data, uint32_t l return crc; } -#endif - void crc32c_init_accel(void) { /* @@ -96,14 +94,13 @@ void crc32c_init_accel(void) */ if (0) { #ifdef __GLIBC__ - } else if (cpu_has_feature(CPU_FLAG_SSE42)) { + } else if (cpu_has_feature(CPU_FLAG_PCLMUL)) { /* printf("CRC32C: pcl\n"); */ crc_function = crc32c_pcl; -#else +#endif } else if (cpu_has_feature(CPU_FLAG_SSE42)) { /* printf("CRC32c: intel\n"); */ crc_function = crc32c_intel; -#endif } else { /* printf("CRC32c: fallback\n"); */ crc_function = __crc32c_le; diff --git a/crypto/hash-speedtest.c b/crypto/hash-speedtest.c index 2b02f6ae7f..88f9c9b17b 100644 --- a/crypto/hash-speedtest.c +++ b/crypto/hash-speedtest.c @@ -190,7 +190,7 @@ int main(int argc, char **argv) { { .name = "CRC32C-ref", .digest = hash_crc32c, .digest_size = 4, .cpu_flag = CPU_FLAG_NONE }, { .name = "CRC32C-NI", .digest = hash_crc32c, .digest_size = 4, - .cpu_flag = CPU_FLAG_SSE42 }, + .cpu_flag = CPU_FLAG_PCLMUL }, { .name = "XXHASH", .digest = hash_xxhash, .digest_size = 8 }, { .name = "SHA256-ref", .digest = hash_sha256, .digest_size = 32, .cpu_flag = CPU_FLAG_NONE, .backend = CRYPTOPROVIDER_BUILTIN + 1 }, diff --git a/crypto/hash-vectest.c b/crypto/hash-vectest.c index 2a00c02981..07a830fb59 100644 --- a/crypto/hash-vectest.c +++ b/crypto/hash-vectest.c @@ -442,7 +442,7 @@ static const struct hash_testspec test_spec[] = { .digest_size = 4, .testvec = crc32c_tv, .count = ARRAY_SIZE(crc32c_tv), - .cpu_flag = CPU_FLAG_SSE42, + .cpu_flag = CPU_FLAG_PCLMUL, .hash = hash_crc32c }, { .name = "XXHASH",