jobcore/btrfs-progs/03f41ac508d2c47fdfef08f0fd2...

151 lines
5.0 KiB
Diff

From 03f41ac508d2c47fdfef08f0fd27ab154f7dcfd8 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 12 Sep 2023 23:32:38 +0200
Subject: [PATCH] btrfs-progs: detect PCLMUL CPU support for accelerated crc32c
The accelerated crc32c needs to check for two CPU features, the crc32c
instructions is in SSE 4.2 and 'pclmulqdq' is a separate. There's still
old hardware used that does not have the PCLMUL instructions. Detect it
and make it the condition.
The pclmul is not supported on old compilers so also add a
configure-time detection and leave the SSE 4.2 only implementation as
the accelerated one if possible.
Issue: #676
Signed-off-by: David Sterba <dsterba@suse.com>
---
common/cpu-utils.c | 5 +++++
common/cpu-utils.h | 1 +
config/ax_gcc_builtin.m4 | 1 +
configure.ac | 1 +
crypto/crc32c.c | 9 +++------
crypto/hash-speedtest.c | 2 +-
crypto/hash-vectest.c | 2 +-
7 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/common/cpu-utils.c b/common/cpu-utils.c
index 3527a3cb01..568e3af55a 100644
--- a/common/cpu-utils.c
+++ b/common/cpu-utils.c
@@ -54,6 +54,7 @@ void cpu_print_flags(void) {
FLAG(SSE2);
FLAG(SSSE3);
FLAG(SSE41);
+ FLAG(PCLMUL);
FLAG(SSE42);
FLAG(SHA);
FLAG(AVX);
@@ -76,6 +77,10 @@ void cpu_detect_flags(void)
__cpu_flags |= CPU_FLAG_SSSE3;
if (__builtin_cpu_supports("sse4.1"))
__cpu_flags |= CPU_FLAG_SSE41;
+#if HAVE___BUILTIN_CPU_SUPPORTS__PCLMUL
+ if (__builtin_cpu_supports("pclmul"))
+ __cpu_flags |= CPU_FLAG_PCLMUL;
+#endif
if (__builtin_cpu_supports("sse4.2"))
__cpu_flags |= CPU_FLAG_SSE42;
if (__builtin_cpu_supports("avx"))
diff --git a/common/cpu-utils.h b/common/cpu-utils.h
index e4a8641b05..014788f422 100644
--- a/common/cpu-utils.h
+++ b/common/cpu-utils.h
@@ -33,6 +33,7 @@ enum cpu_feature {
ENUM_CPU_BIT(CPU_FLAG_SSE2),
ENUM_CPU_BIT(CPU_FLAG_SSSE3),
ENUM_CPU_BIT(CPU_FLAG_SSE41),
+ ENUM_CPU_BIT(CPU_FLAG_PCLMUL),
ENUM_CPU_BIT(CPU_FLAG_SSE42),
ENUM_CPU_BIT(CPU_FLAG_SHA),
ENUM_CPU_BIT(CPU_FLAG_AVX),
diff --git a/config/ax_gcc_builtin.m4 b/config/ax_gcc_builtin.m4
index c89f89ec3d..224e824c1d 100644
--- a/config/ax_gcc_builtin.m4
+++ b/config/ax_gcc_builtin.m4
@@ -124,6 +124,7 @@ AC_DEFUN([AX_GCC_BUILTIN], [
[__builtin_cpu_init], [$1()],
[__builtin_cpu_is], [$1("intel")],
[__builtin_cpu_supports], [$1("sse")],
+ [__builtin_cpu_supports__pclmul], [__builtin_cpu_supports("pclmul")],
[__builtin_ctz], [$1(0)],
[__builtin_ctzl], [$1(0)],
[__builtin_ctzll], [$1(0)],
diff --git a/configure.ac b/configure.ac
index 1ef5f83cfe..1850048bbb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -86,6 +86,7 @@ AC_SUBST([HAVE_GLIBC])
AX_GCC_BUILTIN([__builtin_add_overflow])
AX_GCC_BUILTIN([__builtin_sub_overflow])
AX_GCC_BUILTIN([__builtin_mul_overflow])
+AX_GCC_BUILTIN([__builtin_cpu_supports__pclmul])
AC_CHECK_HEADERS([linux/perf_event.h])
AC_CHECK_HEADERS([linux/hw_breakpoint.h])
diff --git a/crypto/crc32c.c b/crypto/crc32c.c
index 23cbb1a63d..0d540ef658 100644
--- a/crypto/crc32c.c
+++ b/crypto/crc32c.c
@@ -25,7 +25,7 @@ static unsigned int crc32c_pcl(uint32_t crc, unsigned char const *data, uint32_t
return crc_pcl(data, len, crc);
}
-#else
+#endif
/*
* Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com>
@@ -86,8 +86,6 @@ static uint32_t crc32c_intel(uint32_t crc, unsigned char const *data, uint32_t l
return crc;
}
-#endif
-
void crc32c_init_accel(void)
{
/*
@@ -96,14 +94,13 @@ void crc32c_init_accel(void)
*/
if (0) {
#ifdef __GLIBC__
- } else if (cpu_has_feature(CPU_FLAG_SSE42)) {
+ } else if (cpu_has_feature(CPU_FLAG_PCLMUL)) {
/* printf("CRC32C: pcl\n"); */
crc_function = crc32c_pcl;
-#else
+#endif
} else if (cpu_has_feature(CPU_FLAG_SSE42)) {
/* printf("CRC32c: intel\n"); */
crc_function = crc32c_intel;
-#endif
} else {
/* printf("CRC32c: fallback\n"); */
crc_function = __crc32c_le;
diff --git a/crypto/hash-speedtest.c b/crypto/hash-speedtest.c
index 2b02f6ae7f..88f9c9b17b 100644
--- a/crypto/hash-speedtest.c
+++ b/crypto/hash-speedtest.c
@@ -190,7 +190,7 @@ int main(int argc, char **argv) {
{ .name = "CRC32C-ref", .digest = hash_crc32c, .digest_size = 4,
.cpu_flag = CPU_FLAG_NONE },
{ .name = "CRC32C-NI", .digest = hash_crc32c, .digest_size = 4,
- .cpu_flag = CPU_FLAG_SSE42 },
+ .cpu_flag = CPU_FLAG_PCLMUL },
{ .name = "XXHASH", .digest = hash_xxhash, .digest_size = 8 },
{ .name = "SHA256-ref", .digest = hash_sha256, .digest_size = 32,
.cpu_flag = CPU_FLAG_NONE, .backend = CRYPTOPROVIDER_BUILTIN + 1 },
diff --git a/crypto/hash-vectest.c b/crypto/hash-vectest.c
index 2a00c02981..07a830fb59 100644
--- a/crypto/hash-vectest.c
+++ b/crypto/hash-vectest.c
@@ -442,7 +442,7 @@ static const struct hash_testspec test_spec[] = {
.digest_size = 4,
.testvec = crc32c_tv,
.count = ARRAY_SIZE(crc32c_tv),
- .cpu_flag = CPU_FLAG_SSE42,
+ .cpu_flag = CPU_FLAG_PCLMUL,
.hash = hash_crc32c
}, {
.name = "XXHASH",