150 lines
5 KiB
Diff
150 lines
5 KiB
Diff
From 03f41ac508d2c47fdfef08f0fd27ab154f7dcfd8 Mon Sep 17 00:00:00 2001
|
|
From: David Sterba <dsterba@suse.com>
|
|
Date: Tue, 12 Sep 2023 23:32:38 +0200
|
|
Subject: [PATCH] btrfs-progs: detect PCLMUL CPU support for accelerated crc32c
|
|
|
|
The accelerated crc32c needs to check for two CPU features, the crc32c
|
|
instructions is in SSE 4.2 and 'pclmulqdq' is a separate. There's still
|
|
old hardware used that does not have the PCLMUL instructions. Detect it
|
|
and make it the condition.
|
|
|
|
The pclmul is not supported on old compilers so also add a
|
|
configure-time detection and leave the SSE 4.2 only implementation as
|
|
the accelerated one if possible.
|
|
|
|
Issue: #676
|
|
Signed-off-by: David Sterba <dsterba@suse.com>
|
|
---
|
|
common/cpu-utils.c | 5 +++++
|
|
common/cpu-utils.h | 1 +
|
|
config/ax_gcc_builtin.m4 | 1 +
|
|
configure.ac | 1 +
|
|
crypto/crc32c.c | 9 +++------
|
|
crypto/hash-speedtest.c | 2 +-
|
|
crypto/hash-vectest.c | 2 +-
|
|
7 files changed, 13 insertions(+), 8 deletions(-)
|
|
|
|
diff --git a/common/cpu-utils.c b/common/cpu-utils.c
|
|
index 3527a3cb01..568e3af55a 100644
|
|
--- a/common/cpu-utils.c
|
|
+++ b/common/cpu-utils.c
|
|
@@ -54,6 +54,7 @@ void cpu_print_flags(void) {
|
|
FLAG(SSE2);
|
|
FLAG(SSSE3);
|
|
FLAG(SSE41);
|
|
+ FLAG(PCLMUL);
|
|
FLAG(SSE42);
|
|
FLAG(SHA);
|
|
FLAG(AVX);
|
|
@@ -76,6 +77,10 @@ void cpu_detect_flags(void)
|
|
__cpu_flags |= CPU_FLAG_SSSE3;
|
|
if (__builtin_cpu_supports("sse4.1"))
|
|
__cpu_flags |= CPU_FLAG_SSE41;
|
|
+#if HAVE___BUILTIN_CPU_SUPPORTS__PCLMUL
|
|
+ if (__builtin_cpu_supports("pclmul"))
|
|
+ __cpu_flags |= CPU_FLAG_PCLMUL;
|
|
+#endif
|
|
if (__builtin_cpu_supports("sse4.2"))
|
|
__cpu_flags |= CPU_FLAG_SSE42;
|
|
if (__builtin_cpu_supports("avx"))
|
|
diff --git a/common/cpu-utils.h b/common/cpu-utils.h
|
|
index e4a8641b05..014788f422 100644
|
|
--- a/common/cpu-utils.h
|
|
+++ b/common/cpu-utils.h
|
|
@@ -33,6 +33,7 @@ enum cpu_feature {
|
|
ENUM_CPU_BIT(CPU_FLAG_SSE2),
|
|
ENUM_CPU_BIT(CPU_FLAG_SSSE3),
|
|
ENUM_CPU_BIT(CPU_FLAG_SSE41),
|
|
+ ENUM_CPU_BIT(CPU_FLAG_PCLMUL),
|
|
ENUM_CPU_BIT(CPU_FLAG_SSE42),
|
|
ENUM_CPU_BIT(CPU_FLAG_SHA),
|
|
ENUM_CPU_BIT(CPU_FLAG_AVX),
|
|
diff --git a/config/ax_gcc_builtin.m4 b/config/ax_gcc_builtin.m4
|
|
index c89f89ec3d..224e824c1d 100644
|
|
--- a/config/ax_gcc_builtin.m4
|
|
+++ b/config/ax_gcc_builtin.m4
|
|
@@ -124,6 +124,7 @@ AC_DEFUN([AX_GCC_BUILTIN], [
|
|
[__builtin_cpu_init], [$1()],
|
|
[__builtin_cpu_is], [$1("intel")],
|
|
[__builtin_cpu_supports], [$1("sse")],
|
|
+ [__builtin_cpu_supports__pclmul], [__builtin_cpu_supports("pclmul")],
|
|
[__builtin_ctz], [$1(0)],
|
|
[__builtin_ctzl], [$1(0)],
|
|
[__builtin_ctzll], [$1(0)],
|
|
diff --git a/configure.ac b/configure.ac
|
|
index 1ef5f83cfe..1850048bbb 100644
|
|
--- a/configure.ac
|
|
+++ b/configure.ac
|
|
@@ -86,6 +86,7 @@ AC_SUBST([HAVE_GLIBC])
|
|
AX_GCC_BUILTIN([__builtin_add_overflow])
|
|
AX_GCC_BUILTIN([__builtin_sub_overflow])
|
|
AX_GCC_BUILTIN([__builtin_mul_overflow])
|
|
+AX_GCC_BUILTIN([__builtin_cpu_supports__pclmul])
|
|
|
|
AC_CHECK_HEADERS([linux/perf_event.h])
|
|
AC_CHECK_HEADERS([linux/hw_breakpoint.h])
|
|
diff --git a/crypto/crc32c.c b/crypto/crc32c.c
|
|
index 23cbb1a63d..0d540ef658 100644
|
|
--- a/crypto/crc32c.c
|
|
+++ b/crypto/crc32c.c
|
|
@@ -25,7 +25,7 @@ static unsigned int crc32c_pcl(uint32_t crc, unsigned char const *data, uint32_t
|
|
return crc_pcl(data, len, crc);
|
|
}
|
|
|
|
-#else
|
|
+#endif
|
|
|
|
/*
|
|
* Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com>
|
|
@@ -86,8 +86,6 @@ static uint32_t crc32c_intel(uint32_t crc, unsigned char const *data, uint32_t l
|
|
return crc;
|
|
}
|
|
|
|
-#endif
|
|
-
|
|
void crc32c_init_accel(void)
|
|
{
|
|
/*
|
|
@@ -96,14 +94,13 @@ void crc32c_init_accel(void)
|
|
*/
|
|
if (0) {
|
|
#ifdef __GLIBC__
|
|
- } else if (cpu_has_feature(CPU_FLAG_SSE42)) {
|
|
+ } else if (cpu_has_feature(CPU_FLAG_PCLMUL)) {
|
|
/* printf("CRC32C: pcl\n"); */
|
|
crc_function = crc32c_pcl;
|
|
-#else
|
|
+#endif
|
|
} else if (cpu_has_feature(CPU_FLAG_SSE42)) {
|
|
/* printf("CRC32c: intel\n"); */
|
|
crc_function = crc32c_intel;
|
|
-#endif
|
|
} else {
|
|
/* printf("CRC32c: fallback\n"); */
|
|
crc_function = __crc32c_le;
|
|
diff --git a/crypto/hash-speedtest.c b/crypto/hash-speedtest.c
|
|
index 2b02f6ae7f..88f9c9b17b 100644
|
|
--- a/crypto/hash-speedtest.c
|
|
+++ b/crypto/hash-speedtest.c
|
|
@@ -190,7 +190,7 @@ int main(int argc, char **argv) {
|
|
{ .name = "CRC32C-ref", .digest = hash_crc32c, .digest_size = 4,
|
|
.cpu_flag = CPU_FLAG_NONE },
|
|
{ .name = "CRC32C-NI", .digest = hash_crc32c, .digest_size = 4,
|
|
- .cpu_flag = CPU_FLAG_SSE42 },
|
|
+ .cpu_flag = CPU_FLAG_PCLMUL },
|
|
{ .name = "XXHASH", .digest = hash_xxhash, .digest_size = 8 },
|
|
{ .name = "SHA256-ref", .digest = hash_sha256, .digest_size = 32,
|
|
.cpu_flag = CPU_FLAG_NONE, .backend = CRYPTOPROVIDER_BUILTIN + 1 },
|
|
diff --git a/crypto/hash-vectest.c b/crypto/hash-vectest.c
|
|
index 2a00c02981..07a830fb59 100644
|
|
--- a/crypto/hash-vectest.c
|
|
+++ b/crypto/hash-vectest.c
|
|
@@ -442,7 +442,7 @@ static const struct hash_testspec test_spec[] = {
|
|
.digest_size = 4,
|
|
.testvec = crc32c_tv,
|
|
.count = ARRAY_SIZE(crc32c_tv),
|
|
- .cpu_flag = CPU_FLAG_SSE42,
|
|
+ .cpu_flag = CPU_FLAG_PCLMUL,
|
|
.hash = hash_crc32c
|
|
}, {
|
|
.name = "XXHASH",
|