MIPS: Implement __multi3 for GCC7 MIPS64r6 builds
GCC7 is a bit too eager to generate suboptimal __multi3 calls (128bit multiply with 128bit result) for MIPS64r6 builds, even in code which doesn't explicitly use 128bit types, such as the following: unsigned long func(unsigned long a, unsigned long b) { return a > (~0UL) / b; } Which GCC rearanges to: return (unsigned __int128)a * (unsigned __int128)b > 0xffffffffffffffff; Therefore implement __multi3, but only for MIPS64r6 with GCC7 as under normal circumstances we wouldn't expect any calls to __multi3 to be generated from kernel code. Reported-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Signed-off-by: James Hogan <jhogan@kernel.org> Tested-by: Waldemar Brodkorb <wbx@openadk.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Maciej W. Rozycki <macro@mips.com> Cc: Matthew Fortune <matthew.fortune@mips.com> Cc: Florian Fainelli <florian@openwrt.org> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17890/
This commit is contained in:
parent
ccf85c7442
commit
ebabcf17bc
3 changed files with 73 additions and 1 deletions
|
@ -16,4 +16,5 @@ obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o
|
||||||
obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o
|
obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o
|
||||||
|
|
||||||
# libgcc-style stuff needed in the kernel
|
# libgcc-style stuff needed in the kernel
|
||||||
obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o ucmpdi2.o
|
obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o multi3.o \
|
||||||
|
ucmpdi2.o
|
||||||
|
|
|
@ -10,10 +10,18 @@ typedef int word_type __attribute__ ((mode (__word__)));
|
||||||
struct DWstruct {
|
struct DWstruct {
|
||||||
int high, low;
|
int high, low;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct TWstruct {
|
||||||
|
long long high, low;
|
||||||
|
};
|
||||||
#elif defined(__LITTLE_ENDIAN)
|
#elif defined(__LITTLE_ENDIAN)
|
||||||
struct DWstruct {
|
struct DWstruct {
|
||||||
int low, high;
|
int low, high;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct TWstruct {
|
||||||
|
long long low, high;
|
||||||
|
};
|
||||||
#else
|
#else
|
||||||
#error I feel sick.
|
#error I feel sick.
|
||||||
#endif
|
#endif
|
||||||
|
@ -23,4 +31,13 @@ typedef union {
|
||||||
long long ll;
|
long long ll;
|
||||||
} DWunion;
|
} DWunion;
|
||||||
|
|
||||||
|
#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6)
|
||||||
|
typedef int ti_type __attribute__((mode(TI)));
|
||||||
|
|
||||||
|
typedef union {
|
||||||
|
struct TWstruct s;
|
||||||
|
ti_type ti;
|
||||||
|
} TWunion;
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* __ASM_LIBGCC_H */
|
#endif /* __ASM_LIBGCC_H */
|
||||||
|
|
54
arch/mips/lib/multi3.c
Normal file
54
arch/mips/lib/multi3.c
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#include <linux/export.h>
|
||||||
|
|
||||||
|
#include "libgcc.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that
|
||||||
|
* specific case only we'll implement it here.
|
||||||
|
*
|
||||||
|
* See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981
|
||||||
|
*/
|
||||||
|
#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7)
|
||||||
|
|
||||||
|
/* multiply 64-bit values, low 64-bits returned */
|
||||||
|
static inline long long notrace dmulu(long long a, long long b)
|
||||||
|
{
|
||||||
|
long long res;
|
||||||
|
|
||||||
|
asm ("dmulu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* multiply 64-bit unsigned values, high 64-bits of 128-bit result returned */
|
||||||
|
static inline long long notrace dmuhu(long long a, long long b)
|
||||||
|
{
|
||||||
|
long long res;
|
||||||
|
|
||||||
|
asm ("dmuhu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* multiply 128-bit values, low 128-bits returned */
|
||||||
|
ti_type notrace __multi3(ti_type a, ti_type b)
|
||||||
|
{
|
||||||
|
TWunion res, aa, bb;
|
||||||
|
|
||||||
|
aa.ti = a;
|
||||||
|
bb.ti = b;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* a * b = (a.lo * b.lo)
|
||||||
|
* + 2^64 * (a.hi * b.lo + a.lo * b.hi)
|
||||||
|
* [+ 2^128 * (a.hi * b.hi)]
|
||||||
|
*/
|
||||||
|
res.s.low = dmulu(aa.s.low, bb.s.low);
|
||||||
|
res.s.high = dmuhu(aa.s.low, bb.s.low);
|
||||||
|
res.s.high += dmulu(aa.s.high, bb.s.low);
|
||||||
|
res.s.high += dmulu(aa.s.low, bb.s.high);
|
||||||
|
|
||||||
|
return res.ti;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(__multi3);
|
||||||
|
|
||||||
|
#endif /* 64BIT && CPU_MIPSR6 && GCC7 */
|
Loading…
Reference in a new issue