ARM: 8477/1: runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()

The ARM compiler inserts calls to __aeabi_idiv() and
__aeabi_uidiv() when it needs to perform division on signed and
unsigned integers. If a processor has support for the sdiv and
udiv instructions, the kernel may overwrite the beginning of those
functions with those instructions and a "bx lr" to get better
performance.

To ensure that those functions are aligned to a 32-bit word for easier
patching (which might not always be the case in Thumb mode) and that
the two patched instructions end up in the same cache line, a 8-byte
alignment is enforced when ARM_PATCH_IDIV is selected.

This was heavily inspired by a previous patch from Stephen Boyd.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
This commit is contained in:
Nicolas Pitre 2015-12-12 02:49:21 +01:00 committed by Russell King
parent 38fc2f6c98
commit 42f25bddd0
3 changed files with 93 additions and 0 deletions

View file

@ -1603,6 +1603,24 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11
config ARM_ASM_UNIFIED
bool
config ARM_PATCH_IDIV
bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()"
depends on CPU_32v7 && !XIP_KERNEL
default y
help
The ARM compiler inserts calls to __aeabi_idiv() and
__aeabi_uidiv() when it needs to perform division on signed
and unsigned integers. Some v7 CPUs have support for the sdiv
and udiv instructions that can be used to implement those
functions.
Enabling this option allows the kernel to modify itself to
replace the first two instructions of these library functions
with the sdiv or udiv plus "bx lr" instructions when the CPU
it is running on supports them. Typically this will be faster
and less power intensive than running the original library
code to do integer division.
config AEABI
bool "Use the ARM EABI to compile the kernel"
help

View file

@ -375,6 +375,72 @@ void __init early_print(const char *str, ...)
printk("%s", buf);
}
#ifdef CONFIG_ARM_PATCH_IDIV
static inline u32 __attribute_const__ sdiv_instruction(void)
{
if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
/* "sdiv r0, r0, r1" */
u32 insn = __opcode_thumb32_compose(0xfb90, 0xf0f1);
return __opcode_to_mem_thumb32(insn);
}
/* "sdiv r0, r0, r1" */
return __opcode_to_mem_arm(0xe710f110);
}
static inline u32 __attribute_const__ udiv_instruction(void)
{
if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
/* "udiv r0, r0, r1" */
u32 insn = __opcode_thumb32_compose(0xfbb0, 0xf0f1);
return __opcode_to_mem_thumb32(insn);
}
/* "udiv r0, r0, r1" */
return __opcode_to_mem_arm(0xe730f110);
}
static inline u32 __attribute_const__ bx_lr_instruction(void)
{
if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
/* "bx lr; nop" */
u32 insn = __opcode_thumb32_compose(0x4770, 0x46c0);
return __opcode_to_mem_thumb32(insn);
}
/* "bx lr" */
return __opcode_to_mem_arm(0xe12fff1e);
}
static void __init patch_aeabi_idiv(void)
{
extern void __aeabi_uidiv(void);
extern void __aeabi_idiv(void);
uintptr_t fn_addr;
unsigned int mask;
mask = IS_ENABLED(CONFIG_THUMB2_KERNEL) ? HWCAP_IDIVT : HWCAP_IDIVA;
if (!(elf_hwcap & mask))
return;
pr_info("CPU: div instructions available: patching division code\n");
fn_addr = ((uintptr_t)&__aeabi_uidiv) & ~1;
((u32 *)fn_addr)[0] = udiv_instruction();
((u32 *)fn_addr)[1] = bx_lr_instruction();
flush_icache_range(fn_addr, fn_addr + 8);
fn_addr = ((uintptr_t)&__aeabi_idiv) & ~1;
((u32 *)fn_addr)[0] = sdiv_instruction();
((u32 *)fn_addr)[1] = bx_lr_instruction();
flush_icache_range(fn_addr, fn_addr + 8);
}
#else
static inline void patch_aeabi_idiv(void) { }
#endif
static void __init cpuid_init_hwcaps(void)
{
int block;
@ -642,6 +708,7 @@ static void __init setup_processor(void)
elf_hwcap = list->elf_hwcap;
cpuid_init_hwcaps();
patch_aeabi_idiv();
#ifndef CONFIG_ARM_THUMB
elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT);

View file

@ -205,6 +205,10 @@ Boston, MA 02111-1307, USA. */
.endm
#ifdef CONFIG_ARM_PATCH_IDIV
.align 3
#endif
ENTRY(__udivsi3)
ENTRY(__aeabi_uidiv)
UNWIND(.fnstart)
@ -253,6 +257,10 @@ UNWIND(.fnstart)
UNWIND(.fnend)
ENDPROC(__umodsi3)
#ifdef CONFIG_ARM_PATCH_IDIV
.align 3
#endif
ENTRY(__divsi3)
ENTRY(__aeabi_idiv)
UNWIND(.fnstart)