ARM: 8477/1: runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()
The ARM compiler inserts calls to __aeabi_idiv() and __aeabi_uidiv() when it needs to perform division on signed and unsigned integers. If a processor has support for the sdiv and udiv instructions, the kernel may overwrite the beginning of those functions with those instructions and a "bx lr" to get better performance. To ensure that those functions are aligned to a 32-bit word for easier patching (which might not always be the case in Thumb mode) and that the two patched instructions end up in the same cache line, a 8-byte alignment is enforced when ARM_PATCH_IDIV is selected. This was heavily inspired by a previous patch from Stephen Boyd. Signed-off-by: Nicolas Pitre <nico@linaro.org> Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
This commit is contained in:
parent
38fc2f6c98
commit
42f25bddd0
3 changed files with 93 additions and 0 deletions
|
@ -1603,6 +1603,24 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11
|
|||
config ARM_ASM_UNIFIED
|
||||
bool
|
||||
|
||||
config ARM_PATCH_IDIV
|
||||
bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()"
|
||||
depends on CPU_32v7 && !XIP_KERNEL
|
||||
default y
|
||||
help
|
||||
The ARM compiler inserts calls to __aeabi_idiv() and
|
||||
__aeabi_uidiv() when it needs to perform division on signed
|
||||
and unsigned integers. Some v7 CPUs have support for the sdiv
|
||||
and udiv instructions that can be used to implement those
|
||||
functions.
|
||||
|
||||
Enabling this option allows the kernel to modify itself to
|
||||
replace the first two instructions of these library functions
|
||||
with the sdiv or udiv plus "bx lr" instructions when the CPU
|
||||
it is running on supports them. Typically this will be faster
|
||||
and less power intensive than running the original library
|
||||
code to do integer division.
|
||||
|
||||
config AEABI
|
||||
bool "Use the ARM EABI to compile the kernel"
|
||||
help
|
||||
|
|
|
@ -375,6 +375,72 @@ void __init early_print(const char *str, ...)
|
|||
printk("%s", buf);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM_PATCH_IDIV
|
||||
|
||||
static inline u32 __attribute_const__ sdiv_instruction(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
|
||||
/* "sdiv r0, r0, r1" */
|
||||
u32 insn = __opcode_thumb32_compose(0xfb90, 0xf0f1);
|
||||
return __opcode_to_mem_thumb32(insn);
|
||||
}
|
||||
|
||||
/* "sdiv r0, r0, r1" */
|
||||
return __opcode_to_mem_arm(0xe710f110);
|
||||
}
|
||||
|
||||
static inline u32 __attribute_const__ udiv_instruction(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
|
||||
/* "udiv r0, r0, r1" */
|
||||
u32 insn = __opcode_thumb32_compose(0xfbb0, 0xf0f1);
|
||||
return __opcode_to_mem_thumb32(insn);
|
||||
}
|
||||
|
||||
/* "udiv r0, r0, r1" */
|
||||
return __opcode_to_mem_arm(0xe730f110);
|
||||
}
|
||||
|
||||
static inline u32 __attribute_const__ bx_lr_instruction(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
|
||||
/* "bx lr; nop" */
|
||||
u32 insn = __opcode_thumb32_compose(0x4770, 0x46c0);
|
||||
return __opcode_to_mem_thumb32(insn);
|
||||
}
|
||||
|
||||
/* "bx lr" */
|
||||
return __opcode_to_mem_arm(0xe12fff1e);
|
||||
}
|
||||
|
||||
static void __init patch_aeabi_idiv(void)
|
||||
{
|
||||
extern void __aeabi_uidiv(void);
|
||||
extern void __aeabi_idiv(void);
|
||||
uintptr_t fn_addr;
|
||||
unsigned int mask;
|
||||
|
||||
mask = IS_ENABLED(CONFIG_THUMB2_KERNEL) ? HWCAP_IDIVT : HWCAP_IDIVA;
|
||||
if (!(elf_hwcap & mask))
|
||||
return;
|
||||
|
||||
pr_info("CPU: div instructions available: patching division code\n");
|
||||
|
||||
fn_addr = ((uintptr_t)&__aeabi_uidiv) & ~1;
|
||||
((u32 *)fn_addr)[0] = udiv_instruction();
|
||||
((u32 *)fn_addr)[1] = bx_lr_instruction();
|
||||
flush_icache_range(fn_addr, fn_addr + 8);
|
||||
|
||||
fn_addr = ((uintptr_t)&__aeabi_idiv) & ~1;
|
||||
((u32 *)fn_addr)[0] = sdiv_instruction();
|
||||
((u32 *)fn_addr)[1] = bx_lr_instruction();
|
||||
flush_icache_range(fn_addr, fn_addr + 8);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void patch_aeabi_idiv(void) { }
|
||||
#endif
|
||||
|
||||
static void __init cpuid_init_hwcaps(void)
|
||||
{
|
||||
int block;
|
||||
|
@ -642,6 +708,7 @@ static void __init setup_processor(void)
|
|||
elf_hwcap = list->elf_hwcap;
|
||||
|
||||
cpuid_init_hwcaps();
|
||||
patch_aeabi_idiv();
|
||||
|
||||
#ifndef CONFIG_ARM_THUMB
|
||||
elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT);
|
||||
|
|
|
@ -205,6 +205,10 @@ Boston, MA 02111-1307, USA. */
|
|||
.endm
|
||||
|
||||
|
||||
#ifdef CONFIG_ARM_PATCH_IDIV
|
||||
.align 3
|
||||
#endif
|
||||
|
||||
ENTRY(__udivsi3)
|
||||
ENTRY(__aeabi_uidiv)
|
||||
UNWIND(.fnstart)
|
||||
|
@ -253,6 +257,10 @@ UNWIND(.fnstart)
|
|||
UNWIND(.fnend)
|
||||
ENDPROC(__umodsi3)
|
||||
|
||||
#ifdef CONFIG_ARM_PATCH_IDIV
|
||||
.align 3
|
||||
#endif
|
||||
|
||||
ENTRY(__divsi3)
|
||||
ENTRY(__aeabi_idiv)
|
||||
UNWIND(.fnstart)
|
||||
|
|
Loading…
Reference in a new issue