powerpc fixes for 4.13 #6
All fixes for code that went in this cycle. - A revert of an optimisation to the syscall exit path, which could lead to an oops on either older machines or machines with > 1T of memory. - Disable some deep idle states if the firmware configuration for them fails. - Re-enable HARD/SOFT lockup detectors in defconfigs after a Kconfig change. - Six fairly small patches fixing bugs in our new watchdog code. Thanks to: Gautham R. Shenoy, Nicholas Piggin. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJZjZBeAAoJEFHr6jzI4aWA0RoP/jreL470i8sjPvm7EuF0+jWb D3gaXx55jPo65CLxDH/qMElKk1VxAgCpWHqVN9e/wd9qKxE+xwQ890Enlx4wQRQe I7AkQQoATsf05medbvihTcNRxMcOvLPCi4GP64W/aegaTGliKyFWAu7w/ZsSzGi+ jaskMrBJAR2X4OBfbUi0UEE/KsvDkUje2qJj1NiFyhFFXLyizy4dQknYXVCFD2LR IJah0ZaZM0scOd7YDfsfybBbBDnwi4VqdHTLA5NFBB+sz55+bO9lVOYWpnUYLouH sdZjpjtCuIS+aYOLHupCDXzW2ntv2AVDe4pckTpXrJbUYypXlg8lVx6kfbJ2QETr NrNHljcmmqgqDSglUrDdEpkuQT6Y6hJMroUvo8h20Y+8WXNaKFA5VhhGLkQuBcE4 v/osXs1aiiBURilWTDS5Jo1f5FTZT8ZkNr1/n5jC7q+Y/qkVFglr32hjuomwWxOz SmnwOOcIdJnykr3/pUCvOAffApLfzWFrZeW1MpV1NVlodrubKU939wYZT0yU86b3 ZSs7pK07eZhfdepanG3nCDahhl2qxMyuEjglSfX8WDvVB2MdoxhoUeLwbyMV9qIZ 5A7Anozod4tOrU+qxtPp3uQ230nbQ9JW9yIQM/tdr88k8swpo3NO4O5kfcmH8FVN CBG5pCRNXIZhtJiYVAiv =Vy1K -----END PGP SIGNATURE----- Merge tag 'powerpc-4.13-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux Pull powerpc fixes from Michael Ellerman: "All fixes for code that went in this cycle. - a revert of an optimisation to the syscall exit path, which could lead to an oops on either older machines or machines with > 1TB of memory - disable some deep idle states if the firmware configuration for them fails - re-enable HARD/SOFT lockup detectors in defconfigs after a Kconfig change - six fairly small patches fixing bugs in our new watchdog code Thanks to: Gautham R Shenoy, Nicholas Piggin" * tag 'powerpc-4.13-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: powerpc/watchdog: add locking around init/exit functions powerpc/watchdog: Fix marking of stuck CPUs powerpc/watchdog: Fix final-check recovered case powerpc/watchdog: Moderate touch_nmi_watchdog overhead powerpc/watchdog: Improve watchdog lock primitive powerpc: NMI IPI improve lock primitive powerpc/configs: Re-enable HARD/SOFT lockup detectors powerpc/powernv/idle: Disable LOSE_FULL_CONTEXT states when stop-api fails Revert "powerpc/64: Avoid restore_math call if possible in syscall exit"
This commit is contained in:
commit
8001a975f9
9 changed files with 111 additions and 68 deletions
|
@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y
|
|||
CONFIG_DEBUG_KERNEL=y
|
||||
CONFIG_DEBUG_STACK_USAGE=y
|
||||
CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
CONFIG_LOCKUP_DETECTOR=y
|
||||
CONFIG_SOFTLOCKUP_DETECTOR=y
|
||||
CONFIG_HARDLOCKUP_DETECTOR=y
|
||||
CONFIG_LATENCYTOP=y
|
||||
CONFIG_SCHED_TRACER=y
|
||||
CONFIG_BLK_DEV_IO_TRACE=y
|
||||
|
|
|
@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y
|
|||
CONFIG_DEBUG_KERNEL=y
|
||||
CONFIG_DEBUG_STACK_USAGE=y
|
||||
CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
CONFIG_LOCKUP_DETECTOR=y
|
||||
CONFIG_SOFTLOCKUP_DETECTOR=y
|
||||
CONFIG_HARDLOCKUP_DETECTOR=y
|
||||
CONFIG_DEBUG_MUTEXES=y
|
||||
CONFIG_LATENCYTOP=y
|
||||
CONFIG_SCHED_TRACER=y
|
||||
|
|
|
@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y
|
|||
CONFIG_DEBUG_KERNEL=y
|
||||
CONFIG_DEBUG_STACK_USAGE=y
|
||||
CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
CONFIG_LOCKUP_DETECTOR=y
|
||||
CONFIG_SOFTLOCKUP_DETECTOR=y
|
||||
CONFIG_HARDLOCKUP_DETECTOR=y
|
||||
CONFIG_LATENCYTOP=y
|
||||
CONFIG_SCHED_TRACER=y
|
||||
CONFIG_BLK_DEV_IO_TRACE=y
|
||||
|
|
|
@ -223,17 +223,27 @@ system_call_exit:
|
|||
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
|
||||
bne- .Lsyscall_exit_work
|
||||
|
||||
/* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */
|
||||
li r7,MSR_FP
|
||||
andi. r0,r8,MSR_FP
|
||||
beq 2f
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
oris r7,r7,MSR_VEC@h
|
||||
andis. r0,r8,MSR_VEC@h
|
||||
bne 3f
|
||||
#endif
|
||||
and r0,r8,r7
|
||||
cmpd r0,r7
|
||||
bne .Lsyscall_restore_math
|
||||
.Lsyscall_restore_math_cont:
|
||||
2: addi r3,r1,STACK_FRAME_OVERHEAD
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
li r10,MSR_RI
|
||||
mtmsrd r10,1 /* Restore RI */
|
||||
#endif
|
||||
bl restore_math
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
li r11,0
|
||||
mtmsrd r11,1
|
||||
#endif
|
||||
ld r8,_MSR(r1)
|
||||
ld r3,RESULT(r1)
|
||||
li r11,-MAX_ERRNO
|
||||
|
||||
cmpld r3,r11
|
||||
3: cmpld r3,r11
|
||||
ld r5,_CCR(r1)
|
||||
bge- .Lsyscall_error
|
||||
.Lsyscall_error_cont:
|
||||
|
@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
|||
std r5,_CCR(r1)
|
||||
b .Lsyscall_error_cont
|
||||
|
||||
.Lsyscall_restore_math:
|
||||
/*
|
||||
* Some initial tests from restore_math to avoid the heavyweight
|
||||
* C code entry and MSR manipulations.
|
||||
*/
|
||||
LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
|
||||
and. r0,r0,r8
|
||||
bne 1f
|
||||
|
||||
ld r7,PACACURRENT(r13)
|
||||
lbz r0,THREAD+THREAD_LOAD_FP(r7)
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
lbz r6,THREAD+THREAD_LOAD_VEC(r7)
|
||||
add r0,r0,r6
|
||||
#endif
|
||||
cmpdi r0,0
|
||||
beq .Lsyscall_restore_math_cont
|
||||
|
||||
1: addi r3,r1,STACK_FRAME_OVERHEAD
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
li r10,MSR_RI
|
||||
mtmsrd r10,1 /* Restore RI */
|
||||
#endif
|
||||
bl restore_math
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
li r11,0
|
||||
mtmsrd r11,1
|
||||
#endif
|
||||
/* Restore volatiles, reload MSR from updated one */
|
||||
ld r8,_MSR(r1)
|
||||
ld r3,RESULT(r1)
|
||||
li r11,-MAX_ERRNO
|
||||
b .Lsyscall_restore_math_cont
|
||||
|
||||
/* Traced system call support */
|
||||
.Lsyscall_dotrace:
|
||||
bl save_nvgprs
|
||||
|
|
|
@ -511,10 +511,6 @@ void restore_math(struct pt_regs *regs)
|
|||
{
|
||||
unsigned long msr;
|
||||
|
||||
/*
|
||||
* Syscall exit makes a similar initial check before branching
|
||||
* to restore_math. Keep them in synch.
|
||||
*/
|
||||
if (!msr_tm_active(regs->msr) &&
|
||||
!current->thread.load_fp && !loadvec(current->thread))
|
||||
return;
|
||||
|
|
|
@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
|
|||
hard_irq_disable();
|
||||
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
|
||||
raw_local_irq_restore(*flags);
|
||||
cpu_relax();
|
||||
spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
|
||||
raw_local_irq_save(*flags);
|
||||
hard_irq_disable();
|
||||
}
|
||||
|
@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
|
|||
static void nmi_ipi_lock(void)
|
||||
{
|
||||
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
|
||||
cpu_relax();
|
||||
spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
|
||||
}
|
||||
|
||||
static void nmi_ipi_unlock(void)
|
||||
|
@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
|
|||
nmi_ipi_lock_start(&flags);
|
||||
while (nmi_ipi_busy_count) {
|
||||
nmi_ipi_unlock_end(&flags);
|
||||
cpu_relax();
|
||||
spin_until_cond(nmi_ipi_busy_count == 0);
|
||||
nmi_ipi_lock_start(&flags);
|
||||
}
|
||||
|
||||
|
|
|
@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags)
|
|||
* This may be called from low level interrupt handlers at some
|
||||
* point in future.
|
||||
*/
|
||||
local_irq_save(*flags);
|
||||
while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
|
||||
cpu_relax();
|
||||
raw_local_irq_save(*flags);
|
||||
hard_irq_disable(); /* Make it soft-NMI safe */
|
||||
while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
|
||||
raw_local_irq_restore(*flags);
|
||||
spin_until_cond(!test_bit(0, &__wd_smp_lock));
|
||||
raw_local_irq_save(*flags);
|
||||
hard_irq_disable();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void wd_smp_unlock(unsigned long *flags)
|
||||
{
|
||||
clear_bit_unlock(0, &__wd_smp_lock);
|
||||
local_irq_restore(*flags);
|
||||
raw_local_irq_restore(*flags);
|
||||
}
|
||||
|
||||
static void wd_lockup_ipi(struct pt_regs *regs)
|
||||
|
@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs)
|
|||
nmi_panic(regs, "Hard LOCKUP");
|
||||
}
|
||||
|
||||
static void set_cpu_stuck(int cpu, u64 tb)
|
||||
static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
|
||||
{
|
||||
cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
|
||||
cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
|
||||
cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
|
||||
cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
|
||||
if (cpumask_empty(&wd_smp_cpus_pending)) {
|
||||
wd_smp_last_reset_tb = tb;
|
||||
cpumask_andnot(&wd_smp_cpus_pending,
|
||||
|
@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb)
|
|||
&wd_smp_cpus_stuck);
|
||||
}
|
||||
}
|
||||
static void set_cpu_stuck(int cpu, u64 tb)
|
||||
{
|
||||
set_cpumask_stuck(cpumask_of(cpu), tb);
|
||||
}
|
||||
|
||||
static void watchdog_smp_panic(int cpu, u64 tb)
|
||||
{
|
||||
|
@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb)
|
|||
}
|
||||
smp_flush_nmi_ipi(1000000);
|
||||
|
||||
/* Take the stuck CPU out of the watch group */
|
||||
for_each_cpu(c, &wd_smp_cpus_pending)
|
||||
set_cpu_stuck(c, tb);
|
||||
/* Take the stuck CPUs out of the watch group */
|
||||
set_cpumask_stuck(&wd_smp_cpus_pending, tb);
|
||||
|
||||
out:
|
||||
wd_smp_unlock(&flags);
|
||||
|
||||
printk_safe_flush();
|
||||
|
@ -152,6 +159,11 @@ out:
|
|||
|
||||
if (hardlockup_panic)
|
||||
nmi_panic(NULL, "Hard LOCKUP");
|
||||
|
||||
return;
|
||||
|
||||
out:
|
||||
wd_smp_unlock(&flags);
|
||||
}
|
||||
|
||||
static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
|
||||
|
@ -258,9 +270,11 @@ static void wd_timer_fn(unsigned long data)
|
|||
|
||||
void arch_touch_nmi_watchdog(void)
|
||||
{
|
||||
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
watchdog_timer_interrupt(cpu);
|
||||
if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
|
||||
watchdog_timer_interrupt(cpu);
|
||||
}
|
||||
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
|
||||
|
||||
|
@ -283,6 +297,8 @@ static void stop_watchdog_timer_on(unsigned int cpu)
|
|||
|
||||
static int start_wd_on_cpu(unsigned int cpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
|
||||
WARN_ON(1);
|
||||
return 0;
|
||||
|
@ -297,12 +313,14 @@ static int start_wd_on_cpu(unsigned int cpu)
|
|||
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
|
||||
return 0;
|
||||
|
||||
wd_smp_lock(&flags);
|
||||
cpumask_set_cpu(cpu, &wd_cpus_enabled);
|
||||
if (cpumask_weight(&wd_cpus_enabled) == 1) {
|
||||
cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
|
||||
wd_smp_last_reset_tb = get_tb();
|
||||
}
|
||||
smp_wmb();
|
||||
wd_smp_unlock(&flags);
|
||||
|
||||
start_watchdog_timer_on(cpu);
|
||||
|
||||
return 0;
|
||||
|
@ -310,12 +328,17 @@ static int start_wd_on_cpu(unsigned int cpu)
|
|||
|
||||
static int stop_wd_on_cpu(unsigned int cpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
|
||||
return 0; /* Can happen in CPU unplug case */
|
||||
|
||||
stop_watchdog_timer_on(cpu);
|
||||
|
||||
wd_smp_lock(&flags);
|
||||
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
|
||||
wd_smp_unlock(&flags);
|
||||
|
||||
wd_smp_clear_cpu_pending(cpu, get_tb());
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
|
|||
*/
|
||||
static u64 pnv_deepest_stop_psscr_val;
|
||||
static u64 pnv_deepest_stop_psscr_mask;
|
||||
static u64 pnv_deepest_stop_flag;
|
||||
static bool deepest_stop_found;
|
||||
|
||||
static int pnv_save_sprs_for_deep_states(void)
|
||||
|
@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void)
|
|||
|
||||
update_subcore_sibling_mask();
|
||||
|
||||
if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
|
||||
pnv_save_sprs_for_deep_states();
|
||||
if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
|
||||
int rc = pnv_save_sprs_for_deep_states();
|
||||
|
||||
if (likely(!rc))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The stop-api is unable to restore hypervisor
|
||||
* resources on wakeup from platform idle states which
|
||||
* lose full context. So disable such states.
|
||||
*/
|
||||
supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
|
||||
pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
|
||||
pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300) &&
|
||||
(pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
|
||||
/*
|
||||
* Use the default stop state for CPU-Hotplug
|
||||
* if available.
|
||||
*/
|
||||
if (default_stop_found) {
|
||||
pnv_deepest_stop_psscr_val =
|
||||
pnv_default_stop_val;
|
||||
pnv_deepest_stop_psscr_mask =
|
||||
pnv_default_stop_mask;
|
||||
pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
|
||||
pnv_deepest_stop_psscr_val);
|
||||
} else { /* Fallback to snooze loop for CPU-Hotplug */
|
||||
deepest_stop_found = false;
|
||||
pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u32 pnv_get_supported_cpuidle_states(void)
|
||||
|
@ -375,7 +408,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
|
|||
pnv_deepest_stop_psscr_val;
|
||||
srr1 = power9_idle_stop(psscr);
|
||||
|
||||
} else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
|
||||
} else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
|
||||
(idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
|
||||
srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
|
||||
} else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
|
||||
(idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
|
||||
|
@ -553,6 +587,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
|
|||
max_residency_ns = residency_ns[i];
|
||||
pnv_deepest_stop_psscr_val = psscr_val[i];
|
||||
pnv_deepest_stop_psscr_mask = psscr_mask[i];
|
||||
pnv_deepest_stop_flag = flags[i];
|
||||
deepest_stop_found = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -235,6 +235,7 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
|
|||
return -1;
|
||||
}
|
||||
|
||||
extern u32 pnv_get_supported_cpuidle_states(void);
|
||||
static int powernv_add_idle_states(void)
|
||||
{
|
||||
struct device_node *power_mgt;
|
||||
|
@ -248,6 +249,8 @@ static int powernv_add_idle_states(void)
|
|||
const char *names[CPUIDLE_STATE_MAX];
|
||||
u32 has_stop_states = 0;
|
||||
int i, rc;
|
||||
u32 supported_flags = pnv_get_supported_cpuidle_states();
|
||||
|
||||
|
||||
/* Currently we have snooze statically defined */
|
||||
|
||||
|
@ -362,6 +365,13 @@ static int powernv_add_idle_states(void)
|
|||
for (i = 0; i < dt_idle_states; i++) {
|
||||
unsigned int exit_latency, target_residency;
|
||||
bool stops_timebase = false;
|
||||
|
||||
/*
|
||||
* Skip the platform idle state whose flag isn't in
|
||||
* the supported_cpuidle_states flag mask.
|
||||
*/
|
||||
if ((flags[i] & supported_flags) != flags[i])
|
||||
continue;
|
||||
/*
|
||||
* If an idle state has exit latency beyond
|
||||
* POWERNV_THRESHOLD_LATENCY_NS then don't use it
|
||||
|
|
Loading…
Reference in a new issue