This is a new generic nmi_watchdog implementation using the perf events infrastructure as suggested by Ingo. The implementation is simple, just create an in-kernel perf event and register an overflow handler to check for cpu lockups. I created a generic implementation that lives in kernel/ and the hardware specific part that for now lives in arch/x86. This approach has a number of advantages: - It simplifies the x86 PMU implementation in the long run, in that it removes the hardcoded low-level PMU implementation that was the NMI watchdog before. - It allows new NMI watchdog features to be added in a central place. - It allows other architectures to enable the NMI watchdog, as long as they have perf events (that provide NMIs) implemented. - It also allows for more graceful co-existence of existing perf events apps and the NMI watchdog - before these changes the relationship was exclusive. (The NMI watchdog will 'spend' a perf event when enabled. In later iterations we might be able to piggyback from an existing NMI event without having to allocate a hardware event for the NMI watchdog - turning this into a no-hardware-cost feature.) As for compatibility, we'll keep the old NMI watchdog code as well until the new one can 100% replace it on all CPUs, old and new alike. That might take some time as the NMI watchdog has been ported to many CPU models. I have done light testing to make sure the framework works correctly and it does. v2: Set the correct timeout values based on the old nmi watchdog Signed-off-by: Don Zickus <dzickus@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: gorcunov@gmail.com Cc: aris@redhat.com Cc: peterz@infradead.org LKML-Reference: <1265424425-31562-3-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
114 lines
3 KiB
C
114 lines
3 KiB
C
/*
|
|
* HW NMI watchdog support
|
|
*
|
|
* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
|
|
*
|
|
* Arch specific calls to support NMI watchdog
|
|
*
|
|
* Bits copied from original nmi.c file
|
|
*
|
|
*/
|
|
|
|
#include <asm/apic.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/kernel_stat.h>
|
|
#include <asm/mce.h>
|
|
|
|
#include <linux/nmi.h>
|
|
#include <linux/module.h>
|
|
|
|
/* For reliability, we're prepared to waste bits here. */
|
|
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
|
|
|
|
static DEFINE_PER_CPU(unsigned, last_irq_sum);
|
|
|
|
/*
|
|
* Take the local apic timer and PIT/HPET into account. We don't
|
|
* know which one is active, when we have highres/dyntick on
|
|
*/
|
|
static inline unsigned int get_timer_irqs(int cpu)
|
|
{
|
|
return per_cpu(irq_stat, cpu).apic_timer_irqs +
|
|
per_cpu(irq_stat, cpu).irq0_irqs;
|
|
}
|
|
|
|
static inline int mce_in_progress(void)
|
|
{
|
|
#if defined(CONFIG_X86_MCE)
|
|
return atomic_read(&mce_entry) > 0;
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
|
|
{
|
|
unsigned int sum;
|
|
int cpu = smp_processor_id();
|
|
|
|
/* FIXME: cheap hack for this check, probably should get its own
|
|
* die_notifier handler
|
|
*/
|
|
if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
|
|
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
|
|
|
|
spin_lock(&lock);
|
|
printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
|
|
show_regs(regs);
|
|
dump_stack();
|
|
spin_unlock(&lock);
|
|
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
|
|
}
|
|
|
|
/* if we are doing an mce, just assume the cpu is not stuck */
|
|
/* Could check oops_in_progress here too, but it's safer not to */
|
|
if (mce_in_progress())
|
|
return 0;
|
|
|
|
/* We determine if the cpu is stuck by checking whether any
|
|
* interrupts have happened since we last checked. Of course
|
|
* an nmi storm could create false positives, but the higher
|
|
* level logic should account for that
|
|
*/
|
|
sum = get_timer_irqs(cpu);
|
|
if (__get_cpu_var(last_irq_sum) == sum) {
|
|
return 1;
|
|
} else {
|
|
__get_cpu_var(last_irq_sum) = sum;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void arch_trigger_all_cpu_backtrace(void)
|
|
{
|
|
int i;
|
|
|
|
cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
|
|
|
|
printk(KERN_INFO "sending NMI to all CPUs:\n");
|
|
apic->send_IPI_all(NMI_VECTOR);
|
|
|
|
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
|
|
for (i = 0; i < 10 * 1000; i++) {
|
|
if (cpumask_empty(to_cpumask(backtrace_mask)))
|
|
break;
|
|
mdelay(1);
|
|
}
|
|
}
|
|
|
|
/* STUB calls to mimic old nmi_watchdog behaviour */
|
|
unsigned int nmi_watchdog = NMI_NONE;
|
|
EXPORT_SYMBOL(nmi_watchdog);
|
|
atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
|
|
EXPORT_SYMBOL(nmi_active);
|
|
int nmi_watchdog_enabled;
|
|
int unknown_nmi_panic;
|
|
void cpu_nmi_set_wd_enabled(void) { return; }
|
|
void acpi_nmi_enable(void) { return; }
|
|
void acpi_nmi_disable(void) { return; }
|
|
void stop_apic_nmi_watchdog(void *unused) { return; }
|
|
void setup_apic_nmi_watchdog(void *unused) { return; }
|
|
int __init check_nmi_watchdog(void) { return 0; }
|