Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits) perf tools: Fix compile error on x86_64 Ubuntu perf report: Fix --stdio output alignment when --showcpuutilization used perf annotate: Get rid of field_sep check perf annotate: Fix usage string perf kmem: Fix a memory leak perf kmem: Add missing closedir() calls perf top: Add error message for EMFILE perf test: Change type of '-v' option to INCR perf script: Add missing closedir() calls tracing: Fix compile error when static ftrace is enabled recordmcount: Fix handling of elf64 big-endian objects. perf tools: Add const.h to MANIFEST to make perf-tar-src-pkg work again perf tools: Add support for guest/host-only profiling perf kvm: Do guest-only counting by default perf top: Don't update total_period on process_sample perf hists: Stop using 'self' for struct hist_entry perf hists: Rename total_session to total_period x86: Add counter when debug stack is used with interrupts enabled x86: Allow NMIs to hit breakpoints in i386 x86: Keep current stack in NMI breakpoints ...
This commit is contained in:
commit
83c2f912b4
29 changed files with 1269 additions and 475 deletions
|
@ -2475,6 +2475,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
stacktrace [FTRACE]
|
||||
Enabled the stack tracer on boot up.
|
||||
|
||||
stacktrace_filter=[function-list]
|
||||
[FTRACE] Limit the functions that the stack tracer
|
||||
will trace at boot up. function-list is a comma separated
|
||||
list of functions. This list can be changed at run
|
||||
time by the stack_trace_filter file in the debugfs
|
||||
tracing directory. Note, this enables stack tracing
|
||||
and the stacktrace above is not needed.
|
||||
|
||||
sti= [PARISC,HW]
|
||||
Format: <num>
|
||||
Set the STI (builtin display/keyboard on the HP-PARISC
|
||||
|
|
|
@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump);
|
|||
|
||||
extern void hw_breakpoint_restore(void);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
DECLARE_PER_CPU(int, debug_stack_usage);
|
||||
static inline void debug_stack_usage_inc(void)
|
||||
{
|
||||
__get_cpu_var(debug_stack_usage)++;
|
||||
}
|
||||
static inline void debug_stack_usage_dec(void)
|
||||
{
|
||||
__get_cpu_var(debug_stack_usage)--;
|
||||
}
|
||||
int is_debug_stack(unsigned long addr);
|
||||
void debug_stack_set_zero(void);
|
||||
void debug_stack_reset(void);
|
||||
#else /* !X86_64 */
|
||||
static inline int is_debug_stack(unsigned long addr) { return 0; }
|
||||
static inline void debug_stack_set_zero(void) { }
|
||||
static inline void debug_stack_reset(void) { }
|
||||
static inline void debug_stack_usage_inc(void) { }
|
||||
static inline void debug_stack_usage_dec(void) { }
|
||||
#endif /* X86_64 */
|
||||
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* _ASM_X86_DEBUGREG_H */
|
||||
|
|
|
@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
|
|||
|
||||
extern struct desc_ptr idt_descr;
|
||||
extern gate_desc idt_table[];
|
||||
extern struct desc_ptr nmi_idt_descr;
|
||||
extern gate_desc nmi_idt_table[];
|
||||
|
||||
struct gdt_page {
|
||||
struct desc_struct gdt[GDT_ENTRIES];
|
||||
|
@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
|
|||
desc->limit = (limit >> 16) & 0xf;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static inline void set_nmi_gate(int gate, void *addr)
|
||||
{
|
||||
gate_desc s;
|
||||
|
||||
pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
|
||||
write_idt_entry(nmi_idt_table, gate, &s);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void _set_gate(int gate, unsigned type, void *addr,
|
||||
unsigned dpl, unsigned ist, unsigned seg)
|
||||
{
|
||||
|
|
|
@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid);
|
|||
|
||||
#ifdef CONFIG_X86_64
|
||||
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
|
||||
struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
|
||||
(unsigned long) nmi_idt_table };
|
||||
|
||||
DEFINE_PER_CPU_FIRST(union irq_stack_union,
|
||||
irq_stack_union) __aligned(PAGE_SIZE);
|
||||
|
@ -1085,6 +1087,26 @@ unsigned long kernel_eflags;
|
|||
*/
|
||||
DEFINE_PER_CPU(struct orig_ist, orig_ist);
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
|
||||
DEFINE_PER_CPU(int, debug_stack_usage);
|
||||
|
||||
int is_debug_stack(unsigned long addr)
|
||||
{
|
||||
return __get_cpu_var(debug_stack_usage) ||
|
||||
(addr <= __get_cpu_var(debug_stack_addr) &&
|
||||
addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
|
||||
}
|
||||
|
||||
void debug_stack_set_zero(void)
|
||||
{
|
||||
load_idt((const struct desc_ptr *)&nmi_idt_descr);
|
||||
}
|
||||
|
||||
void debug_stack_reset(void)
|
||||
{
|
||||
load_idt((const struct desc_ptr *)&idt_descr);
|
||||
}
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
|
||||
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
|
||||
|
@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void)
|
|||
estacks += exception_stack_sizes[v];
|
||||
oist->ist[v] = t->x86_tss.ist[v] =
|
||||
(unsigned long)estacks;
|
||||
if (v == DEBUG_STACK-1)
|
||||
per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1480,62 +1480,214 @@ ENTRY(error_exit)
|
|||
CFI_ENDPROC
|
||||
END(error_exit)
|
||||
|
||||
/*
|
||||
* Test if a given stack is an NMI stack or not.
|
||||
*/
|
||||
.macro test_in_nmi reg stack nmi_ret normal_ret
|
||||
cmpq %\reg, \stack
|
||||
ja \normal_ret
|
||||
subq $EXCEPTION_STKSZ, %\reg
|
||||
cmpq %\reg, \stack
|
||||
jb \normal_ret
|
||||
jmp \nmi_ret
|
||||
.endm
|
||||
|
||||
/* runs on exception stack */
|
||||
ENTRY(nmi)
|
||||
INTR_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq_cfi $-1
|
||||
/*
|
||||
* We allow breakpoints in NMIs. If a breakpoint occurs, then
|
||||
* the iretq it performs will take us out of NMI context.
|
||||
* This means that we can have nested NMIs where the next
|
||||
* NMI is using the top of the stack of the previous NMI. We
|
||||
* can't let it execute because the nested NMI will corrupt the
|
||||
* stack of the previous NMI. NMI handlers are not re-entrant
|
||||
* anyway.
|
||||
*
|
||||
* To handle this case we do the following:
|
||||
* Check the a special location on the stack that contains
|
||||
* a variable that is set when NMIs are executing.
|
||||
* The interrupted task's stack is also checked to see if it
|
||||
* is an NMI stack.
|
||||
* If the variable is not set and the stack is not the NMI
|
||||
* stack then:
|
||||
* o Set the special variable on the stack
|
||||
* o Copy the interrupt frame into a "saved" location on the stack
|
||||
* o Copy the interrupt frame into a "copy" location on the stack
|
||||
* o Continue processing the NMI
|
||||
* If the variable is set or the previous stack is the NMI stack:
|
||||
* o Modify the "copy" location to jump to the repeate_nmi
|
||||
* o return back to the first NMI
|
||||
*
|
||||
* Now on exit of the first NMI, we first clear the stack variable
|
||||
* The NMI stack will tell any nested NMIs at that point that it is
|
||||
* nested. Then we pop the stack normally with iret, and if there was
|
||||
* a nested NMI that updated the copy interrupt stack frame, a
|
||||
* jump will be made to the repeat_nmi code that will handle the second
|
||||
* NMI.
|
||||
*/
|
||||
|
||||
/* Use %rdx as out temp variable throughout */
|
||||
pushq_cfi %rdx
|
||||
|
||||
/*
|
||||
* Check the special variable on the stack to see if NMIs are
|
||||
* executing.
|
||||
*/
|
||||
cmp $1, -8(%rsp)
|
||||
je nested_nmi
|
||||
|
||||
/*
|
||||
* Now test if the previous stack was an NMI stack.
|
||||
* We need the double check. We check the NMI stack to satisfy the
|
||||
* race when the first NMI clears the variable before returning.
|
||||
* We check the variable because the first NMI could be in a
|
||||
* breakpoint routine using a breakpoint stack.
|
||||
*/
|
||||
lea 6*8(%rsp), %rdx
|
||||
test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
|
||||
|
||||
nested_nmi:
|
||||
/*
|
||||
* Do nothing if we interrupted the fixup in repeat_nmi.
|
||||
* It's about to repeat the NMI handler, so we are fine
|
||||
* with ignoring this one.
|
||||
*/
|
||||
movq $repeat_nmi, %rdx
|
||||
cmpq 8(%rsp), %rdx
|
||||
ja 1f
|
||||
movq $end_repeat_nmi, %rdx
|
||||
cmpq 8(%rsp), %rdx
|
||||
ja nested_nmi_out
|
||||
|
||||
1:
|
||||
/* Set up the interrupted NMIs stack to jump to repeat_nmi */
|
||||
leaq -6*8(%rsp), %rdx
|
||||
movq %rdx, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET 6*8
|
||||
pushq_cfi $__KERNEL_DS
|
||||
pushq_cfi %rdx
|
||||
pushfq_cfi
|
||||
pushq_cfi $__KERNEL_CS
|
||||
pushq_cfi $repeat_nmi
|
||||
|
||||
/* Put stack back */
|
||||
addq $(11*8), %rsp
|
||||
CFI_ADJUST_CFA_OFFSET -11*8
|
||||
|
||||
nested_nmi_out:
|
||||
popq_cfi %rdx
|
||||
|
||||
/* No need to check faults here */
|
||||
INTERRUPT_RETURN
|
||||
|
||||
first_nmi:
|
||||
/*
|
||||
* Because nested NMIs will use the pushed location that we
|
||||
* stored in rdx, we must keep that space available.
|
||||
* Here's what our stack frame will look like:
|
||||
* +-------------------------+
|
||||
* | original SS |
|
||||
* | original Return RSP |
|
||||
* | original RFLAGS |
|
||||
* | original CS |
|
||||
* | original RIP |
|
||||
* +-------------------------+
|
||||
* | temp storage for rdx |
|
||||
* +-------------------------+
|
||||
* | NMI executing variable |
|
||||
* +-------------------------+
|
||||
* | Saved SS |
|
||||
* | Saved Return RSP |
|
||||
* | Saved RFLAGS |
|
||||
* | Saved CS |
|
||||
* | Saved RIP |
|
||||
* +-------------------------+
|
||||
* | copied SS |
|
||||
* | copied Return RSP |
|
||||
* | copied RFLAGS |
|
||||
* | copied CS |
|
||||
* | copied RIP |
|
||||
* +-------------------------+
|
||||
* | pt_regs |
|
||||
* +-------------------------+
|
||||
*
|
||||
* The saved RIP is used to fix up the copied RIP that a nested
|
||||
* NMI may zero out. The original stack frame and the temp storage
|
||||
* is also used by nested NMIs and can not be trusted on exit.
|
||||
*/
|
||||
/* Set the NMI executing variable on the stack. */
|
||||
pushq_cfi $1
|
||||
|
||||
/* Copy the stack frame to the Saved frame */
|
||||
.rept 5
|
||||
pushq_cfi 6*8(%rsp)
|
||||
.endr
|
||||
|
||||
/* Make another copy, this one may be modified by nested NMIs */
|
||||
.rept 5
|
||||
pushq_cfi 4*8(%rsp)
|
||||
.endr
|
||||
|
||||
/* Do not pop rdx, nested NMIs will corrupt it */
|
||||
movq 11*8(%rsp), %rdx
|
||||
|
||||
/*
|
||||
* Everything below this point can be preempted by a nested
|
||||
* NMI if the first NMI took an exception. Repeated NMIs
|
||||
* caused by an exception and nested NMI will start here, and
|
||||
* can still be preempted by another NMI.
|
||||
*/
|
||||
restart_nmi:
|
||||
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
|
||||
subq $ORIG_RAX-R15, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
|
||||
/*
|
||||
* Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
|
||||
* as we should not be calling schedule in NMI context.
|
||||
* Even with normal interrupts enabled. An NMI should not be
|
||||
* setting NEED_RESCHED or anything that normal interrupts and
|
||||
* exceptions might do.
|
||||
*/
|
||||
call save_paranoid
|
||||
DEFAULT_FRAME 0
|
||||
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
|
||||
movq %rsp,%rdi
|
||||
movq $-1,%rsi
|
||||
call do_nmi
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
/* paranoidexit; without TRACE_IRQS_OFF */
|
||||
/* ebx: no swapgs flag */
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
testl %ebx,%ebx /* swapgs needed? */
|
||||
jnz nmi_restore
|
||||
testl $3,CS(%rsp)
|
||||
jnz nmi_userspace
|
||||
nmi_swapgs:
|
||||
SWAPGS_UNSAFE_STACK
|
||||
nmi_restore:
|
||||
RESTORE_ALL 8
|
||||
/* Clear the NMI executing stack variable */
|
||||
movq $0, 10*8(%rsp)
|
||||
jmp irq_return
|
||||
nmi_userspace:
|
||||
GET_THREAD_INFO(%rcx)
|
||||
movl TI_flags(%rcx),%ebx
|
||||
andl $_TIF_WORK_MASK,%ebx
|
||||
jz nmi_swapgs
|
||||
movq %rsp,%rdi /* &pt_regs */
|
||||
call sync_regs
|
||||
movq %rax,%rsp /* switch stack for scheduling */
|
||||
testl $_TIF_NEED_RESCHED,%ebx
|
||||
jnz nmi_schedule
|
||||
movl %ebx,%edx /* arg3: thread flags */
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
xorl %esi,%esi /* arg2: oldset */
|
||||
movq %rsp,%rdi /* arg1: &pt_regs */
|
||||
call do_notify_resume
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
jmp nmi_userspace
|
||||
nmi_schedule:
|
||||
ENABLE_INTERRUPTS(CLBR_ANY)
|
||||
call schedule
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
jmp nmi_userspace
|
||||
CFI_ENDPROC
|
||||
#else
|
||||
jmp paranoid_exit
|
||||
CFI_ENDPROC
|
||||
#endif
|
||||
END(nmi)
|
||||
|
||||
/*
|
||||
* If an NMI hit an iret because of an exception or breakpoint,
|
||||
* it can lose its NMI context, and a nested NMI may come in.
|
||||
* In that case, the nested NMI will change the preempted NMI's
|
||||
* stack to jump to here when it does the final iret.
|
||||
*/
|
||||
repeat_nmi:
|
||||
INTR_FRAME
|
||||
/* Update the stack variable to say we are still in NMI */
|
||||
movq $1, 5*8(%rsp)
|
||||
|
||||
/* copy the saved stack back to copy stack */
|
||||
.rept 5
|
||||
pushq_cfi 4*8(%rsp)
|
||||
.endr
|
||||
|
||||
jmp restart_nmi
|
||||
CFI_ENDPROC
|
||||
end_repeat_nmi:
|
||||
|
||||
ENTRY(ignore_sysret)
|
||||
CFI_STARTPROC
|
||||
mov $-ENOSYS,%eax
|
||||
|
|
|
@ -417,6 +417,10 @@ ENTRY(phys_base)
|
|||
ENTRY(idt_table)
|
||||
.skip IDT_ENTRIES * 16
|
||||
|
||||
.align L1_CACHE_BYTES
|
||||
ENTRY(nmi_idt_table)
|
||||
.skip IDT_ENTRIES * 16
|
||||
|
||||
__PAGE_ALIGNED_BSS
|
||||
.align PAGE_SIZE
|
||||
ENTRY(empty_zero_page)
|
||||
|
|
|
@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
|
|||
unknown_nmi_error(reason, regs);
|
||||
}
|
||||
|
||||
/*
|
||||
* NMIs can hit breakpoints which will cause it to lose its
|
||||
* NMI context with the CPU when the breakpoint does an iret.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* For i386, NMIs use the same stack as the kernel, and we can
|
||||
* add a workaround to the iret problem in C. Simply have 3 states
|
||||
* the NMI can be in.
|
||||
*
|
||||
* 1) not running
|
||||
* 2) executing
|
||||
* 3) latched
|
||||
*
|
||||
* When no NMI is in progress, it is in the "not running" state.
|
||||
* When an NMI comes in, it goes into the "executing" state.
|
||||
* Normally, if another NMI is triggered, it does not interrupt
|
||||
* the running NMI and the HW will simply latch it so that when
|
||||
* the first NMI finishes, it will restart the second NMI.
|
||||
* (Note, the latch is binary, thus multiple NMIs triggering,
|
||||
* when one is running, are ignored. Only one NMI is restarted.)
|
||||
*
|
||||
* If an NMI hits a breakpoint that executes an iret, another
|
||||
* NMI can preempt it. We do not want to allow this new NMI
|
||||
* to run, but we want to execute it when the first one finishes.
|
||||
* We set the state to "latched", and the first NMI will perform
|
||||
* an cmpxchg on the state, and if it doesn't successfully
|
||||
* reset the state to "not running" it will restart the next
|
||||
* NMI.
|
||||
*/
|
||||
enum nmi_states {
|
||||
NMI_NOT_RUNNING,
|
||||
NMI_EXECUTING,
|
||||
NMI_LATCHED,
|
||||
};
|
||||
static DEFINE_PER_CPU(enum nmi_states, nmi_state);
|
||||
|
||||
#define nmi_nesting_preprocess(regs) \
|
||||
do { \
|
||||
if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \
|
||||
__get_cpu_var(nmi_state) = NMI_LATCHED; \
|
||||
return; \
|
||||
} \
|
||||
nmi_restart: \
|
||||
__get_cpu_var(nmi_state) = NMI_EXECUTING; \
|
||||
} while (0)
|
||||
|
||||
#define nmi_nesting_postprocess() \
|
||||
do { \
|
||||
if (cmpxchg(&__get_cpu_var(nmi_state), \
|
||||
NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \
|
||||
goto nmi_restart; \
|
||||
} while (0)
|
||||
#else /* x86_64 */
|
||||
/*
|
||||
* In x86_64 things are a bit more difficult. This has the same problem
|
||||
* where an NMI hitting a breakpoint that calls iret will remove the
|
||||
* NMI context, allowing a nested NMI to enter. What makes this more
|
||||
* difficult is that both NMIs and breakpoints have their own stack.
|
||||
* When a new NMI or breakpoint is executed, the stack is set to a fixed
|
||||
* point. If an NMI is nested, it will have its stack set at that same
|
||||
* fixed address that the first NMI had, and will start corrupting the
|
||||
* stack. This is handled in entry_64.S, but the same problem exists with
|
||||
* the breakpoint stack.
|
||||
*
|
||||
* If a breakpoint is being processed, and the debug stack is being used,
|
||||
* if an NMI comes in and also hits a breakpoint, the stack pointer
|
||||
* will be set to the same fixed address as the breakpoint that was
|
||||
* interrupted, causing that stack to be corrupted. To handle this case,
|
||||
* check if the stack that was interrupted is the debug stack, and if
|
||||
* so, change the IDT so that new breakpoints will use the current stack
|
||||
* and not switch to the fixed address. On return of the NMI, switch back
|
||||
* to the original IDT.
|
||||
*/
|
||||
static DEFINE_PER_CPU(int, update_debug_stack);
|
||||
|
||||
static inline void nmi_nesting_preprocess(struct pt_regs *regs)
|
||||
{
|
||||
/*
|
||||
* If we interrupted a breakpoint, it is possible that
|
||||
* the nmi handler will have breakpoints too. We need to
|
||||
* change the IDT such that breakpoints that happen here
|
||||
* continue to use the NMI stack.
|
||||
*/
|
||||
if (unlikely(is_debug_stack(regs->sp))) {
|
||||
debug_stack_set_zero();
|
||||
__get_cpu_var(update_debug_stack) = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void nmi_nesting_postprocess(void)
|
||||
{
|
||||
if (unlikely(__get_cpu_var(update_debug_stack)))
|
||||
debug_stack_reset();
|
||||
}
|
||||
#endif
|
||||
|
||||
dotraplinkage notrace __kprobes void
|
||||
do_nmi(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
nmi_nesting_preprocess(regs);
|
||||
|
||||
nmi_enter();
|
||||
|
||||
inc_irq_stat(__nmi_count);
|
||||
|
@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code)
|
|||
default_do_nmi(regs);
|
||||
|
||||
nmi_exit();
|
||||
|
||||
/* On i386, may loop back to preprocess */
|
||||
nmi_nesting_postprocess();
|
||||
}
|
||||
|
||||
void stop_nmi(void)
|
||||
|
|
|
@ -311,9 +311,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
|
|||
== NOTIFY_STOP)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Let others (NMI) know that the debug stack is in use
|
||||
* as we may switch to the interrupt stack.
|
||||
*/
|
||||
debug_stack_usage_inc();
|
||||
preempt_conditional_sti(regs);
|
||||
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
|
||||
preempt_conditional_cli(regs);
|
||||
debug_stack_usage_dec();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -406,6 +412,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
|||
SIGTRAP) == NOTIFY_STOP)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Let others (NMI) know that the debug stack is in use
|
||||
* as we may switch to the interrupt stack.
|
||||
*/
|
||||
debug_stack_usage_inc();
|
||||
|
||||
/* It's safe to allow irq's after DR6 has been saved */
|
||||
preempt_conditional_sti(regs);
|
||||
|
||||
|
@ -413,6 +425,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
|||
handle_vm86_trap((struct kernel_vm86_regs *) regs,
|
||||
error_code, 1);
|
||||
preempt_conditional_cli(regs);
|
||||
debug_stack_usage_dec();
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -432,6 +445,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
|||
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
|
||||
send_sigtrap(tsk, regs, error_code, si_code);
|
||||
preempt_conditional_cli(regs);
|
||||
debug_stack_usage_dec();
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -718,4 +732,10 @@ void __init trap_init(void)
|
|||
cpu_init();
|
||||
|
||||
x86_init.irqs.trap_init();
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
|
||||
set_nmi_gate(1, &debug);
|
||||
set_nmi_gate(3, &int3);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -50,6 +50,11 @@
|
|||
# define inline inline __attribute__((always_inline))
|
||||
# define __inline__ __inline__ __attribute__((always_inline))
|
||||
# define __inline __inline __attribute__((always_inline))
|
||||
#else
|
||||
/* A lot of inline functions can cause havoc with function tracing */
|
||||
# define inline inline notrace
|
||||
# define __inline__ __inline__ notrace
|
||||
# define __inline __inline notrace
|
||||
#endif
|
||||
|
||||
#define __deprecated __attribute__((deprecated))
|
||||
|
|
|
@ -133,6 +133,8 @@ struct ftrace_func_command {
|
|||
int ftrace_arch_code_modify_prepare(void);
|
||||
int ftrace_arch_code_modify_post_process(void);
|
||||
|
||||
void ftrace_bug(int err, unsigned long ip);
|
||||
|
||||
struct seq_file;
|
||||
|
||||
struct ftrace_probe_ops {
|
||||
|
@ -161,7 +163,6 @@ extern int ftrace_text_reserved(void *start, void *end);
|
|||
|
||||
enum {
|
||||
FTRACE_FL_ENABLED = (1 << 30),
|
||||
FTRACE_FL_FREE = (1 << 31),
|
||||
};
|
||||
|
||||
#define FTRACE_FL_MASK (0x3UL << 30)
|
||||
|
@ -172,10 +173,7 @@ struct dyn_ftrace {
|
|||
unsigned long ip; /* address of mcount call-site */
|
||||
struct dyn_ftrace *freelist;
|
||||
};
|
||||
union {
|
||||
unsigned long flags;
|
||||
struct dyn_ftrace *newlist;
|
||||
};
|
||||
unsigned long flags;
|
||||
struct dyn_arch_ftrace arch;
|
||||
};
|
||||
|
||||
|
@ -190,6 +188,56 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
|
|||
int register_ftrace_command(struct ftrace_func_command *cmd);
|
||||
int unregister_ftrace_command(struct ftrace_func_command *cmd);
|
||||
|
||||
enum {
|
||||
FTRACE_UPDATE_CALLS = (1 << 0),
|
||||
FTRACE_DISABLE_CALLS = (1 << 1),
|
||||
FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
|
||||
FTRACE_START_FUNC_RET = (1 << 3),
|
||||
FTRACE_STOP_FUNC_RET = (1 << 4),
|
||||
};
|
||||
|
||||
enum {
|
||||
FTRACE_UPDATE_IGNORE,
|
||||
FTRACE_UPDATE_MAKE_CALL,
|
||||
FTRACE_UPDATE_MAKE_NOP,
|
||||
};
|
||||
|
||||
enum {
|
||||
FTRACE_ITER_FILTER = (1 << 0),
|
||||
FTRACE_ITER_NOTRACE = (1 << 1),
|
||||
FTRACE_ITER_PRINTALL = (1 << 2),
|
||||
FTRACE_ITER_DO_HASH = (1 << 3),
|
||||
FTRACE_ITER_HASH = (1 << 4),
|
||||
FTRACE_ITER_ENABLED = (1 << 5),
|
||||
};
|
||||
|
||||
void arch_ftrace_update_code(int command);
|
||||
|
||||
struct ftrace_rec_iter;
|
||||
|
||||
struct ftrace_rec_iter *ftrace_rec_iter_start(void);
|
||||
struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter);
|
||||
struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
|
||||
|
||||
int ftrace_update_record(struct dyn_ftrace *rec, int enable);
|
||||
int ftrace_test_record(struct dyn_ftrace *rec, int enable);
|
||||
void ftrace_run_stop_machine(int command);
|
||||
int ftrace_location(unsigned long ip);
|
||||
|
||||
extern ftrace_func_t ftrace_trace_function;
|
||||
|
||||
int ftrace_regex_open(struct ftrace_ops *ops, int flag,
|
||||
struct inode *inode, struct file *file);
|
||||
ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos);
|
||||
ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos);
|
||||
loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
|
||||
int ftrace_regex_release(struct inode *inode, struct file *file);
|
||||
|
||||
void __init
|
||||
ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
|
||||
|
||||
/* defined in arch */
|
||||
extern int ftrace_ip_converted(unsigned long ip);
|
||||
extern int ftrace_dyn_arch_init(void *data);
|
||||
|
@ -284,6 +332,25 @@ static inline int ftrace_text_reserved(void *start, void *end)
|
|||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Again users of functions that have ftrace_ops may not
|
||||
* have them defined when ftrace is not enabled, but these
|
||||
* functions may still be called. Use a macro instead of inline.
|
||||
*/
|
||||
#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
|
||||
#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
|
||||
|
||||
static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos) { return -ENODEV; }
|
||||
static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos) { return -ENODEV; }
|
||||
static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
static inline int
|
||||
ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; }
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
|
||||
/* totally disable ftrace - can not re-enable after this */
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1738,11 +1738,121 @@ static int replace_system_preds(struct event_subsystem *system,
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int create_filter_start(char *filter_str, bool set_str,
|
||||
struct filter_parse_state **psp,
|
||||
struct event_filter **filterp)
|
||||
{
|
||||
struct event_filter *filter;
|
||||
struct filter_parse_state *ps = NULL;
|
||||
int err = 0;
|
||||
|
||||
WARN_ON_ONCE(*psp || *filterp);
|
||||
|
||||
/* allocate everything, and if any fails, free all and fail */
|
||||
filter = __alloc_filter();
|
||||
if (filter && set_str)
|
||||
err = replace_filter_string(filter, filter_str);
|
||||
|
||||
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
|
||||
|
||||
if (!filter || !ps || err) {
|
||||
kfree(ps);
|
||||
__free_filter(filter);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* we're committed to creating a new filter */
|
||||
*filterp = filter;
|
||||
*psp = ps;
|
||||
|
||||
parse_init(ps, filter_ops, filter_str);
|
||||
err = filter_parse(ps);
|
||||
if (err && set_str)
|
||||
append_filter_err(ps, filter);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void create_filter_finish(struct filter_parse_state *ps)
|
||||
{
|
||||
if (ps) {
|
||||
filter_opstack_clear(ps);
|
||||
postfix_clear(ps);
|
||||
kfree(ps);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* create_filter - create a filter for a ftrace_event_call
|
||||
* @call: ftrace_event_call to create a filter for
|
||||
* @filter_str: filter string
|
||||
* @set_str: remember @filter_str and enable detailed error in filter
|
||||
* @filterp: out param for created filter (always updated on return)
|
||||
*
|
||||
* Creates a filter for @call with @filter_str. If @set_str is %true,
|
||||
* @filter_str is copied and recorded in the new filter.
|
||||
*
|
||||
* On success, returns 0 and *@filterp points to the new filter. On
|
||||
* failure, returns -errno and *@filterp may point to %NULL or to a new
|
||||
* filter. In the latter case, the returned filter contains error
|
||||
* information if @set_str is %true and the caller is responsible for
|
||||
* freeing it.
|
||||
*/
|
||||
static int create_filter(struct ftrace_event_call *call,
|
||||
char *filter_str, bool set_str,
|
||||
struct event_filter **filterp)
|
||||
{
|
||||
struct event_filter *filter = NULL;
|
||||
struct filter_parse_state *ps = NULL;
|
||||
int err;
|
||||
|
||||
err = create_filter_start(filter_str, set_str, &ps, &filter);
|
||||
if (!err) {
|
||||
err = replace_preds(call, filter, ps, filter_str, false);
|
||||
if (err && set_str)
|
||||
append_filter_err(ps, filter);
|
||||
}
|
||||
create_filter_finish(ps);
|
||||
|
||||
*filterp = filter;
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* create_system_filter - create a filter for an event_subsystem
|
||||
* @system: event_subsystem to create a filter for
|
||||
* @filter_str: filter string
|
||||
* @filterp: out param for created filter (always updated on return)
|
||||
*
|
||||
* Identical to create_filter() except that it creates a subsystem filter
|
||||
* and always remembers @filter_str.
|
||||
*/
|
||||
static int create_system_filter(struct event_subsystem *system,
|
||||
char *filter_str, struct event_filter **filterp)
|
||||
{
|
||||
struct event_filter *filter = NULL;
|
||||
struct filter_parse_state *ps = NULL;
|
||||
int err;
|
||||
|
||||
err = create_filter_start(filter_str, true, &ps, &filter);
|
||||
if (!err) {
|
||||
err = replace_system_preds(system, ps, filter_str);
|
||||
if (!err) {
|
||||
/* System filters just show a default message */
|
||||
kfree(filter->filter_string);
|
||||
filter->filter_string = NULL;
|
||||
} else {
|
||||
append_filter_err(ps, filter);
|
||||
}
|
||||
}
|
||||
create_filter_finish(ps);
|
||||
|
||||
*filterp = filter;
|
||||
return err;
|
||||
}
|
||||
|
||||
int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
|
||||
{
|
||||
struct filter_parse_state *ps;
|
||||
struct event_filter *filter;
|
||||
struct event_filter *tmp;
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&event_mutex);
|
||||
|
@ -1759,49 +1869,30 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
|
|||
goto out_unlock;
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
|
||||
if (!ps)
|
||||
goto out_unlock;
|
||||
err = create_filter(call, filter_string, true, &filter);
|
||||
|
||||
filter = __alloc_filter();
|
||||
if (!filter) {
|
||||
kfree(ps);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
replace_filter_string(filter, filter_string);
|
||||
|
||||
parse_init(ps, filter_ops, filter_string);
|
||||
err = filter_parse(ps);
|
||||
if (err) {
|
||||
append_filter_err(ps, filter);
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = replace_preds(call, filter, ps, filter_string, false);
|
||||
if (err) {
|
||||
filter_disable(call);
|
||||
append_filter_err(ps, filter);
|
||||
} else
|
||||
call->flags |= TRACE_EVENT_FL_FILTERED;
|
||||
out:
|
||||
/*
|
||||
* Always swap the call filter with the new filter
|
||||
* even if there was an error. If there was an error
|
||||
* in the filter, we disable the filter and show the error
|
||||
* string
|
||||
*/
|
||||
tmp = call->filter;
|
||||
rcu_assign_pointer(call->filter, filter);
|
||||
if (tmp) {
|
||||
/* Make sure the call is done with the filter */
|
||||
synchronize_sched();
|
||||
__free_filter(tmp);
|
||||
if (filter) {
|
||||
struct event_filter *tmp = call->filter;
|
||||
|
||||
if (!err)
|
||||
call->flags |= TRACE_EVENT_FL_FILTERED;
|
||||
else
|
||||
filter_disable(call);
|
||||
|
||||
rcu_assign_pointer(call->filter, filter);
|
||||
|
||||
if (tmp) {
|
||||
/* Make sure the call is done with the filter */
|
||||
synchronize_sched();
|
||||
__free_filter(tmp);
|
||||
}
|
||||
}
|
||||
filter_opstack_clear(ps);
|
||||
postfix_clear(ps);
|
||||
kfree(ps);
|
||||
out_unlock:
|
||||
mutex_unlock(&event_mutex);
|
||||
|
||||
|
@ -1811,7 +1902,6 @@ out_unlock:
|
|||
int apply_subsystem_event_filter(struct event_subsystem *system,
|
||||
char *filter_string)
|
||||
{
|
||||
struct filter_parse_state *ps;
|
||||
struct event_filter *filter;
|
||||
int err = 0;
|
||||
|
||||
|
@ -1835,48 +1925,19 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
|
|||
goto out_unlock;
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
|
||||
if (!ps)
|
||||
goto out_unlock;
|
||||
|
||||
filter = __alloc_filter();
|
||||
if (!filter)
|
||||
goto out;
|
||||
|
||||
/* System filters just show a default message */
|
||||
kfree(filter->filter_string);
|
||||
filter->filter_string = NULL;
|
||||
|
||||
/*
|
||||
* No event actually uses the system filter
|
||||
* we can free it without synchronize_sched().
|
||||
*/
|
||||
__free_filter(system->filter);
|
||||
system->filter = filter;
|
||||
|
||||
parse_init(ps, filter_ops, filter_string);
|
||||
err = filter_parse(ps);
|
||||
if (err)
|
||||
goto err_filter;
|
||||
|
||||
err = replace_system_preds(system, ps, filter_string);
|
||||
if (err)
|
||||
goto err_filter;
|
||||
|
||||
out:
|
||||
filter_opstack_clear(ps);
|
||||
postfix_clear(ps);
|
||||
kfree(ps);
|
||||
err = create_system_filter(system, filter_string, &filter);
|
||||
if (filter) {
|
||||
/*
|
||||
* No event actually uses the system filter
|
||||
* we can free it without synchronize_sched().
|
||||
*/
|
||||
__free_filter(system->filter);
|
||||
system->filter = filter;
|
||||
}
|
||||
out_unlock:
|
||||
mutex_unlock(&event_mutex);
|
||||
|
||||
return err;
|
||||
|
||||
err_filter:
|
||||
replace_filter_string(filter, filter_string);
|
||||
append_filter_err(ps, system->filter);
|
||||
goto out;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
|
@ -1894,7 +1955,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
|
|||
{
|
||||
int err;
|
||||
struct event_filter *filter;
|
||||
struct filter_parse_state *ps;
|
||||
struct ftrace_event_call *call;
|
||||
|
||||
mutex_lock(&event_mutex);
|
||||
|
@ -1909,33 +1969,10 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
|
|||
if (event->filter)
|
||||
goto out_unlock;
|
||||
|
||||
filter = __alloc_filter();
|
||||
if (!filter) {
|
||||
err = PTR_ERR(filter);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
|
||||
if (!ps)
|
||||
goto free_filter;
|
||||
|
||||
parse_init(ps, filter_ops, filter_str);
|
||||
err = filter_parse(ps);
|
||||
if (err)
|
||||
goto free_ps;
|
||||
|
||||
err = replace_preds(call, filter, ps, filter_str, false);
|
||||
err = create_filter(call, filter_str, false, &filter);
|
||||
if (!err)
|
||||
event->filter = filter;
|
||||
|
||||
free_ps:
|
||||
filter_opstack_clear(ps);
|
||||
postfix_clear(ps);
|
||||
kfree(ps);
|
||||
|
||||
free_filter:
|
||||
if (err)
|
||||
else
|
||||
__free_filter(filter);
|
||||
|
||||
out_unlock:
|
||||
|
@ -1954,43 +1991,6 @@ out_unlock:
|
|||
#define CREATE_TRACE_POINTS
|
||||
#include "trace_events_filter_test.h"
|
||||
|
||||
static int test_get_filter(char *filter_str, struct ftrace_event_call *call,
|
||||
struct event_filter **pfilter)
|
||||
{
|
||||
struct event_filter *filter;
|
||||
struct filter_parse_state *ps;
|
||||
int err = -ENOMEM;
|
||||
|
||||
filter = __alloc_filter();
|
||||
if (!filter)
|
||||
goto out;
|
||||
|
||||
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
|
||||
if (!ps)
|
||||
goto free_filter;
|
||||
|
||||
parse_init(ps, filter_ops, filter_str);
|
||||
err = filter_parse(ps);
|
||||
if (err)
|
||||
goto free_ps;
|
||||
|
||||
err = replace_preds(call, filter, ps, filter_str, false);
|
||||
if (!err)
|
||||
*pfilter = filter;
|
||||
|
||||
free_ps:
|
||||
filter_opstack_clear(ps);
|
||||
postfix_clear(ps);
|
||||
kfree(ps);
|
||||
|
||||
free_filter:
|
||||
if (err)
|
||||
__free_filter(filter);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
#define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
|
||||
{ \
|
||||
.filter = FILTER, \
|
||||
|
@ -2109,12 +2109,13 @@ static __init int ftrace_test_event_filter(void)
|
|||
struct test_filter_data_t *d = &test_filter_data[i];
|
||||
int err;
|
||||
|
||||
err = test_get_filter(d->filter, &event_ftrace_test_filter,
|
||||
&filter);
|
||||
err = create_filter(&event_ftrace_test_filter, d->filter,
|
||||
false, &filter);
|
||||
if (err) {
|
||||
printk(KERN_INFO
|
||||
"Failed to get filter for '%s', err %d\n",
|
||||
d->filter, err);
|
||||
__free_filter(filter);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,9 @@
|
|||
#include <linux/sysctl.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include <asm/setup.h>
|
||||
|
||||
#include "trace.h"
|
||||
|
||||
#define STACK_TRACE_ENTRIES 500
|
||||
|
@ -133,7 +136,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
|
|||
static struct ftrace_ops trace_ops __read_mostly =
|
||||
{
|
||||
.func = stack_trace_call,
|
||||
.flags = FTRACE_OPS_FL_GLOBAL,
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
|
@ -311,6 +313,21 @@ static const struct file_operations stack_trace_fops = {
|
|||
.release = seq_release,
|
||||
};
|
||||
|
||||
static int
|
||||
stack_trace_filter_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
|
||||
inode, file);
|
||||
}
|
||||
|
||||
static const struct file_operations stack_trace_filter_fops = {
|
||||
.open = stack_trace_filter_open,
|
||||
.read = seq_read,
|
||||
.write = ftrace_filter_write,
|
||||
.llseek = ftrace_regex_lseek,
|
||||
.release = ftrace_regex_release,
|
||||
};
|
||||
|
||||
int
|
||||
stack_trace_sysctl(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
|
@ -338,8 +355,13 @@ stack_trace_sysctl(struct ctl_table *table, int write,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
|
||||
|
||||
static __init int enable_stacktrace(char *str)
|
||||
{
|
||||
if (strncmp(str, "_filter=", 8) == 0)
|
||||
strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
|
||||
|
||||
stack_tracer_enabled = 1;
|
||||
last_stack_tracer_enabled = 1;
|
||||
return 1;
|
||||
|
@ -358,6 +380,12 @@ static __init int stack_trace_init(void)
|
|||
trace_create_file("stack_trace", 0444, d_tracer,
|
||||
NULL, &stack_trace_fops);
|
||||
|
||||
trace_create_file("stack_trace_filter", 0444, d_tracer,
|
||||
NULL, &stack_trace_filter_fops);
|
||||
|
||||
if (stack_trace_filter_buf[0])
|
||||
ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
|
||||
|
||||
if (stack_tracer_enabled)
|
||||
register_ftrace_function(&trace_ops);
|
||||
|
||||
|
|
|
@ -462,7 +462,7 @@ __has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */
|
|||
succeed_file();
|
||||
}
|
||||
if (w(txthdr->sh_type) != SHT_PROGBITS ||
|
||||
!(w(txthdr->sh_flags) & SHF_EXECINSTR))
|
||||
!(_w(txthdr->sh_flags) & SHF_EXECINSTR))
|
||||
return NULL;
|
||||
return txtname;
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@ EVENT MODIFIERS
|
|||
Events can optionally have a modifer by appending a colon and one or
|
||||
more modifiers. Modifiers allow the user to restrict when events are
|
||||
counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
|
||||
Additional modifiers are 'G' for guest counting (in KVM guests) and 'H'
|
||||
for host counting (not in KVM guests).
|
||||
|
||||
The 'p' modifier can be used for specifying how precise the instruction
|
||||
address should be. The 'p' modifier is currently only implemented for
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
tools/perf
|
||||
include/linux/const.h
|
||||
include/linux/perf_event.h
|
||||
include/linux/rbtree.h
|
||||
include/linux/list.h
|
||||
|
|
|
@ -235,7 +235,7 @@ out_delete:
|
|||
}
|
||||
|
||||
static const char * const annotate_usage[] = {
|
||||
"perf annotate [<options>] <command>",
|
||||
"perf annotate [<options>]",
|
||||
NULL
|
||||
};
|
||||
|
||||
|
@ -313,10 +313,5 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
|
|||
annotate.sym_hist_filter = argv[0];
|
||||
}
|
||||
|
||||
if (field_sep && *field_sep == '.') {
|
||||
pr_err("'.' is the only non valid --field-separator argument\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return __cmd_annotate(&annotate);
|
||||
}
|
||||
|
|
|
@ -108,7 +108,9 @@ static void setup_cpunode_map(void)
|
|||
continue;
|
||||
cpunode_map[cpu] = mem;
|
||||
}
|
||||
closedir(dir2);
|
||||
}
|
||||
closedir(dir1);
|
||||
}
|
||||
|
||||
static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
|
||||
|
@ -645,6 +647,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
|
|||
break;
|
||||
if (sort_dimension__add(tok, sort_list) < 0) {
|
||||
error("Unknown --sort key: '%s'", tok);
|
||||
free(str);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,9 +22,6 @@
|
|||
static const char *file_name;
|
||||
static char name_buffer[256];
|
||||
|
||||
bool perf_host = 1;
|
||||
bool perf_guest;
|
||||
|
||||
static const char * const kvm_usage[] = {
|
||||
"perf kvm [<options>] {top|record|report|diff|buildid-list}",
|
||||
NULL
|
||||
|
@ -107,7 +104,8 @@ static int __cmd_buildid_list(int argc, const char **argv)
|
|||
|
||||
int cmd_kvm(int argc, const char **argv, const char *prefix __used)
|
||||
{
|
||||
perf_host = perf_guest = 0;
|
||||
perf_host = 0;
|
||||
perf_guest = 1;
|
||||
|
||||
argc = parse_options(argc, argv, kvm_options, kvm_usage,
|
||||
PARSE_OPT_STOP_AT_NON_OPTION);
|
||||
|
|
|
@ -1018,13 +1018,17 @@ static char *get_script_path(const char *script_root, const char *suffix)
|
|||
__script_root = get_script_root(&script_dirent, suffix);
|
||||
if (__script_root && !strcmp(script_root, __script_root)) {
|
||||
free(__script_root);
|
||||
closedir(lang_dir);
|
||||
closedir(scripts_dir);
|
||||
snprintf(script_path, MAXPATHLEN, "%s/%s",
|
||||
lang_path, script_dirent.d_name);
|
||||
return strdup(script_path);
|
||||
}
|
||||
free(__script_root);
|
||||
}
|
||||
closedir(lang_dir);
|
||||
}
|
||||
closedir(scripts_dir);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -1396,7 +1396,7 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
|
|||
NULL,
|
||||
};
|
||||
const struct option test_options[] = {
|
||||
OPT_INTEGER('v', "verbose", &verbose,
|
||||
OPT_INCR('v', "verbose", &verbose,
|
||||
"be more verbose (show symbol address, etc)"),
|
||||
OPT_END()
|
||||
};
|
||||
|
|
|
@ -235,7 +235,6 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
|
|||
if (he == NULL)
|
||||
return NULL;
|
||||
|
||||
evsel->hists.stats.total_period += sample->period;
|
||||
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
|
||||
return he;
|
||||
}
|
||||
|
@ -889,6 +888,10 @@ try_again:
|
|||
ui__warning("The %s event is not supported.\n",
|
||||
event_name(counter));
|
||||
goto out_err;
|
||||
} else if (err == EMFILE) {
|
||||
ui__warning("Too many events are opened.\n"
|
||||
"Try again after reducing the number of events\n");
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
ui__warning("The sys_perf_event_open() syscall "
|
||||
|
|
|
@ -111,8 +111,11 @@ int perf_evlist__add_default(struct perf_evlist *evlist)
|
|||
.type = PERF_TYPE_HARDWARE,
|
||||
.config = PERF_COUNT_HW_CPU_CYCLES,
|
||||
};
|
||||
struct perf_evsel *evsel = perf_evsel__new(&attr, 0);
|
||||
struct perf_evsel *evsel;
|
||||
|
||||
event_attr_init(&attr);
|
||||
|
||||
evsel = perf_evsel__new(&attr, 0);
|
||||
if (evsel == NULL)
|
||||
goto error;
|
||||
|
||||
|
|
|
@ -76,21 +76,21 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
|
|||
}
|
||||
}
|
||||
|
||||
static void hist_entry__add_cpumode_period(struct hist_entry *self,
|
||||
static void hist_entry__add_cpumode_period(struct hist_entry *he,
|
||||
unsigned int cpumode, u64 period)
|
||||
{
|
||||
switch (cpumode) {
|
||||
case PERF_RECORD_MISC_KERNEL:
|
||||
self->period_sys += period;
|
||||
he->period_sys += period;
|
||||
break;
|
||||
case PERF_RECORD_MISC_USER:
|
||||
self->period_us += period;
|
||||
he->period_us += period;
|
||||
break;
|
||||
case PERF_RECORD_MISC_GUEST_KERNEL:
|
||||
self->period_guest_sys += period;
|
||||
he->period_guest_sys += period;
|
||||
break;
|
||||
case PERF_RECORD_MISC_GUEST_USER:
|
||||
self->period_guest_us += period;
|
||||
he->period_guest_us += period;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -165,18 +165,18 @@ void hists__decay_entries_threaded(struct hists *hists,
|
|||
static struct hist_entry *hist_entry__new(struct hist_entry *template)
|
||||
{
|
||||
size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
|
||||
struct hist_entry *self = malloc(sizeof(*self) + callchain_size);
|
||||
struct hist_entry *he = malloc(sizeof(*he) + callchain_size);
|
||||
|
||||
if (self != NULL) {
|
||||
*self = *template;
|
||||
self->nr_events = 1;
|
||||
if (self->ms.map)
|
||||
self->ms.map->referenced = true;
|
||||
if (he != NULL) {
|
||||
*he = *template;
|
||||
he->nr_events = 1;
|
||||
if (he->ms.map)
|
||||
he->ms.map->referenced = true;
|
||||
if (symbol_conf.use_callchain)
|
||||
callchain_init(self->callchain);
|
||||
callchain_init(he->callchain);
|
||||
}
|
||||
|
||||
return self;
|
||||
return he;
|
||||
}
|
||||
|
||||
static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
|
||||
|
@ -677,15 +677,16 @@ static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
|
||||
u64 total_samples, int left_margin)
|
||||
static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
|
||||
u64 total_samples, int left_margin,
|
||||
FILE *fp)
|
||||
{
|
||||
struct rb_node *rb_node;
|
||||
struct callchain_node *chain;
|
||||
size_t ret = 0;
|
||||
u32 entries_printed = 0;
|
||||
|
||||
rb_node = rb_first(&self->sorted_chain);
|
||||
rb_node = rb_first(&he->sorted_chain);
|
||||
while (rb_node) {
|
||||
double percent;
|
||||
|
||||
|
@ -730,35 +731,35 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows)
|
|||
}
|
||||
}
|
||||
|
||||
static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s,
|
||||
static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s,
|
||||
size_t size, struct hists *pair_hists,
|
||||
bool show_displacement, long displacement,
|
||||
bool color, u64 session_total)
|
||||
bool color, u64 total_period)
|
||||
{
|
||||
u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
|
||||
u64 nr_events;
|
||||
const char *sep = symbol_conf.field_sep;
|
||||
int ret;
|
||||
|
||||
if (symbol_conf.exclude_other && !self->parent)
|
||||
if (symbol_conf.exclude_other && !he->parent)
|
||||
return 0;
|
||||
|
||||
if (pair_hists) {
|
||||
period = self->pair ? self->pair->period : 0;
|
||||
nr_events = self->pair ? self->pair->nr_events : 0;
|
||||
period = he->pair ? he->pair->period : 0;
|
||||
nr_events = he->pair ? he->pair->nr_events : 0;
|
||||
total = pair_hists->stats.total_period;
|
||||
period_sys = self->pair ? self->pair->period_sys : 0;
|
||||
period_us = self->pair ? self->pair->period_us : 0;
|
||||
period_guest_sys = self->pair ? self->pair->period_guest_sys : 0;
|
||||
period_guest_us = self->pair ? self->pair->period_guest_us : 0;
|
||||
period_sys = he->pair ? he->pair->period_sys : 0;
|
||||
period_us = he->pair ? he->pair->period_us : 0;
|
||||
period_guest_sys = he->pair ? he->pair->period_guest_sys : 0;
|
||||
period_guest_us = he->pair ? he->pair->period_guest_us : 0;
|
||||
} else {
|
||||
period = self->period;
|
||||
nr_events = self->nr_events;
|
||||
total = session_total;
|
||||
period_sys = self->period_sys;
|
||||
period_us = self->period_us;
|
||||
period_guest_sys = self->period_guest_sys;
|
||||
period_guest_us = self->period_guest_us;
|
||||
period = he->period;
|
||||
nr_events = he->nr_events;
|
||||
total = total_period;
|
||||
period_sys = he->period_sys;
|
||||
period_us = he->period_us;
|
||||
period_guest_sys = he->period_guest_sys;
|
||||
period_guest_us = he->period_guest_us;
|
||||
}
|
||||
|
||||
if (total) {
|
||||
|
@ -812,8 +813,8 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s,
|
|||
|
||||
if (total > 0)
|
||||
old_percent = (period * 100.0) / total;
|
||||
if (session_total > 0)
|
||||
new_percent = (self->period * 100.0) / session_total;
|
||||
if (total_period > 0)
|
||||
new_percent = (he->period * 100.0) / total_period;
|
||||
|
||||
diff = new_percent - old_percent;
|
||||
|
||||
|
@ -862,9 +863,10 @@ int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size,
|
|||
return ret;
|
||||
}
|
||||
|
||||
int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
|
||||
struct hists *pair_hists, bool show_displacement,
|
||||
long displacement, FILE *fp, u64 session_total)
|
||||
static int hist_entry__fprintf(struct hist_entry *he, size_t size,
|
||||
struct hists *hists, struct hists *pair_hists,
|
||||
bool show_displacement, long displacement,
|
||||
u64 total_period, FILE *fp)
|
||||
{
|
||||
char bf[512];
|
||||
int ret;
|
||||
|
@ -874,14 +876,14 @@ int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
|
|||
|
||||
ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists,
|
||||
show_displacement, displacement,
|
||||
true, session_total);
|
||||
true, total_period);
|
||||
hist_entry__snprintf(he, bf + ret, size - ret, hists);
|
||||
return fprintf(fp, "%s\n", bf);
|
||||
}
|
||||
|
||||
static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
|
||||
struct hists *hists, FILE *fp,
|
||||
u64 session_total)
|
||||
static size_t hist_entry__fprintf_callchain(struct hist_entry *he,
|
||||
struct hists *hists,
|
||||
u64 total_period, FILE *fp)
|
||||
{
|
||||
int left_margin = 0;
|
||||
|
||||
|
@ -889,11 +891,10 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
|
|||
struct sort_entry *se = list_first_entry(&hist_entry__sort_list,
|
||||
typeof(*se), list);
|
||||
left_margin = hists__col_len(hists, se->se_width_idx);
|
||||
left_margin -= thread__comm_len(self->thread);
|
||||
left_margin -= thread__comm_len(he->thread);
|
||||
}
|
||||
|
||||
return hist_entry_callchain__fprintf(fp, self, session_total,
|
||||
left_margin);
|
||||
return hist_entry_callchain__fprintf(he, total_period, left_margin, fp);
|
||||
}
|
||||
|
||||
size_t hists__fprintf(struct hists *hists, struct hists *pair,
|
||||
|
@ -903,6 +904,7 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
|
|||
struct sort_entry *se;
|
||||
struct rb_node *nd;
|
||||
size_t ret = 0;
|
||||
u64 total_period;
|
||||
unsigned long position = 1;
|
||||
long displacement = 0;
|
||||
unsigned int width;
|
||||
|
@ -917,6 +919,24 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
|
|||
|
||||
fprintf(fp, "# %s", pair ? "Baseline" : "Overhead");
|
||||
|
||||
if (symbol_conf.show_cpu_utilization) {
|
||||
if (sep) {
|
||||
ret += fprintf(fp, "%csys", *sep);
|
||||
ret += fprintf(fp, "%cus", *sep);
|
||||
if (perf_guest) {
|
||||
ret += fprintf(fp, "%cguest sys", *sep);
|
||||
ret += fprintf(fp, "%cguest us", *sep);
|
||||
}
|
||||
} else {
|
||||
ret += fprintf(fp, " sys ");
|
||||
ret += fprintf(fp, " us ");
|
||||
if (perf_guest) {
|
||||
ret += fprintf(fp, " guest sys ");
|
||||
ret += fprintf(fp, " guest us ");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (symbol_conf.show_nr_samples) {
|
||||
if (sep)
|
||||
fprintf(fp, "%cSamples", *sep);
|
||||
|
@ -931,24 +951,6 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
|
|||
ret += fprintf(fp, " Period ");
|
||||
}
|
||||
|
||||
if (symbol_conf.show_cpu_utilization) {
|
||||
if (sep) {
|
||||
ret += fprintf(fp, "%csys", *sep);
|
||||
ret += fprintf(fp, "%cus", *sep);
|
||||
if (perf_guest) {
|
||||
ret += fprintf(fp, "%cguest sys", *sep);
|
||||
ret += fprintf(fp, "%cguest us", *sep);
|
||||
}
|
||||
} else {
|
||||
ret += fprintf(fp, " sys ");
|
||||
ret += fprintf(fp, " us ");
|
||||
if (perf_guest) {
|
||||
ret += fprintf(fp, " guest sys ");
|
||||
ret += fprintf(fp, " guest us ");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pair) {
|
||||
if (sep)
|
||||
ret += fprintf(fp, "%cDelta", *sep);
|
||||
|
@ -993,6 +995,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
|
|||
goto print_entries;
|
||||
|
||||
fprintf(fp, "# ........");
|
||||
if (symbol_conf.show_cpu_utilization)
|
||||
fprintf(fp, " ....... .......");
|
||||
if (symbol_conf.show_nr_samples)
|
||||
fprintf(fp, " ..........");
|
||||
if (symbol_conf.show_total_period)
|
||||
|
@ -1025,6 +1029,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
|
|||
goto out;
|
||||
|
||||
print_entries:
|
||||
total_period = hists->stats.total_period;
|
||||
|
||||
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
|
||||
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
|
||||
|
||||
|
@ -1040,11 +1046,10 @@ print_entries:
|
|||
++position;
|
||||
}
|
||||
ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement,
|
||||
displacement, fp, hists->stats.total_period);
|
||||
displacement, total_period, fp);
|
||||
|
||||
if (symbol_conf.use_callchain)
|
||||
ret += hist_entry__fprintf_callchain(h, hists, fp,
|
||||
hists->stats.total_period);
|
||||
ret += hist_entry__fprintf_callchain(h, hists, total_period, fp);
|
||||
if (max_rows && ++nr_rows >= max_rows)
|
||||
goto out;
|
||||
|
||||
|
|
|
@ -66,11 +66,8 @@ struct hists {
|
|||
struct hist_entry *__hists__add_entry(struct hists *self,
|
||||
struct addr_location *al,
|
||||
struct symbol *parent, u64 period);
|
||||
extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *);
|
||||
extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *);
|
||||
int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
|
||||
struct hists *pair_hists, bool show_displacement,
|
||||
long displacement, FILE *fp, u64 session_total);
|
||||
int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
|
||||
int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
|
||||
int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
|
||||
struct hists *hists);
|
||||
void hist_entry__free(struct hist_entry *);
|
||||
|
|
|
@ -735,8 +735,8 @@ static int
|
|||
parse_event_modifier(const char **strp, struct perf_event_attr *attr)
|
||||
{
|
||||
const char *str = *strp;
|
||||
int exclude = 0;
|
||||
int eu = 0, ek = 0, eh = 0, precise = 0;
|
||||
int exclude = 0, exclude_GH = 0;
|
||||
int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0;
|
||||
|
||||
if (!*str)
|
||||
return 0;
|
||||
|
@ -760,6 +760,14 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
|
|||
if (!exclude)
|
||||
exclude = eu = ek = eh = 1;
|
||||
eh = 0;
|
||||
} else if (*str == 'G') {
|
||||
if (!exclude_GH)
|
||||
exclude_GH = eG = eH = 1;
|
||||
eG = 0;
|
||||
} else if (*str == 'H') {
|
||||
if (!exclude_GH)
|
||||
exclude_GH = eG = eH = 1;
|
||||
eH = 0;
|
||||
} else if (*str == 'p') {
|
||||
precise++;
|
||||
} else
|
||||
|
@ -776,6 +784,8 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
|
|||
attr->exclude_kernel = ek;
|
||||
attr->exclude_hv = eh;
|
||||
attr->precise_ip = precise;
|
||||
attr->exclude_host = eH;
|
||||
attr->exclude_guest = eG;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -838,6 +848,7 @@ int parse_events(struct perf_evlist *evlist , const char *str, int unset __used)
|
|||
for (;;) {
|
||||
ostr = str;
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
event_attr_init(&attr);
|
||||
ret = parse_event_symbols(evlist, &str, &attr);
|
||||
if (ret == EVT_FAILED)
|
||||
return -1;
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
*
|
||||
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
*/
|
||||
#include <ctype.h>
|
||||
#include "util.h"
|
||||
#include <dirent.h>
|
||||
#include <mntent.h>
|
||||
|
|
|
@ -1,6 +1,21 @@
|
|||
#include "../perf.h"
|
||||
#include "util.h"
|
||||
#include <sys/mman.h>
|
||||
|
||||
/*
|
||||
* XXX We need to find a better place for these things...
|
||||
*/
|
||||
bool perf_host = true;
|
||||
bool perf_guest = true;
|
||||
|
||||
void event_attr_init(struct perf_event_attr *attr)
|
||||
{
|
||||
if (!perf_host)
|
||||
attr->exclude_host = 1;
|
||||
if (!perf_guest)
|
||||
attr->exclude_guest = 1;
|
||||
}
|
||||
|
||||
int mkdir_p(char *path, mode_t mode)
|
||||
{
|
||||
struct stat st;
|
||||
|
|
|
@ -242,6 +242,10 @@ int strtailcmp(const char *s1, const char *s2);
|
|||
unsigned long convert_unit(unsigned long value, char *unit);
|
||||
int readn(int fd, void *buf, size_t size);
|
||||
|
||||
struct perf_event_attr;
|
||||
|
||||
void event_attr_init(struct perf_event_attr *attr);
|
||||
|
||||
#define _STR(x) #x
|
||||
#define STR(x) _STR(x)
|
||||
|
||||
|
|
Loading…
Reference in a new issue