perf/x86/intel: Rework the large PEBS setup code
In order to allow optimizing perf_pmu_sched_task() we must ensure perf_sched_cb_{inc,dec}() are no longer called from NMI context; this means that pmu::{start,stop}() can no longer use them. Prepare for this by reworking the whole large PEBS setup code. The current code relied on the cpuc->pebs_enabled state, however since that reflects the current active state as per pmu::{start,stop}() we can no longer rely on this. Introduce two counters: cpuc->n_pebs and cpuc->n_large_pebs which count the total number of PEBS events and the number of PEBS events that have FREERUNNING set, resp.. With this we can tell if the current setup requires a single record interrupt threshold or can use a larger buffer. This also improves the code in that it re-enables the large threshold once the PEBS event that required single record gets removed. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
3f005e7de3
commit
09e61b4f78
3 changed files with 73 additions and 35 deletions
|
@ -806,9 +806,55 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
|
|||
return &emptyconstraint;
|
||||
}
|
||||
|
||||
static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
|
||||
/*
|
||||
* We need the sched_task callback even for per-cpu events when we use
|
||||
* the large interrupt threshold, such that we can provide PID and TID
|
||||
* to PEBS samples.
|
||||
*/
|
||||
static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
|
||||
return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
|
||||
}
|
||||
|
||||
static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
u64 threshold;
|
||||
|
||||
if (cpuc->n_pebs == cpuc->n_large_pebs) {
|
||||
threshold = ds->pebs_absolute_maximum -
|
||||
x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
|
||||
} else {
|
||||
threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
|
||||
}
|
||||
|
||||
ds->pebs_interrupt_threshold = threshold;
|
||||
}
|
||||
|
||||
static void
|
||||
pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
|
||||
{
|
||||
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
|
||||
if (!needed_cb)
|
||||
perf_sched_cb_inc(pmu);
|
||||
else
|
||||
perf_sched_cb_dec(pmu);
|
||||
|
||||
pebs_update_threshold(cpuc);
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pmu_pebs_add(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
bool needed_cb = pebs_needs_sched_cb(cpuc);
|
||||
|
||||
cpuc->n_pebs++;
|
||||
if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
|
||||
cpuc->n_large_pebs++;
|
||||
|
||||
pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_enable(struct perf_event *event)
|
||||
|
@ -816,12 +862,11 @@ void intel_pmu_pebs_enable(struct perf_event *event)
|
|||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
bool first_pebs;
|
||||
u64 threshold;
|
||||
|
||||
intel_pmu_pebs_add(event);
|
||||
|
||||
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
||||
|
||||
first_pebs = !pebs_is_enabled(cpuc);
|
||||
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
||||
|
||||
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
|
||||
|
@ -830,46 +875,34 @@ void intel_pmu_pebs_enable(struct perf_event *event)
|
|||
cpuc->pebs_enabled |= 1ULL << 63;
|
||||
|
||||
/*
|
||||
* When the event is constrained enough we can use a larger
|
||||
* threshold and run the event with less frequent PMI.
|
||||
* Use auto-reload if possible to save a MSR write in the PMI.
|
||||
* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
|
||||
*/
|
||||
if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
|
||||
threshold = ds->pebs_absolute_maximum -
|
||||
x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
|
||||
|
||||
if (first_pebs)
|
||||
perf_sched_cb_inc(event->ctx->pmu);
|
||||
} else {
|
||||
threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
|
||||
|
||||
/*
|
||||
* If not all events can use larger buffer,
|
||||
* roll back to threshold = 1
|
||||
*/
|
||||
if (!first_pebs &&
|
||||
(ds->pebs_interrupt_threshold > threshold))
|
||||
perf_sched_cb_dec(event->ctx->pmu);
|
||||
}
|
||||
|
||||
/* Use auto-reload if possible to save a MSR write in the PMI */
|
||||
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
|
||||
ds->pebs_event_reset[hwc->idx] =
|
||||
(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
|
||||
}
|
||||
}
|
||||
|
||||
if (first_pebs || ds->pebs_interrupt_threshold > threshold)
|
||||
ds->pebs_interrupt_threshold = threshold;
|
||||
static void intel_pmu_pebs_del(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
bool needed_cb = pebs_needs_sched_cb(cpuc);
|
||||
|
||||
cpuc->n_pebs--;
|
||||
if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
|
||||
cpuc->n_large_pebs--;
|
||||
|
||||
pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_disable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
bool large_pebs = ds->pebs_interrupt_threshold >
|
||||
ds->pebs_buffer_base + x86_pmu.pebs_record_size;
|
||||
|
||||
if (large_pebs)
|
||||
if (cpuc->n_pebs == cpuc->n_large_pebs)
|
||||
intel_pmu_drain_pebs_buffer();
|
||||
|
||||
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
|
||||
|
@ -879,13 +912,12 @@ void intel_pmu_pebs_disable(struct perf_event *event)
|
|||
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
|
||||
cpuc->pebs_enabled &= ~(1ULL << 63);
|
||||
|
||||
if (large_pebs && !pebs_is_enabled(cpuc))
|
||||
perf_sched_cb_dec(event->ctx->pmu);
|
||||
|
||||
if (cpuc->enabled)
|
||||
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
||||
|
||||
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
|
||||
|
||||
intel_pmu_pebs_del(event);
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_enable_all(void)
|
||||
|
|
|
@ -194,6 +194,8 @@ struct cpu_hw_events {
|
|||
*/
|
||||
struct debug_store *ds;
|
||||
u64 pebs_enabled;
|
||||
int n_pebs;
|
||||
int n_large_pebs;
|
||||
|
||||
/*
|
||||
* Intel LBR bits
|
||||
|
|
|
@ -2818,6 +2818,10 @@ void perf_sched_cb_inc(struct pmu *pmu)
|
|||
/*
|
||||
* This function provides the context switch callback to the lower code
|
||||
* layer. It is invoked ONLY when the context switch callback is enabled.
|
||||
*
|
||||
* This callback is relevant even to per-cpu events; for example multi event
|
||||
* PEBS requires this to provide PID/TID information. This requires we flush
|
||||
* all queued PEBS records before we context switch to a new task.
|
||||
*/
|
||||
static void perf_pmu_sched_task(struct task_struct *prev,
|
||||
struct task_struct *next,
|
||||
|
|
Loading…
Reference in a new issue