intel_pstate: add sample time scaling
The PID assumes that samples are of equal time, which for a deferable timers this is not true when the system goes idle. This causes the PID to take a long time to converge to the min P state and depending on the pattern of the idle load can make the P state appear stuck. The hold-off value of three sample times before using the scaling is to give a grace period for applications that have high performance requirements and spend a lot of time idle, The poster child for this behavior is the ffmpeg benchmark in the Phoronix test suite. Cc: 3.14+ <stable@vger.kernel.org> # 3.14+ Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
This commit is contained in:
parent
f0fe3cd7e1
commit
c4ee841f60
1 changed files with 17 additions and 1 deletions
|
@ -60,6 +60,7 @@ struct sample {
|
||||||
u64 aperf;
|
u64 aperf;
|
||||||
u64 mperf;
|
u64 mperf;
|
||||||
int freq;
|
int freq;
|
||||||
|
ktime_t time;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct pstate_data {
|
struct pstate_data {
|
||||||
|
@ -97,6 +98,7 @@ struct cpudata {
|
||||||
struct vid_data vid;
|
struct vid_data vid;
|
||||||
struct _pid pid;
|
struct _pid pid;
|
||||||
|
|
||||||
|
ktime_t last_sample_time;
|
||||||
u64 prev_aperf;
|
u64 prev_aperf;
|
||||||
u64 prev_mperf;
|
u64 prev_mperf;
|
||||||
struct sample sample;
|
struct sample sample;
|
||||||
|
@ -583,6 +585,8 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
|
||||||
aperf = aperf >> FRAC_BITS;
|
aperf = aperf >> FRAC_BITS;
|
||||||
mperf = mperf >> FRAC_BITS;
|
mperf = mperf >> FRAC_BITS;
|
||||||
|
|
||||||
|
cpu->last_sample_time = cpu->sample.time;
|
||||||
|
cpu->sample.time = ktime_get();
|
||||||
cpu->sample.aperf = aperf;
|
cpu->sample.aperf = aperf;
|
||||||
cpu->sample.mperf = mperf;
|
cpu->sample.mperf = mperf;
|
||||||
cpu->sample.aperf -= cpu->prev_aperf;
|
cpu->sample.aperf -= cpu->prev_aperf;
|
||||||
|
@ -605,12 +609,24 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
|
||||||
|
|
||||||
static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
|
static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
|
||||||
{
|
{
|
||||||
int32_t core_busy, max_pstate, current_pstate;
|
int32_t core_busy, max_pstate, current_pstate, sample_ratio;
|
||||||
|
u32 duration_us;
|
||||||
|
u32 sample_time;
|
||||||
|
|
||||||
core_busy = cpu->sample.core_pct_busy;
|
core_busy = cpu->sample.core_pct_busy;
|
||||||
max_pstate = int_tofp(cpu->pstate.max_pstate);
|
max_pstate = int_tofp(cpu->pstate.max_pstate);
|
||||||
current_pstate = int_tofp(cpu->pstate.current_pstate);
|
current_pstate = int_tofp(cpu->pstate.current_pstate);
|
||||||
core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
|
core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
|
||||||
|
|
||||||
|
sample_time = (pid_params.sample_rate_ms * USEC_PER_MSEC);
|
||||||
|
duration_us = (u32) ktime_us_delta(cpu->sample.time,
|
||||||
|
cpu->last_sample_time);
|
||||||
|
if (duration_us > sample_time * 3) {
|
||||||
|
sample_ratio = div_fp(int_tofp(sample_time),
|
||||||
|
int_tofp(duration_us));
|
||||||
|
core_busy = mul_fp(core_busy, sample_ratio);
|
||||||
|
}
|
||||||
|
|
||||||
return core_busy;
|
return core_busy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue