sched/deadline: speed up SCHED_DEADLINE pushes with a push-heap
Data from tests confirmed that the original active load balancing logic didn't scale neither in the number of CPU nor in the number of tasks (as sched_rt does). Here we provide a global data structure to keep track of deadlines of the running tasks in the system. The structure is composed by a bitmask showing the free CPUs and a max-heap, needed when the system is heavily loaded. The implementation and concurrent access scheme are kept simple by design. However, our measurements show that we can compete with sched_rt on large multi-CPUs machines [1]. Only the push path is addressed, the extension to use this structure also for pull decisions is straightforward. However, we are currently evaluating different (in order to decrease/avoid contention) data structures to solve possibly both problems. We are also going to re-run tests considering recent changes inside cpupri [2]. [1] http://retis.sssup.it/~jlelli/papers/Ospert11Lelli.pdf [2] http://www.spinics.net/lists/linux-rt-users/msg06778.html Signed-off-by: Juri Lelli <juri.lelli@gmail.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1383831828-15501-14-git-send-email-juri.lelli@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
332ac17ef5
commit
6bfd6d72f5
6 changed files with 269 additions and 40 deletions
|
@ -14,7 +14,7 @@ endif
|
|||
obj-y += core.o proc.o clock.o cputime.o
|
||||
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
|
||||
obj-y += wait.o completion.o
|
||||
obj-$(CONFIG_SMP) += cpupri.o
|
||||
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
|
||||
obj-$(CONFIG_SCHEDSTATS) += stats.o
|
||||
obj-$(CONFIG_SCHED_DEBUG) += debug.o
|
||||
|
|
|
@ -5287,6 +5287,7 @@ static void free_rootdomain(struct rcu_head *rcu)
|
|||
struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
|
||||
|
||||
cpupri_cleanup(&rd->cpupri);
|
||||
cpudl_cleanup(&rd->cpudl);
|
||||
free_cpumask_var(rd->dlo_mask);
|
||||
free_cpumask_var(rd->rto_mask);
|
||||
free_cpumask_var(rd->online);
|
||||
|
@ -5345,6 +5346,8 @@ static int init_rootdomain(struct root_domain *rd)
|
|||
goto free_dlo_mask;
|
||||
|
||||
init_dl_bw(&rd->dl_bw);
|
||||
if (cpudl_init(&rd->cpudl) != 0)
|
||||
goto free_dlo_mask;
|
||||
|
||||
if (cpupri_init(&rd->cpupri) != 0)
|
||||
goto free_rto_mask;
|
||||
|
|
216
kernel/sched/cpudeadline.c
Normal file
216
kernel/sched/cpudeadline.c
Normal file
|
@ -0,0 +1,216 @@
|
|||
/*
|
||||
* kernel/sched/cpudl.c
|
||||
*
|
||||
* Global CPU deadline management
|
||||
*
|
||||
* Author: Juri Lelli <j.lelli@sssup.it>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/kernel.h>
|
||||
#include "cpudeadline.h"
|
||||
|
||||
static inline int parent(int i)
|
||||
{
|
||||
return (i - 1) >> 1;
|
||||
}
|
||||
|
||||
static inline int left_child(int i)
|
||||
{
|
||||
return (i << 1) + 1;
|
||||
}
|
||||
|
||||
static inline int right_child(int i)
|
||||
{
|
||||
return (i << 1) + 2;
|
||||
}
|
||||
|
||||
static inline int dl_time_before(u64 a, u64 b)
|
||||
{
|
||||
return (s64)(a - b) < 0;
|
||||
}
|
||||
|
||||
void cpudl_exchange(struct cpudl *cp, int a, int b)
|
||||
{
|
||||
int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;
|
||||
|
||||
swap(cp->elements[a], cp->elements[b]);
|
||||
swap(cp->cpu_to_idx[cpu_a], cp->cpu_to_idx[cpu_b]);
|
||||
}
|
||||
|
||||
void cpudl_heapify(struct cpudl *cp, int idx)
|
||||
{
|
||||
int l, r, largest;
|
||||
|
||||
/* adapted from lib/prio_heap.c */
|
||||
while(1) {
|
||||
l = left_child(idx);
|
||||
r = right_child(idx);
|
||||
largest = idx;
|
||||
|
||||
if ((l < cp->size) && dl_time_before(cp->elements[idx].dl,
|
||||
cp->elements[l].dl))
|
||||
largest = l;
|
||||
if ((r < cp->size) && dl_time_before(cp->elements[largest].dl,
|
||||
cp->elements[r].dl))
|
||||
largest = r;
|
||||
if (largest == idx)
|
||||
break;
|
||||
|
||||
/* Push idx down the heap one level and bump one up */
|
||||
cpudl_exchange(cp, largest, idx);
|
||||
idx = largest;
|
||||
}
|
||||
}
|
||||
|
||||
void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl)
|
||||
{
|
||||
WARN_ON(idx > num_present_cpus() || idx == IDX_INVALID);
|
||||
|
||||
if (dl_time_before(new_dl, cp->elements[idx].dl)) {
|
||||
cp->elements[idx].dl = new_dl;
|
||||
cpudl_heapify(cp, idx);
|
||||
} else {
|
||||
cp->elements[idx].dl = new_dl;
|
||||
while (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl,
|
||||
cp->elements[idx].dl)) {
|
||||
cpudl_exchange(cp, idx, parent(idx));
|
||||
idx = parent(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline int cpudl_maximum(struct cpudl *cp)
|
||||
{
|
||||
return cp->elements[0].cpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* cpudl_find - find the best (later-dl) CPU in the system
|
||||
* @cp: the cpudl max-heap context
|
||||
* @p: the task
|
||||
* @later_mask: a mask to fill in with the selected CPUs (or NULL)
|
||||
*
|
||||
* Returns: int - best CPU (heap maximum if suitable)
|
||||
*/
|
||||
int cpudl_find(struct cpudl *cp, struct task_struct *p,
|
||||
struct cpumask *later_mask)
|
||||
{
|
||||
int best_cpu = -1;
|
||||
const struct sched_dl_entity *dl_se = &p->dl;
|
||||
|
||||
if (later_mask && cpumask_and(later_mask, cp->free_cpus,
|
||||
&p->cpus_allowed) && cpumask_and(later_mask,
|
||||
later_mask, cpu_active_mask)) {
|
||||
best_cpu = cpumask_any(later_mask);
|
||||
goto out;
|
||||
} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
|
||||
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
|
||||
best_cpu = cpudl_maximum(cp);
|
||||
if (later_mask)
|
||||
cpumask_set_cpu(best_cpu, later_mask);
|
||||
}
|
||||
|
||||
out:
|
||||
WARN_ON(best_cpu > num_present_cpus() && best_cpu != -1);
|
||||
|
||||
return best_cpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* cpudl_set - update the cpudl max-heap
|
||||
* @cp: the cpudl max-heap context
|
||||
* @cpu: the target cpu
|
||||
* @dl: the new earliest deadline for this cpu
|
||||
*
|
||||
* Notes: assumes cpu_rq(cpu)->lock is locked
|
||||
*
|
||||
* Returns: (void)
|
||||
*/
|
||||
void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
|
||||
{
|
||||
int old_idx, new_cpu;
|
||||
unsigned long flags;
|
||||
|
||||
WARN_ON(cpu > num_present_cpus());
|
||||
|
||||
raw_spin_lock_irqsave(&cp->lock, flags);
|
||||
old_idx = cp->cpu_to_idx[cpu];
|
||||
if (!is_valid) {
|
||||
/* remove item */
|
||||
if (old_idx == IDX_INVALID) {
|
||||
/*
|
||||
* Nothing to remove if old_idx was invalid.
|
||||
* This could happen if a rq_offline_dl is
|
||||
* called for a CPU without -dl tasks running.
|
||||
*/
|
||||
goto out;
|
||||
}
|
||||
new_cpu = cp->elements[cp->size - 1].cpu;
|
||||
cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl;
|
||||
cp->elements[old_idx].cpu = new_cpu;
|
||||
cp->size--;
|
||||
cp->cpu_to_idx[new_cpu] = old_idx;
|
||||
cp->cpu_to_idx[cpu] = IDX_INVALID;
|
||||
while (old_idx > 0 && dl_time_before(
|
||||
cp->elements[parent(old_idx)].dl,
|
||||
cp->elements[old_idx].dl)) {
|
||||
cpudl_exchange(cp, old_idx, parent(old_idx));
|
||||
old_idx = parent(old_idx);
|
||||
}
|
||||
cpumask_set_cpu(cpu, cp->free_cpus);
|
||||
cpudl_heapify(cp, old_idx);
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (old_idx == IDX_INVALID) {
|
||||
cp->size++;
|
||||
cp->elements[cp->size - 1].dl = 0;
|
||||
cp->elements[cp->size - 1].cpu = cpu;
|
||||
cp->cpu_to_idx[cpu] = cp->size - 1;
|
||||
cpudl_change_key(cp, cp->size - 1, dl);
|
||||
cpumask_clear_cpu(cpu, cp->free_cpus);
|
||||
} else {
|
||||
cpudl_change_key(cp, old_idx, dl);
|
||||
}
|
||||
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&cp->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* cpudl_init - initialize the cpudl structure
|
||||
* @cp: the cpudl max-heap context
|
||||
*/
|
||||
int cpudl_init(struct cpudl *cp)
|
||||
{
|
||||
int i;
|
||||
|
||||
memset(cp, 0, sizeof(*cp));
|
||||
raw_spin_lock_init(&cp->lock);
|
||||
cp->size = 0;
|
||||
for (i = 0; i < NR_CPUS; i++)
|
||||
cp->cpu_to_idx[i] = IDX_INVALID;
|
||||
if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
cpumask_setall(cp->free_cpus);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* cpudl_cleanup - clean up the cpudl structure
|
||||
* @cp: the cpudl max-heap context
|
||||
*/
|
||||
void cpudl_cleanup(struct cpudl *cp)
|
||||
{
|
||||
/*
|
||||
* nothing to do for the moment
|
||||
*/
|
||||
}
|
33
kernel/sched/cpudeadline.h
Normal file
33
kernel/sched/cpudeadline.h
Normal file
|
@ -0,0 +1,33 @@
|
|||
#ifndef _LINUX_CPUDL_H
|
||||
#define _LINUX_CPUDL_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
|
||||
#define IDX_INVALID -1
|
||||
|
||||
struct array_item {
|
||||
u64 dl;
|
||||
int cpu;
|
||||
};
|
||||
|
||||
struct cpudl {
|
||||
raw_spinlock_t lock;
|
||||
int size;
|
||||
int cpu_to_idx[NR_CPUS];
|
||||
struct array_item elements[NR_CPUS];
|
||||
cpumask_var_t free_cpus;
|
||||
};
|
||||
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
int cpudl_find(struct cpudl *cp, struct task_struct *p,
|
||||
struct cpumask *later_mask);
|
||||
void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
|
||||
int cpudl_init(struct cpudl *cp);
|
||||
void cpudl_cleanup(struct cpudl *cp);
|
||||
#else
|
||||
#define cpudl_set(cp, cpu, dl) do { } while (0)
|
||||
#define cpudl_init() do { } while (0)
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#endif /* _LINUX_CPUDL_H */
|
|
@ -16,6 +16,8 @@
|
|||
*/
|
||||
#include "sched.h"
|
||||
|
||||
#include <linux/slab.h>
|
||||
|
||||
struct dl_bandwidth def_dl_bandwidth;
|
||||
|
||||
static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
|
||||
|
@ -640,6 +642,7 @@ static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
|
|||
*/
|
||||
dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
|
||||
dl_rq->earliest_dl.curr = deadline;
|
||||
cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
|
||||
} else if (dl_rq->earliest_dl.next == 0 ||
|
||||
dl_time_before(deadline, dl_rq->earliest_dl.next)) {
|
||||
/*
|
||||
|
@ -663,6 +666,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
|
|||
if (!dl_rq->dl_nr_running) {
|
||||
dl_rq->earliest_dl.curr = 0;
|
||||
dl_rq->earliest_dl.next = 0;
|
||||
cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
|
||||
} else {
|
||||
struct rb_node *leftmost = dl_rq->rb_leftmost;
|
||||
struct sched_dl_entity *entry;
|
||||
|
@ -670,6 +674,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
|
|||
entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
|
||||
dl_rq->earliest_dl.curr = entry->deadline;
|
||||
dl_rq->earliest_dl.next = next_deadline(rq);
|
||||
cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -855,9 +860,6 @@ static void yield_task_dl(struct rq *rq)
|
|||
#ifdef CONFIG_SMP
|
||||
|
||||
static int find_later_rq(struct task_struct *task);
|
||||
static int latest_cpu_find(struct cpumask *span,
|
||||
struct task_struct *task,
|
||||
struct cpumask *later_mask);
|
||||
|
||||
static int
|
||||
select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
|
||||
|
@ -904,7 +906,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
|
|||
* let's hope p can move out.
|
||||
*/
|
||||
if (rq->curr->nr_cpus_allowed == 1 ||
|
||||
latest_cpu_find(rq->rd->span, rq->curr, NULL) == -1)
|
||||
cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
|
||||
return;
|
||||
|
||||
/*
|
||||
|
@ -912,7 +914,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
|
|||
* see if it is pushed or pulled somewhere else.
|
||||
*/
|
||||
if (p->nr_cpus_allowed != 1 &&
|
||||
latest_cpu_find(rq->rd->span, p, NULL) != -1)
|
||||
cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
|
||||
return;
|
||||
|
||||
resched_task(rq->curr);
|
||||
|
@ -1085,39 +1087,6 @@ next_node:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int latest_cpu_find(struct cpumask *span,
|
||||
struct task_struct *task,
|
||||
struct cpumask *later_mask)
|
||||
{
|
||||
const struct sched_dl_entity *dl_se = &task->dl;
|
||||
int cpu, found = -1, best = 0;
|
||||
u64 max_dl = 0;
|
||||
|
||||
for_each_cpu(cpu, span) {
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
struct dl_rq *dl_rq = &rq->dl;
|
||||
|
||||
if (cpumask_test_cpu(cpu, &task->cpus_allowed) &&
|
||||
(!dl_rq->dl_nr_running || dl_time_before(dl_se->deadline,
|
||||
dl_rq->earliest_dl.curr))) {
|
||||
if (later_mask)
|
||||
cpumask_set_cpu(cpu, later_mask);
|
||||
if (!best && !dl_rq->dl_nr_running) {
|
||||
best = 1;
|
||||
found = cpu;
|
||||
} else if (!best &&
|
||||
dl_time_before(max_dl,
|
||||
dl_rq->earliest_dl.curr)) {
|
||||
max_dl = dl_rq->earliest_dl.curr;
|
||||
found = cpu;
|
||||
}
|
||||
} else if (later_mask)
|
||||
cpumask_clear_cpu(cpu, later_mask);
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
|
||||
|
||||
static int find_later_rq(struct task_struct *task)
|
||||
|
@ -1134,7 +1103,8 @@ static int find_later_rq(struct task_struct *task)
|
|||
if (task->nr_cpus_allowed == 1)
|
||||
return -1;
|
||||
|
||||
best_cpu = latest_cpu_find(task_rq(task)->rd->span, task, later_mask);
|
||||
best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
|
||||
task, later_mask);
|
||||
if (best_cpu == -1)
|
||||
return -1;
|
||||
|
||||
|
@ -1510,6 +1480,9 @@ static void rq_online_dl(struct rq *rq)
|
|||
{
|
||||
if (rq->dl.overloaded)
|
||||
dl_set_overload(rq);
|
||||
|
||||
if (rq->dl.dl_nr_running > 0)
|
||||
cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
|
||||
}
|
||||
|
||||
/* Assumes rq->lock is held */
|
||||
|
@ -1517,6 +1490,8 @@ static void rq_offline_dl(struct rq *rq)
|
|||
{
|
||||
if (rq->dl.overloaded)
|
||||
dl_clear_overload(rq);
|
||||
|
||||
cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
|
||||
}
|
||||
|
||||
void init_sched_dl_class(void)
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <linux/slab.h>
|
||||
|
||||
#include "cpupri.h"
|
||||
#include "cpudeadline.h"
|
||||
#include "cpuacct.h"
|
||||
|
||||
struct rq;
|
||||
|
@ -503,6 +504,7 @@ struct root_domain {
|
|||
cpumask_var_t dlo_mask;
|
||||
atomic_t dlo_count;
|
||||
struct dl_bw dl_bw;
|
||||
struct cpudl cpudl;
|
||||
|
||||
/*
|
||||
* The "RT overload" flag: it gets set if a CPU has more than
|
||||
|
|
Loading…
Reference in a new issue