sched: Remove get_online_cpus() usage
Remove get_online_cpus() usage from the scheduler; there's 4 sites that use it: - sched_init_smp(); where its completely superfluous since we're in 'early' boot and there simply cannot be any hotplugging. - sched_getaffinity(); we already take a raw spinlock to protect the task cpus_allowed mask, this disables preemption and therefore also stabilizes cpu_online_mask as that's modified using stop_machine. However switch to active mask for symmetry with sched_setaffinity()/set_cpus_allowed_ptr(). We guarantee active mask stability by inserting sync_rcu/sched() into _cpu_down. - sched_setaffinity(); we don't appear to need get_online_cpus() either, there's two sites where hotplug appears relevant: * cpuset_cpus_allowed(); for the !cpuset case we use possible_mask, for the cpuset case we hold task_lock, which is a spinlock and thus for mainline disables preemption (might cause pain on RT). * set_cpus_allowed_ptr(); Holds all scheduler locks and thus has preemption properly disabled; also it already deals with hotplug races explicitly where it releases them. - migrate_swap(); we can make stop_two_cpus() do the heavy lifting for us with a little trickery. By adding a sync_sched/rcu() after the CPU_DOWN_PREPARE notifier we can provide preempt/rcu guarantees for cpu_active_mask. Use these to validate that both our cpus are active when queueing the stop work before we queue the stop_machine works for take_cpu_down(). Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com> Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Oleg Nesterov <oleg@redhat.com> Link: http://lkml.kernel.org/r/20131011123820.GV3081@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
746023159c
commit
6acce3ef84
3 changed files with 48 additions and 15 deletions
17
kernel/cpu.c
17
kernel/cpu.c
|
@ -308,6 +308,23 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
|||
}
|
||||
smpboot_park_threads(cpu);
|
||||
|
||||
/*
|
||||
* By now we've cleared cpu_active_mask, wait for all preempt-disabled
|
||||
* and RCU users of this state to go away such that all new such users
|
||||
* will observe it.
|
||||
*
|
||||
* For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
|
||||
* not imply sync_sched(), so explicitly call both.
|
||||
*/
|
||||
#ifdef CONFIG_PREEMPT
|
||||
synchronize_sched();
|
||||
#endif
|
||||
synchronize_rcu();
|
||||
|
||||
/*
|
||||
* So now all preempt/rcu users must observe !cpu_active().
|
||||
*/
|
||||
|
||||
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
|
||||
if (err) {
|
||||
/* CPU didn't die: tell everyone. Can't complain. */
|
||||
|
|
|
@ -1085,8 +1085,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
|
|||
struct migration_swap_arg arg;
|
||||
int ret = -EINVAL;
|
||||
|
||||
get_online_cpus();
|
||||
|
||||
arg = (struct migration_swap_arg){
|
||||
.src_task = cur,
|
||||
.src_cpu = task_cpu(cur),
|
||||
|
@ -1097,6 +1095,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
|
|||
if (arg.src_cpu == arg.dst_cpu)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* These three tests are all lockless; this is OK since all of them
|
||||
* will be re-checked with proper locks held further down the line.
|
||||
*/
|
||||
if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
|
||||
goto out;
|
||||
|
||||
|
@ -1109,7 +1111,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
|
|||
ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg);
|
||||
|
||||
out:
|
||||
put_online_cpus();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -3710,7 +3711,6 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
|
|||
struct task_struct *p;
|
||||
int retval;
|
||||
|
||||
get_online_cpus();
|
||||
rcu_read_lock();
|
||||
|
||||
p = find_process_by_pid(pid);
|
||||
|
@ -3773,7 +3773,6 @@ out_free_cpus_allowed:
|
|||
free_cpumask_var(cpus_allowed);
|
||||
out_put_task:
|
||||
put_task_struct(p);
|
||||
put_online_cpus();
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -3818,7 +3817,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
|
|||
unsigned long flags;
|
||||
int retval;
|
||||
|
||||
get_online_cpus();
|
||||
rcu_read_lock();
|
||||
|
||||
retval = -ESRCH;
|
||||
|
@ -3831,12 +3829,11 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
|
|||
goto out_unlock;
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
|
||||
cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
put_online_cpus();
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
@ -6494,14 +6491,17 @@ void __init sched_init_smp(void)
|
|||
|
||||
sched_init_numa();
|
||||
|
||||
get_online_cpus();
|
||||
/*
|
||||
* There's no userspace yet to cause hotplug operations; hence all the
|
||||
* cpu masks are stable and all blatant races in the below code cannot
|
||||
* happen.
|
||||
*/
|
||||
mutex_lock(&sched_domains_mutex);
|
||||
init_sched_domains(cpu_active_mask);
|
||||
cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
|
||||
if (cpumask_empty(non_isolated_cpus))
|
||||
cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
|
||||
mutex_unlock(&sched_domains_mutex);
|
||||
put_online_cpus();
|
||||
|
||||
hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
|
||||
hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
|
||||
|
|
|
@ -234,11 +234,13 @@ static void irq_cpu_stop_queue_work(void *arg)
|
|||
*/
|
||||
int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
|
||||
{
|
||||
int call_cpu;
|
||||
struct cpu_stop_done done;
|
||||
struct cpu_stop_work work1, work2;
|
||||
struct irq_cpu_stop_queue_work_info call_args;
|
||||
struct multi_stop_data msdata = {
|
||||
struct multi_stop_data msdata;
|
||||
|
||||
preempt_disable();
|
||||
msdata = (struct multi_stop_data){
|
||||
.fn = fn,
|
||||
.data = arg,
|
||||
.num_threads = 2,
|
||||
|
@ -261,17 +263,31 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
|
|||
cpu_stop_init_done(&done, 2);
|
||||
set_state(&msdata, MULTI_STOP_PREPARE);
|
||||
|
||||
/*
|
||||
* If we observe both CPUs active we know _cpu_down() cannot yet have
|
||||
* queued its stop_machine works and therefore ours will get executed
|
||||
* first. Or its not either one of our CPUs that's getting unplugged,
|
||||
* in which case we don't care.
|
||||
*
|
||||
* This relies on the stopper workqueues to be FIFO.
|
||||
*/
|
||||
if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
|
||||
preempt_enable();
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Queuing needs to be done by the lowest numbered CPU, to ensure
|
||||
* that works are always queued in the same order on every CPU.
|
||||
* This prevents deadlocks.
|
||||
*/
|
||||
call_cpu = min(cpu1, cpu2);
|
||||
|
||||
smp_call_function_single(call_cpu, &irq_cpu_stop_queue_work,
|
||||
smp_call_function_single(min(cpu1, cpu2),
|
||||
&irq_cpu_stop_queue_work,
|
||||
&call_args, 0);
|
||||
preempt_enable();
|
||||
|
||||
wait_for_completion(&done.completion);
|
||||
|
||||
return done.executed ? done.ret : -ENOENT;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue