rcu: Simplify curing of load woes
[sfrench/cifs-2.6.git] / kernel / rcutree.c
index 5d96d68d20f8b666be15c6e2e6ac1c86f2d0af47..0a8ec5b2e208eebe7eafb3c86f5cc8cd8f837539 100644 (file)
@@ -95,7 +95,6 @@ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
 DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
-static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
 DEFINE_PER_CPU(char, rcu_cpu_has_work);
 static char rcu_kthreads_spawnable;
 
@@ -1476,7 +1475,7 @@ static void invoke_rcu_cpu_kthread(void)
                local_irq_restore(flags);
                return;
        }
-       wake_up(&__get_cpu_var(rcu_cpu_wq));
+       wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
        local_irq_restore(flags);
 }
 
@@ -1596,14 +1595,12 @@ static int rcu_cpu_kthread(void *arg)
        unsigned long flags;
        int spincnt = 0;
        unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
-       wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
        char work;
        char *workp = &per_cpu(rcu_cpu_has_work, cpu);
 
        for (;;) {
                *statusp = RCU_KTHREAD_WAITING;
-               wait_event_interruptible(*wqp,
-                                        *workp != 0 || kthread_should_stop());
+               rcu_wait(*workp != 0 || kthread_should_stop());
                local_bh_disable();
                if (rcu_cpu_kthread_should_stop(cpu)) {
                        local_bh_enable();
@@ -1638,6 +1635,20 @@ static int rcu_cpu_kthread(void *arg)
  * to manipulate rcu_cpu_kthread_task.  There might be another CPU
  * attempting to access it during boot, but the locking in kthread_bind()
  * will enforce sufficient ordering.
+ *
+ * Please note that we cannot simply refuse to wake up the per-CPU
+ * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state,
+ * which can result in softlockup complaints if the task ends up being
+ * idle for more than a couple of minutes.
+ *
+ * However, please note also that we cannot bind the per-CPU kthread to its
+ * CPU until that CPU is fully online.  We also cannot wait until the
+ * CPU is fully online before we create its per-CPU kthread, as this would
+ * deadlock the system when CPU notifiers tried waiting for grace
+ * periods.  So we bind the per-CPU kthread to its CPU only if the CPU
+ * is online.  If its CPU is not yet fully online, then the code in
+ * rcu_cpu_kthread() will wait until it is fully online, and then do
+ * the binding.
  */
 static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
 {
@@ -1650,13 +1661,14 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
        t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
        if (IS_ERR(t))
                return PTR_ERR(t);
-       kthread_bind(t, cpu);
+       if (cpu_online(cpu))
+               kthread_bind(t, cpu);
        per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
        WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
-       per_cpu(rcu_cpu_kthread_task, cpu) = t;
-       wake_up_process(t);
        sp.sched_priority = RCU_KTHREAD_PRIO;
        sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+       per_cpu(rcu_cpu_kthread_task, cpu) = t;
+       wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */
        return 0;
 }
 
@@ -1677,8 +1689,7 @@ static int rcu_node_kthread(void *arg)
 
        for (;;) {
                rnp->node_kthread_status = RCU_KTHREAD_WAITING;
-               wait_event_interruptible(rnp->node_wq,
-                                        atomic_read(&rnp->wakemask) != 0);
+               rcu_wait(atomic_read(&rnp->wakemask) != 0);
                rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
                raw_spin_lock_irqsave(&rnp->lock, flags);
                mask = atomic_xchg(&rnp->wakemask, 0);
@@ -1762,9 +1773,9 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
                raw_spin_lock_irqsave(&rnp->lock, flags);
                rnp->node_kthread_task = t;
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               wake_up_process(t);
                sp.sched_priority = 99;
                sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+               wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
        }
        return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
 }
@@ -1779,21 +1790,16 @@ static int __init rcu_spawn_kthreads(void)
 
        rcu_kthreads_spawnable = 1;
        for_each_possible_cpu(cpu) {
-               init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
                per_cpu(rcu_cpu_has_work, cpu) = 0;
                if (cpu_online(cpu))
                        (void)rcu_spawn_one_cpu_kthread(cpu);
        }
        rnp = rcu_get_root(rcu_state);
-       init_waitqueue_head(&rnp->node_wq);
-       rcu_init_boost_waitqueue(rnp);
        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
-       if (NUM_RCU_NODES > 1)
-               rcu_for_each_leaf_node(rcu_state, rnp) {
-                       init_waitqueue_head(&rnp->node_wq);
-                       rcu_init_boost_waitqueue(rnp);
+       if (NUM_RCU_NODES > 1) {
+               rcu_for_each_leaf_node(rcu_state, rnp)
                        (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
-               }
+       }
        return 0;
 }
 early_initcall(rcu_spawn_kthreads);
@@ -2197,14 +2203,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
        raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
 }
 
-static void __cpuinit rcu_online_cpu(int cpu)
+static void __cpuinit rcu_prepare_cpu(int cpu)
 {
        rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
        rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
        rcu_preempt_init_percpu_data(cpu);
 }
 
-static void __cpuinit rcu_online_kthreads(int cpu)
+static void __cpuinit rcu_prepare_kthreads(int cpu)
 {
        struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
        struct rcu_node *rnp = rdp->mynode;
@@ -2230,8 +2236,8 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-               rcu_online_cpu(cpu);
-               rcu_online_kthreads(cpu);
+               rcu_prepare_cpu(cpu);
+               rcu_prepare_kthreads(cpu);
                break;
        case CPU_ONLINE:
        case CPU_DOWN_FAILED: