Merge tag 'trace-v4.14-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rosted...
[sfrench/cifs-2.6.git] / kernel / rcu / tree.c
index 51d4c3acf32d40dff46610919602909f3372fd27..0c44c7b42e6d50bbb67ae47cbe257a734020b16c 100644 (file)
@@ -97,9 +97,6 @@ struct rcu_state sname##_state = { \
        .gp_state = RCU_GP_IDLE, \
        .gpnum = 0UL - 300UL, \
        .completed = 0UL - 300UL, \
-       .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
-       .orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \
-       .orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \
        .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
        .name = RCU_STATE_NAME(sname), \
        .abbr = sabbr, \
@@ -843,13 +840,9 @@ static void rcu_eqs_enter(bool user)
  */
 void rcu_idle_enter(void)
 {
-       unsigned long flags;
-
-       local_irq_save(flags);
+       RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_idle_enter() invoked with irqs enabled!!!");
        rcu_eqs_enter(false);
-       local_irq_restore(flags);
 }
-EXPORT_SYMBOL_GPL(rcu_idle_enter);
 
 #ifdef CONFIG_NO_HZ_FULL
 /**
@@ -862,7 +855,8 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
  */
 void rcu_user_enter(void)
 {
-       rcu_eqs_enter(1);
+       RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_user_enter() invoked with irqs enabled!!!");
+       rcu_eqs_enter(true);
 }
 #endif /* CONFIG_NO_HZ_FULL */
 
@@ -888,6 +882,11 @@ void rcu_irq_exit(void)
 
        RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
        rdtp = this_cpu_ptr(&rcu_dynticks);
+
+       /* Page faults can happen in NMI handlers, so check... */
+       if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+               return;
+
        WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
                     rdtp->dynticks_nesting < 1);
        if (rdtp->dynticks_nesting <= 1) {
@@ -955,8 +954,10 @@ static void rcu_eqs_exit(bool user)
        if (oldval & DYNTICK_TASK_NEST_MASK) {
                rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
        } else {
+               __this_cpu_inc(disable_rcu_irq_enter);
                rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
                rcu_eqs_exit_common(oldval, user);
+               __this_cpu_dec(disable_rcu_irq_enter);
        }
 }
 
@@ -979,7 +980,6 @@ void rcu_idle_exit(void)
        rcu_eqs_exit(false);
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL_GPL(rcu_idle_exit);
 
 #ifdef CONFIG_NO_HZ_FULL
 /**
@@ -1020,6 +1020,11 @@ void rcu_irq_enter(void)
 
        RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!");
        rdtp = this_cpu_ptr(&rcu_dynticks);
+
+       /* Page faults can happen in NMI handlers, so check... */
+       if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+               return;
+
        oldval = rdtp->dynticks_nesting;
        rdtp->dynticks_nesting++;
        WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
@@ -1358,12 +1363,13 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
        j = jiffies;
        gpa = READ_ONCE(rsp->gp_activity);
        if (j - gpa > 2 * HZ) {
-               pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n",
+               pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
                       rsp->name, j - gpa,
                       rsp->gpnum, rsp->completed,
                       rsp->gp_flags,
                       gp_state_getname(rsp->gp_state), rsp->gp_state,
-                      rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
+                      rsp->gp_kthread ? rsp->gp_kthread->state : ~0,
+                      rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1);
                if (rsp->gp_kthread) {
                        sched_show_task(rsp->gp_kthread);
                        wake_up_process(rsp->gp_kthread);
@@ -2067,8 +2073,8 @@ static bool rcu_gp_init(struct rcu_state *rsp)
 }
 
 /*
- * Helper function for wait_event_interruptible_timeout() wakeup
- * at force-quiescent-state time.
+ * Helper function for swait_event_idle() wakeup at force-quiescent-state
+ * time.
  */
 static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
 {
@@ -2206,9 +2212,8 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                               READ_ONCE(rsp->gpnum),
                                               TPS("reqwait"));
                        rsp->gp_state = RCU_GP_WAIT_GPS;
-                       swait_event_interruptible(rsp->gp_wq,
-                                                READ_ONCE(rsp->gp_flags) &
-                                                RCU_GP_FLAG_INIT);
+                       swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
+                                                    RCU_GP_FLAG_INIT);
                        rsp->gp_state = RCU_GP_DONE_GPS;
                        /* Locking provides needed memory barrier. */
                        if (rcu_gp_init(rsp))
@@ -2239,7 +2244,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                               READ_ONCE(rsp->gpnum),
                                               TPS("fqswait"));
                        rsp->gp_state = RCU_GP_WAIT_FQS;
-                       ret = swait_event_interruptible_timeout(rsp->gp_wq,
+                       ret = swait_event_idle_timeout(rsp->gp_wq,
                                        rcu_gp_fqs_check_wake(rsp, &gf), j);
                        rsp->gp_state = RCU_GP_DOING_FQS;
                        /* Locking provides needed memory barriers. */
@@ -2409,6 +2414,8 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
                        return;
                }
                WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
+               WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 &&
+                            rcu_preempt_blocked_readers_cgp(rnp));
                rnp->qsmask &= ~mask;
                trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
                                                 mask, rnp->qsmask, rnp->level,
@@ -2562,85 +2569,6 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
        rcu_report_qs_rdp(rdp->cpu, rsp, rdp);
 }
 
-/*
- * Send the specified CPU's RCU callbacks to the orphanage.  The
- * specified CPU must be offline, and the caller must hold the
- * ->orphan_lock.
- */
-static void
-rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
-                         struct rcu_node *rnp, struct rcu_data *rdp)
-{
-       lockdep_assert_held(&rsp->orphan_lock);
-
-       /* No-CBs CPUs do not have orphanable callbacks. */
-       if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu))
-               return;
-
-       /*
-        * Orphan the callbacks.  First adjust the counts.  This is safe
-        * because _rcu_barrier() excludes CPU-hotplug operations, so it
-        * cannot be running now.  Thus no memory barrier is required.
-        */
-       rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist);
-       rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done);
-
-       /*
-        * Next, move those callbacks still needing a grace period to
-        * the orphanage, where some other CPU will pick them up.
-        * Some of the callbacks might have gone partway through a grace
-        * period, but that is too bad.  They get to start over because we
-        * cannot assume that grace periods are synchronized across CPUs.
-        */
-       rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
-
-       /*
-        * Then move the ready-to-invoke callbacks to the orphanage,
-        * where some other CPU will pick them up.  These will not be
-        * required to pass though another grace period: They are done.
-        */
-       rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done);
-
-       /* Finally, disallow further callbacks on this CPU.  */
-       rcu_segcblist_disable(&rdp->cblist);
-}
-
-/*
- * Adopt the RCU callbacks from the specified rcu_state structure's
- * orphanage.  The caller must hold the ->orphan_lock.
- */
-static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
-{
-       struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
-
-       lockdep_assert_held(&rsp->orphan_lock);
-
-       /* No-CBs CPUs are handled specially. */
-       if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
-           rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
-               return;
-
-       /* Do the accounting first. */
-       rdp->n_cbs_adopted += rsp->orphan_done.len;
-       if (rsp->orphan_done.len_lazy != rsp->orphan_done.len)
-               rcu_idle_count_callbacks_posted();
-       rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done);
-
-       /*
-        * We do not need a memory barrier here because the only way we
-        * can get here if there is an rcu_barrier() in flight is if
-        * we are the task doing the rcu_barrier().
-        */
-
-       /* First adopt the ready-to-invoke callbacks, then the done ones. */
-       rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done);
-       WARN_ON_ONCE(rsp->orphan_done.head);
-       rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
-       WARN_ON_ONCE(rsp->orphan_pend.head);
-       WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) !=
-                    !rcu_segcblist_n_cbs(&rdp->cblist));
-}
-
 /*
  * Trace the fact that this CPU is going offline.
  */
@@ -2704,14 +2632,12 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
 
 /*
  * The CPU has been completely removed, and some other CPU is reporting
- * this fact from process context.  Do the remainder of the cleanup,
- * including orphaning the outgoing CPU's RCU callbacks, and also
- * adopting them.  There can only be one CPU hotplug operation at a time,
- * so no other CPU can be attempting to update rcu_cpu_kthread_task.
+ * this fact from process context.  Do the remainder of the cleanup.
+ * There can only be one CPU hotplug operation at a time, so no need for
+ * explicit locking.
  */
 static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 {
-       unsigned long flags;
        struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
        struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
 
@@ -2720,18 +2646,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 
        /* Adjust any no-longer-needed kthreads. */
        rcu_boost_kthread_setaffinity(rnp, -1);
-
-       /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
-       raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
-       rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
-       rcu_adopt_orphan_cbs(rsp, flags);
-       raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
-
-       WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
-                 !rcu_segcblist_empty(&rdp->cblist),
-                 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
-                 cpu, rcu_segcblist_n_cbs(&rdp->cblist),
-                 rcu_segcblist_first_cb(&rdp->cblist));
 }
 
 /*
@@ -3569,10 +3483,11 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
        struct rcu_state *rsp = rdp->rsp;
 
        if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
-               _rcu_barrier_trace(rsp, "LastCB", -1, rsp->barrier_sequence);
+               _rcu_barrier_trace(rsp, TPS("LastCB"), -1,
+                                  rsp->barrier_sequence);
                complete(&rsp->barrier_completion);
        } else {
-               _rcu_barrier_trace(rsp, "CB", -1, rsp->barrier_sequence);
+               _rcu_barrier_trace(rsp, TPS("CB"), -1, rsp->barrier_sequence);
        }
 }
 
@@ -3584,14 +3499,15 @@ static void rcu_barrier_func(void *type)
        struct rcu_state *rsp = type;
        struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
 
-       _rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence);
+       _rcu_barrier_trace(rsp, TPS("IRQ"), -1, rsp->barrier_sequence);
        rdp->barrier_head.func = rcu_barrier_callback;
        debug_rcu_head_queue(&rdp->barrier_head);
        if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
                atomic_inc(&rsp->barrier_cpu_count);
        } else {
                debug_rcu_head_unqueue(&rdp->barrier_head);
-               _rcu_barrier_trace(rsp, "IRQNQ", -1, rsp->barrier_sequence);
+               _rcu_barrier_trace(rsp, TPS("IRQNQ"), -1,
+                                  rsp->barrier_sequence);
        }
 }
 
@@ -3605,14 +3521,15 @@ static void _rcu_barrier(struct rcu_state *rsp)
        struct rcu_data *rdp;
        unsigned long s = rcu_seq_snap(&rsp->barrier_sequence);
 
-       _rcu_barrier_trace(rsp, "Begin", -1, s);
+       _rcu_barrier_trace(rsp, TPS("Begin"), -1, s);
 
        /* Take mutex to serialize concurrent rcu_barrier() requests. */
        mutex_lock(&rsp->barrier_mutex);
 
        /* Did someone else do our work for us? */
        if (rcu_seq_done(&rsp->barrier_sequence, s)) {
-               _rcu_barrier_trace(rsp, "EarlyExit", -1, rsp->barrier_sequence);
+               _rcu_barrier_trace(rsp, TPS("EarlyExit"), -1,
+                                  rsp->barrier_sequence);
                smp_mb(); /* caller's subsequent code after above check. */
                mutex_unlock(&rsp->barrier_mutex);
                return;
@@ -3620,7 +3537,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 
        /* Mark the start of the barrier operation. */
        rcu_seq_start(&rsp->barrier_sequence);
-       _rcu_barrier_trace(rsp, "Inc1", -1, rsp->barrier_sequence);
+       _rcu_barrier_trace(rsp, TPS("Inc1"), -1, rsp->barrier_sequence);
 
        /*
         * Initialize the count to one rather than to zero in order to
@@ -3643,10 +3560,10 @@ static void _rcu_barrier(struct rcu_state *rsp)
                rdp = per_cpu_ptr(rsp->rda, cpu);
                if (rcu_is_nocb_cpu(cpu)) {
                        if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) {
-                               _rcu_barrier_trace(rsp, "OfflineNoCB", cpu,
+                               _rcu_barrier_trace(rsp, TPS("OfflineNoCB"), cpu,
                                                   rsp->barrier_sequence);
                        } else {
-                               _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
+                               _rcu_barrier_trace(rsp, TPS("OnlineNoCB"), cpu,
                                                   rsp->barrier_sequence);
                                smp_mb__before_atomic();
                                atomic_inc(&rsp->barrier_cpu_count);
@@ -3654,11 +3571,11 @@ static void _rcu_barrier(struct rcu_state *rsp)
                                           rcu_barrier_callback, rsp, cpu, 0);
                        }
                } else if (rcu_segcblist_n_cbs(&rdp->cblist)) {
-                       _rcu_barrier_trace(rsp, "OnlineQ", cpu,
+                       _rcu_barrier_trace(rsp, TPS("OnlineQ"), cpu,
                                           rsp->barrier_sequence);
                        smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
                } else {
-                       _rcu_barrier_trace(rsp, "OnlineNQ", cpu,
+                       _rcu_barrier_trace(rsp, TPS("OnlineNQ"), cpu,
                                           rsp->barrier_sequence);
                }
        }
@@ -3675,7 +3592,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
        wait_for_completion(&rsp->barrier_completion);
 
        /* Mark the end of the barrier operation. */
-       _rcu_barrier_trace(rsp, "Inc2", -1, rsp->barrier_sequence);
+       _rcu_barrier_trace(rsp, TPS("Inc2"), -1, rsp->barrier_sequence);
        rcu_seq_end(&rsp->barrier_sequence);
 
        /* Other rcu_barrier() invocations can now safely proceed. */
@@ -3777,8 +3694,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
         */
        rnp = rdp->mynode;
        raw_spin_lock_rcu_node(rnp);            /* irqs already disabled. */
-       if (!rdp->beenonline)
-               WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
        rdp->beenonline = true;  /* We have now been online. */
        rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
        rdp->completed = rnp->completed;
@@ -3882,6 +3797,8 @@ void rcu_cpu_starting(unsigned int cpu)
 {
        unsigned long flags;
        unsigned long mask;
+       int nbits;
+       unsigned long oldmask;
        struct rcu_data *rdp;
        struct rcu_node *rnp;
        struct rcu_state *rsp;
@@ -3892,9 +3809,15 @@ void rcu_cpu_starting(unsigned int cpu)
                mask = rdp->grpmask;
                raw_spin_lock_irqsave_rcu_node(rnp, flags);
                rnp->qsmaskinitnext |= mask;
+               oldmask = rnp->expmaskinitnext;
                rnp->expmaskinitnext |= mask;
+               oldmask ^= rnp->expmaskinitnext;
+               nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
+               /* Allow lockless access for expedited grace periods. */
+               smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */
                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        }
+       smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -3937,6 +3860,50 @@ void rcu_report_dead(unsigned int cpu)
        for_each_rcu_flavor(rsp)
                rcu_cleanup_dying_idle_cpu(cpu, rsp);
 }
+
+/* Migrate the dead CPU's callbacks to the current CPU. */
+static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
+{
+       unsigned long flags;
+       struct rcu_data *my_rdp;
+       struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+       struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
+
+       if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))
+               return;  /* No callbacks to migrate. */
+
+       local_irq_save(flags);
+       my_rdp = this_cpu_ptr(rsp->rda);
+       if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) {
+               local_irq_restore(flags);
+               return;
+       }
+       raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
+       rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */
+       rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */
+       rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
+       WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
+                    !rcu_segcblist_n_cbs(&my_rdp->cblist));
+       raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags);
+       WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
+                 !rcu_segcblist_empty(&rdp->cblist),
+                 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
+                 cpu, rcu_segcblist_n_cbs(&rdp->cblist),
+                 rcu_segcblist_first_cb(&rdp->cblist));
+}
+
+/*
+ * The outgoing CPU has just passed through the dying-idle state,
+ * and we are being invoked from the CPU that was IPIed to continue the
+ * offline operation.  We need to migrate the outgoing CPU's callbacks.
+ */
+void rcutree_migrate_callbacks(int cpu)
+{
+       struct rcu_state *rsp;
+
+       for_each_rcu_flavor(rsp)
+               rcu_migrate_callbacks(cpu, rsp);
+}
 #endif
 
 /*
@@ -4134,7 +4101,7 @@ static void __init rcu_init_geometry(void)
        if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
            nr_cpu_ids == NR_CPUS)
                return;
-       pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n",
+       pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n",
                rcu_fanout_leaf, nr_cpu_ids);
 
        /*