Merge branches 'doc.2021.01.06a', 'fixes.2021.01.04b', 'kfree_rcu.2021.01.04a', ...
[sfrench/cifs-2.6.git] / kernel / rcu / tree.c
index 40e5e3dd253e077cace9e8bd0edae8c71b9bf87d..0f4a6a3c057b0120be8ff35f3f40be6bde7fa3aa 100644 (file)
@@ -83,6 +83,9 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
        .dynticks_nesting = 1,
        .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
        .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
+#ifdef CONFIG_RCU_NOCB_CPU
+       .cblist.flags = SEGCBLIST_SOFTIRQ_ONLY,
+#endif
 };
 static struct rcu_state rcu_state = {
        .level = { &rcu_state.node[0] },
@@ -100,8 +103,10 @@ static struct rcu_state rcu_state = {
 static bool dump_tree;
 module_param(dump_tree, bool, 0444);
 /* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
-static bool use_softirq = true;
+static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT);
+#ifndef CONFIG_PREEMPT_RT
 module_param(use_softirq, bool, 0444);
+#endif
 /* Control rcu_node-tree auto-balancing at boot time. */
 static bool rcu_fanout_exact;
 module_param(rcu_fanout_exact, bool, 0444);
@@ -1495,6 +1500,8 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
        if (!rcu_segcblist_pend_cbs(&rdp->cblist))
                return false;
 
+       trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPreAcc"));
+
        /*
         * Callbacks are often registered with incomplete grace-period
         * information.  Something about the fact that getting exact
@@ -1515,6 +1522,8 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
        else
                trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccReadyCB"));
 
+       trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPostAcc"));
+
        return ret;
 }
 
@@ -1765,7 +1774,7 @@ static bool rcu_gp_init(void)
         * go offline later.  Please also refer to "Hotplug CPU" section
         * of RCU's Requirements documentation.
         */
-       rcu_state.gp_state = RCU_GP_ONOFF;
+       WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF);
        rcu_for_each_leaf_node(rnp) {
                smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
                firstseq = READ_ONCE(rnp->ofl_seq);
@@ -1831,7 +1840,7 @@ static bool rcu_gp_init(void)
         * The grace period cannot complete until the initialization
         * process finishes, because this kthread handles both.
         */
-       rcu_state.gp_state = RCU_GP_INIT;
+       WRITE_ONCE(rcu_state.gp_state, RCU_GP_INIT);
        rcu_for_each_node_breadth_first(rnp) {
                rcu_gp_slow(gp_init_delay);
                raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -1930,17 +1939,22 @@ static void rcu_gp_fqs_loop(void)
        ret = 0;
        for (;;) {
                if (!ret) {
-                       rcu_state.jiffies_force_qs = jiffies + j;
+                       WRITE_ONCE(rcu_state.jiffies_force_qs, jiffies + j);
+                       /*
+                        * jiffies_force_qs before RCU_GP_WAIT_FQS state
+                        * update; required for stall checks.
+                        */
+                       smp_wmb();
                        WRITE_ONCE(rcu_state.jiffies_kick_kthreads,
                                   jiffies + (j ? 3 * j : 2));
                }
                trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
                                       TPS("fqswait"));
-               rcu_state.gp_state = RCU_GP_WAIT_FQS;
+               WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_FQS);
                ret = swait_event_idle_timeout_exclusive(
                                rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j);
                rcu_gp_torture_wait();
-               rcu_state.gp_state = RCU_GP_DOING_FQS;
+               WRITE_ONCE(rcu_state.gp_state, RCU_GP_DOING_FQS);
                /* Locking provides needed memory barriers. */
                /* If grace period done, leave loop. */
                if (!READ_ONCE(rnp->qsmask) &&
@@ -2054,7 +2068,7 @@ static void rcu_gp_cleanup(void)
        trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
        rcu_seq_end(&rcu_state.gp_seq);
        ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
-       rcu_state.gp_state = RCU_GP_IDLE;
+       WRITE_ONCE(rcu_state.gp_state, RCU_GP_IDLE);
        /* Check for GP requests since above loop. */
        rdp = this_cpu_ptr(&rcu_data);
        if (!needgp && ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) {
@@ -2093,12 +2107,12 @@ static int __noreturn rcu_gp_kthread(void *unused)
                for (;;) {
                        trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
                                               TPS("reqwait"));
-                       rcu_state.gp_state = RCU_GP_WAIT_GPS;
+                       WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_GPS);
                        swait_event_idle_exclusive(rcu_state.gp_wq,
                                         READ_ONCE(rcu_state.gp_flags) &
                                         RCU_GP_FLAG_INIT);
                        rcu_gp_torture_wait();
-                       rcu_state.gp_state = RCU_GP_DONE_GPS;
+                       WRITE_ONCE(rcu_state.gp_state, RCU_GP_DONE_GPS);
                        /* Locking provides needed memory barrier. */
                        if (rcu_gp_init())
                                break;
@@ -2113,9 +2127,9 @@ static int __noreturn rcu_gp_kthread(void *unused)
                rcu_gp_fqs_loop();
 
                /* Handle grace-period end. */
-               rcu_state.gp_state = RCU_GP_CLEANUP;
+               WRITE_ONCE(rcu_state.gp_state, RCU_GP_CLEANUP);
                rcu_gp_cleanup();
-               rcu_state.gp_state = RCU_GP_CLEANED;
+               WRITE_ONCE(rcu_state.gp_state, RCU_GP_CLEANED);
        }
 }
 
@@ -2430,11 +2444,12 @@ int rcutree_dead_cpu(unsigned int cpu)
 static void rcu_do_batch(struct rcu_data *rdp)
 {
        int div;
+       bool __maybe_unused empty;
        unsigned long flags;
        const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
        struct rcu_head *rhp;
        struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
-       long bl, count;
+       long bl, count = 0;
        long pending, tlimit = 0;
 
        /* If no callbacks are ready, just return. */
@@ -2471,14 +2486,18 @@ static void rcu_do_batch(struct rcu_data *rdp)
        rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
        if (offloaded)
                rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
+
+       trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued"));
        rcu_nocb_unlock_irqrestore(rdp, flags);
 
        /* Invoke callbacks. */
        tick_dep_set_task(current, TICK_DEP_BIT_RCU);
        rhp = rcu_cblist_dequeue(&rcl);
+
        for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
                rcu_callback_t f;
 
+               count++;
                debug_rcu_head_unqueue(rhp);
 
                rcu_lock_acquire(&rcu_callback_map);
@@ -2492,21 +2511,19 @@ static void rcu_do_batch(struct rcu_data *rdp)
 
                /*
                 * Stop only if limit reached and CPU has something to do.
-                * Note: The rcl structure counts down from zero.
                 */
-               if (-rcl.len >= bl && !offloaded &&
+               if (count >= bl && !offloaded &&
                    (need_resched() ||
                     (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
                        break;
                if (unlikely(tlimit)) {
                        /* only call local_clock() every 32 callbacks */
-                       if (likely((-rcl.len & 31) || local_clock() < tlimit))
+                       if (likely((count & 31) || local_clock() < tlimit))
                                continue;
                        /* Exceeded the time limit, so leave. */
                        break;
                }
-               if (offloaded) {
-                       WARN_ON_ONCE(in_serving_softirq());
+               if (!in_serving_softirq()) {
                        local_bh_enable();
                        lockdep_assert_irqs_enabled();
                        cond_resched_tasks_rcu_qs();
@@ -2517,15 +2534,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
 
        local_irq_save(flags);
        rcu_nocb_lock(rdp);
-       count = -rcl.len;
        rdp->n_cbs_invoked += count;
        trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(),
                            is_idle_task(current), rcu_is_callbacks_kthread());
 
        /* Update counts and requeue any remaining callbacks. */
        rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
-       smp_mb(); /* List handling before counting for rcu_barrier(). */
-       rcu_segcblist_insert_count(&rdp->cblist, &rcl);
+       rcu_segcblist_add_len(&rdp->cblist, -count);
 
        /* Reinstate batch limit if we have worked down the excess. */
        count = rcu_segcblist_n_cbs(&rdp->cblist);
@@ -2543,9 +2558,12 @@ static void rcu_do_batch(struct rcu_data *rdp)
         * The following usually indicates a double call_rcu().  To track
         * this down, try building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y.
         */
-       WARN_ON_ONCE(count == 0 && !rcu_segcblist_empty(&rdp->cblist));
+       empty = rcu_segcblist_empty(&rdp->cblist);
+       WARN_ON_ONCE(count == 0 && !empty);
        WARN_ON_ONCE(!IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-                    count != 0 && rcu_segcblist_empty(&rdp->cblist));
+                    count != 0 && empty);
+       WARN_ON_ONCE(count == 0 && rcu_segcblist_n_segment_cbs(&rdp->cblist) != 0);
+       WARN_ON_ONCE(!empty && rcu_segcblist_n_segment_cbs(&rdp->cblist) == 0);
 
        rcu_nocb_unlock_irqrestore(rdp, flags);
 
@@ -2566,6 +2584,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
 void rcu_sched_clock_irq(int user)
 {
        trace_rcu_utilization(TPS("Start scheduler-tick"));
+       lockdep_assert_irqs_disabled();
        raw_cpu_inc(rcu_data.ticks_this_gp);
        /* The load-acquire pairs with the store-release setting to true. */
        if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
@@ -2579,6 +2598,7 @@ void rcu_sched_clock_irq(int user)
        rcu_flavor_sched_clock_irq(user);
        if (rcu_pending(user))
                invoke_rcu_core();
+       lockdep_assert_irqs_disabled();
 
        trace_rcu_utilization(TPS("End scheduler-tick"));
 }
@@ -2688,7 +2708,7 @@ static __latent_entropy void rcu_core(void)
        unsigned long flags;
        struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
        struct rcu_node *rnp = rdp->mynode;
-       const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
+       const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist);
 
        if (cpu_is_offline(smp_processor_id()))
                return;
@@ -2708,17 +2728,17 @@ static __latent_entropy void rcu_core(void)
 
        /* No grace period and unregistered callbacks? */
        if (!rcu_gp_in_progress() &&
-           rcu_segcblist_is_enabled(&rdp->cblist) && !offloaded) {
-               local_irq_save(flags);
+           rcu_segcblist_is_enabled(&rdp->cblist) && do_batch) {
+               rcu_nocb_lock_irqsave(rdp, flags);
                if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
                        rcu_accelerate_cbs_unlocked(rnp, rdp);
-               local_irq_restore(flags);
+               rcu_nocb_unlock_irqrestore(rdp, flags);
        }
 
        rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());
 
        /* If there are callbacks ready, invoke them. */
-       if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist) &&
+       if (do_batch && rcu_segcblist_ready_cbs(&rdp->cblist) &&
            likely(READ_ONCE(rcu_scheduler_fully_active)))
                rcu_do_batch(rdp);
 
@@ -2941,6 +2961,7 @@ static void check_cb_ovld(struct rcu_data *rdp)
 static void
 __call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
+       static atomic_t doublefrees;
        unsigned long flags;
        struct rcu_data *rdp;
        bool was_alldone;
@@ -2954,8 +2975,10 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
                 * Use rcu:rcu_callback trace event to find the previous
                 * time callback was passed to __call_rcu().
                 */
-               WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pS()!!!\n",
-                         head, head->func);
+               if (atomic_inc_return(&doublefrees) < 4) {
+                       pr_err("%s(): Double-freed CB %p->%pS()!!!  ", __func__, head, head->func);
+                       mem_dump_obj(head);
+               }
                WRITE_ONCE(head->func, rcu_leak_callback);
                return;
        }
@@ -2989,6 +3012,8 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
                trace_rcu_callback(rcu_state.name, head,
                                   rcu_segcblist_n_cbs(&rdp->cblist));
 
+       trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));
+
        /* Go handle any RCU core processing required. */
        if (unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {
                __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
@@ -3498,6 +3523,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
                goto unlock_return;
        }
 
+       kasan_record_aux_stack(ptr);
        success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
        if (!success) {
                run_page_cache_worker(krcp);
@@ -3747,6 +3773,8 @@ static int rcu_pending(int user)
        struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
        struct rcu_node *rnp = rdp->mynode;
 
+       lockdep_assert_irqs_disabled();
+
        /* Check for CPU stalls, if enabled. */
        check_cpu_stall(rdp);
 
@@ -4001,12 +4029,18 @@ int rcutree_prepare_cpu(unsigned int cpu)
        rdp->qlen_last_fqs_check = 0;
        rdp->n_force_qs_snap = rcu_state.n_force_qs;
        rdp->blimit = blimit;
-       if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */
-           !rcu_segcblist_is_offloaded(&rdp->cblist))
-               rcu_segcblist_init(&rdp->cblist);  /* Re-enable callbacks. */
        rdp->dynticks_nesting = 1;      /* CPU not up, no tearing. */
        rcu_dynticks_eqs_online();
        raw_spin_unlock_rcu_node(rnp);          /* irqs remain disabled. */
+       /*
+        * Lock in case the CB/GP kthreads are still around handling
+        * old callbacks (longer term we should flush all callbacks
+        * before completing CPU offline)
+        */
+       rcu_nocb_lock(rdp);
+       if (rcu_segcblist_empty(&rdp->cblist)) /* No early-boot CBs? */
+               rcu_segcblist_init(&rdp->cblist);  /* Re-enable callbacks. */
+       rcu_nocb_unlock(rdp);
 
        /*
         * Add CPU to leaf rcu_node pending-online bitmask.  Any needed
@@ -4159,6 +4193,9 @@ void rcu_report_dead(unsigned int cpu)
        struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
        struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
 
+       // Do any dangling deferred wakeups.
+       do_nocb_deferred_wakeup(rdp);
+
        /* QS for any half-done expedited grace period. */
        preempt_disable();
        rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));