Merge branches 'doc.2021.01.06a', 'fixes.2021.01.04b', 'kfree_rcu.2021.01.04a', ...

[sfrench/cifs-2.6.git] / kernel / rcu / tree.c
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index 40e5e3dd253e077cace9e8bd0edae8c71b9bf87d..0f4a6a3c057b0120be8ff35f3f40be6bde7fa3aa 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -83,6 +83,9 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
         .dynticks_nesting = 1,
         .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
         .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
+#ifdef CONFIG_RCU_NOCB_CPU
+       .cblist.flags = SEGCBLIST_SOFTIRQ_ONLY,
+#endif
  };
  static struct rcu_state rcu_state = {
         .level = { &rcu_state.node[0] },
@@ -100,8 +103,10 @@ static struct rcu_state rcu_state = {
  static bool dump_tree;
  module_param(dump_tree, bool, 0444);
  /* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
-static bool use_softirq = true;
+static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT);
+#ifndef CONFIG_PREEMPT_RT
  module_param(use_softirq, bool, 0444);
+#endif
  /* Control rcu_node-tree auto-balancing at boot time. */
  static bool rcu_fanout_exact;
  module_param(rcu_fanout_exact, bool, 0444);
@@ -1495,6 +1500,8 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
         if (!rcu_segcblist_pend_cbs(&rdp->cblist))
                 return false;
  
+       trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPreAcc"));
+
         /*
          * Callbacks are often registered with incomplete grace-period
          * information.  Something about the fact that getting exact
@@ -1515,6 +1522,8 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
         else
                 trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccReadyCB"));
  
+       trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPostAcc"));
+
         return ret;
  }
  
@@ -1765,7 +1774,7 @@ static bool rcu_gp_init(void)
          * go offline later.  Please also refer to "Hotplug CPU" section
          * of RCU's Requirements documentation.
          */
-       rcu_state.gp_state = RCU_GP_ONOFF;
+       WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF);
         rcu_for_each_leaf_node(rnp) {
                 smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
                 firstseq = READ_ONCE(rnp->ofl_seq);
@@ -1831,7 +1840,7 @@ static bool rcu_gp_init(void)
          * The grace period cannot complete until the initialization
          * process finishes, because this kthread handles both.
          */
-       rcu_state.gp_state = RCU_GP_INIT;
+       WRITE_ONCE(rcu_state.gp_state, RCU_GP_INIT);
         rcu_for_each_node_breadth_first(rnp) {
                 rcu_gp_slow(gp_init_delay);
                 raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -1930,17 +1939,22 @@ static void rcu_gp_fqs_loop(void)
         ret = 0;
         for (;;) {
                 if (!ret) {
-                       rcu_state.jiffies_force_qs = jiffies + j;
+                       WRITE_ONCE(rcu_state.jiffies_force_qs, jiffies + j);
+                       /*
+                        * jiffies_force_qs before RCU_GP_WAIT_FQS state
+                        * update; required for stall checks.
+                        */
+                       smp_wmb();
                         WRITE_ONCE(rcu_state.jiffies_kick_kthreads,
                                    jiffies + (j ? 3 * j : 2));
                 }
                 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
                                        TPS("fqswait"));
-               rcu_state.gp_state = RCU_GP_WAIT_FQS;
+               WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_FQS);
                 ret = swait_event_idle_timeout_exclusive(
                                 rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j);
                 rcu_gp_torture_wait();
-               rcu_state.gp_state = RCU_GP_DOING_FQS;
+               WRITE_ONCE(rcu_state.gp_state, RCU_GP_DOING_FQS);
                 /* Locking provides needed memory barriers. */
                 /* If grace period done, leave loop. */
                 if (!READ_ONCE(rnp->qsmask) &&
@@ -2054,7 +2068,7 @@ static void rcu_gp_cleanup(void)
         trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
         rcu_seq_end(&rcu_state.gp_seq);
         ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
-       rcu_state.gp_state = RCU_GP_IDLE;
+       WRITE_ONCE(rcu_state.gp_state, RCU_GP_IDLE);
         /* Check for GP requests since above loop. */
         rdp = this_cpu_ptr(&rcu_data);
         if (!needgp && ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) {
@@ -2093,12 +2107,12 @@ static int __noreturn rcu_gp_kthread(void *unused)
                 for (;;) {
                         trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
                                                TPS("reqwait"));
-                       rcu_state.gp_state = RCU_GP_WAIT_GPS;
+                       WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_GPS);
                         swait_event_idle_exclusive(rcu_state.gp_wq,
                                          READ_ONCE(rcu_state.gp_flags) &
                                          RCU_GP_FLAG_INIT);
                         rcu_gp_torture_wait();
-                       rcu_state.gp_state = RCU_GP_DONE_GPS;
+                       WRITE_ONCE(rcu_state.gp_state, RCU_GP_DONE_GPS);
                         /* Locking provides needed memory barrier. */
                         if (rcu_gp_init())
                                 break;
@@ -2113,9 +2127,9 @@ static int __noreturn rcu_gp_kthread(void *unused)
                 rcu_gp_fqs_loop();
  
                 /* Handle grace-period end. */
-               rcu_state.gp_state = RCU_GP_CLEANUP;
+               WRITE_ONCE(rcu_state.gp_state, RCU_GP_CLEANUP);
                 rcu_gp_cleanup();
-               rcu_state.gp_state = RCU_GP_CLEANED;
+               WRITE_ONCE(rcu_state.gp_state, RCU_GP_CLEANED);
         }
  }
  
@@ -2430,11 +2444,12 @@ int rcutree_dead_cpu(unsigned int cpu)
  static void rcu_do_batch(struct rcu_data *rdp)
  {
         int div;
+       bool __maybe_unused empty;
         unsigned long flags;
         const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
         struct rcu_head *rhp;
         struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
-       long bl, count;
+       long bl, count = 0;
         long pending, tlimit = 0;
  
         /* If no callbacks are ready, just return. */
@@ -2471,14 +2486,18 @@ static void rcu_do_batch(struct rcu_data *rdp)
         rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
         if (offloaded)
                 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
+
+       trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued"));
         rcu_nocb_unlock_irqrestore(rdp, flags);
  
         /* Invoke callbacks. */
         tick_dep_set_task(current, TICK_DEP_BIT_RCU);
         rhp = rcu_cblist_dequeue(&rcl);
+
         for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
                 rcu_callback_t f;
  
+               count++;
                 debug_rcu_head_unqueue(rhp);
  
                 rcu_lock_acquire(&rcu_callback_map);
@@ -2492,21 +2511,19 @@ static void rcu_do_batch(struct rcu_data *rdp)
  
                 /*
                  * Stop only if limit reached and CPU has something to do.
-                * Note: The rcl structure counts down from zero.
                  */
-               if (-rcl.len >= bl && !offloaded &&
+               if (count >= bl && !offloaded &&
                     (need_resched() ||
                      (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
                         break;
                 if (unlikely(tlimit)) {
                         /* only call local_clock() every 32 callbacks */
-                       if (likely((-rcl.len & 31) || local_clock() < tlimit))
+                       if (likely((count & 31) || local_clock() < tlimit))
                                 continue;
                         /* Exceeded the time limit, so leave. */
                         break;
                 }
-               if (offloaded) {
-                       WARN_ON_ONCE(in_serving_softirq());
+               if (!in_serving_softirq()) {
                         local_bh_enable();
                         lockdep_assert_irqs_enabled();
                         cond_resched_tasks_rcu_qs();
@@ -2517,15 +2534,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
  
         local_irq_save(flags);
         rcu_nocb_lock(rdp);
-       count = -rcl.len;
         rdp->n_cbs_invoked += count;
         trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(),
                             is_idle_task(current), rcu_is_callbacks_kthread());
  
         /* Update counts and requeue any remaining callbacks. */
         rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
-       smp_mb(); /* List handling before counting for rcu_barrier(). */
-       rcu_segcblist_insert_count(&rdp->cblist, &rcl);
+       rcu_segcblist_add_len(&rdp->cblist, -count);
  
         /* Reinstate batch limit if we have worked down the excess. */
         count = rcu_segcblist_n_cbs(&rdp->cblist);
@@ -2543,9 +2558,12 @@ static void rcu_do_batch(struct rcu_data *rdp)
          * The following usually indicates a double call_rcu().  To track
          * this down, try building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y.
          */
-       WARN_ON_ONCE(count == 0 && !rcu_segcblist_empty(&rdp->cblist));
+       empty = rcu_segcblist_empty(&rdp->cblist);
+       WARN_ON_ONCE(count == 0 && !empty);
         WARN_ON_ONCE(!IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-                    count != 0 && rcu_segcblist_empty(&rdp->cblist));
+                    count != 0 && empty);
+       WARN_ON_ONCE(count == 0 && rcu_segcblist_n_segment_cbs(&rdp->cblist) != 0);
+       WARN_ON_ONCE(!empty && rcu_segcblist_n_segment_cbs(&rdp->cblist) == 0);
  
         rcu_nocb_unlock_irqrestore(rdp, flags);
  
@@ -2566,6 +2584,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
  void rcu_sched_clock_irq(int user)
  {
         trace_rcu_utilization(TPS("Start scheduler-tick"));
+       lockdep_assert_irqs_disabled();
         raw_cpu_inc(rcu_data.ticks_this_gp);
         /* The load-acquire pairs with the store-release setting to true. */
         if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
@@ -2579,6 +2598,7 @@ void rcu_sched_clock_irq(int user)
         rcu_flavor_sched_clock_irq(user);
         if (rcu_pending(user))
                 invoke_rcu_core();
+       lockdep_assert_irqs_disabled();
  
         trace_rcu_utilization(TPS("End scheduler-tick"));
  }
@@ -2688,7 +2708,7 @@ static __latent_entropy void rcu_core(void)
         unsigned long flags;
         struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
         struct rcu_node *rnp = rdp->mynode;
-       const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist);
+       const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist);
  
         if (cpu_is_offline(smp_processor_id()))
                 return;
@@ -2708,17 +2728,17 @@ static __latent_entropy void rcu_core(void)
  
         /* No grace period and unregistered callbacks? */
         if (!rcu_gp_in_progress() &&
-           rcu_segcblist_is_enabled(&rdp->cblist) && !offloaded) {
-               local_irq_save(flags);
+           rcu_segcblist_is_enabled(&rdp->cblist) && do_batch) {
+               rcu_nocb_lock_irqsave(rdp, flags);
                 if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
                         rcu_accelerate_cbs_unlocked(rnp, rdp);
-               local_irq_restore(flags);
+               rcu_nocb_unlock_irqrestore(rdp, flags);
         }
  
         rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());
  
         /* If there are callbacks ready, invoke them. */
-       if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist) &&
+       if (do_batch && rcu_segcblist_ready_cbs(&rdp->cblist) &&
             likely(READ_ONCE(rcu_scheduler_fully_active)))
                 rcu_do_batch(rdp);
  
@@ -2941,6 +2961,7 @@ static void check_cb_ovld(struct rcu_data *rdp)
  static void
  __call_rcu(struct rcu_head *head, rcu_callback_t func)
  {
+       static atomic_t doublefrees;
         unsigned long flags;
         struct rcu_data *rdp;
         bool was_alldone;
@@ -2954,8 +2975,10 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
                  * Use rcu:rcu_callback trace event to find the previous
                  * time callback was passed to __call_rcu().
                  */
-               WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pS()!!!\n",
-                         head, head->func);
+               if (atomic_inc_return(&doublefrees) < 4) {
+                       pr_err("%s(): Double-freed CB %p->%pS()!!!  ", __func__, head, head->func);
+                       mem_dump_obj(head);
+               }
                 WRITE_ONCE(head->func, rcu_leak_callback);
                 return;
         }
@@ -2989,6 +3012,8 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
                 trace_rcu_callback(rcu_state.name, head,
                                    rcu_segcblist_n_cbs(&rdp->cblist));
  
+       trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));
+
         /* Go handle any RCU core processing required. */
         if (unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {
                 __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
@@ -3498,6 +3523,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
                 goto unlock_return;
         }
  
+       kasan_record_aux_stack(ptr);
         success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
         if (!success) {
                 run_page_cache_worker(krcp);
@@ -3747,6 +3773,8 @@ static int rcu_pending(int user)
         struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
         struct rcu_node *rnp = rdp->mynode;
  
+       lockdep_assert_irqs_disabled();
+
         /* Check for CPU stalls, if enabled. */
         check_cpu_stall(rdp);
  
@@ -4001,12 +4029,18 @@ int rcutree_prepare_cpu(unsigned int cpu)
         rdp->qlen_last_fqs_check = 0;
         rdp->n_force_qs_snap = rcu_state.n_force_qs;
         rdp->blimit = blimit;
-       if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */
-           !rcu_segcblist_is_offloaded(&rdp->cblist))
-               rcu_segcblist_init(&rdp->cblist);  /* Re-enable callbacks. */
         rdp->dynticks_nesting = 1;      /* CPU not up, no tearing. */
         rcu_dynticks_eqs_online();
         raw_spin_unlock_rcu_node(rnp);          /* irqs remain disabled. */
+       /*
+        * Lock in case the CB/GP kthreads are still around handling
+        * old callbacks (longer term we should flush all callbacks
+        * before completing CPU offline)
+        */
+       rcu_nocb_lock(rdp);
+       if (rcu_segcblist_empty(&rdp->cblist)) /* No early-boot CBs? */
+               rcu_segcblist_init(&rdp->cblist);  /* Re-enable callbacks. */
+       rcu_nocb_unlock(rdp);
  
         /*
          * Add CPU to leaf rcu_node pending-online bitmask.  Any needed
@@ -4159,6 +4193,9 @@ void rcu_report_dead(unsigned int cpu)
         struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
         struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
  
+       // Do any dangling deferred wakeups.
+       do_nocb_deferred_wakeup(rdp);
+
         /* QS for any half-done expedited grace period. */
         preempt_disable();
         rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));