Merge branches 'doc.2021.01.06a', 'fixes.2021.01.04b', 'kfree_rcu.2021.01.04a', ...
[sfrench/cifs-2.6.git] / kernel / rcu / tree_stall.h
index 70d48c52fabc9947397c8a06eacde26c15c56722..475b26171b20fff053842f265fc5b1dc2d7fcd17 100644 (file)
@@ -266,6 +266,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
        struct task_struct *t;
        struct task_struct *ts[8];
 
+       lockdep_assert_irqs_disabled();
        if (!rcu_preempt_blocked_readers_cgp(rnp))
                return 0;
        pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
@@ -290,6 +291,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
                                ".q"[rscr.rs.b.need_qs],
                                ".e"[rscr.rs.b.exp_hint],
                                ".l"[rscr.on_blkd_list]);
+               lockdep_assert_irqs_disabled();
                put_task_struct(t);
                ndetected++;
        }
@@ -333,9 +335,12 @@ static void rcu_dump_cpu_stacks(void)
        rcu_for_each_leaf_node(rnp) {
                raw_spin_lock_irqsave_rcu_node(rnp, flags);
                for_each_leaf_node_possible_cpu(rnp, cpu)
-                       if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
-                               if (!trigger_single_cpu_backtrace(cpu))
+                       if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
+                               if (cpu_is_offline(cpu))
+                                       pr_err("Offline CPU %d blocking current GP.\n", cpu);
+                               else if (!trigger_single_cpu_backtrace(cpu))
                                        dump_cpu_task(cpu);
+                       }
                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        }
 }
@@ -449,25 +454,66 @@ static void print_cpu_stall_info(int cpu)
 /* Complain about starvation of grace-period kthread.  */
 static void rcu_check_gp_kthread_starvation(void)
 {
+       int cpu;
        struct task_struct *gpk = rcu_state.gp_kthread;
        unsigned long j;
 
        if (rcu_is_gp_kthread_starving(&j)) {
+               cpu = gpk ? task_cpu(gpk) : -1;
                pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
                       rcu_state.name, j,
                       (long)rcu_seq_current(&rcu_state.gp_seq),
                       data_race(rcu_state.gp_flags),
                       gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
-                      gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
+                      gpk ? gpk->state : ~0, cpu);
                if (gpk) {
                        pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name);
                        pr_err("RCU grace-period kthread stack dump:\n");
                        sched_show_task(gpk);
+                       if (cpu >= 0) {
+                               if (cpu_is_offline(cpu)) {
+                                       pr_err("RCU GP kthread last ran on offline CPU %d.\n", cpu);
+                               } else  {
+                                       pr_err("Stack dump where RCU GP kthread last ran:\n");
+                                       if (!trigger_single_cpu_backtrace(cpu))
+                                               dump_cpu_task(cpu);
+                               }
+                       }
                        wake_up_process(gpk);
                }
        }
 }
 
+/* Complain about missing wakeups from expired fqs wait timer */
+static void rcu_check_gp_kthread_expired_fqs_timer(void)
+{
+       struct task_struct *gpk = rcu_state.gp_kthread;
+       short gp_state;
+       unsigned long jiffies_fqs;
+       int cpu;
+
+       /*
+        * Order reads of .gp_state and .jiffies_force_qs.
+        * Matching smp_wmb() is present in rcu_gp_fqs_loop().
+        */
+       gp_state = smp_load_acquire(&rcu_state.gp_state);
+       jiffies_fqs = READ_ONCE(rcu_state.jiffies_force_qs);
+
+       if (gp_state == RCU_GP_WAIT_FQS &&
+           time_after(jiffies, jiffies_fqs + RCU_STALL_MIGHT_MIN) &&
+           gpk && !READ_ONCE(gpk->on_rq)) {
+               cpu = task_cpu(gpk);
+               pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx\n",
+                      rcu_state.name, (jiffies - jiffies_fqs),
+                      (long)rcu_seq_current(&rcu_state.gp_seq),
+                      data_race(rcu_state.gp_flags),
+                      gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS,
+                      gpk->state);
+               pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n",
+                      cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu));
+       }
+}
+
 static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
 {
        int cpu;
@@ -478,6 +524,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
        struct rcu_node *rnp;
        long totqlen = 0;
 
+       lockdep_assert_irqs_disabled();
+
        /* Kick and suppress, if so configured. */
        rcu_stall_kick_kthreads();
        if (rcu_stall_is_suppressed())
@@ -499,6 +547,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
                                }
                }
                ndetected += rcu_print_task_stall(rnp, flags); // Releases rnp->lock.
+               lockdep_assert_irqs_disabled();
        }
 
        for_each_possible_cpu(cpu)
@@ -529,6 +578,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
                WRITE_ONCE(rcu_state.jiffies_stall,
                           jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
 
+       rcu_check_gp_kthread_expired_fqs_timer();
        rcu_check_gp_kthread_starvation();
 
        panic_on_rcu_stall();
@@ -544,6 +594,8 @@ static void print_cpu_stall(unsigned long gps)
        struct rcu_node *rnp = rcu_get_root();
        long totqlen = 0;
 
+       lockdep_assert_irqs_disabled();
+
        /* Kick and suppress, if so configured. */
        rcu_stall_kick_kthreads();
        if (rcu_stall_is_suppressed())
@@ -564,6 +616,7 @@ static void print_cpu_stall(unsigned long gps)
                jiffies - gps,
                (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
 
+       rcu_check_gp_kthread_expired_fqs_timer();
        rcu_check_gp_kthread_starvation();
 
        rcu_dump_cpu_stacks();
@@ -598,6 +651,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
        unsigned long js;
        struct rcu_node *rnp;
 
+       lockdep_assert_irqs_disabled();
        if ((rcu_stall_is_suppressed() && !READ_ONCE(rcu_kick_kthreads)) ||
            !rcu_gp_in_progress())
                return;