Merge branches 'doc.2021.01.06a', 'fixes.2021.01.04b', 'kfree_rcu.2021.01.04a', ...
[sfrench/cifs-2.6.git] / kernel / rcu / rcutorture.c
index 916ea4f66e4b2380c3cc7d3bd1c10bb0601cd3c9..99657ffa66887aba18dcead4ecb6209e943c7013 100644 (file)
@@ -85,6 +85,7 @@ torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives");
 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
 torture_param(bool, gp_normal, false,
             "Use normal (non-expedited) GP wait primitives");
+torture_param(bool, gp_poll, false, "Use polling GP wait primitives");
 torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives");
 torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers");
 torture_param(int, leakpointer, 0, "Leak pointer dereferences from readers");
@@ -97,6 +98,8 @@ torture_param(int, object_debug, 0,
 torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
 torture_param(int, onoff_interval, 0,
             "Time between CPU hotplugs (jiffies), 0=disable");
+torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
+torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
 torture_param(int, read_exit_delay, 13,
              "Delay between read-then-exit episodes (s)");
 torture_param(int, read_exit_burst, 16,
@@ -127,10 +130,12 @@ static char *torture_type = "rcu";
 module_param(torture_type, charp, 0444);
 MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, srcu, ...)");
 
+static int nrealnocbers;
 static int nrealreaders;
 static struct task_struct *writer_task;
 static struct task_struct **fakewriter_tasks;
 static struct task_struct **reader_tasks;
+static struct task_struct **nocb_tasks;
 static struct task_struct *stats_task;
 static struct task_struct *fqs_task;
 static struct task_struct *boost_tasks[NR_CPUS];
@@ -142,11 +147,22 @@ static struct task_struct *read_exit_task;
 
 #define RCU_TORTURE_PIPE_LEN 10
 
+// Mailbox-like structure to check RCU global memory ordering.
+struct rcu_torture_reader_check {
+       unsigned long rtc_myloops;
+       int rtc_chkrdr;
+       unsigned long rtc_chkloops;
+       int rtc_ready;
+       struct rcu_torture_reader_check *rtc_assigner;
+} ____cacheline_internodealigned_in_smp;
+
+// Update-side data structure used to check RCU readers.
 struct rcu_torture {
        struct rcu_head rtort_rcu;
        int rtort_pipe_count;
        struct list_head rtort_free;
        int rtort_mbtest;
+       struct rcu_torture_reader_check *rtort_chkp;
 };
 
 static LIST_HEAD(rcu_torture_freelist);
@@ -157,10 +173,13 @@ static DEFINE_SPINLOCK(rcu_torture_lock);
 static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count);
 static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch);
 static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
+static struct rcu_torture_reader_check *rcu_torture_reader_mbchk;
 static atomic_t n_rcu_torture_alloc;
 static atomic_t n_rcu_torture_alloc_fail;
 static atomic_t n_rcu_torture_free;
 static atomic_t n_rcu_torture_mberror;
+static atomic_t n_rcu_torture_mbchk_fail;
+static atomic_t n_rcu_torture_mbchk_tries;
 static atomic_t n_rcu_torture_error;
 static long n_rcu_torture_barrier_error;
 static long n_rcu_torture_boost_ktrerror;
@@ -174,6 +193,8 @@ static unsigned long n_read_exits;
 static struct list_head rcu_torture_removed;
 static unsigned long shutdown_jiffies;
 static unsigned long start_gp_seq;
+static atomic_long_t n_nocb_offload;
+static atomic_long_t n_nocb_deoffload;
 
 static int rcu_torture_writer_state;
 #define RTWS_FIXED_DELAY       0
@@ -183,9 +204,11 @@ static int rcu_torture_writer_state;
 #define RTWS_EXP_SYNC          4
 #define RTWS_COND_GET          5
 #define RTWS_COND_SYNC         6
-#define RTWS_SYNC              7
-#define RTWS_STUTTER           8
-#define RTWS_STOPPING          9
+#define RTWS_POLL_GET          7
+#define RTWS_POLL_WAIT         8
+#define RTWS_SYNC              9
+#define RTWS_STUTTER           10
+#define RTWS_STOPPING          11
 static const char * const rcu_torture_writer_state_names[] = {
        "RTWS_FIXED_DELAY",
        "RTWS_DELAY",
@@ -194,6 +217,8 @@ static const char * const rcu_torture_writer_state_names[] = {
        "RTWS_EXP_SYNC",
        "RTWS_COND_GET",
        "RTWS_COND_SYNC",
+       "RTWS_POLL_GET",
+       "RTWS_POLL_WAIT",
        "RTWS_SYNC",
        "RTWS_STUTTER",
        "RTWS_STOPPING",
@@ -311,12 +336,15 @@ struct rcu_torture_ops {
        void (*deferred_free)(struct rcu_torture *p);
        void (*sync)(void);
        void (*exp_sync)(void);
-       unsigned long (*get_state)(void);
+       unsigned long (*get_gp_state)(void);
+       unsigned long (*start_gp_poll)(void);
+       bool (*poll_gp_state)(unsigned long oldstate);
        void (*cond_sync)(unsigned long oldstate);
        call_rcu_func_t call;
        void (*cb_barrier)(void);
        void (*fqs)(void);
        void (*stats)(void);
+       void (*gp_kthread_dbg)(void);
        int (*stall_dur)(void);
        int irq_capable;
        int can_boost;
@@ -385,7 +413,12 @@ static bool
 rcu_torture_pipe_update_one(struct rcu_torture *rp)
 {
        int i;
+       struct rcu_torture_reader_check *rtrcp = READ_ONCE(rp->rtort_chkp);
 
+       if (rtrcp) {
+               WRITE_ONCE(rp->rtort_chkp, NULL);
+               smp_store_release(&rtrcp->rtc_ready, 1); // Pair with smp_load_acquire().
+       }
        i = READ_ONCE(rp->rtort_pipe_count);
        if (i > RCU_TORTURE_PIPE_LEN)
                i = RCU_TORTURE_PIPE_LEN;
@@ -460,12 +493,13 @@ static struct rcu_torture_ops rcu_ops = {
        .deferred_free  = rcu_torture_deferred_free,
        .sync           = synchronize_rcu,
        .exp_sync       = synchronize_rcu_expedited,
-       .get_state      = get_state_synchronize_rcu,
+       .get_gp_state   = get_state_synchronize_rcu,
        .cond_sync      = cond_synchronize_rcu,
        .call           = call_rcu,
        .cb_barrier     = rcu_barrier,
        .fqs            = rcu_force_quiescent_state,
        .stats          = NULL,
+       .gp_kthread_dbg = show_rcu_gp_kthreads,
        .stall_dur      = rcu_jiffies_till_stall_check,
        .irq_capable    = 1,
        .can_boost      = rcu_can_boost(),
@@ -568,6 +602,21 @@ static void srcu_torture_synchronize(void)
        synchronize_srcu(srcu_ctlp);
 }
 
+static unsigned long srcu_torture_get_gp_state(void)
+{
+       return get_state_synchronize_srcu(srcu_ctlp);
+}
+
+static unsigned long srcu_torture_start_gp_poll(void)
+{
+       return start_poll_synchronize_srcu(srcu_ctlp);
+}
+
+static bool srcu_torture_poll_gp_state(unsigned long oldstate)
+{
+       return poll_state_synchronize_srcu(srcu_ctlp, oldstate);
+}
+
 static void srcu_torture_call(struct rcu_head *head,
                              rcu_callback_t func)
 {
@@ -599,6 +648,9 @@ static struct rcu_torture_ops srcu_ops = {
        .deferred_free  = srcu_torture_deferred_free,
        .sync           = srcu_torture_synchronize,
        .exp_sync       = srcu_torture_synchronize_expedited,
+       .get_gp_state   = srcu_torture_get_gp_state,
+       .start_gp_poll  = srcu_torture_start_gp_poll,
+       .poll_gp_state  = srcu_torture_poll_gp_state,
        .call           = srcu_torture_call,
        .cb_barrier     = srcu_torture_barrier,
        .stats          = srcu_torture_stats,
@@ -693,6 +745,7 @@ static struct rcu_torture_ops tasks_ops = {
        .exp_sync       = synchronize_rcu_mult_test,
        .call           = call_rcu_tasks,
        .cb_barrier     = rcu_barrier_tasks,
+       .gp_kthread_dbg = show_rcu_tasks_classic_gp_kthread,
        .fqs            = NULL,
        .stats          = NULL,
        .irq_capable    = 1,
@@ -762,6 +815,7 @@ static struct rcu_torture_ops tasks_rude_ops = {
        .exp_sync       = synchronize_rcu_tasks_rude,
        .call           = call_rcu_tasks_rude,
        .cb_barrier     = rcu_barrier_tasks_rude,
+       .gp_kthread_dbg = show_rcu_tasks_rude_gp_kthread,
        .fqs            = NULL,
        .stats          = NULL,
        .irq_capable    = 1,
@@ -800,6 +854,7 @@ static struct rcu_torture_ops tasks_tracing_ops = {
        .exp_sync       = synchronize_rcu_tasks_trace,
        .call           = call_rcu_tasks_trace,
        .cb_barrier     = rcu_barrier_tasks_trace,
+       .gp_kthread_dbg = show_rcu_tasks_trace_gp_kthread,
        .fqs            = NULL,
        .stats          = NULL,
        .irq_capable    = 1,
@@ -912,7 +967,8 @@ static int rcu_torture_boost(void *arg)
                oldstarttime = boost_starttime;
                while (time_before(jiffies, oldstarttime)) {
                        schedule_timeout_interruptible(oldstarttime - jiffies);
-                       stutter_wait("rcu_torture_boost");
+                       if (stutter_wait("rcu_torture_boost"))
+                               sched_set_fifo_low(current);
                        if (torture_must_stop())
                                goto checkwait;
                }
@@ -932,7 +988,8 @@ static int rcu_torture_boost(void *arg)
                                                                 jiffies);
                                call_rcu_time = jiffies;
                        }
-                       stutter_wait("rcu_torture_boost");
+                       if (stutter_wait("rcu_torture_boost"))
+                               sched_set_fifo_low(current);
                        if (torture_must_stop())
                                goto checkwait;
                }
@@ -964,7 +1021,8 @@ static int rcu_torture_boost(void *arg)
                }
 
                /* Go do the stutter. */
-checkwait:     stutter_wait("rcu_torture_boost");
+checkwait:     if (stutter_wait("rcu_torture_boost"))
+                       sched_set_fifo_low(current);
        } while (!torture_must_stop());
 
        /* Clean up and exit. */
@@ -987,6 +1045,7 @@ rcu_torture_fqs(void *arg)
 {
        unsigned long fqs_resume_time;
        int fqs_burst_remaining;
+       int oldnice = task_nice(current);
 
        VERBOSE_TOROUT_STRING("rcu_torture_fqs task started");
        do {
@@ -1002,46 +1061,33 @@ rcu_torture_fqs(void *arg)
                        udelay(fqs_holdoff);
                        fqs_burst_remaining -= fqs_holdoff;
                }
-               stutter_wait("rcu_torture_fqs");
+               if (stutter_wait("rcu_torture_fqs"))
+                       sched_set_normal(current, oldnice);
        } while (!torture_must_stop());
        torture_kthread_stopping("rcu_torture_fqs");
        return 0;
 }
 
+// Used by writers to randomly choose from the available grace-period
+// primitives.  The only purpose of the initialization is to size the array.
+static int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC, RTWS_COND_GET, RTWS_POLL_GET, RTWS_SYNC };
+static int nsynctypes;
+
 /*
- * RCU torture writer kthread.  Repeatedly substitutes a new structure
- * for that pointed to by rcu_torture_current, freeing the old structure
- * after a series of grace periods (the "pipeline").
+ * Determine which grace-period primitives are available.
  */
-static int
-rcu_torture_writer(void *arg)
+static void rcu_torture_write_types(void)
 {
-       bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal();
-       int expediting = 0;
-       unsigned long gp_snap;
        bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
-       bool gp_sync1 = gp_sync;
-       int i;
-       struct rcu_torture *rp;
-       struct rcu_torture *old_rp;
-       static DEFINE_TORTURE_RANDOM(rand);
-       int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC,
-                          RTWS_COND_GET, RTWS_SYNC };
-       int nsynctypes = 0;
-
-       VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
-       if (!can_expedite)
-               pr_alert("%s" TORTURE_FLAG
-                        " GP expediting controlled from boot/sysfs for %s.\n",
-                        torture_type, cur_ops->name);
+       bool gp_poll1 = gp_poll, gp_sync1 = gp_sync;
 
        /* Initialize synctype[] array.  If none set, take default. */
-       if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1)
-               gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true;
-       if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) {
+       if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_poll1 && !gp_sync1)
+               gp_cond1 = gp_exp1 = gp_normal1 = gp_poll1 = gp_sync1 = true;
+       if (gp_cond1 && cur_ops->get_gp_state && cur_ops->cond_sync) {
                synctype[nsynctypes++] = RTWS_COND_GET;
                pr_info("%s: Testing conditional GPs.\n", __func__);
-       } else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync)) {
+       } else if (gp_cond && (!cur_ops->get_gp_state || !cur_ops->cond_sync)) {
                pr_alert("%s: gp_cond without primitives.\n", __func__);
        }
        if (gp_exp1 && cur_ops->exp_sync) {
@@ -1056,12 +1102,46 @@ rcu_torture_writer(void *arg)
        } else if (gp_normal && !cur_ops->deferred_free) {
                pr_alert("%s: gp_normal without primitives.\n", __func__);
        }
+       if (gp_poll1 && cur_ops->start_gp_poll && cur_ops->poll_gp_state) {
+               synctype[nsynctypes++] = RTWS_POLL_GET;
+               pr_info("%s: Testing polling GPs.\n", __func__);
+       } else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) {
+               pr_alert("%s: gp_poll without primitives.\n", __func__);
+       }
        if (gp_sync1 && cur_ops->sync) {
                synctype[nsynctypes++] = RTWS_SYNC;
                pr_info("%s: Testing normal GPs.\n", __func__);
        } else if (gp_sync && !cur_ops->sync) {
                pr_alert("%s: gp_sync without primitives.\n", __func__);
        }
+}
+
+/*
+ * RCU torture writer kthread.  Repeatedly substitutes a new structure
+ * for that pointed to by rcu_torture_current, freeing the old structure
+ * after a series of grace periods (the "pipeline").
+ */
+static int
+rcu_torture_writer(void *arg)
+{
+       bool boot_ended;
+       bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal();
+       unsigned long cookie;
+       int expediting = 0;
+       unsigned long gp_snap;
+       int i;
+       int idx;
+       int oldnice = task_nice(current);
+       struct rcu_torture *rp;
+       struct rcu_torture *old_rp;
+       static DEFINE_TORTURE_RANDOM(rand);
+       bool stutter_waited;
+
+       VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
+       if (!can_expedite)
+               pr_alert("%s" TORTURE_FLAG
+                        " GP expediting controlled from boot/sysfs for %s.\n",
+                        torture_type, cur_ops->name);
        if (WARN_ONCE(nsynctypes == 0,
                      "rcu_torture_writer: No update-side primitives.\n")) {
                /*
@@ -1075,7 +1155,7 @@ rcu_torture_writer(void *arg)
 
        do {
                rcu_torture_writer_state = RTWS_FIXED_DELAY;
-               schedule_timeout_uninterruptible(1);
+               torture_hrtimeout_us(500, 1000, &rand);
                rp = rcu_torture_alloc();
                if (rp == NULL)
                        continue;
@@ -1095,6 +1175,18 @@ rcu_torture_writer(void *arg)
                        atomic_inc(&rcu_torture_wcount[i]);
                        WRITE_ONCE(old_rp->rtort_pipe_count,
                                   old_rp->rtort_pipe_count + 1);
+                       if (cur_ops->get_gp_state && cur_ops->poll_gp_state) {
+                               idx = cur_ops->readlock();
+                               cookie = cur_ops->get_gp_state();
+                               WARN_ONCE(rcu_torture_writer_state != RTWS_DEF_FREE &&
+                                         cur_ops->poll_gp_state(cookie),
+                                         "%s: Cookie check 1 failed %s(%d) %lu->%lu\n",
+                                         __func__,
+                                         rcu_torture_writer_state_getname(),
+                                         rcu_torture_writer_state,
+                                         cookie, cur_ops->get_gp_state());
+                               cur_ops->readunlock(idx);
+                       }
                        switch (synctype[torture_random(&rand) % nsynctypes]) {
                        case RTWS_DEF_FREE:
                                rcu_torture_writer_state = RTWS_DEF_FREE;
@@ -1107,15 +1199,21 @@ rcu_torture_writer(void *arg)
                                break;
                        case RTWS_COND_GET:
                                rcu_torture_writer_state = RTWS_COND_GET;
-                               gp_snap = cur_ops->get_state();
-                               i = torture_random(&rand) % 16;
-                               if (i != 0)
-                                       schedule_timeout_interruptible(i);
-                               udelay(torture_random(&rand) % 1000);
+                               gp_snap = cur_ops->get_gp_state();
+                               torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
                                rcu_torture_writer_state = RTWS_COND_SYNC;
                                cur_ops->cond_sync(gp_snap);
                                rcu_torture_pipe_update(old_rp);
                                break;
+                       case RTWS_POLL_GET:
+                               rcu_torture_writer_state = RTWS_POLL_GET;
+                               gp_snap = cur_ops->start_gp_poll();
+                               rcu_torture_writer_state = RTWS_POLL_WAIT;
+                               while (!cur_ops->poll_gp_state(gp_snap))
+                                       torture_hrtimeout_jiffies(torture_random(&rand) % 16,
+                                                                 &rand);
+                               rcu_torture_pipe_update(old_rp);
+                               break;
                        case RTWS_SYNC:
                                rcu_torture_writer_state = RTWS_SYNC;
                                cur_ops->sync();
@@ -1125,6 +1223,14 @@ rcu_torture_writer(void *arg)
                                WARN_ON_ONCE(1);
                                break;
                        }
+                       if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
+                               WARN_ONCE(rcu_torture_writer_state != RTWS_DEF_FREE &&
+                                         !cur_ops->poll_gp_state(cookie),
+                                         "%s: Cookie check 2 failed %s(%d) %lu->%lu\n",
+                                         __func__,
+                                         rcu_torture_writer_state_getname(),
+                                         rcu_torture_writer_state,
+                                         cookie, cur_ops->get_gp_state());
                }
                WRITE_ONCE(rcu_torture_current_version,
                           rcu_torture_current_version + 1);
@@ -1143,11 +1249,13 @@ rcu_torture_writer(void *arg)
                                       !rcu_gp_is_normal();
                }
                rcu_torture_writer_state = RTWS_STUTTER;
-               if (stutter_wait("rcu_torture_writer") &&
+               boot_ended = rcu_inkernel_boot_has_ended();
+               stutter_waited = stutter_wait("rcu_torture_writer");
+               if (stutter_waited &&
                    !READ_ONCE(rcu_fwd_cb_nodelay) &&
                    !cur_ops->slow_gps &&
                    !torture_must_stop() &&
-                   rcu_inkernel_boot_has_ended())
+                   boot_ended)
                        for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
                                if (list_empty(&rcu_tortures[i].rtort_free) &&
                                    rcu_access_pointer(rcu_torture_current) !=
@@ -1155,6 +1263,8 @@ rcu_torture_writer(void *arg)
                                        rcu_ftrace_dump(DUMP_ALL);
                                        WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
                                }
+               if (stutter_waited)
+                       sched_set_normal(current, oldnice);
        } while (!torture_must_stop());
        rcu_torture_current = NULL;  // Let stats task know that we are done.
        /* Reset expediting back to unexpedited. */
@@ -1179,26 +1289,43 @@ rcu_torture_writer(void *arg)
 static int
 rcu_torture_fakewriter(void *arg)
 {
+       unsigned long gp_snap;
        DEFINE_TORTURE_RANDOM(rand);
 
        VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task started");
        set_user_nice(current, MAX_NICE);
 
        do {
-               schedule_timeout_uninterruptible(1 + torture_random(&rand)%10);
-               udelay(torture_random(&rand) & 0x3ff);
+               torture_hrtimeout_jiffies(torture_random(&rand) % 10, &rand);
                if (cur_ops->cb_barrier != NULL &&
                    torture_random(&rand) % (nfakewriters * 8) == 0) {
                        cur_ops->cb_barrier();
-               } else if (gp_normal == gp_exp) {
-                       if (cur_ops->sync && torture_random(&rand) & 0x80)
-                               cur_ops->sync();
-                       else if (cur_ops->exp_sync)
+               } else {
+                       switch (synctype[torture_random(&rand) % nsynctypes]) {
+                       case RTWS_DEF_FREE:
+                               break;
+                       case RTWS_EXP_SYNC:
                                cur_ops->exp_sync();
-               } else if (gp_normal && cur_ops->sync) {
-                       cur_ops->sync();
-               } else if (cur_ops->exp_sync) {
-                       cur_ops->exp_sync();
+                               break;
+                       case RTWS_COND_GET:
+                               gp_snap = cur_ops->get_gp_state();
+                               torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
+                               cur_ops->cond_sync(gp_snap);
+                               break;
+                       case RTWS_POLL_GET:
+                               gp_snap = cur_ops->start_gp_poll();
+                               while (!cur_ops->poll_gp_state(gp_snap)) {
+                                       torture_hrtimeout_jiffies(torture_random(&rand) % 16,
+                                                                 &rand);
+                               }
+                               break;
+                       case RTWS_SYNC:
+                               cur_ops->sync();
+                               break;
+                       default:
+                               WARN_ON_ONCE(1);
+                               break;
+                       }
                }
                stutter_wait("rcu_torture_fakewriter");
        } while (!torture_must_stop());
@@ -1212,6 +1339,62 @@ static void rcu_torture_timer_cb(struct rcu_head *rhp)
        kfree(rhp);
 }
 
+// Set up and carry out testing of RCU's global memory ordering
+static void rcu_torture_reader_do_mbchk(long myid, struct rcu_torture *rtp,
+                                       struct torture_random_state *trsp)
+{
+       unsigned long loops;
+       int noc = torture_num_online_cpus();
+       int rdrchked;
+       int rdrchker;
+       struct rcu_torture_reader_check *rtrcp; // Me.
+       struct rcu_torture_reader_check *rtrcp_assigner; // Assigned us to do checking.
+       struct rcu_torture_reader_check *rtrcp_chked; // Reader being checked.
+       struct rcu_torture_reader_check *rtrcp_chker; // Reader doing checking when not me.
+
+       if (myid < 0)
+               return; // Don't try this from timer handlers.
+
+       // Increment my counter.
+       rtrcp = &rcu_torture_reader_mbchk[myid];
+       WRITE_ONCE(rtrcp->rtc_myloops, rtrcp->rtc_myloops + 1);
+
+       // Attempt to assign someone else some checking work.
+       rdrchked = torture_random(trsp) % nrealreaders;
+       rtrcp_chked = &rcu_torture_reader_mbchk[rdrchked];
+       rdrchker = torture_random(trsp) % nrealreaders;
+       rtrcp_chker = &rcu_torture_reader_mbchk[rdrchker];
+       if (rdrchked != myid && rdrchked != rdrchker && noc >= rdrchked && noc >= rdrchker &&
+           smp_load_acquire(&rtrcp->rtc_chkrdr) < 0 && // Pairs with smp_store_release below.
+           !READ_ONCE(rtp->rtort_chkp) &&
+           !smp_load_acquire(&rtrcp_chker->rtc_assigner)) { // Pairs with smp_store_release below.
+               rtrcp->rtc_chkloops = READ_ONCE(rtrcp_chked->rtc_myloops);
+               WARN_ON_ONCE(rtrcp->rtc_chkrdr >= 0);
+               rtrcp->rtc_chkrdr = rdrchked;
+               WARN_ON_ONCE(rtrcp->rtc_ready); // This gets set after the grace period ends.
+               if (cmpxchg_relaxed(&rtrcp_chker->rtc_assigner, NULL, rtrcp) ||
+                   cmpxchg_relaxed(&rtp->rtort_chkp, NULL, rtrcp))
+                       (void)cmpxchg_relaxed(&rtrcp_chker->rtc_assigner, rtrcp, NULL); // Back out.
+       }
+
+       // If assigned some completed work, do it!
+       rtrcp_assigner = READ_ONCE(rtrcp->rtc_assigner);
+       if (!rtrcp_assigner || !smp_load_acquire(&rtrcp_assigner->rtc_ready))
+               return; // No work or work not yet ready.
+       rdrchked = rtrcp_assigner->rtc_chkrdr;
+       if (WARN_ON_ONCE(rdrchked < 0))
+               return;
+       rtrcp_chked = &rcu_torture_reader_mbchk[rdrchked];
+       loops = READ_ONCE(rtrcp_chked->rtc_myloops);
+       atomic_inc(&n_rcu_torture_mbchk_tries);
+       if (ULONG_CMP_LT(loops, rtrcp_assigner->rtc_chkloops))
+               atomic_inc(&n_rcu_torture_mbchk_fail);
+       rtrcp_assigner->rtc_chkloops = loops + ULONG_MAX / 2;
+       rtrcp_assigner->rtc_ready = 0;
+       smp_store_release(&rtrcp->rtc_assigner, NULL); // Someone else can assign us work.
+       smp_store_release(&rtrcp_assigner->rtc_chkrdr, -1); // Assigner can again assign.
+}
+
 /*
  * Do one extension of an RCU read-side critical section using the
  * current reader state in readstate (set to zero for initial entry
@@ -1347,8 +1530,9 @@ rcutorture_loop_extend(int *readstate, struct torture_random_state *trsp,
  * no data to read.  Can be invoked both from process context and
  * from a timer handler.
  */
-static bool rcu_torture_one_read(struct torture_random_state *trsp)
+static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
 {
+       unsigned long cookie;
        int i;
        unsigned long started;
        unsigned long completed;
@@ -1364,6 +1548,8 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp)
        WARN_ON_ONCE(!rcu_is_watching());
        newstate = rcutorture_extend_mask(readstate, trsp);
        rcutorture_one_extend(&readstate, newstate, trsp, rtrsp++);
+       if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
+               cookie = cur_ops->get_gp_state();
        started = cur_ops->get_gp_seq();
        ts = rcu_trace_clock_local();
        p = rcu_dereference_check(rcu_torture_current,
@@ -1379,6 +1565,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp)
        }
        if (p->rtort_mbtest == 0)
                atomic_inc(&n_rcu_torture_mberror);
+       rcu_torture_reader_do_mbchk(myid, p, trsp);
        rtrsp = rcutorture_loop_extend(&readstate, trsp, rtrsp);
        preempt_disable();
        pipe_count = READ_ONCE(p->rtort_pipe_count);
@@ -1400,6 +1587,13 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp)
        }
        __this_cpu_inc(rcu_torture_batch[completed]);
        preempt_enable();
+       if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
+               WARN_ONCE(cur_ops->poll_gp_state(cookie),
+                         "%s: Cookie check 3 failed %s(%d) %lu->%lu\n",
+                         __func__,
+                         rcu_torture_writer_state_getname(),
+                         rcu_torture_writer_state,
+                         cookie, cur_ops->get_gp_state());
        rcutorture_one_extend(&readstate, 0, trsp, rtrsp);
        WARN_ON_ONCE(readstate & RCUTORTURE_RDR_MASK);
        // This next splat is expected behavior if leakpointer, especially
@@ -1428,7 +1622,7 @@ static DEFINE_TORTURE_RANDOM_PERCPU(rcu_torture_timer_rand);
 static void rcu_torture_timer(struct timer_list *unused)
 {
        atomic_long_inc(&n_rcu_torture_timers);
-       (void)rcu_torture_one_read(this_cpu_ptr(&rcu_torture_timer_rand));
+       (void)rcu_torture_one_read(this_cpu_ptr(&rcu_torture_timer_rand), -1);
 
        /* Test call_rcu() invocation from interrupt handler. */
        if (cur_ops->call) {
@@ -1464,13 +1658,13 @@ rcu_torture_reader(void *arg)
                        if (!timer_pending(&t))
                                mod_timer(&t, jiffies + 1);
                }
-               if (!rcu_torture_one_read(&rand) && !torture_must_stop())
+               if (!rcu_torture_one_read(&rand, myid) && !torture_must_stop())
                        schedule_timeout_interruptible(HZ);
                if (time_after(jiffies, lastsleep) && !torture_must_stop()) {
-                       schedule_timeout_interruptible(1);
+                       torture_hrtimeout_us(500, 1000, &rand);
                        lastsleep = jiffies + 10;
                }
-               while (num_online_cpus() < mynumonline && !torture_must_stop())
+               while (torture_num_online_cpus() < mynumonline && !torture_must_stop())
                        schedule_timeout_interruptible(HZ / 5);
                stutter_wait("rcu_torture_reader");
        } while (!torture_must_stop());
@@ -1483,6 +1677,53 @@ rcu_torture_reader(void *arg)
        return 0;
 }
 
+/*
+ * Randomly Toggle CPUs' callback-offload state.  This uses hrtimers to
+ * increase race probabilities and fuzzes the interval between toggling.
+ */
+static int rcu_nocb_toggle(void *arg)
+{
+       int cpu;
+       int maxcpu = -1;
+       int oldnice = task_nice(current);
+       long r;
+       DEFINE_TORTURE_RANDOM(rand);
+       ktime_t toggle_delay;
+       unsigned long toggle_fuzz;
+       ktime_t toggle_interval = ms_to_ktime(nocbs_toggle);
+
+       VERBOSE_TOROUT_STRING("rcu_nocb_toggle task started");
+       while (!rcu_inkernel_boot_has_ended())
+               schedule_timeout_interruptible(HZ / 10);
+       for_each_online_cpu(cpu)
+               maxcpu = cpu;
+       WARN_ON(maxcpu < 0);
+       if (toggle_interval > ULONG_MAX)
+               toggle_fuzz = ULONG_MAX >> 3;
+       else
+               toggle_fuzz = toggle_interval >> 3;
+       if (toggle_fuzz <= 0)
+               toggle_fuzz = NSEC_PER_USEC;
+       do {
+               r = torture_random(&rand);
+               cpu = (r >> 4) % (maxcpu + 1);
+               if (r & 0x1) {
+                       rcu_nocb_cpu_offload(cpu);
+                       atomic_long_inc(&n_nocb_offload);
+               } else {
+                       rcu_nocb_cpu_deoffload(cpu);
+                       atomic_long_inc(&n_nocb_deoffload);
+               }
+               toggle_delay = torture_random(&rand) % toggle_fuzz + toggle_interval;
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_hrtimeout(&toggle_delay, HRTIMER_MODE_REL);
+               if (stutter_wait("rcu_nocb_toggle"))
+                       sched_set_normal(current, oldnice);
+       } while (!torture_must_stop());
+       torture_kthread_stopping("rcu_nocb_toggle");
+       return 0;
+}
+
 /*
  * Print torture statistics.  Caller must ensure that there is only
  * one call to this function at a given time!!!  This is normally
@@ -1524,8 +1765,9 @@ rcu_torture_stats_print(void)
                atomic_read(&n_rcu_torture_alloc),
                atomic_read(&n_rcu_torture_alloc_fail),
                atomic_read(&n_rcu_torture_free));
-       pr_cont("rtmbe: %d rtbe: %ld rtbke: %ld rtbre: %ld ",
+       pr_cont("rtmbe: %d rtmbkf: %d/%d rtbe: %ld rtbke: %ld rtbre: %ld ",
                atomic_read(&n_rcu_torture_mberror),
+               atomic_read(&n_rcu_torture_mbchk_fail), atomic_read(&n_rcu_torture_mbchk_tries),
                n_rcu_torture_barrier_error,
                n_rcu_torture_boost_ktrerror,
                n_rcu_torture_boost_rterror);
@@ -1538,16 +1780,20 @@ rcu_torture_stats_print(void)
                data_race(n_barrier_successes),
                data_race(n_barrier_attempts),
                data_race(n_rcu_torture_barrier_error));
-       pr_cont("read-exits: %ld\n", data_race(n_read_exits));
+       pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
+       pr_cont("nocb-toggles: %ld:%ld\n",
+               atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
 
        pr_alert("%s%s ", torture_type, TORTURE_FLAG);
        if (atomic_read(&n_rcu_torture_mberror) ||
+           atomic_read(&n_rcu_torture_mbchk_fail) ||
            n_rcu_torture_barrier_error || n_rcu_torture_boost_ktrerror ||
            n_rcu_torture_boost_rterror || n_rcu_torture_boost_failure ||
            i > 1) {
                pr_cont("%s", "!!! ");
                atomic_inc(&n_rcu_torture_error);
                WARN_ON_ONCE(atomic_read(&n_rcu_torture_mberror));
+               WARN_ON_ONCE(atomic_read(&n_rcu_torture_mbchk_fail));
                WARN_ON_ONCE(n_rcu_torture_barrier_error);  // rcu_barrier()
                WARN_ON_ONCE(n_rcu_torture_boost_ktrerror); // no boost kthread
                WARN_ON_ONCE(n_rcu_torture_boost_rterror); // can't set RT prio
@@ -1594,7 +1840,8 @@ rcu_torture_stats_print(void)
                        sched_show_task(wtp);
                        splatted = true;
                }
-               show_rcu_gp_kthreads();
+               if (cur_ops->gp_kthread_dbg)
+                       cur_ops->gp_kthread_dbg();
                rcu_ftrace_dump(DUMP_ALL);
        }
        rtcv_snap = rcu_torture_current_version;
@@ -1631,7 +1878,8 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
                 "stall_cpu_block=%d "
                 "n_barrier_cbs=%d "
                 "onoff_interval=%d onoff_holdoff=%d "
-                "read_exit_delay=%d read_exit_burst=%d\n",
+                "read_exit_delay=%d read_exit_burst=%d "
+                "nocbs_nthreads=%d nocbs_toggle=%d\n",
                 torture_type, tag, nrealreaders, nfakewriters,
                 stat_interval, verbose, test_no_idle_hz, shuffle_interval,
                 stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
@@ -1641,7 +1889,8 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
                 stall_cpu_block,
                 n_barrier_cbs,
                 onoff_interval, onoff_holdoff,
-                read_exit_delay, read_exit_burst);
+                read_exit_delay, read_exit_burst,
+                nocbs_nthreads, nocbs_toggle);
 }
 
 static int rcutorture_booster_cleanup(unsigned int cpu)
@@ -1913,7 +2162,9 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
        unsigned long stopat;
        static DEFINE_TORTURE_RANDOM(trs);
 
-       if  (cur_ops->call && cur_ops->sync && cur_ops->cb_barrier) {
+       if (!cur_ops->sync)
+               return; // Cannot do need_resched() forward progress testing without ->sync.
+       if (cur_ops->call && cur_ops->cb_barrier) {
                init_rcu_head_on_stack(&fcs.rh);
                selfpropcb = true;
        }
@@ -2103,6 +2354,7 @@ static struct notifier_block rcutorture_oom_nb = {
 /* Carry out grace-period forward-progress testing. */
 static int rcu_torture_fwd_prog(void *args)
 {
+       int oldnice = task_nice(current);
        struct rcu_fwd *rfp = args;
        int tested = 0;
        int tested_tries = 0;
@@ -2121,7 +2373,8 @@ static int rcu_torture_fwd_prog(void *args)
                        rcu_torture_fwd_prog_cr(rfp);
 
                /* Avoid slow periods, better to test when busy. */
-               stutter_wait("rcu_torture_fwd_prog");
+               if (stutter_wait("rcu_torture_fwd_prog"))
+                       sched_set_normal(current, oldnice);
        } while (!torture_must_stop());
        /* Short runs might not contain a valid forward-progress attempt. */
        WARN_ON(!tested && tested_tries >= 5);
@@ -2137,8 +2390,8 @@ static int __init rcu_torture_fwd_prog_init(void)
 
        if (!fwd_progress)
                return 0; /* Not requested, so don't do it. */
-       if (!cur_ops->stall_dur || cur_ops->stall_dur() <= 0 ||
-           cur_ops == &rcu_busted_ops) {
+       if ((!cur_ops->sync && !cur_ops->call) ||
+           !cur_ops->stall_dur || cur_ops->stall_dur() <= 0 || cur_ops == &rcu_busted_ops) {
                VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, unsupported by RCU flavor under test");
                return 0;
        }
@@ -2372,7 +2625,7 @@ static int rcu_torture_read_exit_child(void *trsp_in)
        // Minimize time between reading and exiting.
        while (!kthread_should_stop())
                schedule_timeout_uninterruptible(1);
-       (void)rcu_torture_one_read(trsp);
+       (void)rcu_torture_one_read(trsp, -1);
        return 0;
 }
 
@@ -2472,25 +2725,35 @@ rcu_torture_cleanup(void)
                return;
        }
 
-       show_rcu_gp_kthreads();
+       if (cur_ops->gp_kthread_dbg)
+               cur_ops->gp_kthread_dbg();
        rcu_torture_read_exit_cleanup();
        rcu_torture_barrier_cleanup();
        rcu_torture_fwd_prog_cleanup();
        torture_stop_kthread(rcu_torture_stall, stall_task);
        torture_stop_kthread(rcu_torture_writer, writer_task);
 
+       if (nocb_tasks) {
+               for (i = 0; i < nrealnocbers; i++)
+                       torture_stop_kthread(rcu_nocb_toggle, nocb_tasks[i]);
+               kfree(nocb_tasks);
+               nocb_tasks = NULL;
+       }
+
        if (reader_tasks) {
                for (i = 0; i < nrealreaders; i++)
                        torture_stop_kthread(rcu_torture_reader,
                                             reader_tasks[i]);
                kfree(reader_tasks);
+               reader_tasks = NULL;
        }
+       kfree(rcu_torture_reader_mbchk);
+       rcu_torture_reader_mbchk = NULL;
 
        if (fakewriter_tasks) {
-               for (i = 0; i < nfakewriters; i++) {
+               for (i = 0; i < nfakewriters; i++)
                        torture_stop_kthread(rcu_torture_fakewriter,
                                             fakewriter_tasks[i]);
-               }
                kfree(fakewriter_tasks);
                fakewriter_tasks = NULL;
        }
@@ -2583,6 +2846,7 @@ static void rcu_test_debug_objects(void)
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
        struct rcu_head rh1;
        struct rcu_head rh2;
+       struct rcu_head *rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
 
        init_rcu_head_on_stack(&rh1);
        init_rcu_head_on_stack(&rh2);
@@ -2595,6 +2859,10 @@ static void rcu_test_debug_objects(void)
        local_irq_disable(); /* Make it harder to start a new grace period. */
        call_rcu(&rh2, rcu_torture_leak_cb);
        call_rcu(&rh2, rcu_torture_err_cb); /* Duplicate callback. */
+       if (rhp) {
+               call_rcu(rhp, rcu_torture_leak_cb);
+               call_rcu(rhp, rcu_torture_err_cb); /* Another duplicate callback. */
+       }
        local_irq_enable();
        rcu_read_unlock();
        preempt_enable();
@@ -2647,7 +2915,6 @@ rcu_torture_init(void)
                for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
                        pr_cont(" %s", torture_ops[i]->name);
                pr_cont("\n");
-               WARN_ON(!IS_MODULE(CONFIG_RCU_TORTURE_TEST));
                firsterr = -EINVAL;
                cur_ops = NULL;
                goto unwind;
@@ -2690,6 +2957,8 @@ rcu_torture_init(void)
        atomic_set(&n_rcu_torture_alloc_fail, 0);
        atomic_set(&n_rcu_torture_free, 0);
        atomic_set(&n_rcu_torture_mberror, 0);
+       atomic_set(&n_rcu_torture_mbchk_fail, 0);
+       atomic_set(&n_rcu_torture_mbchk_tries, 0);
        atomic_set(&n_rcu_torture_error, 0);
        n_rcu_torture_barrier_error = 0;
        n_rcu_torture_boost_ktrerror = 0;
@@ -2709,6 +2978,7 @@ rcu_torture_init(void)
 
        /* Start up the kthreads. */
 
+       rcu_torture_write_types();
        firsterr = torture_create_kthread(rcu_torture_writer, NULL,
                                          writer_task);
        if (firsterr)
@@ -2731,17 +3001,40 @@ rcu_torture_init(void)
        }
        reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
                               GFP_KERNEL);
-       if (reader_tasks == NULL) {
+       rcu_torture_reader_mbchk = kcalloc(nrealreaders, sizeof(*rcu_torture_reader_mbchk),
+                                          GFP_KERNEL);
+       if (!reader_tasks || !rcu_torture_reader_mbchk) {
                VERBOSE_TOROUT_ERRSTRING("out of memory");
                firsterr = -ENOMEM;
                goto unwind;
        }
        for (i = 0; i < nrealreaders; i++) {
+               rcu_torture_reader_mbchk[i].rtc_chkrdr = -1;
                firsterr = torture_create_kthread(rcu_torture_reader, (void *)i,
                                                  reader_tasks[i]);
                if (firsterr)
                        goto unwind;
        }
+       nrealnocbers = nocbs_nthreads;
+       if (WARN_ON(nrealnocbers < 0))
+               nrealnocbers = 1;
+       if (WARN_ON(nocbs_toggle < 0))
+               nocbs_toggle = HZ;
+       if (nrealnocbers > 0) {
+               nocb_tasks = kcalloc(nrealnocbers, sizeof(nocb_tasks[0]), GFP_KERNEL);
+               if (nocb_tasks == NULL) {
+                       VERBOSE_TOROUT_ERRSTRING("out of memory");
+                       firsterr = -ENOMEM;
+                       goto unwind;
+               }
+       } else {
+               nocb_tasks = NULL;
+       }
+       for (i = 0; i < nrealnocbers; i++) {
+               firsterr = torture_create_kthread(rcu_nocb_toggle, NULL, nocb_tasks[i]);
+               if (firsterr)
+                       goto unwind;
+       }
        if (stat_interval > 0) {
                firsterr = torture_create_kthread(rcu_torture_stats, NULL,
                                                  stats_task);
@@ -2815,6 +3108,10 @@ rcu_torture_init(void)
 unwind:
        torture_init_end();
        rcu_torture_cleanup();
+       if (shutdown_secs) {
+               WARN_ON(!IS_MODULE(CONFIG_RCU_TORTURE_TEST));
+               kernel_power_off();
+       }
        return firsterr;
 }