raid5-ppl: PPL support for disks with write-back cache enabled
[sfrench/cifs-2.6.git] / drivers / md / raid5.c
index 928e24a071338ab6e1fe7668c8caa42b2803ccfe..50d01144b80535e2e937c16021cdeeba632b201e 100644 (file)
@@ -55,7 +55,6 @@
 #include <linux/ratelimit.h>
 #include <linux/nodemask.h>
 #include <linux/flex_array.h>
-#include <linux/sched/signal.h>
 
 #include <trace/events/block.h>
 #include <linux/list_sort.h>
@@ -63,7 +62,7 @@
 #include "md.h"
 #include "raid5.h"
 #include "raid0.h"
-#include "bitmap.h"
+#include "md-bitmap.h"
 #include "raid5-log.h"
 
 #define UNSUPPORTED_MDDEV_FLAGS        (1L << MD_FAILFAST_SUPPORTED)
@@ -1818,8 +1817,11 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
                struct r5dev *dev = &sh->dev[i];
 
                if (dev->written || i == pd_idx || i == qd_idx) {
-                       if (!discard && !test_bit(R5_SkipCopy, &dev->flags))
+                       if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) {
                                set_bit(R5_UPTODATE, &dev->flags);
+                               if (test_bit(STRIPE_EXPAND_READY, &sh->state))
+                                       set_bit(R5_Expanded, &dev->flags);
+                       }
                        if (fua)
                                set_bit(R5_WantFUA, &dev->flags);
                        if (sync)
@@ -2675,13 +2677,13 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
        pr_debug("raid456: error called\n");
 
        spin_lock_irqsave(&conf->device_lock, flags);
+       set_bit(Faulty, &rdev->flags);
        clear_bit(In_sync, &rdev->flags);
        mddev->degraded = raid5_calc_degraded(conf);
        spin_unlock_irqrestore(&conf->device_lock, flags);
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 
        set_bit(Blocked, &rdev->flags);
-       set_bit(Faulty, &rdev->flags);
        set_mask_bits(&mddev->sb_flags, 0,
                      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
        pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
@@ -5561,7 +5563,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
        bool do_flush = false;
 
        if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
-               int ret = r5l_handle_flush_request(conf->log, bi);
+               int ret = log_handle_flush_request(conf, bi);
 
                if (ret == 0)
                        return true;
@@ -5682,28 +5684,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
                                goto retry;
                        }
 
-                       if (rw == WRITE &&
-                           logical_sector >= mddev->suspend_lo &&
-                           logical_sector < mddev->suspend_hi) {
-                               raid5_release_stripe(sh);
-                               /* As the suspend_* range is controlled by
-                                * userspace, we want an interruptible
-                                * wait.
-                                */
-                               prepare_to_wait(&conf->wait_for_overlap,
-                                               &w, TASK_INTERRUPTIBLE);
-                               if (logical_sector >= mddev->suspend_lo &&
-                                   logical_sector < mddev->suspend_hi) {
-                                       sigset_t full, old;
-                                       sigfillset(&full);
-                                       sigprocmask(SIG_BLOCK, &full, &old);
-                                       schedule();
-                                       sigprocmask(SIG_SETMASK, &old, NULL);
-                                       do_prepare = true;
-                               }
-                               goto retry;
-                       }
-
                        if (test_bit(STRIPE_EXPANDING, &sh->state) ||
                            !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
                                /* Stripe is busy expanding or
@@ -5758,6 +5738,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
         */
        struct r5conf *conf = mddev->private;
        struct stripe_head *sh;
+       struct md_rdev *rdev;
        sector_t first_sector, last_sector;
        int raid_disks = conf->previous_raid_disks;
        int data_disks = raid_disks - conf->max_degraded;
@@ -5880,6 +5861,15 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
                        return 0;
                mddev->reshape_position = conf->reshape_progress;
                mddev->curr_resync_completed = sector_nr;
+               if (!mddev->reshape_backwards)
+                       /* Can update recovery_offset */
+                       rdev_for_each(rdev, mddev)
+                               if (rdev->raid_disk >= 0 &&
+                                   !test_bit(Journal, &rdev->flags) &&
+                                   !test_bit(In_sync, &rdev->flags) &&
+                                   rdev->recovery_offset < sector_nr)
+                                       rdev->recovery_offset = sector_nr;
+
                conf->reshape_checkpoint = jiffies;
                set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
                md_wakeup_thread(mddev->thread);
@@ -5978,6 +5968,14 @@ finish:
                        goto ret;
                mddev->reshape_position = conf->reshape_progress;
                mddev->curr_resync_completed = sector_nr;
+               if (!mddev->reshape_backwards)
+                       /* Can update recovery_offset */
+                       rdev_for_each(rdev, mddev)
+                               if (rdev->raid_disk >= 0 &&
+                                   !test_bit(Journal, &rdev->flags) &&
+                                   !test_bit(In_sync, &rdev->flags) &&
+                                   rdev->recovery_offset < sector_nr)
+                                       rdev->recovery_offset = sector_nr;
                conf->reshape_checkpoint = jiffies;
                set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
                md_wakeup_thread(mddev->thread);
@@ -6072,7 +6070,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
         */
        rcu_read_lock();
        for (i = 0; i < conf->raid_disks; i++) {
-               struct md_rdev *rdev = ACCESS_ONCE(conf->disks[i].rdev);
+               struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
 
                if (rdev == NULL || test_bit(Faulty, &rdev->flags))
                        still_degraded = 1;
@@ -6170,7 +6168,7 @@ static int handle_active_stripes(struct r5conf *conf, int group,
                                break;
                if (i == NR_STRIPE_HASH_LOCKS) {
                        spin_unlock_irq(&conf->device_lock);
-                       r5l_flush_stripe_to_raid(conf->log);
+                       log_flush_stripe_to_raid(conf);
                        spin_lock_irq(&conf->device_lock);
                        return batch_size;
                }
@@ -7156,6 +7154,13 @@ static int raid5_run(struct mddev *mddev)
                        min_offset_diff = diff;
        }
 
+       if ((test_bit(MD_HAS_JOURNAL, &mddev->flags) || journal_dev) &&
+           (mddev->bitmap_info.offset || mddev->bitmap_info.file)) {
+               pr_notice("md/raid:%s: array cannot have both journal and bitmap\n",
+                         mdname(mddev));
+               return -EINVAL;
+       }
+
        if (mddev->reshape_position != MaxSector) {
                /* Check that we can continue the reshape.
                 * Difficulties arise if the stripe we would write to
@@ -7958,6 +7963,7 @@ static void end_reshape(struct r5conf *conf)
 {
 
        if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
+               struct md_rdev *rdev;
 
                spin_lock_irq(&conf->device_lock);
                conf->previous_raid_disks = conf->raid_disks;
@@ -7965,6 +7971,11 @@ static void end_reshape(struct r5conf *conf)
                smp_wmb();
                conf->reshape_progress = MaxSector;
                conf->mddev->reshape_position = MaxSector;
+               rdev_for_each(rdev, conf->mddev)
+                       if (rdev->raid_disk >= 0 &&
+                           !test_bit(Journal, &rdev->flags) &&
+                           !test_bit(In_sync, &rdev->flags))
+                               rdev->recovery_offset = MaxSector;
                spin_unlock_irq(&conf->device_lock);
                wake_up(&conf->wait_for_overlap);
 
@@ -8020,16 +8031,12 @@ static void raid5_finish_reshape(struct mddev *mddev)
        }
 }
 
-static void raid5_quiesce(struct mddev *mddev, int state)
+static void raid5_quiesce(struct mddev *mddev, int quiesce)
 {
        struct r5conf *conf = mddev->private;
 
-       switch(state) {
-       case 2: /* resume for a suspend */
-               wake_up(&conf->wait_for_overlap);
-               break;
-
-       case 1: /* stop all writes */
+       if (quiesce) {
+               /* stop all writes */
                lock_all_device_hash_locks_irq(conf);
                /* '2' tells resync/reshape to pause so that all
                 * active stripes can drain
@@ -8045,17 +8052,15 @@ static void raid5_quiesce(struct mddev *mddev, int state)
                unlock_all_device_hash_locks_irq(conf);
                /* allow reshape to continue */
                wake_up(&conf->wait_for_overlap);
-               break;
-
-       case 0: /* re-enable writes */
+       } else {
+               /* re-enable writes */
                lock_all_device_hash_locks_irq(conf);
                conf->quiesce = 0;
                wake_up(&conf->wait_for_quiescent);
                wake_up(&conf->wait_for_overlap);
                unlock_all_device_hash_locks_irq(conf);
-               break;
        }
-       r5l_quiesce(conf->log, state);
+       log_quiesce(conf, quiesce);
 }
 
 static void *raid45_takeover_raid0(struct mddev *mddev, int level)
@@ -8359,6 +8364,13 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
        return err;
 }
 
+static int raid5_start(struct mddev *mddev)
+{
+       struct r5conf *conf = mddev->private;
+
+       return r5l_start(conf->log);
+}
+
 static struct md_personality raid6_personality =
 {
        .name           = "raid6",
@@ -8366,6 +8378,7 @@ static struct md_personality raid6_personality =
        .owner          = THIS_MODULE,
        .make_request   = raid5_make_request,
        .run            = raid5_run,
+       .start          = raid5_start,
        .free           = raid5_free,
        .status         = raid5_status,
        .error_handler  = raid5_error,
@@ -8390,6 +8403,7 @@ static struct md_personality raid5_personality =
        .owner          = THIS_MODULE,
        .make_request   = raid5_make_request,
        .run            = raid5_run,
+       .start          = raid5_start,
        .free           = raid5_free,
        .status         = raid5_status,
        .error_handler  = raid5_error,
@@ -8415,6 +8429,7 @@ static struct md_personality raid4_personality =
        .owner          = THIS_MODULE,
        .make_request   = raid5_make_request,
        .run            = raid5_run,
+       .start          = raid5_start,
        .free           = raid5_free,
        .status         = raid5_status,
        .error_handler  = raid5_error,