Merge tag 'usb-4.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
[sfrench/cifs-2.6.git] / drivers / md / raid5.c
index f8055a7abb4b053fd74f5502e648cecdf105349e..9c4f7659f8b1337c99cfd0ab5070012e3f658849 100644 (file)
@@ -233,11 +233,15 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
                        if (test_bit(R5_InJournal, &sh->dev[i].flags))
                                injournal++;
        /*
-        * When quiesce in r5c write back, set STRIPE_HANDLE for stripes with
-        * data in journal, so they are not released to cached lists
+        * In the following cases, the stripe cannot be released to cached
+        * lists. Therefore, we make the stripe write out and set
+        * STRIPE_HANDLE:
+        *   1. when quiesce in r5c write back;
+        *   2. when resync is requested fot the stripe.
         */
-       if (conf->quiesce && r5c_is_writeback(conf->log) &&
-           !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0) {
+       if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) ||
+           (conf->quiesce && r5c_is_writeback(conf->log) &&
+            !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) {
                if (test_bit(STRIPE_R5C_CACHING, &sh->state))
                        r5c_make_stripe_write_out(sh);
                set_bit(STRIPE_HANDLE, &sh->state);
@@ -2689,7 +2693,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
                bdevname(rdev->bdev, b),
                mdname(mddev),
                conf->raid_disks - mddev->degraded);
-       r5c_update_on_rdev_error(mddev);
+       r5c_update_on_rdev_error(mddev, rdev);
 }
 
 /*
@@ -3050,6 +3054,11 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
  *      When LOG_CRITICAL, stripes with injournal == 0 will be sent to
  *      no_space_stripes list.
  *
+ *   3. during journal failure
+ *      In journal failure, we try to flush all cached data to raid disks
+ *      based on data in stripe cache. The array is read-only to upper
+ *      layers, so we would skip all pending writes.
+ *
  */
 static inline bool delay_towrite(struct r5conf *conf,
                                 struct r5dev *dev,
@@ -3063,6 +3072,9 @@ static inline bool delay_towrite(struct r5conf *conf,
        if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) &&
            s->injournal > 0)
                return true;
+       /* case 3 above */
+       if (s->log_failed && s->injournal)
+               return true;
        return false;
 }
 
@@ -4648,8 +4660,13 @@ static void handle_stripe(struct stripe_head *sh)
 
        if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
                spin_lock(&sh->stripe_lock);
-               /* Cannot process 'sync' concurrently with 'discard' */
-               if (!test_bit(STRIPE_DISCARD, &sh->state) &&
+               /*
+                * Cannot process 'sync' concurrently with 'discard'.
+                * Flush data in r5cache before 'sync'.
+                */
+               if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) &&
+                   !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) &&
+                   !test_bit(STRIPE_DISCARD, &sh->state) &&
                    test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
                        set_bit(STRIPE_SYNCING, &sh->state);
                        clear_bit(STRIPE_INSYNC, &sh->state);
@@ -4696,10 +4713,15 @@ static void handle_stripe(struct stripe_head *sh)
               " to_write=%d failed=%d failed_num=%d,%d\n",
               s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
               s.failed_num[0], s.failed_num[1]);
-       /* check if the array has lost more than max_degraded devices and,
+       /*
+        * check if the array has lost more than max_degraded devices and,
         * if so, some requests might need to be failed.
+        *
+        * When journal device failed (log_failed), we will only process
+        * the stripe if there is data need write to raid disks
         */
-       if (s.failed > conf->max_degraded || s.log_failed) {
+       if (s.failed > conf->max_degraded ||
+           (s.log_failed && s.injournal == 0)) {
                sh->check_state = 0;
                sh->reconstruct_state = 0;
                break_stripe_batch_list(sh, 0);
@@ -5272,8 +5294,10 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
        struct stripe_head *sh, *tmp;
        struct list_head *handle_list = NULL;
        struct r5worker_group *wg;
-       bool second_try = !r5c_is_writeback(conf->log);
-       bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state);
+       bool second_try = !r5c_is_writeback(conf->log) &&
+               !r5l_log_disk_error(conf);
+       bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) ||
+               r5l_log_disk_error(conf);
 
 again:
        wg = NULL;
@@ -7521,7 +7545,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                 * neilb: there is no locking about new writes here,
                 * so this cannot be safe.
                 */
-               if (atomic_read(&conf->active_stripes)) {
+               if (atomic_read(&conf->active_stripes) ||
+                   atomic_read(&conf->r5c_cached_full_stripes) ||
+                   atomic_read(&conf->r5c_cached_partial_stripes)) {
                        return -EBUSY;
                }
                log_exit(conf);