raid10: improve random reads performance
authorTomasz Majchrzak <tomasz.majchrzak@intel.com>
Fri, 24 Jun 2016 12:20:16 +0000 (14:20 +0200)
committerShaohua Li <shli@fb.com>
Tue, 19 Jul 2016 22:20:28 +0000 (15:20 -0700)
RAID10 random read performance is lower than expected due to excessive spinlock
utilisation which is required mostly for rebuild/resync. Simplify allow_barrier
as it's in IO path and encounters a lot of unnecessary congestion.

As lower_barrier just takes a lock in order to decrement a counter, convert
counter (nr_pending) into atomic variable and remove the spin lock. There is
also a congestion for wake_up (it uses lock internally) so call it only when
it's really needed. As wake_up is not called constantly anymore, ensure process
waiting to raise a barrier is notified when there are no more waiting IOs.

Signed-off-by: Tomasz Majchrzak <tomasz.majchrzak@intel.com>
Signed-off-by: Shaohua Li <shli@fb.com>
drivers/md/raid10.c
drivers/md/raid10.h

index f7f3c8a634195a7804e08fb887c0de51b763760c..cb1d88709ff07888c8d1ecbec39c610a82f136a3 100644 (file)
@@ -905,7 +905,7 @@ static void raise_barrier(struct r10conf *conf, int force)
 
        /* Now wait for all pending IO to complete */
        wait_event_lock_irq(conf->wait_barrier,
 
        /* Now wait for all pending IO to complete */
        wait_event_lock_irq(conf->wait_barrier,
-                           !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+                           !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
                            conf->resync_lock);
 
        spin_unlock_irq(&conf->resync_lock);
                            conf->resync_lock);
 
        spin_unlock_irq(&conf->resync_lock);
@@ -936,23 +936,23 @@ static void wait_barrier(struct r10conf *conf)
                 */
                wait_event_lock_irq(conf->wait_barrier,
                                    !conf->barrier ||
                 */
                wait_event_lock_irq(conf->wait_barrier,
                                    !conf->barrier ||
-                                   (conf->nr_pending &&
+                                   (atomic_read(&conf->nr_pending) &&
                                     current->bio_list &&
                                     !bio_list_empty(current->bio_list)),
                                    conf->resync_lock);
                conf->nr_waiting--;
                                     current->bio_list &&
                                     !bio_list_empty(current->bio_list)),
                                    conf->resync_lock);
                conf->nr_waiting--;
+               if (!conf->nr_waiting)
+                       wake_up(&conf->wait_barrier);
        }
        }
-       conf->nr_pending++;
+       atomic_inc(&conf->nr_pending);
        spin_unlock_irq(&conf->resync_lock);
 }
 
 static void allow_barrier(struct r10conf *conf)
 {
        spin_unlock_irq(&conf->resync_lock);
 }
 
 static void allow_barrier(struct r10conf *conf)
 {
-       unsigned long flags;
-       spin_lock_irqsave(&conf->resync_lock, flags);
-       conf->nr_pending--;
-       spin_unlock_irqrestore(&conf->resync_lock, flags);
-       wake_up(&conf->wait_barrier);
+       if ((atomic_dec_and_test(&conf->nr_pending)) ||
+                       (conf->array_freeze_pending))
+               wake_up(&conf->wait_barrier);
 }
 
 static void freeze_array(struct r10conf *conf, int extra)
 }
 
 static void freeze_array(struct r10conf *conf, int extra)
@@ -970,13 +970,15 @@ static void freeze_array(struct r10conf *conf, int extra)
         * we continue.
         */
        spin_lock_irq(&conf->resync_lock);
         * we continue.
         */
        spin_lock_irq(&conf->resync_lock);
+       conf->array_freeze_pending++;
        conf->barrier++;
        conf->nr_waiting++;
        wait_event_lock_irq_cmd(conf->wait_barrier,
        conf->barrier++;
        conf->nr_waiting++;
        wait_event_lock_irq_cmd(conf->wait_barrier,
-                               conf->nr_pending == conf->nr_queued+extra,
+                               atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
                                conf->resync_lock,
                                flush_pending_writes(conf));
 
                                conf->resync_lock,
                                flush_pending_writes(conf));
 
+       conf->array_freeze_pending--;
        spin_unlock_irq(&conf->resync_lock);
 }
 
        spin_unlock_irq(&conf->resync_lock);
 }
 
@@ -3542,6 +3544,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
 
        spin_lock_init(&conf->resync_lock);
        init_waitqueue_head(&conf->wait_barrier);
 
        spin_lock_init(&conf->resync_lock);
        init_waitqueue_head(&conf->wait_barrier);
+       atomic_set(&conf->nr_pending, 0);
 
        conf->thread = md_register_thread(raid10d, mddev, "raid10");
        if (!conf->thread)
 
        conf->thread = md_register_thread(raid10d, mddev, "raid10");
        if (!conf->thread)
index 6fc2c75759bf28eacaa2d93bbbb4041fb10bbd6d..18ec1f7a98bf77c98da88eaaa7ff0a95da2ac30a 100644 (file)
@@ -64,10 +64,11 @@ struct r10conf {
        int                     pending_count;
 
        spinlock_t              resync_lock;
        int                     pending_count;
 
        spinlock_t              resync_lock;
-       int                     nr_pending;
+       atomic_t                nr_pending;
        int                     nr_waiting;
        int                     nr_queued;
        int                     barrier;
        int                     nr_waiting;
        int                     nr_queued;
        int                     barrier;
+       int                     array_freeze_pending;
        sector_t                next_resync;
        int                     fullsync;  /* set to 1 if a full sync is needed,
                                            * (fresh device added).
        sector_t                next_resync;
        int                     fullsync;  /* set to 1 if a full sync is needed,
                                            * (fresh device added).