wait: add wait_event_lock_irq() interface
authorLukas Czerner <lczerner@redhat.com>
Fri, 30 Nov 2012 10:42:40 +0000 (11:42 +0100)
committerJens Axboe <axboe@kernel.dk>
Fri, 30 Nov 2012 10:47:57 +0000 (11:47 +0100)
New wait_event{_interruptible}_lock_irq{_cmd} macros added. This commit
moves the private wait_event_lock_irq() macro from MD to regular wait
includes, introduces new macro wait_event_lock_irq_cmd() instead of using
the old method with omitting cmd parameter which is ugly and makes a use
of new macros in the MD. It also introduces the _interruptible_ variant.

The use of new interface is when one have a special lock to protect data
structures used in the condition, or one also needs to invoke "cmd"
before putting it to sleep.

All new macros are expected to be called with the lock taken. The lock
is released before sleep and is reacquired afterwards. We will leave the
macro with the lock held.

Note to DM: IMO this should also fix theoretical race on waitqueue while
using simultaneously wait_event_lock_irq() and wait_event() because of
lack of locking around current state setting and wait queue removal.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Cc: Neil Brown <neilb@suse.de>
Cc: David Howells <dhowells@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/md/md.c
drivers/md/md.h
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
include/linux/wait.h

index 9ab768acfb623f8bbb13870e5f65150a60ecad07..7e513a38cec7e481ecd7367056f1f4662925b052 100644 (file)
@@ -452,7 +452,7 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
        spin_lock_irq(&mddev->write_lock);
        wait_event_lock_irq(mddev->sb_wait,
                            !mddev->flush_bio,
-                           mddev->write_lock, /*nothing*/);
+                           mddev->write_lock);
        mddev->flush_bio = bio;
        spin_unlock_irq(&mddev->write_lock);
 
index af443ab868db910f2db2d35d53ffacca701f1e60..1e2fc3d9c74c5665b7910f76f4e3c86780c0768e 100644 (file)
@@ -551,32 +551,6 @@ struct md_thread {
 
 #define THREAD_WAKEUP  0
 
-#define __wait_event_lock_irq(wq, condition, lock, cmd)                \
-do {                                                                   \
-       wait_queue_t __wait;                                            \
-       init_waitqueue_entry(&__wait, current);                         \
-                                                                       \
-       add_wait_queue(&wq, &__wait);                                   \
-       for (;;) {                                                      \
-               set_current_state(TASK_UNINTERRUPTIBLE);                \
-               if (condition)                                          \
-                       break;                                          \
-               spin_unlock_irq(&lock);                                 \
-               cmd;                                                    \
-               schedule();                                             \
-               spin_lock_irq(&lock);                                   \
-       }                                                               \
-       current->state = TASK_RUNNING;                                  \
-       remove_wait_queue(&wq, &__wait);                                \
-} while (0)
-
-#define wait_event_lock_irq(wq, condition, lock, cmd)                  \
-do {                                                                   \
-       if (condition)                                                  \
-               break;                                                  \
-       __wait_event_lock_irq(wq, condition, lock, cmd);                \
-} while (0)
-
 static inline void safe_put_page(struct page *p)
 {
        if (p) put_page(p);
index 8034fbd6190ce647ec2feb6a281e4bb32b406f8e..534dd74a2da07afdda3ae5a1f4446eeea79054b6 100644 (file)
@@ -822,7 +822,7 @@ static void raise_barrier(struct r1conf *conf)
 
        /* Wait until no block IO is waiting */
        wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
-                           conf->resync_lock);
+                           conf->resync_lock);
 
        /* block any new IO from starting */
        conf->barrier++;
@@ -830,7 +830,7 @@ static void raise_barrier(struct r1conf *conf)
        /* Now wait for all pending IO to complete */
        wait_event_lock_irq(conf->wait_barrier,
                            !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-                           conf->resync_lock);
+                           conf->resync_lock);
 
        spin_unlock_irq(&conf->resync_lock);
 }
@@ -864,8 +864,7 @@ static void wait_barrier(struct r1conf *conf)
                                    (conf->nr_pending &&
                                     current->bio_list &&
                                     !bio_list_empty(current->bio_list)),
-                                   conf->resync_lock,
-                       );
+                                   conf->resync_lock);
                conf->nr_waiting--;
        }
        conf->nr_pending++;
@@ -898,10 +897,10 @@ static void freeze_array(struct r1conf *conf)
        spin_lock_irq(&conf->resync_lock);
        conf->barrier++;
        conf->nr_waiting++;
-       wait_event_lock_irq(conf->wait_barrier,
-                           conf->nr_pending == conf->nr_queued+1,
-                           conf->resync_lock,
-                           flush_pending_writes(conf));
+       wait_event_lock_irq_cmd(conf->wait_barrier,
+                               conf->nr_pending == conf->nr_queued+1,
+                               conf->resync_lock,
+                               flush_pending_writes(conf));
        spin_unlock_irq(&conf->resync_lock);
 }
 static void unfreeze_array(struct r1conf *conf)
index 906ccbd0f7dcdc6710869c5b990f9375d020d370..9a08f621b27d49a497cbe2b62d678d3d212d122f 100644 (file)
@@ -952,7 +952,7 @@ static void raise_barrier(struct r10conf *conf, int force)
 
        /* Wait until no block IO is waiting (unless 'force') */
        wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-                           conf->resync_lock);
+                           conf->resync_lock);
 
        /* block any new IO from starting */
        conf->barrier++;
@@ -960,7 +960,7 @@ static void raise_barrier(struct r10conf *conf, int force)
        /* Now wait for all pending IO to complete */
        wait_event_lock_irq(conf->wait_barrier,
                            !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-                           conf->resync_lock);
+                           conf->resync_lock);
 
        spin_unlock_irq(&conf->resync_lock);
 }
@@ -993,8 +993,7 @@ static void wait_barrier(struct r10conf *conf)
                                    (conf->nr_pending &&
                                     current->bio_list &&
                                     !bio_list_empty(current->bio_list)),
-                                   conf->resync_lock,
-                       );
+                                   conf->resync_lock);
                conf->nr_waiting--;
        }
        conf->nr_pending++;
@@ -1027,10 +1026,10 @@ static void freeze_array(struct r10conf *conf)
        spin_lock_irq(&conf->resync_lock);
        conf->barrier++;
        conf->nr_waiting++;
-       wait_event_lock_irq(conf->wait_barrier,
-                           conf->nr_pending == conf->nr_queued+1,
-                           conf->resync_lock,
-                           flush_pending_writes(conf));
+       wait_event_lock_irq_cmd(conf->wait_barrier,
+                               conf->nr_pending == conf->nr_queued+1,
+                               conf->resync_lock,
+                               flush_pending_writes(conf));
 
        spin_unlock_irq(&conf->resync_lock);
 }
index c5439dce0295078ecf82094af5474a649284ce61..2bf617d6f4fd43dbe1a35384334564a62f339ed3 100644 (file)
@@ -466,7 +466,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
        do {
                wait_event_lock_irq(conf->wait_for_stripe,
                                    conf->quiesce == 0 || noquiesce,
-                                   conf->device_lock, /* nothing */);
+                                   conf->device_lock);
                sh = __find_stripe(conf, sector, conf->generation - previous);
                if (!sh) {
                        if (!conf->inactive_blocked)
@@ -480,8 +480,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                                                    (atomic_read(&conf->active_stripes)
                                                     < (conf->max_nr_stripes *3/4)
                                                     || !conf->inactive_blocked),
-                                                   conf->device_lock,
-                                                   );
+                                                   conf->device_lock);
                                conf->inactive_blocked = 0;
                        } else
                                init_stripe(sh, sector, previous);
@@ -1646,8 +1645,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
                spin_lock_irq(&conf->device_lock);
                wait_event_lock_irq(conf->wait_for_stripe,
                                    !list_empty(&conf->inactive_list),
-                                   conf->device_lock,
-                                   );
+                                   conf->device_lock);
                osh = get_free_stripe(conf);
                spin_unlock_irq(&conf->device_lock);
                atomic_set(&nsh->count, 1);
@@ -4000,7 +3998,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
                spin_lock_irq(&conf->device_lock);
                wait_event_lock_irq(conf->wait_for_stripe,
                                    conf->quiesce == 0,
-                                   conf->device_lock, /* nothing */);
+                                   conf->device_lock);
                atomic_inc(&conf->active_aligned_reads);
                spin_unlock_irq(&conf->device_lock);
 
@@ -6088,7 +6086,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
                wait_event_lock_irq(conf->wait_for_stripe,
                                    atomic_read(&conf->active_stripes) == 0 &&
                                    atomic_read(&conf->active_aligned_reads) == 0,
-                                   conf->device_lock, /* nothing */);
+                                   conf->device_lock);
                conf->quiesce = 1;
                spin_unlock_irq(&conf->device_lock);
                /* allow reshape to continue */
index 168dfe122dd3816c86785a32ce642d3427c79593..7cb64d4b499d21263feba5f3abf09bd600c5f8dc 100644 (file)
@@ -550,6 +550,170 @@ do {                                                                      \
        __ret;                                                          \
 })
 
+
+#define __wait_event_lock_irq(wq, condition, lock, cmd)                        \
+do {                                                                   \
+       DEFINE_WAIT(__wait);                                            \
+                                                                       \
+       for (;;) {                                                      \
+               prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);    \
+               if (condition)                                          \
+                       break;                                          \
+               spin_unlock_irq(&lock);                                 \
+               cmd;                                                    \
+               schedule();                                             \
+               spin_lock_irq(&lock);                                   \
+       }                                                               \
+       finish_wait(&wq, &__wait);                                      \
+} while (0)
+
+/**
+ * wait_event_lock_irq_cmd - sleep until a condition gets true. The
+ *                          condition is checked under the lock. This
+ *                          is expected to be called with the lock
+ *                          taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before cmd
+ *       and schedule() and reacquired afterwards.
+ * @cmd: a command which is invoked outside the critical section before
+ *      sleep
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before invoking the cmd and going to sleep and is reacquired
+ * afterwards.
+ */
+#define wait_event_lock_irq_cmd(wq, condition, lock, cmd)              \
+do {                                                                   \
+       if (condition)                                                  \
+               break;                                                  \
+       __wait_event_lock_irq(wq, condition, lock, cmd);                \
+} while (0)
+
+/**
+ * wait_event_lock_irq - sleep until a condition gets true. The
+ *                      condition is checked under the lock. This
+ *                      is expected to be called with the lock
+ *                      taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ *       and reacquired afterwards.
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ */
+#define wait_event_lock_irq(wq, condition, lock)                       \
+do {                                                                   \
+       if (condition)                                                  \
+               break;                                                  \
+       __wait_event_lock_irq(wq, condition, lock, );                   \
+} while (0)
+
+
+#define __wait_event_interruptible_lock_irq(wq, condition,             \
+                                           lock, ret, cmd)             \
+do {                                                                   \
+       DEFINE_WAIT(__wait);                                            \
+                                                                       \
+       for (;;) {                                                      \
+               prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);      \
+               if (condition)                                          \
+                       break;                                          \
+               if (signal_pending(current)) {                          \
+                       ret = -ERESTARTSYS;                             \
+                       break;                                          \
+               }                                                       \
+               spin_unlock_irq(&lock);                                 \
+               cmd;                                                    \
+               schedule();                                             \
+               spin_lock_irq(&lock);                                   \
+       }                                                               \
+       finish_wait(&wq, &__wait);                                      \
+} while (0)
+
+/**
+ * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
+ *             The condition is checked under the lock. This is expected to
+ *             be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before cmd and
+ *       schedule() and reacquired afterwards.
+ * @cmd: a command which is invoked outside the critical section before
+ *      sleep
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before invoking the cmd and going to sleep and is reacquired
+ * afterwards.
+ *
+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
+ * and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd)        \
+({                                                                     \
+       int __ret = 0;                                                  \
+                                                                       \
+       if (!(condition))                                               \
+               __wait_event_interruptible_lock_irq(wq, condition,      \
+                                                   lock, __ret, cmd);  \
+       __ret;                                                          \
+})
+
+/**
+ * wait_event_interruptible_lock_irq - sleep until a condition gets true.
+ *             The condition is checked under the lock. This is expected
+ *             to be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ *       and reacquired afterwards.
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ *
+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
+ * and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_lock_irq(wq, condition, lock)         \
+({                                                                     \
+       int __ret = 0;                                                  \
+                                                                       \
+       if (!(condition))                                               \
+               __wait_event_interruptible_lock_irq(wq, condition,      \
+                                                   lock, __ret, );     \
+       __ret;                                                          \
+})
+
+
 /*
  * These are the old interfaces to sleep waiting for an event.
  * They are racy.  DO NOT use them, use the wait_event* interfaces above.