Merge branch 'for-linus' of git://neil.brown.name/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 10 Aug 2010 22:38:19 +0000 (15:38 -0700)
committerChris Metcalf <cmetcalf@tilera.com>
Wed, 11 Aug 2010 14:30:35 +0000 (10:30 -0400)
* 'for-linus' of git://neil.brown.name/md: (24 commits)
  md: clean up do_md_stop
  md: fix another deadlock with removing sysfs attributes.
  md: move revalidate_disk() back outside open_mutex
  md/raid10: fix deadlock with unaligned read during resync
  md/bitmap:  separate out loading a bitmap from initialising the structures.
  md/bitmap: prepare for storing write-intent-bitmap via dm-dirty-log.
  md/bitmap: optimise scanning of empty bitmaps.
  md/bitmap: clean up plugging calls.
  md/bitmap: reduce dependence on sysfs.
  md/bitmap: white space clean up and similar.
  md/raid5: export raid5 unplugging interface.
  md/plug: optionally use plugger to unplug an array during resync/recovery.
  md/raid5: add simple plugging infrastructure.
  md/raid5: export is_congested test
  raid5: Don't set read-ahead when there is no queue
  md: add support for raising dm events.
  md: export various start/stop interfaces
  md: split out md_rdev_init
  md: be more careful setting MD_CHANGE_CLEAN
  md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk
  ...

1  2 
drivers/md/md.c
drivers/md/md.h
drivers/md/raid10.c
drivers/md/raid5.c

diff --combined drivers/md/md.c
index 700c96edf9b2589811bb5cbeb1567a1f156e64f9,d44efb267a69fdc0ad05e3ea6ec6c3e7b41a3dc0..11567c7999a243d3d95ebecdb623c26de49705ce
@@@ -36,7 -36,6 +36,7 @@@
  #include <linux/blkdev.h>
  #include <linux/sysctl.h>
  #include <linux/seq_file.h>
 +#include <linux/smp_lock.h>
  #include <linux/buffer_head.h> /* for invalidate_bdev */
  #include <linux/poll.h>
  #include <linux/ctype.h>
@@@ -262,7 -261,7 +262,7 @@@ static int md_make_request(struct reque
   * Once ->stop is called and completes, the module will be completely
   * unused.
   */
static void mddev_suspend(mddev_t *mddev)
+ void mddev_suspend(mddev_t *mddev)
  {
        BUG_ON(mddev->suspended);
        mddev->suspended = 1;
        wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
        mddev->pers->quiesce(mddev, 1);
  }
+ EXPORT_SYMBOL_GPL(mddev_suspend);
  
static void mddev_resume(mddev_t *mddev)
+ void mddev_resume(mddev_t *mddev)
  {
        mddev->suspended = 0;
        wake_up(&mddev->sb_wait);
        mddev->pers->quiesce(mddev, 0);
  }
+ EXPORT_SYMBOL_GPL(mddev_resume);
  
  int mddev_congested(mddev_t *mddev, int bits)
  {
@@@ -354,7 -355,7 +356,7 @@@ static void md_submit_barrier(struct wo
                /* an empty barrier - all done */
                bio_endio(bio, 0);
        else {
 -              bio->bi_rw &= ~(1<<BIO_RW_BARRIER);
 +              bio->bi_rw &= ~REQ_HARDBARRIER;
                if (mddev->pers->make_request(mddev, bio))
                        generic_make_request(bio);
                mddev->barrier = POST_REQUEST_BARRIER;
@@@ -385,6 -386,51 +387,51 @@@ void md_barrier_request(mddev_t *mddev
  }
  EXPORT_SYMBOL(md_barrier_request);
  
+ /* Support for plugging.
+  * This mirrors the plugging support in request_queue, but does not
+  * require having a whole queue
+  */
+ static void plugger_work(struct work_struct *work)
+ {
+       struct plug_handle *plug =
+               container_of(work, struct plug_handle, unplug_work);
+       plug->unplug_fn(plug);
+ }
+ static void plugger_timeout(unsigned long data)
+ {
+       struct plug_handle *plug = (void *)data;
+       kblockd_schedule_work(NULL, &plug->unplug_work);
+ }
+ void plugger_init(struct plug_handle *plug,
+                 void (*unplug_fn)(struct plug_handle *))
+ {
+       plug->unplug_flag = 0;
+       plug->unplug_fn = unplug_fn;
+       init_timer(&plug->unplug_timer);
+       plug->unplug_timer.function = plugger_timeout;
+       plug->unplug_timer.data = (unsigned long)plug;
+       INIT_WORK(&plug->unplug_work, plugger_work);
+ }
+ EXPORT_SYMBOL_GPL(plugger_init);
+ void plugger_set_plug(struct plug_handle *plug)
+ {
+       if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag))
+               mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1);
+ }
+ EXPORT_SYMBOL_GPL(plugger_set_plug);
+ int plugger_remove_plug(struct plug_handle *plug)
+ {
+       if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) {
+               del_timer(&plug->unplug_timer);
+               return 1;
+       } else
+               return 0;
+ }
+ EXPORT_SYMBOL_GPL(plugger_remove_plug);
  static inline mddev_t *mddev_get(mddev_t *mddev)
  {
        atomic_inc(&mddev->active);
@@@ -417,7 -463,7 +464,7 @@@ static void mddev_put(mddev_t *mddev
        spin_unlock(&all_mddevs_lock);
  }
  
static void mddev_init(mddev_t *mddev)
+ void mddev_init(mddev_t *mddev)
  {
        mutex_init(&mddev->open_mutex);
        mutex_init(&mddev->reconfig_mutex);
        mddev->resync_max = MaxSector;
        mddev->level = LEVEL_NONE;
  }
+ EXPORT_SYMBOL_GPL(mddev_init);
  
  static mddev_t * mddev_find(dev_t unit)
  {
@@@ -533,25 -580,31 +581,31 @@@ static void mddev_unlock(mddev_t * mdde
                 * an access to the files will try to take reconfig_mutex
                 * while holding the file unremovable, which leads to
                 * a deadlock.
-                * So hold open_mutex instead - we are allowed to take
-                * it while holding reconfig_mutex, and md_run can
-                * use it to wait for the remove to complete.
+                * So hold set sysfs_active while the remove in happeing,
+                * and anything else which might set ->to_remove or my
+                * otherwise change the sysfs namespace will fail with
+                * -EBUSY if sysfs_active is still set.
+                * We set sysfs_active under reconfig_mutex and elsewhere
+                * test it under the same mutex to ensure its correct value
+                * is seen.
                 */
                struct attribute_group *to_remove = mddev->to_remove;
                mddev->to_remove = NULL;
-               mutex_lock(&mddev->open_mutex);
+               mddev->sysfs_active = 1;
                mutex_unlock(&mddev->reconfig_mutex);
  
-               if (to_remove != &md_redundancy_group)
-                       sysfs_remove_group(&mddev->kobj, to_remove);
-               if (mddev->pers == NULL ||
-                   mddev->pers->sync_request == NULL) {
-                       sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
-                       if (mddev->sysfs_action)
-                               sysfs_put(mddev->sysfs_action);
-                       mddev->sysfs_action = NULL;
+               if (mddev->kobj.sd) {
+                       if (to_remove != &md_redundancy_group)
+                               sysfs_remove_group(&mddev->kobj, to_remove);
+                       if (mddev->pers == NULL ||
+                           mddev->pers->sync_request == NULL) {
+                               sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
+                               if (mddev->sysfs_action)
+                                       sysfs_put(mddev->sysfs_action);
+                               mddev->sysfs_action = NULL;
+                       }
                }
-               mutex_unlock(&mddev->open_mutex);
+               mddev->sysfs_active = 0;
        } else
                mutex_unlock(&mddev->reconfig_mutex);
  
@@@ -676,11 -729,11 +730,11 @@@ void md_super_write(mddev_t *mddev, mdk
         * if zero is reached.
         * If an error occurred, call md_error
         *
 -       * As we might need to resubmit the request if BIO_RW_BARRIER
 +       * As we might need to resubmit the request if REQ_HARDBARRIER
         * causes ENOTSUPP, we allocate a spare bio...
         */
        struct bio *bio = bio_alloc(GFP_NOIO, 1);
 -      int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG);
 +      int rw = REQ_WRITE | REQ_SYNC | REQ_UNPLUG;
  
        bio->bi_bdev = rdev->bdev;
        bio->bi_sector = sector;
        atomic_inc(&mddev->pending_writes);
        if (!test_bit(BarriersNotsupp, &rdev->flags)) {
                struct bio *rbio;
 -              rw |= (1<<BIO_RW_BARRIER);
 +              rw |= REQ_HARDBARRIER;
                rbio = bio_clone(bio, GFP_NOIO);
                rbio->bi_private = bio;
                rbio->bi_end_io = super_written_barrier;
@@@ -737,7 -790,7 +791,7 @@@ int sync_page_io(struct block_device *b
        struct completion event;
        int ret;
  
 -      rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
 +      rw |= REQ_SYNC | REQ_UNPLUG;
  
        bio->bi_bdev = bdev;
        bio->bi_sector = sector;
@@@ -1812,11 -1865,9 +1866,9 @@@ static int bind_rdev_to_array(mdk_rdev_
                goto fail;
  
        ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
-       if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) {
-               kobject_del(&rdev->kobj);
-               goto fail;
-       }
-       rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, NULL, "state");
+       if (sysfs_create_link(&rdev->kobj, ko, "block"))
+               /* failure here is OK */;
+       rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
  
        list_add_rcu(&rdev->same_set, &mddev->disks);
        bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
@@@ -2335,8 -2386,8 +2387,8 @@@ state_store(mdk_rdev_t *rdev, const cha
                set_bit(In_sync, &rdev->flags);
                err = 0;
        }
-       if (!err && rdev->sysfs_state)
-               sysfs_notify_dirent(rdev->sysfs_state);
+       if (!err)
+               sysfs_notify_dirent_safe(rdev->sysfs_state);
        return err ? err : len;
  }
  static struct rdev_sysfs_entry rdev_state =
@@@ -2431,14 -2482,10 +2483,10 @@@ slot_store(mdk_rdev_t *rdev, const cha
                        rdev->raid_disk = -1;
                        return err;
                } else
-                       sysfs_notify_dirent(rdev->sysfs_state);
+                       sysfs_notify_dirent_safe(rdev->sysfs_state);
                sprintf(nm, "rd%d", rdev->raid_disk);
                if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
-                       printk(KERN_WARNING
-                              "md: cannot register "
-                              "%s for %s\n",
-                              nm, mdname(rdev->mddev));
+                       /* failure here is OK */;
                /* don't wakeup anyone, leave that to userspace. */
        } else {
                if (slot >= rdev->mddev->raid_disks)
                clear_bit(Faulty, &rdev->flags);
                clear_bit(WriteMostly, &rdev->flags);
                set_bit(In_sync, &rdev->flags);
-               sysfs_notify_dirent(rdev->sysfs_state);
+               sysfs_notify_dirent_safe(rdev->sysfs_state);
        }
        return len;
  }
@@@ -2696,6 -2743,24 +2744,24 @@@ static struct kobj_type rdev_ktype = 
        .default_attrs  = rdev_default_attrs,
  };
  
+ void md_rdev_init(mdk_rdev_t *rdev)
+ {
+       rdev->desc_nr = -1;
+       rdev->saved_raid_disk = -1;
+       rdev->raid_disk = -1;
+       rdev->flags = 0;
+       rdev->data_offset = 0;
+       rdev->sb_events = 0;
+       rdev->last_read_error.tv_sec  = 0;
+       rdev->last_read_error.tv_nsec = 0;
+       atomic_set(&rdev->nr_pending, 0);
+       atomic_set(&rdev->read_errors, 0);
+       atomic_set(&rdev->corrected_errors, 0);
+       INIT_LIST_HEAD(&rdev->same_set);
+       init_waitqueue_head(&rdev->blocked_wait);
+ }
+ EXPORT_SYMBOL_GPL(md_rdev_init);
  /*
   * Import a device. If 'super_format' >= 0, then sanity check the superblock
   *
@@@ -2719,6 -2784,7 +2785,7 @@@ static mdk_rdev_t *md_import_device(dev
                return ERR_PTR(-ENOMEM);
        }
  
+       md_rdev_init(rdev);
        if ((err = alloc_disk_sb(rdev)))
                goto abort_free;
  
  
        kobject_init(&rdev->kobj, &rdev_ktype);
  
-       rdev->desc_nr = -1;
-       rdev->saved_raid_disk = -1;
-       rdev->raid_disk = -1;
-       rdev->flags = 0;
-       rdev->data_offset = 0;
-       rdev->sb_events = 0;
-       rdev->last_read_error.tv_sec  = 0;
-       rdev->last_read_error.tv_nsec = 0;
-       atomic_set(&rdev->nr_pending, 0);
-       atomic_set(&rdev->read_errors, 0);
-       atomic_set(&rdev->corrected_errors, 0);
        size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
        if (!size) {
                printk(KERN_WARNING 
                }
        }
  
-       INIT_LIST_HEAD(&rdev->same_set);
-       init_waitqueue_head(&rdev->blocked_wait);
        return rdev;
  
  abort_free:
@@@ -2961,7 -3012,9 +3013,9 @@@ level_store(mddev_t *mddev, const char 
         *  - new personality will access other array.
         */
  
-       if (mddev->sync_thread || mddev->reshape_position != MaxSector)
+       if (mddev->sync_thread ||
+           mddev->reshape_position != MaxSector ||
+           mddev->sysfs_active)
                return -EBUSY;
  
        if (!mddev->pers->quiesce) {
@@@ -3438,7 -3491,7 +3492,7 @@@ array_state_store(mddev_t *mddev, cons
        if (err)
                return err;
        else {
-               sysfs_notify_dirent(mddev->sysfs_state);
+               sysfs_notify_dirent_safe(mddev->sysfs_state);
                return len;
        }
  }
@@@ -3736,7 -3789,7 +3790,7 @@@ action_store(mddev_t *mddev, const cha
        }
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
-       sysfs_notify_dirent(mddev->sysfs_action);
+       sysfs_notify_dirent_safe(mddev->sysfs_action);
        return len;
  }
  
@@@ -4282,13 -4335,14 +4336,14 @@@ static int md_alloc(dev_t dev, char *na
                       disk->disk_name);
                error = 0;
        }
-       if (sysfs_create_group(&mddev->kobj, &md_bitmap_group))
+       if (mddev->kobj.sd &&
+           sysfs_create_group(&mddev->kobj, &md_bitmap_group))
                printk(KERN_DEBUG "pointless warning\n");
   abort:
        mutex_unlock(&disks_mutex);
-       if (!error) {
+       if (!error && mddev->kobj.sd) {
                kobject_uevent(&mddev->kobj, KOBJ_ADD);
-               mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, NULL, "array_state");
+               mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
        }
        mddev_put(mddev);
        return error;
@@@ -4326,14 -4380,14 +4381,14 @@@ static void md_safemode_timeout(unsigne
        if (!atomic_read(&mddev->writes_pending)) {
                mddev->safemode = 1;
                if (mddev->external)
-                       sysfs_notify_dirent(mddev->sysfs_state);
+                       sysfs_notify_dirent_safe(mddev->sysfs_state);
        }
        md_wakeup_thread(mddev->thread);
  }
  
  static int start_dirty_degraded;
  
static int md_run(mddev_t *mddev)
+ int md_run(mddev_t *mddev)
  {
        int err;
        mdk_rdev_t *rdev;
  
        if (mddev->pers)
                return -EBUSY;
-       /* These two calls synchronise us with the
-        * sysfs_remove_group calls in mddev_unlock,
-        * so they must have completed.
-        */
-       mutex_lock(&mddev->open_mutex);
-       mutex_unlock(&mddev->open_mutex);
+       /* Cannot run until previous stop completes properly */
+       if (mddev->sysfs_active)
+               return -EBUSY;
  
        /*
         * Analyze all RAID superblock(s)
                                return -EINVAL;
                        }
                }
-               sysfs_notify_dirent(rdev->sysfs_state);
+               sysfs_notify_dirent_safe(rdev->sysfs_state);
        }
  
        spin_lock(&pers_lock);
                return err;
        }
        if (mddev->pers->sync_request) {
-               if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
+               if (mddev->kobj.sd &&
+                   sysfs_create_group(&mddev->kobj, &md_redundancy_group))
                        printk(KERN_WARNING
                               "md: cannot register extra attributes for %s\n",
                               mdname(mddev));
-               mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
+               mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
        } else if (mddev->ro == 2) /* auto-readonly not meaningful */
                mddev->ro = 0;
  
                        char nm[20];
                        sprintf(nm, "rd%d", rdev->raid_disk);
                        if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
-                               printk("md: cannot register %s for %s\n",
-                                      nm, mdname(mddev));
+                               /* failure here is OK */;
                }
        
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
  
        md_new_event(mddev);
-       sysfs_notify_dirent(mddev->sysfs_state);
-       if (mddev->sysfs_action)
-               sysfs_notify_dirent(mddev->sysfs_action);
+       sysfs_notify_dirent_safe(mddev->sysfs_state);
+       sysfs_notify_dirent_safe(mddev->sysfs_action);
        sysfs_notify(&mddev->kobj, NULL, "degraded");
        return 0;
  }
+ EXPORT_SYMBOL_GPL(md_run);
  
  static int do_md_run(mddev_t *mddev)
  {
        err = md_run(mddev);
        if (err)
                goto out;
+       err = bitmap_load(mddev);
+       if (err) {
+               bitmap_destroy(mddev);
+               goto out;
+       }
        set_capacity(mddev->gendisk, mddev->array_sectors);
        revalidate_disk(mddev->gendisk);
        kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
@@@ -4574,7 -4628,7 +4629,7 @@@ static int restart_array(mddev_t *mddev
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
        md_wakeup_thread(mddev->sync_thread);
-       sysfs_notify_dirent(mddev->sysfs_state);
+       sysfs_notify_dirent_safe(mddev->sysfs_state);
        return 0;
  }
  
@@@ -4645,9 -4699,10 +4700,10 @@@ static void md_clean(mddev_t *mddev
        mddev->bitmap_info.chunksize = 0;
        mddev->bitmap_info.daemon_sleep = 0;
        mddev->bitmap_info.max_write_behind = 0;
+       mddev->plug = NULL;
  }
  
static void md_stop_writes(mddev_t *mddev)
+ void md_stop_writes(mddev_t *mddev)
  {
        if (mddev->sync_thread) {
                set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                md_update_sb(mddev, 1);
        }
  }
+ EXPORT_SYMBOL_GPL(md_stop_writes);
  
static void md_stop(mddev_t *mddev)
+ void md_stop(mddev_t *mddev)
  {
-       md_stop_writes(mddev);
        mddev->pers->stop(mddev);
        if (mddev->pers->sync_request && mddev->to_remove == NULL)
                mddev->to_remove = &md_redundancy_group;
        mddev->pers = NULL;
        clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
  }
+ EXPORT_SYMBOL_GPL(md_stop);
  
  static int md_set_readonly(mddev_t *mddev, int is_open)
  {
                mddev->ro = 1;
                set_disk_ro(mddev->gendisk, 1);
                clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-               sysfs_notify_dirent(mddev->sysfs_state);
+               sysfs_notify_dirent_safe(mddev->sysfs_state);
                err = 0;        
        }
  out:
   */
  static int do_md_stop(mddev_t * mddev, int mode, int is_open)
  {
-       int err = 0;
        struct gendisk *disk = mddev->gendisk;
        mdk_rdev_t *rdev;
  
        mutex_lock(&mddev->open_mutex);
-       if (atomic_read(&mddev->openers) > is_open) {
+       if (atomic_read(&mddev->openers) > is_open ||
+           mddev->sysfs_active) {
                printk("md: %s still in use.\n",mdname(mddev));
-               err = -EBUSY;
-       } else if (mddev->pers) {
+               mutex_unlock(&mddev->open_mutex);
+               return -EBUSY;
+       }
  
+       if (mddev->pers) {
                if (mddev->ro)
                        set_disk_ro(disk, 0);
  
+               md_stop_writes(mddev);
                md_stop(mddev);
                mddev->queue->merge_bvec_fn = NULL;
                mddev->queue->unplug_fn = NULL;
                mddev->queue->backing_dev_info.congested_fn = NULL;
  
                /* tell userspace to handle 'inactive' */
-               sysfs_notify_dirent(mddev->sysfs_state);
+               sysfs_notify_dirent_safe(mddev->sysfs_state);
  
                list_for_each_entry(rdev, &mddev->disks, same_set)
                        if (rdev->raid_disk >= 0) {
                        }
  
                set_capacity(disk, 0);
+               mutex_unlock(&mddev->open_mutex);
                revalidate_disk(disk);
  
                if (mddev->ro)
                        mddev->ro = 0;
-               
-               err = 0;
-       }
-       mutex_unlock(&mddev->open_mutex);
-       if (err)
-               return err;
+       } else
+               mutex_unlock(&mddev->open_mutex);
        /*
         * Free resources if final stop
         */
        if (mode == 0) {
                printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
  
                bitmap_destroy(mddev);
                kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
                if (mddev->hold_active == UNTIL_STOP)
                        mddev->hold_active = 0;
        }
-       err = 0;
        blk_integrity_unregister(disk);
        md_new_event(mddev);
-       sysfs_notify_dirent(mddev->sysfs_state);
-       return err;
+       sysfs_notify_dirent_safe(mddev->sysfs_state);
+       return 0;
  }
  
  #ifndef MODULE
@@@ -5139,7 -5191,7 +5192,7 @@@ static int add_new_disk(mddev_t * mddev
                if (err)
                        export_rdev(rdev);
                else
-                       sysfs_notify_dirent(rdev->sysfs_state);
+                       sysfs_notify_dirent_safe(rdev->sysfs_state);
  
                md_update_sb(mddev, 1);
                if (mddev->degraded)
@@@ -5332,8 -5384,11 +5385,11 @@@ static int set_bitmap_file(mddev_t *mdd
        err = 0;
        if (mddev->pers) {
                mddev->pers->quiesce(mddev, 1);
-               if (fd >= 0)
+               if (fd >= 0) {
                        err = bitmap_create(mddev);
+                       if (!err)
+                               err = bitmap_load(mddev);
+               }
                if (fd < 0 || err) {
                        bitmap_destroy(mddev);
                        fd = -1; /* make sure to put the file */
@@@ -5582,6 -5637,8 +5638,8 @@@ static int update_array_info(mddev_t *m
                                mddev->bitmap_info.default_offset;
                        mddev->pers->quiesce(mddev, 1);
                        rv = bitmap_create(mddev);
+                       if (!rv)
+                               rv = bitmap_load(mddev);
                        if (rv)
                                bitmap_destroy(mddev);
                        mddev->pers->quiesce(mddev, 0);
@@@ -5814,7 -5871,7 +5872,7 @@@ static int md_ioctl(struct block_devic
        if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
                if (mddev->ro == 2) {
                        mddev->ro = 0;
-                       sysfs_notify_dirent(mddev->sysfs_state);
+                       sysfs_notify_dirent_safe(mddev->sysfs_state);
                        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
                        md_wakeup_thread(mddev->thread);
                } else {
@@@ -5903,7 -5960,6 +5961,7 @@@ static int md_open(struct block_device 
        mddev_t *mddev = mddev_find(bdev->bd_dev);
        int err;
  
 +      lock_kernel();
        if (mddev->gendisk != bdev->bd_disk) {
                /* we are racing with mddev_put which is discarding this
                 * bd_disk.
                /* Wait until bdev->bd_disk is definitely gone */
                flush_scheduled_work();
                /* Then retry the open from the top */
 +              unlock_kernel();
                return -ERESTARTSYS;
        }
        BUG_ON(mddev != bdev->bd_disk->private_data);
  
        check_disk_size_change(mddev->gendisk, bdev);
   out:
 +      unlock_kernel();
        return err;
  }
  
@@@ -5935,10 -5989,8 +5993,10 @@@ static int md_release(struct gendisk *d
        mddev_t *mddev = disk->private_data;
  
        BUG_ON(!mddev);
 +      lock_kernel();
        atomic_dec(&mddev->openers);
        mddev_put(mddev);
 +      unlock_kernel();
  
        return 0;
  }
@@@ -6065,10 -6117,12 +6123,12 @@@ void md_error(mddev_t *mddev, mdk_rdev_
        mddev->pers->error_handler(mddev,rdev);
        if (mddev->degraded)
                set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-       sysfs_notify_dirent(rdev->sysfs_state);
+       sysfs_notify_dirent_safe(rdev->sysfs_state);
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
+       if (mddev->event_work.func)
+               schedule_work(&mddev->event_work);
        md_new_event_inintr(mddev);
  }
  
@@@ -6526,7 -6580,7 +6586,7 @@@ void md_write_start(mddev_t *mddev, str
                spin_unlock_irq(&mddev->write_lock);
        }
        if (did_change)
-               sysfs_notify_dirent(mddev->sysfs_state);
+               sysfs_notify_dirent_safe(mddev->sysfs_state);
        wait_event(mddev->sb_wait,
                   !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
                   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
@@@ -6569,7 -6623,7 +6629,7 @@@ int md_allow_write(mddev_t *mddev
                        mddev->safemode = 1;
                spin_unlock_irq(&mddev->write_lock);
                md_update_sb(mddev, 0);
-               sysfs_notify_dirent(mddev->sysfs_state);
+               sysfs_notify_dirent_safe(mddev->sysfs_state);
        } else
                spin_unlock_irq(&mddev->write_lock);
  
  }
  EXPORT_SYMBOL_GPL(md_allow_write);
  
+ void md_unplug(mddev_t *mddev)
+ {
+       if (mddev->queue)
+               blk_unplug(mddev->queue);
+       if (mddev->plug)
+               mddev->plug->unplug_fn(mddev->plug);
+ }
  #define SYNC_MARKS    10
  #define       SYNC_MARK_STEP  (3*HZ)
  void md_do_sync(mddev_t *mddev)
                     >= mddev->resync_max - mddev->curr_resync_completed
                            )) {
                        /* time to update curr_resync_completed */
-                       blk_unplug(mddev->queue);
+                       md_unplug(mddev);
                        wait_event(mddev->recovery_wait,
                                   atomic_read(&mddev->recovery_active) == 0);
                        mddev->curr_resync_completed =
                                mddev->curr_resync;
-                       set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+                       if (mddev->persistent)
+                               set_bit(MD_CHANGE_CLEAN, &mddev->flags);
                        sysfs_notify(&mddev->kobj, NULL, "sync_completed");
                }
  
                 * about not overloading the IO subsystem. (things like an
                 * e2fsck being done on the RAID array should execute fast)
                 */
-               blk_unplug(mddev->queue);
+               md_unplug(mddev);
                cond_resched();
  
                currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
         * this also signals 'finished resyncing' to md_stop
         */
   out:
-       blk_unplug(mddev->queue);
+       md_unplug(mddev);
  
        wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
  
@@@ -6956,10 -7019,7 +7025,7 @@@ static int remove_and_add_spares(mddev_
                                        sprintf(nm, "rd%d", rdev->raid_disk);
                                        if (sysfs_create_link(&mddev->kobj,
                                                              &rdev->kobj, nm))
-                                               printk(KERN_WARNING
-                                                      "md: cannot register "
-                                                      "%s for %s\n",
-                                                      nm, mdname(mddev));
+                                               /* failure here is OK */;
                                        spares++;
                                        md_new_event(mddev);
                                        set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@@ -7052,7 -7112,7 +7118,7 @@@ void md_check_recovery(mddev_t *mddev
                                mddev->safemode = 0;
                        spin_unlock_irq(&mddev->write_lock);
                        if (did_change)
-                               sysfs_notify_dirent(mddev->sysfs_state);
+                               sysfs_notify_dirent_safe(mddev->sysfs_state);
                }
  
                if (mddev->flags)
                        mddev->recovery = 0;
                        /* flag recovery needed just to double check */
                        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-                       sysfs_notify_dirent(mddev->sysfs_action);
+                       sysfs_notify_dirent_safe(mddev->sysfs_action);
                        md_new_event(mddev);
                        goto unlock;
                }
                                mddev->recovery = 0;
                        } else
                                md_wakeup_thread(mddev->sync_thread);
-                       sysfs_notify_dirent(mddev->sysfs_action);
+                       sysfs_notify_dirent_safe(mddev->sysfs_action);
                        md_new_event(mddev);
                }
        unlock:
                        if (test_and_clear_bit(MD_RECOVERY_RECOVER,
                                               &mddev->recovery))
                                if (mddev->sysfs_action)
-                                       sysfs_notify_dirent(mddev->sysfs_action);
+                                       sysfs_notify_dirent_safe(mddev->sysfs_action);
                }
                mddev_unlock(mddev);
        }
  
  void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
  {
-       sysfs_notify_dirent(rdev->sysfs_state);
+       sysfs_notify_dirent_safe(rdev->sysfs_state);
        wait_event_timeout(rdev->blocked_wait,
                           !test_bit(Blocked, &rdev->flags),
                           msecs_to_jiffies(5000));
diff --combined drivers/md/md.h
index fc56e0f21c80ac9774d8271dff26129f70bb9c77,6f797eceae317225f354a5d4b7c947fc41d52a5c..a953fe2808ae7ef17b1046b6cff6bc6276c1b8c1
  typedef struct mddev_s mddev_t;
  typedef struct mdk_rdev_s mdk_rdev_t;
  
+ /* generic plugging support - like that provided with request_queue,
+  * but does not require a request_queue
+  */
+ struct plug_handle {
+       void                    (*unplug_fn)(struct plug_handle *);
+       struct timer_list       unplug_timer;
+       struct work_struct      unplug_work;
+       unsigned long           unplug_flag;
+ };
+ #define       PLUGGED_FLAG 1
+ void plugger_init(struct plug_handle *plug,
+                 void (*unplug_fn)(struct plug_handle *));
+ void plugger_set_plug(struct plug_handle *plug);
+ int plugger_remove_plug(struct plug_handle *plug);
+ static inline void plugger_flush(struct plug_handle *plug)
+ {
+       del_timer_sync(&plug->unplug_timer);
+       cancel_work_sync(&plug->unplug_work);
+ }
  /*
   * MD's 'extended' device
   */
@@@ -67,7 -87,7 +87,7 @@@ struct mdk_rdev_
  #define       Faulty          1               /* device is known to have a fault */
  #define       In_sync         2               /* device is in_sync with rest of array */
  #define       WriteMostly     4               /* Avoid reading if at all possible */
 -#define       BarriersNotsupp 5               /* BIO_RW_BARRIER is not supported */
 +#define       BarriersNotsupp 5               /* REQ_HARDBARRIER is not supported */
  #define       AllReserved     6               /* If whole device is reserved for
                                         * one array */
  #define       AutoDetected    7               /* added by auto-detect */
@@@ -125,6 -145,10 +145,10 @@@ struct mddev_
        int                             suspended;
        atomic_t                        active_io;
        int                             ro;
+       int                             sysfs_active; /* set when sysfs deletes
+                                                      * are happening, so run/
+                                                      * takeover/stop are not safe
+                                                      */
  
        struct gendisk                  *gendisk;
  
                                                         * fails.  Only supported
                                                         */
        struct bio                      *biolist;       /* bios that need to be retried
 -                                                       * because BIO_RW_BARRIER is not supported
 +                                                       * because REQ_HARDBARRIER is not supported
                                                         */
  
        atomic_t                        recovery_active; /* blocks scheduled, but not written */
                                                         * hot-adding a bitmap.  It should
                                                         * eventually be settable by sysfs.
                                                         */
+               /* When md is serving under dm, it might use a
+                * dirty_log to store the bits.
+                */
+               struct dm_dirty_log *log;
                struct mutex            mutex;
                unsigned long           chunksize;
-               unsigned long           daemon_sleep; /* how many seconds between updates? */
+               unsigned long           daemon_sleep; /* how many jiffies between updates? */
                unsigned long           max_write_behind; /* write-behind mode */
                int                     external;
        } bitmap_info;
        struct list_head                all_mddevs;
  
        struct attribute_group          *to_remove;
+       struct plug_handle              *plug; /* if used by personality */
        /* Generic barrier handling.
         * If there is a pending barrier request, all other
         * writes are blocked while the devices are flushed.
        struct bio *barrier;
        atomic_t flush_pending;
        struct work_struct barrier_work;
+       struct work_struct event_work;  /* used by dm to report failure event */
  };
  
  
@@@ -382,6 -414,18 +414,18 @@@ struct md_sysfs_entry 
  };
  extern struct attribute_group md_bitmap_group;
  
+ static inline struct sysfs_dirent *sysfs_get_dirent_safe(struct sysfs_dirent *sd, char *name)
+ {
+       if (sd)
+               return sysfs_get_dirent(sd, NULL, name);
+       return sd;
+ }
+ static inline void sysfs_notify_dirent_safe(struct sysfs_dirent *sd)
+ {
+       if (sd)
+               sysfs_notify_dirent(sd);
+ }
  static inline char * mdname (mddev_t * mddev)
  {
        return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
@@@ -474,5 -518,14 +518,14 @@@ extern int md_integrity_register(mddev_
  extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
  extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
  extern void restore_bitmap_write_access(struct file *file);
+ extern void md_unplug(mddev_t *mddev);
+ extern void mddev_init(mddev_t *mddev);
+ extern int md_run(mddev_t *mddev);
+ extern void md_stop(mddev_t *mddev);
+ extern void md_stop_writes(mddev_t *mddev);
+ extern void md_rdev_init(mdk_rdev_t *rdev);
  
+ extern void mddev_suspend(mddev_t *mddev);
+ extern void mddev_resume(mddev_t *mddev);
  #endif /* _MD_MD_H */
diff --combined drivers/md/raid10.c
index 62ecb6650fd023ce10c5dc619be058fa480f8d91,d1d6891263469f5c84be025c004e411ed93f2171..a88aeb5198c76a6c3a5ed58693d71f751ae975a7
@@@ -799,12 -799,12 +799,12 @@@ static int make_request(mddev_t *mddev
        int i;
        int chunk_sects = conf->chunk_mask + 1;
        const int rw = bio_data_dir(bio);
 -      const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
 +      const bool do_sync = (bio->bi_rw & REQ_SYNC);
        struct bio_list bl;
        unsigned long flags;
        mdk_rdev_t *blocked_rdev;
  
 -      if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
 +      if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
                md_barrier_request(mddev, bio);
                return 0;
        }
                 */
                bp = bio_split(bio,
                               chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
+               /* Each of these 'make_request' calls will call 'wait_barrier'.
+                * If the first succeeds but the second blocks due to the resync
+                * thread raising the barrier, we will deadlock because the
+                * IO to the underlying device will be queued in generic_make_request
+                * and will never complete, so will never reduce nr_pending.
+                * So increment nr_waiting here so no new raise_barriers will
+                * succeed, and so the second wait_barrier cannot block.
+                */
+               spin_lock_irq(&conf->resync_lock);
+               conf->nr_waiting++;
+               spin_unlock_irq(&conf->resync_lock);
                if (make_request(mddev, &bp->bio1))
                        generic_make_request(&bp->bio1);
                if (make_request(mddev, &bp->bio2))
                        generic_make_request(&bp->bio2);
  
+               spin_lock_irq(&conf->resync_lock);
+               conf->nr_waiting--;
+               wake_up(&conf->wait_barrier);
+               spin_unlock_irq(&conf->resync_lock);
                bio_pair_release(bp);
                return 0;
        bad_map:
                        mirror->rdev->data_offset;
                read_bio->bi_bdev = mirror->rdev->bdev;
                read_bio->bi_end_io = raid10_end_read_request;
 -              read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
 +              read_bio->bi_rw = READ | do_sync;
                read_bio->bi_private = r10_bio;
  
                generic_make_request(read_bio);
                        conf->mirrors[d].rdev->data_offset;
                mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
                mbio->bi_end_io = raid10_end_write_request;
 -              mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO);
 +              mbio->bi_rw = WRITE | do_sync;
                mbio->bi_private = r10_bio;
  
                atomic_inc(&r10_bio->remaining);
@@@ -1716,7 -1734,7 +1734,7 @@@ static void raid10d(mddev_t *mddev
                                raid_end_bio_io(r10_bio);
                                bio_put(bio);
                        } else {
 -                              const bool do_sync = bio_rw_flagged(r10_bio->master_bio, BIO_RW_SYNCIO);
 +                              const bool do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
                                bio_put(bio);
                                rdev = conf->mirrors[mirror].rdev;
                                if (printk_ratelimit())
                                bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
                                        + rdev->data_offset;
                                bio->bi_bdev = rdev->bdev;
 -                              bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
 +                              bio->bi_rw = READ | do_sync;
                                bio->bi_private = r10_bio;
                                bio->bi_end_io = raid10_end_read_request;
                                unplug = 1;
diff --combined drivers/md/raid5.c
index 20ac2f14376a1417552982697cde2b303af7afd2,e30a809cbea0f2a4b7b13b20c6d1d483e63985f4..866d4b5a144c465daf21e439b9b0e6ef36571d6a
@@@ -201,11 -201,11 +201,11 @@@ static void __release_stripe(raid5_conf
                if (test_bit(STRIPE_HANDLE, &sh->state)) {
                        if (test_bit(STRIPE_DELAYED, &sh->state)) {
                                list_add_tail(&sh->lru, &conf->delayed_list);
-                               blk_plug_device(conf->mddev->queue);
+                               plugger_set_plug(&conf->plug);
                        } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
                                   sh->bm_seq - conf->seq_write > 0) {
                                list_add_tail(&sh->lru, &conf->bitmap_list);
-                               blk_plug_device(conf->mddev->queue);
+                               plugger_set_plug(&conf->plug);
                        } else {
                                clear_bit(STRIPE_BIT_DELAY, &sh->state);
                                list_add_tail(&sh->lru, &conf->handle_list);
@@@ -434,7 -434,6 +434,6 @@@ static int has_failed(raid5_conf_t *con
  }
  
  static void unplug_slaves(mddev_t *mddev);
- static void raid5_unplug_device(struct request_queue *q);
  
  static struct stripe_head *
  get_active_stripe(raid5_conf_t *conf, sector_t sector,
                                                     < (conf->max_nr_stripes *3/4)
                                                     || !conf->inactive_blocked),
                                                    conf->device_lock,
-                                                   raid5_unplug_device(conf->mddev->queue)
+                                                   md_raid5_unplug_device(conf)
                                        );
                                conf->inactive_blocked = 0;
                        } else
@@@ -1337,10 -1336,14 +1336,14 @@@ static int grow_stripes(raid5_conf_t *c
        struct kmem_cache *sc;
        int devs = max(conf->raid_disks, conf->previous_raid_disks);
  
-       sprintf(conf->cache_name[0],
-               "raid%d-%s", conf->level, mdname(conf->mddev));
-       sprintf(conf->cache_name[1],
-               "raid%d-%s-alt", conf->level, mdname(conf->mddev));
+       if (conf->mddev->gendisk)
+               sprintf(conf->cache_name[0],
+                       "raid%d-%s", conf->level, mdname(conf->mddev));
+       else
+               sprintf(conf->cache_name[0],
+                       "raid%d-%p", conf->level, conf->mddev);
+       sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
        conf->active_name = 0;
        sc = kmem_cache_create(conf->cache_name[conf->active_name],
                               sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@@ -3614,7 -3617,7 +3617,7 @@@ static void raid5_activate_delayed(raid
                        list_add_tail(&sh->lru, &conf->hold_list);
                }
        } else
-               blk_plug_device(conf->mddev->queue);
+               plugger_set_plug(&conf->plug);
  }
  
  static void activate_bit_delay(raid5_conf_t *conf)
@@@ -3655,36 -3658,44 +3658,44 @@@ static void unplug_slaves(mddev_t *mdde
        rcu_read_unlock();
  }
  
static void raid5_unplug_device(struct request_queue *q)
void md_raid5_unplug_device(raid5_conf_t *conf)
  {
-       mddev_t *mddev = q->queuedata;
-       raid5_conf_t *conf = mddev->private;
        unsigned long flags;
  
        spin_lock_irqsave(&conf->device_lock, flags);
  
-       if (blk_remove_plug(q)) {
+       if (plugger_remove_plug(&conf->plug)) {
                conf->seq_flush++;
                raid5_activate_delayed(conf);
        }
-       md_wakeup_thread(mddev->thread);
+       md_wakeup_thread(conf->mddev->thread);
  
        spin_unlock_irqrestore(&conf->device_lock, flags);
  
-       unplug_slaves(mddev);
+       unplug_slaves(conf->mddev);
  }
+ EXPORT_SYMBOL_GPL(md_raid5_unplug_device);
  
- static int raid5_congested(void *data, int bits)
+ static void raid5_unplug(struct plug_handle *plug)
+ {
+       raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
+       md_raid5_unplug_device(conf);
+ }
+ static void raid5_unplug_queue(struct request_queue *q)
+ {
+       mddev_t *mddev = q->queuedata;
+       md_raid5_unplug_device(mddev->private);
+ }
+ int md_raid5_congested(mddev_t *mddev, int bits)
  {
-       mddev_t *mddev = data;
        raid5_conf_t *conf = mddev->private;
  
        /* No difference between reads and writes.  Just check
         * how busy the stripe_cache is
         */
  
-       if (mddev_congested(mddev, bits))
-               return 1;
        if (conf->inactive_blocked)
                return 1;
        if (conf->quiesce)
  
        return 0;
  }
+ EXPORT_SYMBOL_GPL(md_raid5_congested);
+ static int raid5_congested(void *data, int bits)
+ {
+       mddev_t *mddev = data;
+       return mddev_congested(mddev, bits) ||
+               md_raid5_congested(mddev, bits);
+ }
  
  /* We want read requests to align with chunks where possible,
   * but write requests don't need to.
@@@ -3958,7 -3978,7 +3978,7 @@@ static int make_request(mddev_t *mddev
        const int rw = bio_data_dir(bi);
        int remaining;
  
 -      if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) {
 +      if (unlikely(bi->bi_rw & REQ_HARDBARRIER)) {
                /* Drain all pending writes.  We only really need
                 * to ensure they have been submitted, but this is
                 * easier.
                                 * add failed due to overlap.  Flush everything
                                 * and wait a while
                                 */
-                               raid5_unplug_device(mddev->queue);
+                               md_raid5_unplug_device(conf);
                                release_stripe(sh);
                                schedule();
                                goto retry;
@@@ -4566,23 -4586,15 +4586,15 @@@ raid5_show_stripe_cache_size(mddev_t *m
                return 0;
  }
  
static ssize_t
- raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
int
+ raid5_set_cache_size(mddev_t *mddev, int size)
  {
        raid5_conf_t *conf = mddev->private;
-       unsigned long new;
        int err;
  
-       if (len >= PAGE_SIZE)
+       if (size <= 16 || size > 32768)
                return -EINVAL;
-       if (!conf)
-               return -ENODEV;
-       if (strict_strtoul(page, 10, &new))
-               return -EINVAL;
-       if (new <= 16 || new > 32768)
-               return -EINVAL;
-       while (new < conf->max_nr_stripes) {
+       while (size < conf->max_nr_stripes) {
                if (drop_one_stripe(conf))
                        conf->max_nr_stripes--;
                else
        err = md_allow_write(mddev);
        if (err)
                return err;
-       while (new > conf->max_nr_stripes) {
+       while (size > conf->max_nr_stripes) {
                if (grow_one_stripe(conf))
                        conf->max_nr_stripes++;
                else break;
        }
+       return 0;
+ }
+ EXPORT_SYMBOL(raid5_set_cache_size);
+ static ssize_t
+ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
+ {
+       raid5_conf_t *conf = mddev->private;
+       unsigned long new;
+       int err;
+       if (len >= PAGE_SIZE)
+               return -EINVAL;
+       if (!conf)
+               return -ENODEV;
+       if (strict_strtoul(page, 10, &new))
+               return -EINVAL;
+       err = raid5_set_cache_size(mddev, new);
+       if (err)
+               return err;
        return len;
  }
  
@@@ -4958,7 -4991,7 +4991,7 @@@ static int only_parity(int raid_disk, i
  static int run(mddev_t *mddev)
  {
        raid5_conf_t *conf;
-       int working_disks = 0, chunk_size;
+       int working_disks = 0;
        int dirty_parity_disks = 0;
        mdk_rdev_t *rdev;
        sector_t reshape_offset = 0;
                                                        "reshape");
        }
  
-       /* read-ahead size must cover two whole stripes, which is
-        * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
-        */
-       {
-               int data_disks = conf->previous_raid_disks - conf->max_degraded;
-               int stripe = data_disks *
-                       ((mddev->chunk_sectors << 9) / PAGE_SIZE);
-               if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
-                       mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
-       }
  
        /* Ok, everything is just fine now */
        if (mddev->to_remove == &raid5_attrs_group)
                mddev->to_remove = NULL;
-       else if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
+       else if (mddev->kobj.sd &&
+           sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
                printk(KERN_WARNING
-                      "md/raid:%s: failed to create sysfs attributes.\n",
+                      "raid5: failed to create sysfs attributes for %s\n",
                       mdname(mddev));
+       md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
  
-       mddev->queue->queue_lock = &conf->device_lock;
+       plugger_init(&conf->plug, raid5_unplug);
+       mddev->plug = &conf->plug;
+       if (mddev->queue) {
+               int chunk_size;
+               /* read-ahead size must cover two whole stripes, which
+                * is 2 * (datadisks) * chunksize where 'n' is the
+                * number of raid devices
+                */
+               int data_disks = conf->previous_raid_disks - conf->max_degraded;
+               int stripe = data_disks *
+                       ((mddev->chunk_sectors << 9) / PAGE_SIZE);
+               if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
+                       mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
  
-       mddev->queue->unplug_fn = raid5_unplug_device;
-       mddev->queue->backing_dev_info.congested_data = mddev;
-       mddev->queue->backing_dev_info.congested_fn = raid5_congested;
+               blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
  
-       md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
+               mddev->queue->backing_dev_info.congested_data = mddev;
+               mddev->queue->backing_dev_info.congested_fn = raid5_congested;
+               mddev->queue->queue_lock = &conf->device_lock;
+               mddev->queue->unplug_fn = raid5_unplug_queue;
  
-       blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
-       chunk_size = mddev->chunk_sectors << 9;
-       blk_queue_io_min(mddev->queue, chunk_size);
-       blk_queue_io_opt(mddev->queue, chunk_size *
-                        (conf->raid_disks - conf->max_degraded));
+               chunk_size = mddev->chunk_sectors << 9;
+               blk_queue_io_min(mddev->queue, chunk_size);
+               blk_queue_io_opt(mddev->queue, chunk_size *
+                                (conf->raid_disks - conf->max_degraded));
  
-       list_for_each_entry(rdev, &mddev->disks, same_set)
-               disk_stack_limits(mddev->gendisk, rdev->bdev,
-                                 rdev->data_offset << 9);
+               list_for_each_entry(rdev, &mddev->disks, same_set)
+                       disk_stack_limits(mddev->gendisk, rdev->bdev,
+                                         rdev->data_offset << 9);
+       }
  
        return 0;
  abort:
@@@ -5200,8 -5238,9 +5238,9 @@@ static int stop(mddev_t *mddev
  
        md_unregister_thread(mddev->thread);
        mddev->thread = NULL;
-       mddev->queue->backing_dev_info.congested_fn = NULL;
-       blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
+       if (mddev->queue)
+               mddev->queue->backing_dev_info.congested_fn = NULL;
+       plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/
        free_conf(conf);
        mddev->private = NULL;
        mddev->to_remove = &raid5_attrs_group;
@@@ -5545,10 -5584,7 +5584,7 @@@ static int raid5_start_reshape(mddev_t 
                                sprintf(nm, "rd%d", rdev->raid_disk);
                                if (sysfs_create_link(&mddev->kobj,
                                                      &rdev->kobj, nm))
-                                       printk(KERN_WARNING
-                                              "md/raid:%s: failed to create "
-                                              " link %s\n",
-                                              mdname(mddev), nm);
+                                       /* Failure here is OK */;
                        } else
                                break;
                }
@@@ -5603,7 -5639,7 +5639,7 @@@ static void end_reshape(raid5_conf_t *c
                /* read-ahead size must cover two whole stripes, which is
                 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
                 */
-               {
+               if (conf->mddev->queue) {
                        int data_disks = conf->raid_disks - conf->max_degraded;
                        int stripe = data_disks * ((conf->chunk_sectors << 9)
                                                   / PAGE_SIZE);