static LIST_HEAD(pers_list);
static DEFINE_SPINLOCK(pers_lock);
+static struct kobj_type md_ktype;
+
struct md_cluster_operations *md_cluster_ops;
EXPORT_SYMBOL(md_cluster_ops);
struct module *md_cluster_mod;
mddev->sync_speed_max : sysctl_speed_limit_max;
}
+static void * flush_info_alloc(gfp_t gfp_flags, void *data)
+{
+ return kzalloc(sizeof(struct flush_info), gfp_flags);
+}
+static void flush_info_free(void *flush_info, void *data)
+{
+ kfree(flush_info);
+}
+
+static void * flush_bio_alloc(gfp_t gfp_flags, void *data)
+{
+ return kzalloc(sizeof(struct flush_bio), gfp_flags);
+}
+static void flush_bio_free(void *flush_bio, void *data)
+{
+ kfree(flush_bio);
+}
+
static struct ctl_table_header *raid_table_header;
static struct ctl_table raid_table[] = {
*/
static bool create_on_open = true;
-/* bio_clone_mddev
- * like bio_clone_bioset, but with a local bio set
- */
-
struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
struct mddev *mddev)
{
struct bio *b;
- if (!mddev || !mddev->bio_set)
+ if (!mddev || !bioset_initialized(&mddev->bio_set))
return bio_alloc(gfp_mask, nr_iovecs);
- b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
+ b = bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
if (!b)
return NULL;
return b;
static struct bio *md_bio_alloc_sync(struct mddev *mddev)
{
- if (!mddev || !mddev->sync_set)
+ if (!mddev || !bioset_initialized(&mddev->sync_set))
return bio_alloc(GFP_NOIO, 1);
- return bio_alloc_bioset(GFP_NOIO, 1, mddev->sync_set);
+ return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
}
/*
static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
{
const int rw = bio_data_dir(bio);
+ const int sgrp = op_stat_group(bio_op(bio));
struct mddev *mddev = q->queuedata;
unsigned int sectors;
int cpu;
md_handle_request(mddev, bio);
cpu = part_stat_lock();
- part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
- part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
+ part_stat_inc(cpu, &mddev->gendisk->part0, ios[sgrp]);
+ part_stat_add(cpu, &mddev->gendisk->part0, sectors[sgrp], sectors);
part_stat_unlock();
return BLK_QC_T_NONE;
/*
* Generic flush handling for md
*/
+static void submit_flushes(struct work_struct *ws)
+{
+ struct flush_info *fi = container_of(ws, struct flush_info, flush_work);
+ struct mddev *mddev = fi->mddev;
+ struct bio *bio = fi->bio;
-static void md_end_flush(struct bio *bio)
+ bio->bi_opf &= ~REQ_PREFLUSH;
+ md_handle_request(mddev, bio);
+
+ mempool_free(fi, mddev->flush_pool);
+}
+
+static void md_end_flush(struct bio *fbio)
{
- struct md_rdev *rdev = bio->bi_private;
- struct mddev *mddev = rdev->mddev;
+ struct flush_bio *fb = fbio->bi_private;
+ struct md_rdev *rdev = fb->rdev;
+ struct flush_info *fi = fb->fi;
+ struct bio *bio = fi->bio;
+ struct mddev *mddev = fi->mddev;
rdev_dec_pending(rdev, mddev);
- if (atomic_dec_and_test(&mddev->flush_pending)) {
- /* The pre-request flush has finished */
- queue_work(md_wq, &mddev->flush_work);
+ if (atomic_dec_and_test(&fi->flush_pending)) {
+ if (bio->bi_iter.bi_size == 0)
+ /* an empty barrier - all done */
+ bio_endio(bio);
+ else {
+ INIT_WORK(&fi->flush_work, submit_flushes);
+ queue_work(md_wq, &fi->flush_work);
+ }
}
- bio_put(bio);
-}
-static void md_submit_flush_data(struct work_struct *ws);
+ mempool_free(fb, mddev->flush_bio_pool);
+ bio_put(fbio);
+}
-static void submit_flushes(struct work_struct *ws)
+void md_flush_request(struct mddev *mddev, struct bio *bio)
{
- struct mddev *mddev = container_of(ws, struct mddev, flush_work);
struct md_rdev *rdev;
+ struct flush_info *fi;
+
+ fi = mempool_alloc(mddev->flush_pool, GFP_NOIO);
+
+ fi->bio = bio;
+ fi->mddev = mddev;
+ atomic_set(&fi->flush_pending, 1);
- INIT_WORK(&mddev->flush_work, md_submit_flush_data);
- atomic_set(&mddev->flush_pending, 1);
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev)
if (rdev->raid_disk >= 0 &&
* we reclaim rcu_read_lock
*/
struct bio *bi;
+ struct flush_bio *fb;
atomic_inc(&rdev->nr_pending);
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
+
+ fb = mempool_alloc(mddev->flush_bio_pool, GFP_NOIO);
+ fb->fi = fi;
+ fb->rdev = rdev;
+
bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
- bi->bi_end_io = md_end_flush;
- bi->bi_private = rdev;
bio_set_dev(bi, rdev->bdev);
+ bi->bi_end_io = md_end_flush;
+ bi->bi_private = fb;
bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
- atomic_inc(&mddev->flush_pending);
+
+ atomic_inc(&fi->flush_pending);
submit_bio(bi);
+
rcu_read_lock();
rdev_dec_pending(rdev, mddev);
}
rcu_read_unlock();
- if (atomic_dec_and_test(&mddev->flush_pending))
- queue_work(md_wq, &mddev->flush_work);
-}
-
-static void md_submit_flush_data(struct work_struct *ws)
-{
- struct mddev *mddev = container_of(ws, struct mddev, flush_work);
- struct bio *bio = mddev->flush_bio;
- /*
- * must reset flush_bio before calling into md_handle_request to avoid a
- * deadlock, because other bios passed md_handle_request suspend check
- * could wait for this and below md_handle_request could wait for those
- * bios because of suspend check
- */
- mddev->flush_bio = NULL;
- wake_up(&mddev->sb_wait);
-
- if (bio->bi_iter.bi_size == 0)
- /* an empty barrier - all done */
- bio_endio(bio);
- else {
- bio->bi_opf &= ~REQ_PREFLUSH;
- md_handle_request(mddev, bio);
+ if (atomic_dec_and_test(&fi->flush_pending)) {
+ if (bio->bi_iter.bi_size == 0)
+ /* an empty barrier - all done */
+ bio_endio(bio);
+ else {
+ INIT_WORK(&fi->flush_work, submit_flushes);
+ queue_work(md_wq, &fi->flush_work);
+ }
}
}
-
-void md_flush_request(struct mddev *mddev, struct bio *bio)
-{
- spin_lock_irq(&mddev->lock);
- wait_event_lock_irq(mddev->sb_wait,
- !mddev->flush_bio,
- mddev->lock);
- mddev->flush_bio = bio;
- spin_unlock_irq(&mddev->lock);
-
- INIT_WORK(&mddev->flush_work, submit_flushes);
- queue_work(md_wq, &mddev->flush_work);
-}
EXPORT_SYMBOL(md_flush_request);
static inline struct mddev *mddev_get(struct mddev *mddev)
static void mddev_put(struct mddev *mddev)
{
- struct bio_set *bs = NULL, *sync_bs = NULL;
-
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
return;
if (!mddev->raid_disks && list_empty(&mddev->disks) &&
/* Array is not configured at all, and not held active,
* so destroy it */
list_del_init(&mddev->all_mddevs);
- bs = mddev->bio_set;
- sync_bs = mddev->sync_set;
- mddev->bio_set = NULL;
- mddev->sync_set = NULL;
- if (mddev->gendisk) {
- /* We did a probe so need to clean up. Call
- * queue_work inside the spinlock so that
- * flush_workqueue() after mddev_find will
- * succeed in waiting for the work to be done.
- */
- INIT_WORK(&mddev->del_work, mddev_delayed_delete);
- queue_work(md_misc_wq, &mddev->del_work);
- } else
- kfree(mddev);
+
+ /*
+ * Call queue_work inside the spinlock so that
+ * flush_workqueue() after mddev_find will succeed in waiting
+ * for the work to be done.
+ */
+ INIT_WORK(&mddev->del_work, mddev_delayed_delete);
+ queue_work(md_misc_wq, &mddev->del_work);
}
spin_unlock(&all_mddevs_lock);
- if (bs)
- bioset_free(bs);
- if (sync_bs)
- bioset_free(sync_bs);
}
static void md_safemode_timeout(struct timer_list *t);
void mddev_init(struct mddev *mddev)
{
+ kobject_init(&mddev->kobj, &md_ktype);
mutex_init(&mddev->open_mutex);
mutex_init(&mddev->reconfig_mutex);
mutex_init(&mddev->bitmap_info.mutex);
atomic_set(&mddev->openers, 0);
atomic_set(&mddev->active_io, 0);
spin_lock_init(&mddev->lock);
- atomic_set(&mddev->flush_pending, 0);
init_waitqueue_head(&mddev->sb_wait);
init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector;
bdev_get_integrity(reference->bdev));
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
- if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
+ if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
pr_err("md: failed to create integrity pool for %s\n",
mdname(mddev));
return -EINVAL;
err = 0;
}
} else if (cmd_match(buf, "re-add")) {
- if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
+ if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
+ rdev->saved_raid_disk >= 0) {
/* clear_bit is performed _after_ all the devices
* have their local Faulty bit cleared. If any writes
* happen in the meantime in the local node, they
put_disk(mddev->gendisk);
percpu_ref_exit(&mddev->writes_pending);
+ bioset_exit(&mddev->bio_set);
+ bioset_exit(&mddev->sync_set);
kfree(mddev);
}
mutex_lock(&mddev->open_mutex);
add_disk(disk);
- error = kobject_init_and_add(&mddev->kobj, &md_ktype,
- &disk_to_dev(disk)->kobj, "%s", "md");
+ error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
if (error) {
/* This isn't possible, but as kobject_init_and_add is marked
* __must_check, we must do something with the result
sysfs_notify_dirent_safe(rdev->sysfs_state);
}
- if (mddev->bio_set == NULL) {
- mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- if (!mddev->bio_set)
- return -ENOMEM;
+ if (!bioset_initialized(&mddev->bio_set)) {
+ err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ if (err)
+ return err;
+ }
+ if (!bioset_initialized(&mddev->sync_set)) {
+ err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ if (err)
+ return err;
+ }
+ if (mddev->flush_pool == NULL) {
+ mddev->flush_pool = mempool_create(NR_FLUSH_INFOS, flush_info_alloc,
+ flush_info_free, mddev);
+ if (!mddev->flush_pool) {
+ err = -ENOMEM;
+ goto abort;
+ }
}
- if (mddev->sync_set == NULL) {
- mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- if (!mddev->sync_set) {
+ if (mddev->flush_bio_pool == NULL) {
+ mddev->flush_bio_pool = mempool_create(NR_FLUSH_BIOS, flush_bio_alloc,
+ flush_bio_free, mddev);
+ if (!mddev->flush_bio_pool) {
err = -ENOMEM;
goto abort;
}
return 0;
abort:
- if (mddev->bio_set) {
- bioset_free(mddev->bio_set);
- mddev->bio_set = NULL;
+ if (mddev->flush_bio_pool) {
+ mempool_destroy(mddev->flush_bio_pool);
+ mddev->flush_bio_pool = NULL;
}
- if (mddev->sync_set) {
- bioset_free(mddev->sync_set);
- mddev->sync_set = NULL;
+ if (mddev->flush_pool){
+ mempool_destroy(mddev->flush_pool);
+ mddev->flush_pool = NULL;
}
return err;
* This is called from dm-raid
*/
__md_stop(mddev);
- if (mddev->bio_set) {
- bioset_free(mddev->bio_set);
- mddev->bio_set = NULL;
+ if (mddev->flush_bio_pool) {
+ mempool_destroy(mddev->flush_bio_pool);
+ mddev->flush_bio_pool = NULL;
}
- if (mddev->sync_set) {
- bioset_free(mddev->sync_set);
- mddev->sync_set = NULL;
+ if (mddev->flush_pool) {
+ mempool_destroy(mddev->flush_pool);
+ mddev->flush_pool = NULL;
}
+ bioset_exit(&mddev->bio_set);
+ bioset_exit(&mddev->sync_set);
}
EXPORT_SYMBOL_GPL(md_stop);
char b[BDEVNAME_SIZE];
struct md_rdev *rdev;
+ if (!mddev->pers)
+ return -ENODEV;
+
rdev = find_rdev(mddev, dev);
if (!rdev)
return -ENXIO;
resync -= atomic_read(&mddev->recovery_active);
if (resync == 0) {
+ if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
+ struct md_rdev *rdev;
+
+ rdev_for_each(rdev, mddev)
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(Faulty, &rdev->flags) &&
+ rdev->recovery_offset != MaxSector &&
+ rdev->recovery_offset) {
+ seq_printf(seq, "\trecover=REMOTE");
+ return 1;
+ }
+ if (mddev->reshape_position != MaxSector)
+ seq_printf(seq, "\treshape=REMOTE");
+ else
+ seq_printf(seq, "\tresync=REMOTE");
+ return 1;
+ }
if (mddev->recovery_cp < MaxSector) {
seq_printf(seq, "\tresync=PENDING");
return 1;
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev) {
struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
- curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
- (int)part_stat_read(&disk->part0, sectors[1]) -
+ curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
atomic_read(&disk->sync_io);
/* sync IO will cause sync_io to increase before the disk_stats
* as sync_io is counted when a request starts, and
if (mddev->pers->hot_remove_disk(
mddev, rdev) == 0) {
sysfs_unlink_rdev(mddev, rdev);
+ rdev->saved_raid_disk = rdev->raid_disk;
rdev->raid_disk = -1;
removed++;
}