Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Apr 2009 17:02:45 +0000 (10:02 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Apr 2009 17:02:45 +0000 (10:02 -0700)
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (36 commits)
  dm: set queue ordered mode
  dm: move wait queue declaration
  dm: merge pushback and deferred bio lists
  dm: allow uninterruptible wait for pending io
  dm: merge __flush_deferred_io into caller
  dm: move bio_io_error into __split_and_process_bio
  dm: rename __split_bio
  dm: remove unnecessary struct dm_wq_req
  dm: remove unnecessary work queue context field
  dm: remove unnecessary work queue type field
  dm: bio list add bio_list_add_head
  dm snapshot: persistent fix dtr cleanup
  dm snapshot: move status to exception store
  dm snapshot: move ctr parsing to exception store
  dm snapshot: use DMEMIT macro for status
  dm snapshot: remove dm_snap header
  dm snapshot: remove dm_snap header use
  dm exception store: move cow pointer
  dm exception store: move chunk_fields
  dm exception store: move dm_target pointer
  ...

19 files changed:
drivers/md/dm-bio-list.h
drivers/md/dm-bio-record.h
drivers/md/dm-crypt.c
drivers/md/dm-exception-store.c
drivers/md/dm-exception-store.h
drivers/md/dm-io.c
drivers/md/dm-log.c
drivers/md/dm-path-selector.c
drivers/md/dm-raid1.c
drivers/md/dm-snap-persistent.c
drivers/md/dm-snap-transient.c
drivers/md/dm-snap.c
drivers/md/dm-snap.h [deleted file]
drivers/md/dm-table.c
drivers/md/dm-target.c
drivers/md/dm.c
drivers/md/dm.h
include/linux/device-mapper.h
include/linux/dm-dirty-log.h

index d4509be0fe67f78ecb91c0a5981d28ab9edd31cc..345098b4ca77ac77400c00e7598f2398bec04a06 100644 (file)
@@ -52,6 +52,16 @@ static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
        bl->tail = bio;
 }
 
+static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
+{
+       bio->bi_next = bl->head;
+
+       bl->head = bio;
+
+       if (!bl->tail)
+               bl->tail = bio;
+}
+
 static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
 {
        if (!bl2->head)
index d3ec217847d68c9ca2b6d20ca4e9d2eb9cefc3f3..3a8cfa2645c72f6539170f2ab2d3242bb4a6fa58 100644 (file)
  * functions in this file help the target record and restore the
  * original bio state.
  */
+
+struct dm_bio_vec_details {
+#if PAGE_SIZE < 65536
+       __u16 bv_len;
+       __u16 bv_offset;
+#else
+       unsigned bv_len;
+       unsigned bv_offset;
+#endif
+};
+
 struct dm_bio_details {
        sector_t bi_sector;
        struct block_device *bi_bdev;
        unsigned int bi_size;
        unsigned short bi_idx;
        unsigned long bi_flags;
+       struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES];
 };
 
 static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
 {
+       unsigned i;
+
        bd->bi_sector = bio->bi_sector;
        bd->bi_bdev = bio->bi_bdev;
        bd->bi_size = bio->bi_size;
        bd->bi_idx = bio->bi_idx;
        bd->bi_flags = bio->bi_flags;
+
+       for (i = 0; i < bio->bi_vcnt; i++) {
+               bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len;
+               bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset;
+       }
 }
 
 static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
 {
+       unsigned i;
+
        bio->bi_sector = bd->bi_sector;
        bio->bi_bdev = bd->bi_bdev;
        bio->bi_size = bd->bi_size;
        bio->bi_idx = bd->bi_idx;
        bio->bi_flags = bd->bi_flags;
+
+       for (i = 0; i < bio->bi_vcnt; i++) {
+               bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len;
+               bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset;
+       }
 }
 
 #endif
index bfefd079a9557b24c4d9d4f91646c1d5f2661942..53394e863c749db9444ab4ae4780b06a61afb8f1 100644 (file)
@@ -1156,8 +1156,7 @@ bad_ivmode:
        crypto_free_ablkcipher(tfm);
 bad_cipher:
        /* Must zero key material before freeing */
-       memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
-       kfree(cc);
+       kzfree(cc);
        return -EINVAL;
 }
 
@@ -1183,8 +1182,7 @@ static void crypt_dtr(struct dm_target *ti)
        dm_put_device(ti, cc->dev);
 
        /* Must zero key material before freeing */
-       memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
-       kfree(cc);
+       kzfree(cc);
 }
 
 static int crypt_map(struct dm_target *ti, struct bio *bio,
index dccbfb0e010fafc7b3470f06de29132b18124e57..a2e26c24214150cac23e92acf1391a212d60271b 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "dm-exception-store.h"
 
+#include <linux/ctype.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
 
 #define DM_MSG_PREFIX "snapshot exception stores"
 
+static LIST_HEAD(_exception_store_types);
+static DEFINE_SPINLOCK(_lock);
+
+static struct dm_exception_store_type *__find_exception_store_type(const char *name)
+{
+       struct dm_exception_store_type *type;
+
+       list_for_each_entry(type, &_exception_store_types, list)
+               if (!strcmp(name, type->name))
+                       return type;
+
+       return NULL;
+}
+
+static struct dm_exception_store_type *_get_exception_store_type(const char *name)
+{
+       struct dm_exception_store_type *type;
+
+       spin_lock(&_lock);
+
+       type = __find_exception_store_type(name);
+
+       if (type && !try_module_get(type->module))
+               type = NULL;
+
+       spin_unlock(&_lock);
+
+       return type;
+}
+
+/*
+ * get_type
+ * @type_name
+ *
+ * Attempt to retrieve the dm_exception_store_type by name.  If not already
+ * available, attempt to load the appropriate module.
+ *
+ * Exstore modules are named "dm-exstore-" followed by the 'type_name'.
+ * Modules may contain multiple types.
+ * This function will first try the module "dm-exstore-<type_name>",
+ * then truncate 'type_name' on the last '-' and try again.
+ *
+ * For example, if type_name was "clustered-shared", it would search
+ * 'dm-exstore-clustered-shared' then 'dm-exstore-clustered'.
+ *
+ * 'dm-exception-store-<type_name>' is too long of a name in my
+ * opinion, which is why I've chosen to have the files
+ * containing exception store implementations be 'dm-exstore-<type_name>'.
+ * If you want your module to be autoloaded, you will follow this
+ * naming convention.
+ *
+ * Returns: dm_exception_store_type* on success, NULL on failure
+ */
+static struct dm_exception_store_type *get_type(const char *type_name)
+{
+       char *p, *type_name_dup;
+       struct dm_exception_store_type *type;
+
+       type = _get_exception_store_type(type_name);
+       if (type)
+               return type;
+
+       type_name_dup = kstrdup(type_name, GFP_KERNEL);
+       if (!type_name_dup) {
+               DMERR("No memory left to attempt load for \"%s\"", type_name);
+               return NULL;
+       }
+
+       while (request_module("dm-exstore-%s", type_name_dup) ||
+              !(type = _get_exception_store_type(type_name))) {
+               p = strrchr(type_name_dup, '-');
+               if (!p)
+                       break;
+               p[0] = '\0';
+       }
+
+       if (!type)
+               DMWARN("Module for exstore type \"%s\" not found.", type_name);
+
+       kfree(type_name_dup);
+
+       return type;
+}
+
+static void put_type(struct dm_exception_store_type *type)
+{
+       spin_lock(&_lock);
+       module_put(type->module);
+       spin_unlock(&_lock);
+}
+
+int dm_exception_store_type_register(struct dm_exception_store_type *type)
+{
+       int r = 0;
+
+       spin_lock(&_lock);
+       if (!__find_exception_store_type(type->name))
+               list_add(&type->list, &_exception_store_types);
+       else
+               r = -EEXIST;
+       spin_unlock(&_lock);
+
+       return r;
+}
+EXPORT_SYMBOL(dm_exception_store_type_register);
+
+int dm_exception_store_type_unregister(struct dm_exception_store_type *type)
+{
+       spin_lock(&_lock);
+
+       if (!__find_exception_store_type(type->name)) {
+               spin_unlock(&_lock);
+               return -EINVAL;
+       }
+
+       list_del(&type->list);
+
+       spin_unlock(&_lock);
+
+       return 0;
+}
+EXPORT_SYMBOL(dm_exception_store_type_unregister);
+
+/*
+ * Round a number up to the nearest 'size' boundary.  size must
+ * be a power of 2.
+ */
+static ulong round_up(ulong n, ulong size)
+{
+       size--;
+       return (n + size) & ~size;
+}
+
+static int set_chunk_size(struct dm_exception_store *store,
+                         const char *chunk_size_arg, char **error)
+{
+       unsigned long chunk_size_ulong;
+       char *value;
+
+       chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
+       if (*chunk_size_arg == '\0' || *value != '\0') {
+               *error = "Invalid chunk size";
+               return -EINVAL;
+       }
+
+       if (!chunk_size_ulong) {
+               store->chunk_size = store->chunk_mask = store->chunk_shift = 0;
+               return 0;
+       }
+
+       /*
+        * Chunk size must be multiple of page size.  Silently
+        * round up if it's not.
+        */
+       chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
+
+       /* Check chunk_size is a power of 2 */
+       if (!is_power_of_2(chunk_size_ulong)) {
+               *error = "Chunk size is not a power of 2";
+               return -EINVAL;
+       }
+
+       /* Validate the chunk size against the device block size */
+       if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
+               *error = "Chunk size is not a multiple of device blocksize";
+               return -EINVAL;
+       }
+
+       store->chunk_size = chunk_size_ulong;
+       store->chunk_mask = chunk_size_ulong - 1;
+       store->chunk_shift = ffs(chunk_size_ulong) - 1;
+
+       return 0;
+}
+
+int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
+                             unsigned *args_used,
+                             struct dm_exception_store **store)
+{
+       int r = 0;
+       struct dm_exception_store_type *type;
+       struct dm_exception_store *tmp_store;
+       char persistent;
+
+       if (argc < 3) {
+               ti->error = "Insufficient exception store arguments";
+               return -EINVAL;
+       }
+
+       tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL);
+       if (!tmp_store) {
+               ti->error = "Exception store allocation failed";
+               return -ENOMEM;
+       }
+
+       persistent = toupper(*argv[1]);
+       if (persistent != 'P' && persistent != 'N') {
+               ti->error = "Persistent flag is not P or N";
+               return -EINVAL;
+       }
+
+       type = get_type(argv[1]);
+       if (!type) {
+               ti->error = "Exception store type not recognised";
+               r = -EINVAL;
+               goto bad_type;
+       }
+
+       tmp_store->type = type;
+       tmp_store->ti = ti;
+
+       r = dm_get_device(ti, argv[0], 0, 0,
+                         FMODE_READ | FMODE_WRITE, &tmp_store->cow);
+       if (r) {
+               ti->error = "Cannot get COW device";
+               goto bad_cow;
+       }
+
+       r = set_chunk_size(tmp_store, argv[2], &ti->error);
+       if (r)
+               goto bad_cow;
+
+       r = type->ctr(tmp_store, 0, NULL);
+       if (r) {
+               ti->error = "Exception store type constructor failed";
+               goto bad_ctr;
+       }
+
+       *args_used = 3;
+       *store = tmp_store;
+       return 0;
+
+bad_ctr:
+       dm_put_device(ti, tmp_store->cow);
+bad_cow:
+       put_type(type);
+bad_type:
+       kfree(tmp_store);
+       return r;
+}
+EXPORT_SYMBOL(dm_exception_store_create);
+
+void dm_exception_store_destroy(struct dm_exception_store *store)
+{
+       store->type->dtr(store);
+       dm_put_device(store->ti, store->cow);
+       put_type(store->type);
+       kfree(store);
+}
+EXPORT_SYMBOL(dm_exception_store_destroy);
+
 int dm_exception_store_init(void)
 {
        int r;
index bb9f33d5daa2169127d96fd9ebfcbe41d1cc819b..0a2e6e7f67b3c5b6ac4c4cbd7dc2ca6811eaa30c 100644 (file)
@@ -37,11 +37,18 @@ struct dm_snap_exception {
  * Abstraction to handle the meta/layout of exception stores (the
  * COW device).
  */
-struct dm_exception_store {
+struct dm_exception_store;
+struct dm_exception_store_type {
+       const char *name;
+       struct module *module;
+
+       int (*ctr) (struct dm_exception_store *store,
+                   unsigned argc, char **argv);
+
        /*
         * Destroys this object when you've finished with it.
         */
-       void (*destroy) (struct dm_exception_store *store);
+       void (*dtr) (struct dm_exception_store *store);
 
        /*
         * The target shouldn't read the COW device until this is
@@ -72,8 +79,9 @@ struct dm_exception_store {
         */
        void (*drop_snapshot) (struct dm_exception_store *store);
 
-       int (*status) (struct dm_exception_store *store, status_type_t status,
-                      char *result, unsigned int maxlen);
+       unsigned (*status) (struct dm_exception_store *store,
+                           status_type_t status, char *result,
+                           unsigned maxlen);
 
        /*
         * Return how full the snapshot is.
@@ -82,7 +90,21 @@ struct dm_exception_store {
                               sector_t *numerator,
                               sector_t *denominator);
 
-       struct dm_snapshot *snap;
+       /* For internal device-mapper use only. */
+       struct list_head list;
+};
+
+struct dm_exception_store {
+       struct dm_exception_store_type *type;
+       struct dm_target *ti;
+
+       struct dm_dev *cow;
+
+       /* Size of data blocks saved - must be a power of 2 */
+       chunk_t chunk_size;
+       chunk_t chunk_mask;
+       chunk_t chunk_shift;
+
        void *context;
 };
 
@@ -129,6 +151,28 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
 
 #  endif
 
+/*
+ * Return the number of sectors in the device.
+ */
+static inline sector_t get_dev_size(struct block_device *bdev)
+{
+       return bdev->bd_inode->i_size >> SECTOR_SHIFT;
+}
+
+static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
+                                     sector_t sector)
+{
+       return (sector & ~store->chunk_mask) >> store->chunk_shift;
+}
+
+int dm_exception_store_type_register(struct dm_exception_store_type *type);
+int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
+
+int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
+                             unsigned *args_used,
+                             struct dm_exception_store **store);
+void dm_exception_store_destroy(struct dm_exception_store *store);
+
 int dm_exception_store_init(void);
 void dm_exception_store_exit(void);
 
@@ -141,8 +185,4 @@ void dm_persistent_snapshot_exit(void);
 int dm_transient_snapshot_init(void);
 void dm_transient_snapshot_exit(void);
 
-int dm_create_persistent(struct dm_exception_store *store);
-
-int dm_create_transient(struct dm_exception_store *store);
-
 #endif /* _LINUX_DM_EXCEPTION_STORE */
index 36e2b5e46a6b644f0e9902b57b29c0666541d733..e73aabd61cd78abdbc63996704c177e6aea399d1 100644 (file)
@@ -370,16 +370,13 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
        while (1) {
                set_current_state(TASK_UNINTERRUPTIBLE);
 
-               if (!atomic_read(&io.count) || signal_pending(current))
+               if (!atomic_read(&io.count))
                        break;
 
                io_schedule();
        }
        set_current_state(TASK_RUNNING);
 
-       if (atomic_read(&io.count))
-               return -EINTR;
-
        if (error_bits)
                *error_bits = io.error_bits;
 
index 737961f275c196f9b6aa41957dd08c72457c597b..be233bc4d91787a2729ef8ca740a946ddc782332 100644 (file)
 
 #define DM_MSG_PREFIX "dirty region log"
 
-struct dm_dirty_log_internal {
-       struct dm_dirty_log_type *type;
-
-       struct list_head list;
-       long use;
-};
-
 static LIST_HEAD(_log_types);
 static DEFINE_SPINLOCK(_lock);
 
-static struct dm_dirty_log_internal *__find_dirty_log_type(const char *name)
+static struct dm_dirty_log_type *__find_dirty_log_type(const char *name)
 {
-       struct dm_dirty_log_internal *log_type;
+       struct dm_dirty_log_type *log_type;
 
        list_for_each_entry(log_type, &_log_types, list)
-               if (!strcmp(name, log_type->type->name))
+               if (!strcmp(name, log_type->name))
                        return log_type;
 
        return NULL;
 }
 
-static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
+static struct dm_dirty_log_type *_get_dirty_log_type(const char *name)
 {
-       struct dm_dirty_log_internal *log_type;
+       struct dm_dirty_log_type *log_type;
 
        spin_lock(&_lock);
 
        log_type = __find_dirty_log_type(name);
-       if (log_type) {
-               if (!log_type->use && !try_module_get(log_type->type->module))
-                       log_type = NULL;
-               else
-                       log_type->use++;
-       }
+       if (log_type && !try_module_get(log_type->module))
+               log_type = NULL;
 
        spin_unlock(&_lock);
 
@@ -76,14 +65,14 @@ static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
 static struct dm_dirty_log_type *get_type(const char *type_name)
 {
        char *p, *type_name_dup;
-       struct dm_dirty_log_internal *log_type;
+       struct dm_dirty_log_type *log_type;
 
        if (!type_name)
                return NULL;
 
        log_type = _get_dirty_log_type(type_name);
        if (log_type)
-               return log_type->type;
+               return log_type;
 
        type_name_dup = kstrdup(type_name, GFP_KERNEL);
        if (!type_name_dup) {
@@ -105,56 +94,33 @@ static struct dm_dirty_log_type *get_type(const char *type_name)
 
        kfree(type_name_dup);
 
-       return log_type ? log_type->type : NULL;
+       return log_type;
 }
 
 static void put_type(struct dm_dirty_log_type *type)
 {
-       struct dm_dirty_log_internal *log_type;
-
        if (!type)
                return;
 
        spin_lock(&_lock);
-       log_type = __find_dirty_log_type(type->name);
-       if (!log_type)
+       if (!__find_dirty_log_type(type->name))
                goto out;
 
-       if (!--log_type->use)
-               module_put(type->module);
-
-       BUG_ON(log_type->use < 0);
+       module_put(type->module);
 
 out:
        spin_unlock(&_lock);
 }
 
-static struct dm_dirty_log_internal *_alloc_dirty_log_type(struct dm_dirty_log_type *type)
-{
-       struct dm_dirty_log_internal *log_type = kzalloc(sizeof(*log_type),
-                                                        GFP_KERNEL);
-
-       if (log_type)
-               log_type->type = type;
-
-       return log_type;
-}
-
 int dm_dirty_log_type_register(struct dm_dirty_log_type *type)
 {
-       struct dm_dirty_log_internal *log_type = _alloc_dirty_log_type(type);
        int r = 0;
 
-       if (!log_type)
-               return -ENOMEM;
-
        spin_lock(&_lock);
        if (!__find_dirty_log_type(type->name))
-               list_add(&log_type->list, &_log_types);
-       else {
-               kfree(log_type);
+               list_add(&type->list, &_log_types);
+       else
                r = -EEXIST;
-       }
        spin_unlock(&_lock);
 
        return r;
@@ -163,25 +129,16 @@ EXPORT_SYMBOL(dm_dirty_log_type_register);
 
 int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type)
 {
-       struct dm_dirty_log_internal *log_type;
-
        spin_lock(&_lock);
 
-       log_type = __find_dirty_log_type(type->name);
-       if (!log_type) {
+       if (!__find_dirty_log_type(type->name)) {
                spin_unlock(&_lock);
                return -EINVAL;
        }
 
-       if (log_type->use) {
-               spin_unlock(&_lock);
-               return -ETXTBSY;
-       }
-
-       list_del(&log_type->list);
+       list_del(&type->list);
 
        spin_unlock(&_lock);
-       kfree(log_type);
 
        return 0;
 }
index 96ea226155b10e3dbb560b3faa8c2211ec11f41f..42c04f04a0c4c84e28bb1cd05e56a19370d5e565 100644 (file)
@@ -17,9 +17,7 @@
 
 struct ps_internal {
        struct path_selector_type pst;
-
        struct list_head list;
-       long use;
 };
 
 #define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst)
@@ -45,12 +43,8 @@ static struct ps_internal *get_path_selector(const char *name)
 
        down_read(&_ps_lock);
        psi = __find_path_selector_type(name);
-       if (psi) {
-               if ((psi->use == 0) && !try_module_get(psi->pst.module))
-                       psi = NULL;
-               else
-                       psi->use++;
-       }
+       if (psi && !try_module_get(psi->pst.module))
+               psi = NULL;
        up_read(&_ps_lock);
 
        return psi;
@@ -84,11 +78,7 @@ void dm_put_path_selector(struct path_selector_type *pst)
        if (!psi)
                goto out;
 
-       if (--psi->use == 0)
-               module_put(psi->pst.module);
-
-       BUG_ON(psi->use < 0);
-
+       module_put(psi->pst.module);
 out:
        up_read(&_ps_lock);
 }
@@ -136,11 +126,6 @@ int dm_unregister_path_selector(struct path_selector_type *pst)
                return -EINVAL;
        }
 
-       if (psi->use) {
-               up_write(&_ps_lock);
-               return -ETXTBSY;
-       }
-
        list_del(&psi->list);
 
        up_write(&_ps_lock);
index 4d6bc101962e1965a1b98e0cc1e87363be4bcc3a..536ef0bef154e507aae4f738c01c52e7d166d798 100644 (file)
@@ -145,6 +145,8 @@ struct dm_raid1_read_record {
        struct dm_bio_details details;
 };
 
+static struct kmem_cache *_dm_raid1_read_record_cache;
+
 /*
  * Every mirror should look like this one.
  */
@@ -586,6 +588,9 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
        int state;
        struct bio *bio;
        struct bio_list sync, nosync, recover, *this_list = NULL;
+       struct bio_list requeue;
+       struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
+       region_t region;
 
        if (!writes->head)
                return;
@@ -596,10 +601,18 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
        bio_list_init(&sync);
        bio_list_init(&nosync);
        bio_list_init(&recover);
+       bio_list_init(&requeue);
 
        while ((bio = bio_list_pop(writes))) {
-               state = dm_rh_get_state(ms->rh,
-                                       dm_rh_bio_to_region(ms->rh, bio), 1);
+               region = dm_rh_bio_to_region(ms->rh, bio);
+
+               if (log->type->is_remote_recovering &&
+                   log->type->is_remote_recovering(log, region)) {
+                       bio_list_add(&requeue, bio);
+                       continue;
+               }
+
+               state = dm_rh_get_state(ms->rh, region, 1);
                switch (state) {
                case DM_RH_CLEAN:
                case DM_RH_DIRTY:
@@ -618,6 +631,16 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
                bio_list_add(this_list, bio);
        }
 
+       /*
+        * Add bios that are delayed due to remote recovery
+        * back on to the write queue
+        */
+       if (unlikely(requeue.head)) {
+               spin_lock_irq(&ms->lock);
+               bio_list_merge(&ms->writes, &requeue);
+               spin_unlock_irq(&ms->lock);
+       }
+
        /*
         * Increment the pending counts for any regions that will
         * be written to (writes to recover regions are going to
@@ -764,9 +787,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
        atomic_set(&ms->suspend, 0);
        atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
 
-       len = sizeof(struct dm_raid1_read_record);
-       ms->read_record_pool = mempool_create_kmalloc_pool(MIN_READ_RECORDS,
-                                                          len);
+       ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
+                                               _dm_raid1_read_record_cache);
+
        if (!ms->read_record_pool) {
                ti->error = "Error creating mirror read_record_pool";
                kfree(ms);
@@ -1279,16 +1302,31 @@ static int __init dm_mirror_init(void)
 {
        int r;
 
+       _dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
+       if (!_dm_raid1_read_record_cache) {
+               DMERR("Can't allocate dm_raid1_read_record cache");
+               r = -ENOMEM;
+               goto bad_cache;
+       }
+
        r = dm_register_target(&mirror_target);
-       if (r < 0)
+       if (r < 0) {
                DMERR("Failed to register mirror target");
+               goto bad_target;
+       }
+
+       return 0;
 
+bad_target:
+       kmem_cache_destroy(_dm_raid1_read_record_cache);
+bad_cache:
        return r;
 }
 
 static void __exit dm_mirror_exit(void)
 {
        dm_unregister_target(&mirror_target);
+       kmem_cache_destroy(_dm_raid1_read_record_cache);
 }
 
 /* Module hooks */
index 936b34e0959fdd5cdeaa6eddc51dee888fb05447..e75c6dd76a9adfb3b374aa9cc6d48a2db14596be 100644 (file)
@@ -6,7 +6,6 @@
  */
 
 #include "dm-exception-store.h"
-#include "dm-snap.h"
 
 #include <linux/mm.h>
 #include <linux/pagemap.h>
@@ -89,7 +88,7 @@ struct commit_callback {
  * The top level structure for a persistent exception store.
  */
 struct pstore {
-       struct dm_snapshot *snap;       /* up pointer to my snapshot */
+       struct dm_exception_store *store;
        int version;
        int valid;
        uint32_t exceptions_per_area;
@@ -141,7 +140,7 @@ static int alloc_area(struct pstore *ps)
        int r = -ENOMEM;
        size_t len;
 
-       len = ps->snap->chunk_size << SECTOR_SHIFT;
+       len = ps->store->chunk_size << SECTOR_SHIFT;
 
        /*
         * Allocate the chunk_size block of memory that will hold
@@ -163,9 +162,12 @@ static int alloc_area(struct pstore *ps)
 
 static void free_area(struct pstore *ps)
 {
-       vfree(ps->area);
+       if (ps->area)
+               vfree(ps->area);
        ps->area = NULL;
-       vfree(ps->zero_area);
+
+       if (ps->zero_area)
+               vfree(ps->zero_area);
        ps->zero_area = NULL;
 }
 
@@ -189,9 +191,9 @@ static void do_metadata(struct work_struct *work)
 static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
 {
        struct dm_io_region where = {
-               .bdev = ps->snap->cow->bdev,
-               .sector = ps->snap->chunk_size * chunk,
-               .count = ps->snap->chunk_size,
+               .bdev = ps->store->cow->bdev,
+               .sector = ps->store->chunk_size * chunk,
+               .count = ps->store->chunk_size,
        };
        struct dm_io_request io_req = {
                .bi_rw = rw,
@@ -247,15 +249,15 @@ static int area_io(struct pstore *ps, int rw)
 
 static void zero_memory_area(struct pstore *ps)
 {
-       memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
+       memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
 }
 
 static int zero_disk_area(struct pstore *ps, chunk_t area)
 {
        struct dm_io_region where = {
-               .bdev = ps->snap->cow->bdev,
-               .sector = ps->snap->chunk_size * area_location(ps, area),
-               .count = ps->snap->chunk_size,
+               .bdev = ps->store->cow->bdev,
+               .sector = ps->store->chunk_size * area_location(ps, area),
+               .count = ps->store->chunk_size,
        };
        struct dm_io_request io_req = {
                .bi_rw = WRITE,
@@ -278,15 +280,15 @@ static int read_header(struct pstore *ps, int *new_snapshot)
        /*
         * Use default chunk size (or hardsect_size, if larger) if none supplied
         */
-       if (!ps->snap->chunk_size) {
-               ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
-                   bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
-               ps->snap->chunk_mask = ps->snap->chunk_size - 1;
-               ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
+       if (!ps->store->chunk_size) {
+               ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
+                   bdev_hardsect_size(ps->store->cow->bdev) >> 9);
+               ps->store->chunk_mask = ps->store->chunk_size - 1;
+               ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
                chunk_size_supplied = 0;
        }
 
-       ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
+       ps->io_client = dm_io_client_create(sectors_to_pages(ps->store->
                                                             chunk_size));
        if (IS_ERR(ps->io_client))
                return PTR_ERR(ps->io_client);
@@ -317,22 +319,22 @@ static int read_header(struct pstore *ps, int *new_snapshot)
        ps->version = le32_to_cpu(dh->version);
        chunk_size = le32_to_cpu(dh->chunk_size);
 
-       if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size)
+       if (!chunk_size_supplied || ps->store->chunk_size == chunk_size)
                return 0;
 
        DMWARN("chunk size %llu in device metadata overrides "
               "table chunk size of %llu.",
               (unsigned long long)chunk_size,
-              (unsigned long long)ps->snap->chunk_size);
+              (unsigned long long)ps->store->chunk_size);
 
        /* We had a bogus chunk_size. Fix stuff up. */
        free_area(ps);
 
-       ps->snap->chunk_size = chunk_size;
-       ps->snap->chunk_mask = chunk_size - 1;
-       ps->snap->chunk_shift = ffs(chunk_size) - 1;
+       ps->store->chunk_size = chunk_size;
+       ps->store->chunk_mask = chunk_size - 1;
+       ps->store->chunk_shift = ffs(chunk_size) - 1;
 
-       r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
+       r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
                                ps->io_client);
        if (r)
                return r;
@@ -349,13 +351,13 @@ static int write_header(struct pstore *ps)
 {
        struct disk_header *dh;
 
-       memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
+       memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
 
        dh = (struct disk_header *) ps->area;
        dh->magic = cpu_to_le32(SNAP_MAGIC);
        dh->valid = cpu_to_le32(ps->valid);
        dh->version = cpu_to_le32(ps->version);
-       dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
+       dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
 
        return chunk_io(ps, 0, WRITE, 1);
 }
@@ -474,18 +476,25 @@ static struct pstore *get_info(struct dm_exception_store *store)
 static void persistent_fraction_full(struct dm_exception_store *store,
                                     sector_t *numerator, sector_t *denominator)
 {
-       *numerator = get_info(store)->next_free * store->snap->chunk_size;
-       *denominator = get_dev_size(store->snap->cow->bdev);
+       *numerator = get_info(store)->next_free * store->chunk_size;
+       *denominator = get_dev_size(store->cow->bdev);
 }
 
-static void persistent_destroy(struct dm_exception_store *store)
+static void persistent_dtr(struct dm_exception_store *store)
 {
        struct pstore *ps = get_info(store);
 
        destroy_workqueue(ps->metadata_wq);
-       dm_io_client_destroy(ps->io_client);
-       vfree(ps->callbacks);
+
+       /* Created in read_header */
+       if (ps->io_client)
+               dm_io_client_destroy(ps->io_client);
        free_area(ps);
+
+       /* Allocated in persistent_read_metadata */
+       if (ps->callbacks)
+               vfree(ps->callbacks);
+
        kfree(ps);
 }
 
@@ -507,7 +516,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
        /*
         * Now we know correct chunk_size, complete the initialisation.
         */
-       ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
+       ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
                                  sizeof(struct disk_exception);
        ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
                        sizeof(*ps->callbacks));
@@ -564,10 +573,10 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
        struct pstore *ps = get_info(store);
        uint32_t stride;
        chunk_t next_free;
-       sector_t size = get_dev_size(store->snap->cow->bdev);
+       sector_t size = get_dev_size(store->cow->bdev);
 
        /* Is there enough room ? */
-       if (size < ((ps->next_free + 1) * store->snap->chunk_size))
+       if (size < ((ps->next_free + 1) * store->chunk_size))
                return -ENOSPC;
 
        e->new_chunk = ps->next_free;
@@ -656,16 +665,17 @@ static void persistent_drop_snapshot(struct dm_exception_store *store)
                DMWARN("write header failed");
 }
 
-int dm_create_persistent(struct dm_exception_store *store)
+static int persistent_ctr(struct dm_exception_store *store,
+                         unsigned argc, char **argv)
 {
        struct pstore *ps;
 
        /* allocate the pstore */
-       ps = kmalloc(sizeof(*ps), GFP_KERNEL);
+       ps = kzalloc(sizeof(*ps), GFP_KERNEL);
        if (!ps)
                return -ENOMEM;
 
-       ps->snap = store->snap;
+       ps->store = store;
        ps->valid = 1;
        ps->version = SNAPSHOT_DISK_VERSION;
        ps->area = NULL;
@@ -683,22 +693,77 @@ int dm_create_persistent(struct dm_exception_store *store)
                return -ENOMEM;
        }
 
-       store->destroy = persistent_destroy;
-       store->read_metadata = persistent_read_metadata;
-       store->prepare_exception = persistent_prepare_exception;
-       store->commit_exception = persistent_commit_exception;
-       store->drop_snapshot = persistent_drop_snapshot;
-       store->fraction_full = persistent_fraction_full;
        store->context = ps;
 
        return 0;
 }
 
+static unsigned persistent_status(struct dm_exception_store *store,
+                                 status_type_t status, char *result,
+                                 unsigned maxlen)
+{
+       unsigned sz = 0;
+
+       switch (status) {
+       case STATUSTYPE_INFO:
+               break;
+       case STATUSTYPE_TABLE:
+               DMEMIT(" %s P %llu", store->cow->name,
+                      (unsigned long long)store->chunk_size);
+       }
+
+       return sz;
+}
+
+static struct dm_exception_store_type _persistent_type = {
+       .name = "persistent",
+       .module = THIS_MODULE,
+       .ctr = persistent_ctr,
+       .dtr = persistent_dtr,
+       .read_metadata = persistent_read_metadata,
+       .prepare_exception = persistent_prepare_exception,
+       .commit_exception = persistent_commit_exception,
+       .drop_snapshot = persistent_drop_snapshot,
+       .fraction_full = persistent_fraction_full,
+       .status = persistent_status,
+};
+
+static struct dm_exception_store_type _persistent_compat_type = {
+       .name = "P",
+       .module = THIS_MODULE,
+       .ctr = persistent_ctr,
+       .dtr = persistent_dtr,
+       .read_metadata = persistent_read_metadata,
+       .prepare_exception = persistent_prepare_exception,
+       .commit_exception = persistent_commit_exception,
+       .drop_snapshot = persistent_drop_snapshot,
+       .fraction_full = persistent_fraction_full,
+       .status = persistent_status,
+};
+
 int dm_persistent_snapshot_init(void)
 {
-       return 0;
+       int r;
+
+       r = dm_exception_store_type_register(&_persistent_type);
+       if (r) {
+               DMERR("Unable to register persistent exception store type");
+               return r;
+       }
+
+       r = dm_exception_store_type_register(&_persistent_compat_type);
+       if (r) {
+               DMERR("Unable to register old-style persistent exception "
+                     "store type");
+               dm_exception_store_type_unregister(&_persistent_type);
+               return r;
+       }
+
+       return r;
 }
 
 void dm_persistent_snapshot_exit(void)
 {
+       dm_exception_store_type_unregister(&_persistent_type);
+       dm_exception_store_type_unregister(&_persistent_compat_type);
 }
index 7f6e2e6dcb0ddcfa78ba071af928dac0f5401682..cde5aa558e6d77b7d1130777762f8aec84c86ab1 100644 (file)
@@ -6,7 +6,6 @@
  */
 
 #include "dm-exception-store.h"
-#include "dm-snap.h"
 
 #include <linux/mm.h>
 #include <linux/pagemap.h>
@@ -23,7 +22,7 @@ struct transient_c {
        sector_t next_free;
 };
 
-static void transient_destroy(struct dm_exception_store *store)
+static void transient_dtr(struct dm_exception_store *store)
 {
        kfree(store->context);
 }
@@ -39,14 +38,14 @@ static int transient_read_metadata(struct dm_exception_store *store,
 static int transient_prepare_exception(struct dm_exception_store *store,
                                       struct dm_snap_exception *e)
 {
-       struct transient_c *tc = (struct transient_c *) store->context;
-       sector_t size = get_dev_size(store->snap->cow->bdev);
+       struct transient_c *tc = store->context;
+       sector_t size = get_dev_size(store->cow->bdev);
 
-       if (size < (tc->next_free + store->snap->chunk_size))
+       if (size < (tc->next_free + store->chunk_size))
                return -1;
 
-       e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
-       tc->next_free += store->snap->chunk_size;
+       e->new_chunk = sector_to_chunk(store, tc->next_free);
+       tc->next_free += store->chunk_size;
 
        return 0;
 }
@@ -64,20 +63,14 @@ static void transient_fraction_full(struct dm_exception_store *store,
                                    sector_t *numerator, sector_t *denominator)
 {
        *numerator = ((struct transient_c *) store->context)->next_free;
-       *denominator = get_dev_size(store->snap->cow->bdev);
+       *denominator = get_dev_size(store->cow->bdev);
 }
 
-int dm_create_transient(struct dm_exception_store *store)
+static int transient_ctr(struct dm_exception_store *store,
+                        unsigned argc, char **argv)
 {
        struct transient_c *tc;
 
-       store->destroy = transient_destroy;
-       store->read_metadata = transient_read_metadata;
-       store->prepare_exception = transient_prepare_exception;
-       store->commit_exception = transient_commit_exception;
-       store->drop_snapshot = NULL;
-       store->fraction_full = transient_fraction_full;
-
        tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
        if (!tc)
                return -ENOMEM;
@@ -88,11 +81,70 @@ int dm_create_transient(struct dm_exception_store *store)
        return 0;
 }
 
+static unsigned transient_status(struct dm_exception_store *store,
+                                status_type_t status, char *result,
+                                unsigned maxlen)
+{
+       unsigned sz = 0;
+
+       switch (status) {
+       case STATUSTYPE_INFO:
+               break;
+       case STATUSTYPE_TABLE:
+               DMEMIT(" %s N %llu", store->cow->name,
+                      (unsigned long long)store->chunk_size);
+       }
+
+       return sz;
+}
+
+static struct dm_exception_store_type _transient_type = {
+       .name = "transient",
+       .module = THIS_MODULE,
+       .ctr = transient_ctr,
+       .dtr = transient_dtr,
+       .read_metadata = transient_read_metadata,
+       .prepare_exception = transient_prepare_exception,
+       .commit_exception = transient_commit_exception,
+       .fraction_full = transient_fraction_full,
+       .status = transient_status,
+};
+
+static struct dm_exception_store_type _transient_compat_type = {
+       .name = "N",
+       .module = THIS_MODULE,
+       .ctr = transient_ctr,
+       .dtr = transient_dtr,
+       .read_metadata = transient_read_metadata,
+       .prepare_exception = transient_prepare_exception,
+       .commit_exception = transient_commit_exception,
+       .fraction_full = transient_fraction_full,
+       .status = transient_status,
+};
+
 int dm_transient_snapshot_init(void)
 {
-       return 0;
+       int r;
+
+       r = dm_exception_store_type_register(&_transient_type);
+       if (r) {
+               DMWARN("Unable to register transient exception store type");
+               return r;
+       }
+
+       r = dm_exception_store_type_register(&_transient_compat_type);
+       if (r) {
+               DMWARN("Unable to register old-style transient "
+                      "exception store type");
+               dm_exception_store_type_unregister(&_transient_type);
+               return r;
+       }
+
+       return r;
 }
 
 void dm_transient_snapshot_exit(void)
 {
+       dm_exception_store_type_unregister(&_transient_type);
+       dm_exception_store_type_unregister(&_transient_compat_type);
 }
index 65ff82ff124e29a3c7cfedf276df89646fcbc2ac..981a0413068f8028c90e7da039cf5d48fcc27b9b 100644 (file)
@@ -7,7 +7,6 @@
  */
 
 #include <linux/blkdev.h>
-#include <linux/ctype.h>
 #include <linux/device-mapper.h>
 #include <linux/delay.h>
 #include <linux/fs.h>
@@ -20,9 +19,9 @@
 #include <linux/vmalloc.h>
 #include <linux/log2.h>
 #include <linux/dm-kcopyd.h>
+#include <linux/workqueue.h>
 
 #include "dm-exception-store.h"
-#include "dm-snap.h"
 #include "dm-bio-list.h"
 
 #define DM_MSG_PREFIX "snapshots"
  */
 #define MIN_IOS 256
 
+#define DM_TRACKED_CHUNK_HASH_SIZE     16
+#define DM_TRACKED_CHUNK_HASH(x)       ((unsigned long)(x) & \
+                                        (DM_TRACKED_CHUNK_HASH_SIZE - 1))
+
+struct exception_table {
+       uint32_t hash_mask;
+       unsigned hash_shift;
+       struct list_head *table;
+};
+
+struct dm_snapshot {
+       struct rw_semaphore lock;
+
+       struct dm_dev *origin;
+
+       /* List of snapshots per Origin */
+       struct list_head list;
+
+       /* You can't use a snapshot if this is 0 (e.g. if full) */
+       int valid;
+
+       /* Origin writes don't trigger exceptions until this is set */
+       int active;
+
+       mempool_t *pending_pool;
+
+       atomic_t pending_exceptions_count;
+
+       struct exception_table pending;
+       struct exception_table complete;
+
+       /*
+        * pe_lock protects all pending_exception operations and access
+        * as well as the snapshot_bios list.
+        */
+       spinlock_t pe_lock;
+
+       /* The on disk metadata handler */
+       struct dm_exception_store *store;
+
+       struct dm_kcopyd_client *kcopyd_client;
+
+       /* Queue of snapshot writes for ksnapd to flush */
+       struct bio_list queued_bios;
+       struct work_struct queued_bios_work;
+
+       /* Chunks with outstanding reads */
+       mempool_t *tracked_chunk_pool;
+       spinlock_t tracked_chunk_lock;
+       struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+};
+
 static struct workqueue_struct *ksnapd;
 static void flush_queued_bios(struct work_struct *work);
 
+static sector_t chunk_to_sector(struct dm_exception_store *store,
+                               chunk_t chunk)
+{
+       return chunk << store->chunk_shift;
+}
+
+static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
+{
+       /*
+        * There is only ever one instance of a particular block
+        * device so we can compare pointers safely.
+        */
+       return lhs == rhs;
+}
+
 struct dm_snap_pending_exception {
        struct dm_snap_exception e;
 
@@ -476,11 +542,11 @@ static int init_hash_tables(struct dm_snapshot *s)
         * Calculate based on the size of the original volume or
         * the COW volume...
         */
-       cow_dev_size = get_dev_size(s->cow->bdev);
+       cow_dev_size = get_dev_size(s->store->cow->bdev);
        origin_dev_size = get_dev_size(s->origin->bdev);
        max_buckets = calc_max_buckets();
 
-       hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift;
+       hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
        hash_size = min(hash_size, max_buckets);
 
        hash_size = rounddown_pow_of_two(hash_size);
@@ -504,58 +570,6 @@ static int init_hash_tables(struct dm_snapshot *s)
        return 0;
 }
 
-/*
- * Round a number up to the nearest 'size' boundary.  size must
- * be a power of 2.
- */
-static ulong round_up(ulong n, ulong size)
-{
-       size--;
-       return (n + size) & ~size;
-}
-
-static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
-                         char **error)
-{
-       unsigned long chunk_size;
-       char *value;
-
-       chunk_size = simple_strtoul(chunk_size_arg, &value, 10);
-       if (*chunk_size_arg == '\0' || *value != '\0') {
-               *error = "Invalid chunk size";
-               return -EINVAL;
-       }
-
-       if (!chunk_size) {
-               s->chunk_size = s->chunk_mask = s->chunk_shift = 0;
-               return 0;
-       }
-
-       /*
-        * Chunk size must be multiple of page size.  Silently
-        * round up if it's not.
-        */
-       chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
-
-       /* Check chunk_size is a power of 2 */
-       if (!is_power_of_2(chunk_size)) {
-               *error = "Chunk size is not a power of 2";
-               return -EINVAL;
-       }
-
-       /* Validate the chunk size against the device block size */
-       if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) {
-               *error = "Chunk size is not a multiple of device blocksize";
-               return -EINVAL;
-       }
-
-       s->chunk_size = chunk_size;
-       s->chunk_mask = chunk_size - 1;
-       s->chunk_shift = ffs(chunk_size) - 1;
-
-       return 0;
-}
-
 /*
  * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
  */
@@ -564,91 +578,68 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        struct dm_snapshot *s;
        int i;
        int r = -EINVAL;
-       char persistent;
        char *origin_path;
-       char *cow_path;
+       struct dm_exception_store *store;
+       unsigned args_used;
 
        if (argc != 4) {
                ti->error = "requires exactly 4 arguments";
                r = -EINVAL;
-               goto bad1;
+               goto bad_args;
        }
 
        origin_path = argv[0];
-       cow_path = argv[1];
-       persistent = toupper(*argv[2]);
+       argv++;
+       argc--;
 
-       if (persistent != 'P' && persistent != 'N') {
-               ti->error = "Persistent flag is not P or N";
+       r = dm_exception_store_create(ti, argc, argv, &args_used, &store);
+       if (r) {
+               ti->error = "Couldn't create exception store";
                r = -EINVAL;
-               goto bad1;
+               goto bad_args;
        }
 
+       argv += args_used;
+       argc -= args_used;
+
        s = kmalloc(sizeof(*s), GFP_KERNEL);
-       if (s == NULL) {
+       if (!s) {
                ti->error = "Cannot allocate snapshot context private "
                    "structure";
                r = -ENOMEM;
-               goto bad1;
+               goto bad_snap;
        }
 
        r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
        if (r) {
                ti->error = "Cannot get origin device";
-               goto bad2;
-       }
-
-       r = dm_get_device(ti, cow_path, 0, 0,
-                         FMODE_READ | FMODE_WRITE, &s->cow);
-       if (r) {
-               dm_put_device(ti, s->origin);
-               ti->error = "Cannot get COW device";
-               goto bad2;
+               goto bad_origin;
        }
 
-       r = set_chunk_size(s, argv[3], &ti->error);
-       if (r)
-               goto bad3;
-
-       s->type = persistent;
-
+       s->store = store;
        s->valid = 1;
        s->active = 0;
        atomic_set(&s->pending_exceptions_count, 0);
        init_rwsem(&s->lock);
        spin_lock_init(&s->pe_lock);
-       s->ti = ti;
 
        /* Allocate hash table for COW data */
        if (init_hash_tables(s)) {
                ti->error = "Unable to allocate hash table space";
                r = -ENOMEM;
-               goto bad3;
-       }
-
-       s->store.snap = s;
-
-       if (persistent == 'P')
-               r = dm_create_persistent(&s->store);
-       else
-               r = dm_create_transient(&s->store);
-
-       if (r) {
-               ti->error = "Couldn't create exception store";
-               r = -EINVAL;
-               goto bad4;
+               goto bad_hash_tables;
        }
 
        r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
        if (r) {
                ti->error = "Could not create kcopyd client";
-               goto bad5;
+               goto bad_kcopyd;
        }
 
        s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
        if (!s->pending_pool) {
                ti->error = "Could not allocate mempool for pending exceptions";
-               goto bad6;
+               goto bad_pending_pool;
        }
 
        s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
@@ -665,7 +656,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        spin_lock_init(&s->tracked_chunk_lock);
 
        /* Metadata must only be loaded into one table at once */
-       r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s);
+       r = s->store->type->read_metadata(s->store, dm_add_exception,
+                                         (void *)s);
        if (r < 0) {
                ti->error = "Failed to read snapshot metadata";
                goto bad_load_and_register;
@@ -686,34 +678,33 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        }
 
        ti->private = s;
-       ti->split_io = s->chunk_size;
+       ti->split_io = s->store->chunk_size;
 
        return 0;
 
- bad_load_and_register:
+bad_load_and_register:
        mempool_destroy(s->tracked_chunk_pool);
 
- bad_tracked_chunk_pool:
+bad_tracked_chunk_pool:
        mempool_destroy(s->pending_pool);
 
- bad6:
+bad_pending_pool:
        dm_kcopyd_client_destroy(s->kcopyd_client);
 
- bad5:
-       s->store.destroy(&s->store);
-
- bad4:
+bad_kcopyd:
        exit_exception_table(&s->pending, pending_cache);
        exit_exception_table(&s->complete, exception_cache);
 
- bad3:
-       dm_put_device(ti, s->cow);
+bad_hash_tables:
        dm_put_device(ti, s->origin);
 
- bad2:
+bad_origin:
        kfree(s);
 
- bad1:
+bad_snap:
+       dm_exception_store_destroy(store);
+
+bad_args:
        return r;
 }
 
@@ -724,8 +715,6 @@ static void __free_exceptions(struct dm_snapshot *s)
 
        exit_exception_table(&s->pending, pending_cache);
        exit_exception_table(&s->complete, exception_cache);
-
-       s->store.destroy(&s->store);
 }
 
 static void snapshot_dtr(struct dm_target *ti)
@@ -761,7 +750,8 @@ static void snapshot_dtr(struct dm_target *ti)
        mempool_destroy(s->pending_pool);
 
        dm_put_device(ti, s->origin);
-       dm_put_device(ti, s->cow);
+
+       dm_exception_store_destroy(s->store);
 
        kfree(s);
 }
@@ -820,12 +810,12 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
        else if (err == -ENOMEM)
                DMERR("Invalidating snapshot: Unable to allocate exception.");
 
-       if (s->store.drop_snapshot)
-               s->store.drop_snapshot(&s->store);
+       if (s->store->type->drop_snapshot)
+               s->store->type->drop_snapshot(s->store);
 
        s->valid = 0;
 
-       dm_table_event(s->ti->table);
+       dm_table_event(s->store->ti->table);
 }
 
 static void get_pending_exception(struct dm_snap_pending_exception *pe)
@@ -943,8 +933,8 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
 
        else
                /* Update the metadata if we are persistent */
-               s->store.commit_exception(&s->store, &pe->e, commit_callback,
-                                         pe);
+               s->store->type->commit_exception(s->store, &pe->e,
+                                                commit_callback, pe);
 }
 
 /*
@@ -960,11 +950,11 @@ static void start_copy(struct dm_snap_pending_exception *pe)
        dev_size = get_dev_size(bdev);
 
        src.bdev = bdev;
-       src.sector = chunk_to_sector(s, pe->e.old_chunk);
-       src.count = min(s->chunk_size, dev_size - src.sector);
+       src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
+       src.count = min(s->store->chunk_size, dev_size - src.sector);
 
-       dest.bdev = s->cow->bdev;
-       dest.sector = chunk_to_sector(s, pe->e.new_chunk);
+       dest.bdev = s->store->cow->bdev;
+       dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
        dest.count = src.count;
 
        /* Hand over to kcopyd */
@@ -972,6 +962,17 @@ static void start_copy(struct dm_snap_pending_exception *pe)
                    &src, 1, &dest, 0, copy_callback, pe);
 }
 
+static struct dm_snap_pending_exception *
+__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
+{
+       struct dm_snap_exception *e = lookup_exception(&s->pending, chunk);
+
+       if (!e)
+               return NULL;
+
+       return container_of(e, struct dm_snap_pending_exception, e);
+}
+
 /*
  * Looks to see if this snapshot already has a pending exception
  * for this chunk, otherwise it allocates a new one and inserts
@@ -981,40 +982,15 @@ static void start_copy(struct dm_snap_pending_exception *pe)
  * this.
  */
 static struct dm_snap_pending_exception *
-__find_pending_exception(struct dm_snapshot *s, struct bio *bio)
+__find_pending_exception(struct dm_snapshot *s,
+                        struct dm_snap_pending_exception *pe, chunk_t chunk)
 {
-       struct dm_snap_exception *e;
-       struct dm_snap_pending_exception *pe;
-       chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
+       struct dm_snap_pending_exception *pe2;
 
-       /*
-        * Is there a pending exception for this already ?
-        */
-       e = lookup_exception(&s->pending, chunk);
-       if (e) {
-               /* cast the exception to a pending exception */
-               pe = container_of(e, struct dm_snap_pending_exception, e);
-               goto out;
-       }
-
-       /*
-        * Create a new pending exception, we don't want
-        * to hold the lock while we do this.
-        */
-       up_write(&s->lock);
-       pe = alloc_pending_exception(s);
-       down_write(&s->lock);
-
-       if (!s->valid) {
-               free_pending_exception(pe);
-               return NULL;
-       }
-
-       e = lookup_exception(&s->pending, chunk);
-       if (e) {
+       pe2 = __lookup_pending_exception(s, chunk);
+       if (pe2) {
                free_pending_exception(pe);
-               pe = container_of(e, struct dm_snap_pending_exception, e);
-               goto out;
+               return pe2;
        }
 
        pe->e.old_chunk = chunk;
@@ -1024,7 +1000,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
        atomic_set(&pe->ref_count, 0);
        pe->started = 0;
 
-       if (s->store.prepare_exception(&s->store, &pe->e)) {
+       if (s->store->type->prepare_exception(s->store, &pe->e)) {
                free_pending_exception(pe);
                return NULL;
        }
@@ -1032,17 +1008,18 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
        get_pending_exception(pe);
        insert_exception(&s->pending, &pe->e);
 
- out:
        return pe;
 }
 
 static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
                            struct bio *bio, chunk_t chunk)
 {
-       bio->bi_bdev = s->cow->bdev;
-       bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) +
-                        (chunk - e->old_chunk)) +
-                        (bio->bi_sector & s->chunk_mask);
+       bio->bi_bdev = s->store->cow->bdev;
+       bio->bi_sector = chunk_to_sector(s->store,
+                                        dm_chunk_number(e->new_chunk) +
+                                        (chunk - e->old_chunk)) +
+                                        (bio->bi_sector &
+                                         s->store->chunk_mask);
 }
 
 static int snapshot_map(struct dm_target *ti, struct bio *bio,
@@ -1054,7 +1031,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
        chunk_t chunk;
        struct dm_snap_pending_exception *pe = NULL;
 
-       chunk = sector_to_chunk(s, bio->bi_sector);
+       chunk = sector_to_chunk(s->store, bio->bi_sector);
 
        /* Full snapshots are not usable */
        /* To get here the table must be live so s->active is always set. */
@@ -1083,11 +1060,31 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
         * writeable.
         */
        if (bio_rw(bio) == WRITE) {
-               pe = __find_pending_exception(s, bio);
+               pe = __lookup_pending_exception(s, chunk);
                if (!pe) {
-                       __invalidate_snapshot(s, -ENOMEM);
-                       r = -EIO;
-                       goto out_unlock;
+                       up_write(&s->lock);
+                       pe = alloc_pending_exception(s);
+                       down_write(&s->lock);
+
+                       if (!s->valid) {
+                               free_pending_exception(pe);
+                               r = -EIO;
+                               goto out_unlock;
+                       }
+
+                       e = lookup_exception(&s->complete, chunk);
+                       if (e) {
+                               free_pending_exception(pe);
+                               remap_exception(s, e, bio, chunk);
+                               goto out_unlock;
+                       }
+
+                       pe = __find_pending_exception(s, pe, chunk);
+                       if (!pe) {
+                               __invalidate_snapshot(s, -ENOMEM);
+                               r = -EIO;
+                               goto out_unlock;
+                       }
                }
 
                remap_exception(s, &pe->e, bio, chunk);
@@ -1137,24 +1134,25 @@ static void snapshot_resume(struct dm_target *ti)
 static int snapshot_status(struct dm_target *ti, status_type_t type,
                           char *result, unsigned int maxlen)
 {
+       unsigned sz = 0;
        struct dm_snapshot *snap = ti->private;
 
        switch (type) {
        case STATUSTYPE_INFO:
                if (!snap->valid)
-                       snprintf(result, maxlen, "Invalid");
+                       DMEMIT("Invalid");
                else {
-                       if (snap->store.fraction_full) {
+                       if (snap->store->type->fraction_full) {
                                sector_t numerator, denominator;
-                               snap->store.fraction_full(&snap->store,
-                                                         &numerator,
-                                                         &denominator);
-                               snprintf(result, maxlen, "%llu/%llu",
-                                       (unsigned long long)numerator,
-                                       (unsigned long long)denominator);
+                               snap->store->type->fraction_full(snap->store,
+                                                                &numerator,
+                                                                &denominator);
+                               DMEMIT("%llu/%llu",
+                                      (unsigned long long)numerator,
+                                      (unsigned long long)denominator);
                        }
                        else
-                               snprintf(result, maxlen, "Unknown");
+                               DMEMIT("Unknown");
                }
                break;
 
@@ -1164,10 +1162,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
                 * to make private copies if the output is to
                 * make sense.
                 */
-               snprintf(result, maxlen, "%s %s %c %llu",
-                        snap->origin->name, snap->cow->name,
-                        snap->type,
-                        (unsigned long long)snap->chunk_size);
+               DMEMIT("%s", snap->origin->name);
+               snap->store->type->status(snap->store, type, result + sz,
+                                         maxlen - sz);
                break;
        }
 
@@ -1196,14 +1193,14 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
                        goto next_snapshot;
 
                /* Nothing to do if writing beyond end of snapshot */
-               if (bio->bi_sector >= dm_table_get_size(snap->ti->table))
+               if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
                        goto next_snapshot;
 
                /*
                 * Remember, different snapshots can have
                 * different chunk sizes.
                 */
-               chunk = sector_to_chunk(snap, bio->bi_sector);
+               chunk = sector_to_chunk(snap->store, bio->bi_sector);
 
                /*
                 * Check exception table to see if block
@@ -1217,10 +1214,28 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
                if (e)
                        goto next_snapshot;
 
-               pe = __find_pending_exception(snap, bio);
+               pe = __lookup_pending_exception(snap, chunk);
                if (!pe) {
-                       __invalidate_snapshot(snap, -ENOMEM);
-                       goto next_snapshot;
+                       up_write(&snap->lock);
+                       pe = alloc_pending_exception(snap);
+                       down_write(&snap->lock);
+
+                       if (!snap->valid) {
+                               free_pending_exception(pe);
+                               goto next_snapshot;
+                       }
+
+                       e = lookup_exception(&snap->complete, chunk);
+                       if (e) {
+                               free_pending_exception(pe);
+                               goto next_snapshot;
+                       }
+
+                       pe = __find_pending_exception(snap, pe, chunk);
+                       if (!pe) {
+                               __invalidate_snapshot(snap, -ENOMEM);
+                               goto next_snapshot;
+                       }
                }
 
                if (!primary_pe) {
@@ -1360,7 +1375,8 @@ static void origin_resume(struct dm_target *ti)
        o = __lookup_origin(dev->bdev);
        if (o)
                list_for_each_entry (snap, &o->snapshots, list)
-                       chunk_size = min_not_zero(chunk_size, snap->chunk_size);
+                       chunk_size = min_not_zero(chunk_size,
+                                                 snap->store->chunk_size);
        up_read(&_origins_lock);
 
        ti->split_io = chunk_size;
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h
deleted file mode 100644 (file)
index d9e62b4..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_SNAPSHOT_H
-#define DM_SNAPSHOT_H
-
-#include <linux/device-mapper.h>
-#include "dm-exception-store.h"
-#include "dm-bio-list.h"
-#include <linux/blkdev.h>
-#include <linux/workqueue.h>
-
-struct exception_table {
-       uint32_t hash_mask;
-       unsigned hash_shift;
-       struct list_head *table;
-};
-
-#define DM_TRACKED_CHUNK_HASH_SIZE     16
-#define DM_TRACKED_CHUNK_HASH(x)       ((unsigned long)(x) & \
-                                        (DM_TRACKED_CHUNK_HASH_SIZE - 1))
-
-struct dm_snapshot {
-       struct rw_semaphore lock;
-       struct dm_target *ti;
-
-       struct dm_dev *origin;
-       struct dm_dev *cow;
-
-       /* List of snapshots per Origin */
-       struct list_head list;
-
-       /* Size of data blocks saved - must be a power of 2 */
-       chunk_t chunk_size;
-       chunk_t chunk_mask;
-       chunk_t chunk_shift;
-
-       /* You can't use a snapshot if this is 0 (e.g. if full) */
-       int valid;
-
-       /* Origin writes don't trigger exceptions until this is set */
-       int active;
-
-       /* Used for display of table */
-       char type;
-
-       mempool_t *pending_pool;
-
-       atomic_t pending_exceptions_count;
-
-       struct exception_table pending;
-       struct exception_table complete;
-
-       /*
-        * pe_lock protects all pending_exception operations and access
-        * as well as the snapshot_bios list.
-        */
-       spinlock_t pe_lock;
-
-       /* The on disk metadata handler */
-       struct dm_exception_store store;
-
-       struct dm_kcopyd_client *kcopyd_client;
-
-       /* Queue of snapshot writes for ksnapd to flush */
-       struct bio_list queued_bios;
-       struct work_struct queued_bios_work;
-
-       /* Chunks with outstanding reads */
-       mempool_t *tracked_chunk_pool;
-       spinlock_t tracked_chunk_lock;
-       struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
-};
-
-/*
- * Return the number of sectors in the device.
- */
-static inline sector_t get_dev_size(struct block_device *bdev)
-{
-       return bdev->bd_inode->i_size >> SECTOR_SHIFT;
-}
-
-static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
-{
-       return (sector & ~s->chunk_mask) >> s->chunk_shift;
-}
-
-static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
-{
-       return chunk << s->chunk_shift;
-}
-
-static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs)
-{
-       /*
-        * There is only ever one instance of a particular block
-        * device so we can compare pointers safely.
-        */
-       return lhs == rhs;
-}
-
-#endif
index 2fd66c30f7f8b02fd699f7b3c5cab52bd6f4d89d..e8361b191b9b223baef941bfc14212dc052d127f 100644 (file)
@@ -399,28 +399,30 @@ static int check_device_area(struct dm_dev_internal *dd, sector_t start,
 }
 
 /*
- * This upgrades the mode on an already open dm_dev.  Being
+ * This upgrades the mode on an already open dm_dev, being
  * careful to leave things as they were if we fail to reopen the
- * device.
+ * device and not to touch the existing bdev field in case
+ * it is accessed concurrently inside dm_table_any_congested().
  */
 static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
                        struct mapped_device *md)
 {
        int r;
-       struct dm_dev_internal dd_copy;
-       dev_t dev = dd->dm_dev.bdev->bd_dev;
+       struct dm_dev_internal dd_new, dd_old;
 
-       dd_copy = *dd;
+       dd_new = dd_old = *dd;
+
+       dd_new.dm_dev.mode |= new_mode;
+       dd_new.dm_dev.bdev = NULL;
+
+       r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
+       if (r)
+               return r;
 
        dd->dm_dev.mode |= new_mode;
-       dd->dm_dev.bdev = NULL;
-       r = open_dev(dd, dev, md);
-       if (!r)
-               close_dev(&dd_copy, md);
-       else
-               *dd = dd_copy;
+       close_dev(&dd_old, md);
 
-       return r;
+       return 0;
 }
 
 /*
index 7decf10006e41cf9b3372544fa60eff54d195923..04feccf2a997947968260029419e7813cb222553 100644 (file)
 
 #define DM_MSG_PREFIX "target"
 
-struct tt_internal {
-       struct target_type tt;
-
-       struct list_head list;
-       long use;
-};
-
 static LIST_HEAD(_targets);
 static DECLARE_RWSEM(_lock);
 
 #define DM_MOD_NAME_SIZE 32
 
-static inline struct tt_internal *__find_target_type(const char *name)
+static inline struct target_type *__find_target_type(const char *name)
 {
-       struct tt_internal *ti;
+       struct target_type *tt;
 
-       list_for_each_entry (ti, &_targets, list)
-               if (!strcmp(name, ti->tt.name))
-                       return ti;
+       list_for_each_entry(tt, &_targets, list)
+               if (!strcmp(name, tt->name))
+                       return tt;
 
        return NULL;
 }
 
-static struct tt_internal *get_target_type(const char *name)
+static struct target_type *get_target_type(const char *name)
 {
-       struct tt_internal *ti;
+       struct target_type *tt;
 
        down_read(&_lock);
 
-       ti = __find_target_type(name);
-       if (ti) {
-               if ((ti->use == 0) && !try_module_get(ti->tt.module))
-                       ti = NULL;
-               else
-                       ti->use++;
-       }
+       tt = __find_target_type(name);
+       if (tt && !try_module_get(tt->module))
+               tt = NULL;
 
        up_read(&_lock);
-       return ti;
+       return tt;
 }
 
 static void load_module(const char *name)
@@ -62,92 +51,59 @@ static void load_module(const char *name)
 
 struct target_type *dm_get_target_type(const char *name)
 {
-       struct tt_internal *ti = get_target_type(name);
+       struct target_type *tt = get_target_type(name);
 
-       if (!ti) {
+       if (!tt) {
                load_module(name);
-               ti = get_target_type(name);
+               tt = get_target_type(name);
        }
 
-       return ti ? &ti->tt : NULL;
+       return tt;
 }
 
-void dm_put_target_type(struct target_type *t)
+void dm_put_target_type(struct target_type *tt)
 {
-       struct tt_internal *ti = (struct tt_internal *) t;
-
        down_read(&_lock);
-       if (--ti->use == 0)
-               module_put(ti->tt.module);
-
-       BUG_ON(ti->use < 0);
+       module_put(tt->module);
        up_read(&_lock);
-
-       return;
-}
-
-static struct tt_internal *alloc_target(struct target_type *t)
-{
-       struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL);
-
-       if (ti)
-               ti->tt = *t;
-
-       return ti;
 }
 
-
 int dm_target_iterate(void (*iter_func)(struct target_type *tt,
                                        void *param), void *param)
 {
-       struct tt_internal *ti;
+       struct target_type *tt;
 
        down_read(&_lock);
-       list_for_each_entry (ti, &_targets, list)
-               iter_func(&ti->tt, param);
+       list_for_each_entry(tt, &_targets, list)
+               iter_func(tt, param);
        up_read(&_lock);
 
        return 0;
 }
 
-int dm_register_target(struct target_type *t)
+int dm_register_target(struct target_type *tt)
 {
        int rv = 0;
-       struct tt_internal *ti = alloc_target(t);
-
-       if (!ti)
-               return -ENOMEM;
 
        down_write(&_lock);
-       if (__find_target_type(t->name))
+       if (__find_target_type(tt->name))
                rv = -EEXIST;
        else
-               list_add(&ti->list, &_targets);
+               list_add(&tt->list, &_targets);
 
        up_write(&_lock);
-       if (rv)
-               kfree(ti);
        return rv;
 }
 
-void dm_unregister_target(struct target_type *t)
+void dm_unregister_target(struct target_type *tt)
 {
-       struct tt_internal *ti;
-
        down_write(&_lock);
-       if (!(ti = __find_target_type(t->name))) {
-               DMCRIT("Unregistering unrecognised target: %s", t->name);
-               BUG();
-       }
-
-       if (ti->use) {
-               DMCRIT("Attempt to unregister target still in use: %s",
-                      t->name);
+       if (!__find_target_type(tt->name)) {
+               DMCRIT("Unregistering unrecognised target: %s", tt->name);
                BUG();
        }
 
-       list_del(&ti->list);
-       kfree(ti);
+       list_del(&tt->list);
 
        up_write(&_lock);
 }
@@ -156,17 +112,17 @@ void dm_unregister_target(struct target_type *t)
  * io-err: always fails an io, useful for bringing
  * up LVs that have holes in them.
  */
-static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args)
+static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
 {
        return 0;
 }
 
-static void io_err_dtr(struct dm_target *ti)
+static void io_err_dtr(struct dm_target *tt)
 {
        /* empty */
 }
 
-static int io_err_map(struct dm_target *ti, struct bio *bio,
+static int io_err_map(struct dm_target *tt, struct bio *bio,
                      union map_info *map_context)
 {
        return -EIO;
index 8d40f27cce894e4a3c489a7d84d321fb39b363d6..788ba96a6256aaed6de8625d306cc4d5e33a82f9 100644 (file)
@@ -99,19 +99,9 @@ union map_info *dm_get_mapinfo(struct bio *bio)
 /*
  * Work processed by per-device workqueue.
  */
-struct dm_wq_req {
-       enum {
-               DM_WQ_FLUSH_DEFERRED,
-       } type;
-       struct work_struct work;
-       struct mapped_device *md;
-       void *context;
-};
-
 struct mapped_device {
        struct rw_semaphore io_lock;
        struct mutex suspend_lock;
-       spinlock_t pushback_lock;
        rwlock_t map_lock;
        atomic_t holders;
        atomic_t open_count;
@@ -129,8 +119,9 @@ struct mapped_device {
         */
        atomic_t pending;
        wait_queue_head_t wait;
+       struct work_struct work;
        struct bio_list deferred;
-       struct bio_list pushback;
+       spinlock_t deferred_lock;
 
        /*
         * Processing queue (flush/barriers)
@@ -453,7 +444,9 @@ static int queue_io(struct mapped_device *md, struct bio *bio)
                return 1;
        }
 
+       spin_lock_irq(&md->deferred_lock);
        bio_list_add(&md->deferred, bio);
+       spin_unlock_irq(&md->deferred_lock);
 
        up_write(&md->io_lock);
        return 0;               /* deferred successfully */
@@ -537,16 +530,14 @@ static void dec_pending(struct dm_io *io, int error)
                if (io->error == DM_ENDIO_REQUEUE) {
                        /*
                         * Target requested pushing back the I/O.
-                        * This must be handled before the sleeper on
-                        * suspend queue merges the pushback list.
                         */
-                       spin_lock_irqsave(&md->pushback_lock, flags);
+                       spin_lock_irqsave(&md->deferred_lock, flags);
                        if (__noflush_suspending(md))
-                               bio_list_add(&md->pushback, io->bio);
+                               bio_list_add(&md->deferred, io->bio);
                        else
                                /* noflush suspend was interrupted. */
                                io->error = -EIO;
-                       spin_unlock_irqrestore(&md->pushback_lock, flags);
+                       spin_unlock_irqrestore(&md->deferred_lock, flags);
                }
 
                end_io_acct(io);
@@ -834,20 +825,22 @@ static int __clone_and_map(struct clone_info *ci)
 }
 
 /*
- * Split the bio into several clones.
+ * Split the bio into several clones and submit it to targets.
  */
-static int __split_bio(struct mapped_device *md, struct bio *bio)
+static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
 {
        struct clone_info ci;
        int error = 0;
 
        ci.map = dm_get_table(md);
-       if (unlikely(!ci.map))
-               return -EIO;
+       if (unlikely(!ci.map)) {
+               bio_io_error(bio);
+               return;
+       }
        if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
                dm_table_put(ci.map);
                bio_endio(bio, -EOPNOTSUPP);
-               return 0;
+               return;
        }
        ci.md = md;
        ci.bio = bio;
@@ -867,8 +860,6 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
        /* drop the extra reference count */
        dec_pending(ci.io, error);
        dm_table_put(ci.map);
-
-       return 0;
 }
 /*-----------------------------------------------------------------
  * CRUD END
@@ -959,8 +950,9 @@ static int dm_request(struct request_queue *q, struct bio *bio)
                down_read(&md->io_lock);
        }
 
-       r = __split_bio(md, bio);
+       __split_and_process_bio(md, bio);
        up_read(&md->io_lock);
+       return 0;
 
 out_req:
        if (r < 0)
@@ -1074,6 +1066,8 @@ out:
 
 static struct block_device_operations dm_blk_dops;
 
+static void dm_wq_work(struct work_struct *work);
+
 /*
  * Allocate and initialise a blank device with a given minor.
  */
@@ -1101,7 +1095,7 @@ static struct mapped_device *alloc_dev(int minor)
 
        init_rwsem(&md->io_lock);
        mutex_init(&md->suspend_lock);
-       spin_lock_init(&md->pushback_lock);
+       spin_lock_init(&md->deferred_lock);
        rwlock_init(&md->map_lock);
        atomic_set(&md->holders, 1);
        atomic_set(&md->open_count, 0);
@@ -1118,6 +1112,7 @@ static struct mapped_device *alloc_dev(int minor)
        md->queue->backing_dev_info.congested_fn = dm_any_congested;
        md->queue->backing_dev_info.congested_data = md;
        blk_queue_make_request(md->queue, dm_request);
+       blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
        blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
        md->queue->unplug_fn = dm_unplug_all;
        blk_queue_merge_bvec(md->queue, dm_merge_bvec);
@@ -1140,6 +1135,7 @@ static struct mapped_device *alloc_dev(int minor)
 
        atomic_set(&md->pending, 0);
        init_waitqueue_head(&md->wait);
+       INIT_WORK(&md->work, dm_wq_work);
        init_waitqueue_head(&md->eventq);
 
        md->disk->major = _major;
@@ -1379,18 +1375,24 @@ void dm_put(struct mapped_device *md)
 }
 EXPORT_SYMBOL_GPL(dm_put);
 
-static int dm_wait_for_completion(struct mapped_device *md)
+static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
 {
        int r = 0;
+       DECLARE_WAITQUEUE(wait, current);
+
+       dm_unplug_all(md->queue);
+
+       add_wait_queue(&md->wait, &wait);
 
        while (1) {
-               set_current_state(TASK_INTERRUPTIBLE);
+               set_current_state(interruptible);
 
                smp_mb();
                if (!atomic_read(&md->pending))
                        break;
 
-               if (signal_pending(current)) {
+               if (interruptible == TASK_INTERRUPTIBLE &&
+                   signal_pending(current)) {
                        r = -EINTR;
                        break;
                }
@@ -1399,67 +1401,40 @@ static int dm_wait_for_completion(struct mapped_device *md)
        }
        set_current_state(TASK_RUNNING);
 
+       remove_wait_queue(&md->wait, &wait);
+
        return r;
 }
 
 /*
  * Process the deferred bios
  */
-static void __flush_deferred_io(struct mapped_device *md)
+static void dm_wq_work(struct work_struct *work)
 {
+       struct mapped_device *md = container_of(work, struct mapped_device,
+                                               work);
        struct bio *c;
 
-       while ((c = bio_list_pop(&md->deferred))) {
-               if (__split_bio(md, c))
-                       bio_io_error(c);
-       }
-
-       clear_bit(DMF_BLOCK_IO, &md->flags);
-}
+       down_write(&md->io_lock);
 
-static void __merge_pushback_list(struct mapped_device *md)
-{
-       unsigned long flags;
+next_bio:
+       spin_lock_irq(&md->deferred_lock);
+       c = bio_list_pop(&md->deferred);
+       spin_unlock_irq(&md->deferred_lock);
 
-       spin_lock_irqsave(&md->pushback_lock, flags);
-       clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
-       bio_list_merge_head(&md->deferred, &md->pushback);
-       bio_list_init(&md->pushback);
-       spin_unlock_irqrestore(&md->pushback_lock, flags);
-}
+       if (c) {
+               __split_and_process_bio(md, c);
+               goto next_bio;
+       }
 
-static void dm_wq_work(struct work_struct *work)
-{
-       struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
-       struct mapped_device *md = req->md;
+       clear_bit(DMF_BLOCK_IO, &md->flags);
 
-       down_write(&md->io_lock);
-       switch (req->type) {
-       case DM_WQ_FLUSH_DEFERRED:
-               __flush_deferred_io(md);
-               break;
-       default:
-               DMERR("dm_wq_work: unrecognised work type %d", req->type);
-               BUG();
-       }
        up_write(&md->io_lock);
 }
 
-static void dm_wq_queue(struct mapped_device *md, int type, void *context,
-                       struct dm_wq_req *req)
-{
-       req->type = type;
-       req->md = md;
-       req->context = context;
-       INIT_WORK(&req->work, dm_wq_work);
-       queue_work(md->wq, &req->work);
-}
-
-static void dm_queue_flush(struct mapped_device *md, int type, void *context)
+static void dm_queue_flush(struct mapped_device *md)
 {
-       struct dm_wq_req req;
-
-       dm_wq_queue(md, type, context, &req);
+       queue_work(md->wq, &md->work);
        flush_workqueue(md->wq);
 }
 
@@ -1534,7 +1509,6 @@ static void unlock_fs(struct mapped_device *md)
 int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 {
        struct dm_table *map = NULL;
-       DECLARE_WAITQUEUE(wait, current);
        int r = 0;
        int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
        int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
@@ -1584,28 +1558,22 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
        down_write(&md->io_lock);
        set_bit(DMF_BLOCK_IO, &md->flags);
 
-       add_wait_queue(&md->wait, &wait);
        up_write(&md->io_lock);
 
-       /* unplug */
-       if (map)
-               dm_table_unplug_all(map);
-
        /*
         * Wait for the already-mapped ios to complete.
         */
-       r = dm_wait_for_completion(md);
+       r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
 
        down_write(&md->io_lock);
-       remove_wait_queue(&md->wait, &wait);
 
        if (noflush)
-               __merge_pushback_list(md);
+               clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
        up_write(&md->io_lock);
 
        /* were we interrupted ? */
        if (r < 0) {
-               dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
+               dm_queue_flush(md);
 
                unlock_fs(md);
                goto out; /* pushback list is already flushed, so skip flush */
@@ -1645,7 +1613,7 @@ int dm_resume(struct mapped_device *md)
        if (r)
                goto out;
 
-       dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
+       dm_queue_flush(md);
 
        unlock_fs(md);
 
index 20194e000c5afcda9e10965d17c945b0df5d7171..b48397c0abbd44b253c37ce56db3647f7513eb9f 100644 (file)
@@ -60,7 +60,7 @@ int dm_table_barrier_ok(struct dm_table *t);
 int dm_target_init(void);
 void dm_target_exit(void);
 struct target_type *dm_get_target_type(const char *name);
-void dm_put_target_type(struct target_type *t);
+void dm_put_target_type(struct target_type *tt);
 int dm_target_iterate(void (*iter_func)(struct target_type *tt,
                                        void *param), void *param);
 
index 8209e08969f9e4265e4fbbcd0c00b5a68467cad5..66ec05a5795558cef24450388e67ffd6bf0743f3 100644 (file)
@@ -139,6 +139,9 @@ struct target_type {
        dm_ioctl_fn ioctl;
        dm_merge_fn merge;
        dm_busy_fn busy;
+
+       /* For internal device-mapper use. */
+       struct list_head list;
 };
 
 struct io_restrictions {
index 600c5fb2daad4c231b53a4654316469895af9b0c..5e8b11d88f6f891a5c56328ca54ecb0fa4e2ce57 100644 (file)
@@ -28,6 +28,9 @@ struct dm_dirty_log_type {
        const char *name;
        struct module *module;
 
+       /* For internal device-mapper use */
+       struct list_head list;
+
        int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti,
                   unsigned argc, char **argv);
        void (*dtr)(struct dm_dirty_log *log);
@@ -113,6 +116,16 @@ struct dm_dirty_log_type {
         */
        int (*status)(struct dm_dirty_log *log, status_type_t status_type,
                      char *result, unsigned maxlen);
+
+       /*
+        * is_remote_recovering is necessary for cluster mirroring. It provides
+        * a way to detect recovery on another node, so we aren't writing
+        * concurrently.  This function is likely to block (when a cluster log
+        * is used).
+        *
+        * Returns: 0, 1
+        */
+       int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
 };
 
 int dm_dirty_log_type_register(struct dm_dirty_log_type *type);