[PATCH] md: allow md intent bitmap to be stored near the superblock.
authorNeilBrown <neilb@cse.unsw.edu.au>
Wed, 22 Jun 2005 00:17:27 +0000 (17:17 -0700)
committerLinus Torvalds <torvalds@ppc970.osdl.org>
Wed, 22 Jun 2005 02:07:47 +0000 (19:07 -0700)
This provides an alternate to storing the bitmap in a separate file.  The
bitmap can be stored at a given offset from the superblock.  Obviously the
creator of the array must make sure this doesn't intersect with data....
After is good for version-0.90 superblocks.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
drivers/md/bitmap.c
drivers/md/md.c
include/linux/raid/bitmap.h
include/linux/raid/md.h
include/linux/raid/md_k.h
include/linux/raid/md_p.h

index 204564dc6a0d9f111d73bfe04e00a5154122b89e..030d6861051ad3ce2a861a23859563692f8742aa 100644 (file)
@@ -116,7 +116,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
        if (!page)
                printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap));
        else
-               printk("%s: bitmap_alloc_page: allocated page at %p\n",
+               PRINTK("%s: bitmap_alloc_page: allocated page at %p\n",
                        bmname(bitmap), page);
        return page;
 }
@@ -258,13 +258,61 @@ char *file_path(struct file *file, char *buf, int count)
  * basic page I/O operations
  */
 
+/* IO operations when bitmap is stored near all superblocks */
+static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long index)
+{
+       /* choose a good rdev and read the page from there */
+
+       mdk_rdev_t *rdev;
+       struct list_head *tmp;
+       struct page *page = alloc_page(GFP_KERNEL);
+       sector_t target;
+
+       if (!page)
+               return ERR_PTR(-ENOMEM);
+       do {
+               ITERATE_RDEV(mddev, rdev, tmp)
+                       if (rdev->in_sync && !rdev->faulty)
+                               goto found;
+               return ERR_PTR(-EIO);
+
+       found:
+               target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512);
+
+       } while (!sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ));
+
+       page->index = index;
+       return page;
+}
+
+static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wait)
+{
+       mdk_rdev_t *rdev;
+       struct list_head *tmp;
+
+       ITERATE_RDEV(mddev, rdev, tmp)
+               if (rdev->in_sync && !rdev->faulty)
+                       md_super_write(mddev, rdev,
+                                      (rdev->sb_offset<<1) + offset
+                                      + page->index * (PAGE_SIZE/512),
+                                      PAGE_SIZE,
+                                      page);
+
+       if (wait)
+               wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
+       return 0;
+}
+
 /*
- * write out a page
+ * write out a page to a file
  */
 static int write_page(struct bitmap *bitmap, struct page *page, int wait)
 {
        int ret = -ENOMEM;
 
+       if (bitmap->file == NULL)
+               return write_sb_page(bitmap->mddev, bitmap->offset, page, wait);
+
        lock_page(page);
 
        ret = page->mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE);
@@ -394,7 +442,12 @@ static int bitmap_read_sb(struct bitmap *bitmap)
        int err = -EINVAL;
 
        /* page 0 is the superblock, read it... */
-       bitmap->sb_page = read_page(bitmap->file, 0, &bytes_read);
+       if (bitmap->file)
+               bitmap->sb_page = read_page(bitmap->file, 0, &bytes_read);
+       else {
+               bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0);
+               bytes_read = PAGE_SIZE;
+       }
        if (IS_ERR(bitmap->sb_page)) {
                err = PTR_ERR(bitmap->sb_page);
                bitmap->sb_page = NULL;
@@ -625,14 +678,16 @@ static void bitmap_file_kick(struct bitmap *bitmap)
        bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET);
        bitmap_update_sb(bitmap);
 
-       path = kmalloc(PAGE_SIZE, GFP_KERNEL);
-       if (path)
-               ptr = file_path(bitmap->file, path, PAGE_SIZE);
+       if (bitmap->file) {
+               path = kmalloc(PAGE_SIZE, GFP_KERNEL);
+               if (path)
+                       ptr = file_path(bitmap->file, path, PAGE_SIZE);
 
-       printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n",
-               bmname(bitmap), ptr ? ptr : "");
+               printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n",
+                      bmname(bitmap), ptr ? ptr : "");
 
-       kfree(path);
+               kfree(path);
+       }
 
        bitmap_file_put(bitmap);
 
@@ -676,7 +731,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
        void *kaddr;
        unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);
 
-       if (!bitmap->file || !bitmap->filemap) {
+       if (!bitmap->filemap) {
                return;
        }
 
@@ -715,7 +770,7 @@ int bitmap_unplug(struct bitmap *bitmap)
         * flushed out to disk */
        for (i = 0; i < bitmap->file_pages; i++) {
                spin_lock_irqsave(&bitmap->lock, flags);
-               if (!bitmap->file || !bitmap->filemap) {
+               if (!bitmap->filemap) {
                        spin_unlock_irqrestore(&bitmap->lock, flags);
                        return 0;
                }
@@ -732,11 +787,15 @@ int bitmap_unplug(struct bitmap *bitmap)
                                return 1;
        }
        if (wait) { /* if any writes were performed, we need to wait on them */
-               spin_lock_irq(&bitmap->write_lock);
-               wait_event_lock_irq(bitmap->write_wait,
-                       list_empty(&bitmap->complete_pages), bitmap->write_lock,
-                       wake_up_process(bitmap->writeback_daemon->tsk));
-               spin_unlock_irq(&bitmap->write_lock);
+               if (bitmap->file) {
+                       spin_lock_irq(&bitmap->write_lock);
+                       wait_event_lock_irq(bitmap->write_wait,
+                                           list_empty(&bitmap->complete_pages), bitmap->write_lock,
+                                           wake_up_process(bitmap->writeback_daemon->tsk));
+                       spin_unlock_irq(&bitmap->write_lock);
+               } else
+                       wait_event(bitmap->mddev->sb_wait,
+                                  atomic_read(&bitmap->mddev->pending_writes)==0);
        }
        return 0;
 }
@@ -764,7 +823,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
        chunks = bitmap->chunks;
        file = bitmap->file;
 
-       BUG_ON(!file);
+       BUG_ON(!file && !bitmap->offset);
 
 #if INJECT_FAULTS_3
        outofdate = 1;
@@ -779,7 +838,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
 
        num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;
 
-       if (i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
+       if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
                printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
                        bmname(bitmap),
                        (unsigned long) i_size_read(file->f_mapping->host),
@@ -816,14 +875,18 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
                                 */
                                page = bitmap->sb_page;
                                offset = sizeof(bitmap_super_t);
-                       } else {
+                       } else if (file) {
                                page = read_page(file, index, &dummy);
-                               if (IS_ERR(page)) { /* read error */
-                                       ret = PTR_ERR(page);
-                                       goto out;
-                               }
+                               offset = 0;
+                       } else {
+                               page = read_sb_page(bitmap->mddev, bitmap->offset, index);
                                offset = 0;
                        }
+                       if (IS_ERR(page)) { /* read error */
+                               ret = PTR_ERR(page);
+                               goto out;
+                       }
+
                        oldindex = index;
                        oldpage = page;
                        kmap(page);
@@ -874,6 +937,19 @@ out:
        return ret;
 }
 
+void bitmap_write_all(struct bitmap *bitmap)
+{
+       /* We don't actually write all bitmap blocks here,
+        * just flag them as needing to be written
+        */
+
+       unsigned long chunks = bitmap->chunks;
+       unsigned long bytes = (chunks+7)/8 + sizeof(bitmap_super_t);
+       unsigned long num_pages = (bytes + PAGE_SIZE-1) / PAGE_SIZE;
+       while (num_pages--)
+               bitmap->filemap_attr[num_pages] |= BITMAP_PAGE_NEEDWRITE;
+}
+
 
 static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
 {
@@ -913,7 +989,7 @@ int bitmap_daemon_work(struct bitmap *bitmap)
        for (j = 0; j < bitmap->chunks; j++) {
                bitmap_counter_t *bmc;
                spin_lock_irqsave(&bitmap->lock, flags);
-               if (!bitmap->file || !bitmap->filemap) {
+               if (!bitmap->filemap) {
                        /* error or shutdown */
                        spin_unlock_irqrestore(&bitmap->lock, flags);
                        break;
@@ -1072,6 +1148,7 @@ static int bitmap_start_daemon(struct bitmap *bitmap, mdk_thread_t **ptr,
 
        spin_lock_irqsave(&bitmap->lock, flags);
        *ptr = NULL;
+
        if (!bitmap->file) /* no need for daemon if there's no backing file */
                goto out_unlock;
 
@@ -1416,9 +1493,11 @@ int bitmap_create(mddev_t *mddev)
 
        BUG_ON(sizeof(bitmap_super_t) != 256);
 
-       if (!file) /* bitmap disabled, nothing to do */
+       if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */
                return 0;
 
+       BUG_ON(file && mddev->bitmap_offset);
+
        bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL);
        if (!bitmap)
                return -ENOMEM;
@@ -1438,7 +1517,8 @@ int bitmap_create(mddev_t *mddev)
                return -ENOMEM;
 
        bitmap->file = file;
-       get_file(file);
+       bitmap->offset = mddev->bitmap_offset;
+       if (file) get_file(file);
        /* read superblock from bitmap file (this sets bitmap->chunksize) */
        err = bitmap_read_sb(bitmap);
        if (err)
index 7075bebb7f37015ce09ecc37d7aabd557ebfe82b..fde8acfac32037cd9dcec3ee20743adb84e4cc2e 100644 (file)
@@ -337,7 +337,7 @@ static int bi_complete(struct bio *bio, unsigned int bytes_done, int error)
        return 0;
 }
 
-static int sync_page_io(struct block_device *bdev, sector_t sector, int size,
+int sync_page_io(struct block_device *bdev, sector_t sector, int size,
                   struct page *page, int rw)
 {
        struct bio *bio = bio_alloc(GFP_NOIO, 1);
@@ -609,6 +609,17 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
 
                mddev->max_disks = MD_SB_DISKS;
+
+               if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
+                   mddev->bitmap_file == NULL) {
+                       if (mddev->level != 1) {
+                               /* FIXME use a better test */
+                               printk(KERN_WARNING "md: bitmaps only support for raid1\n");
+                               return -EINVAL;
+                       }
+                       mddev->bitmap_offset = (MD_SB_BYTES >> 9);
+               }
+
        } else if (mddev->pers == NULL) {
                /* Insist on good event counter while assembling */
                __u64 ev1 = md_event(sb);
@@ -702,6 +713,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        sb->layout = mddev->layout;
        sb->chunk_size = mddev->chunk_size;
 
+       if (mddev->bitmap && mddev->bitmap_file == NULL)
+               sb->state |= (1<<MD_SB_BITMAP_PRESENT);
+
        sb->disks[0].state = (1<<MD_DISK_REMOVED);
        ITERATE_RDEV(mddev,rdev2,tmp) {
                mdp_disk_t *d;
@@ -898,6 +912,15 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                memcpy(mddev->uuid, sb->set_uuid, 16);
 
                mddev->max_disks =  (4096-256)/2;
+
+               if ((le32_to_cpu(sb->feature_map) & 1) &&
+                   mddev->bitmap_file == NULL ) {
+                       if (mddev->level != 1) {
+                               printk(KERN_WARNING "md: bitmaps only supported for raid1\n");
+                               return -EINVAL;
+                       }
+                       mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
+               }
        } else if (mddev->pers == NULL) {
                /* Insist of good event counter while assembling */
                __u64 ev1 = le64_to_cpu(sb->events);
@@ -960,6 +983,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        else
                sb->resync_offset = cpu_to_le64(0);
 
+       if (mddev->bitmap && mddev->bitmap_file == NULL) {
+               sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
+               sb->feature_map = cpu_to_le32(1);
+       }
+
        max_dev = 0;
        ITERATE_RDEV(mddev,rdev2,tmp)
                if (rdev2->desc_nr+1 > max_dev)
@@ -2406,7 +2434,8 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
                        mdname(mddev));
                fput(mddev->bitmap_file);
                mddev->bitmap_file = NULL;
-       }
+       } else
+               mddev->bitmap_offset = 0; /* file overrides offset */
        return err;
 }
 
@@ -3774,6 +3803,13 @@ void md_check_recovery(mddev_t *mddev)
                        set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
                        if (!spares)
                                set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+                       if (spares && mddev->bitmap && ! mddev->bitmap->file) {
+                               /* We are adding a device or devices to an array
+                                * which has the bitmap stored on all devices.
+                                * So make sure all bitmap pages get written
+                                */
+                               bitmap_write_all(mddev->bitmap);
+                       }
                        mddev->sync_thread = md_register_thread(md_do_sync,
                                                                mddev,
                                                                "%s_resync");
index cfe60cfc8f3d25e6d2848f4e5d58d07447160ef5..e24b74b11150d2239a8187a8d61f01e824249cf1 100644 (file)
@@ -217,6 +217,7 @@ struct bitmap {
        /* bitmap spinlock */
        spinlock_t lock;
 
+       long offset; /* offset from superblock if file is NULL */
        struct file *file; /* backing disk file */
        struct page *sb_page; /* cached copy of the bitmap file superblock */
        struct page **filemap; /* list of cache pages for the file */
@@ -255,6 +256,7 @@ void bitmap_print_sb(struct bitmap *bitmap);
 int bitmap_update_sb(struct bitmap *bitmap);
 
 int  bitmap_setallbits(struct bitmap *bitmap);
+void bitmap_write_all(struct bitmap *bitmap);
 
 /* these are exported */
 int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors);
index 75f41d8faed2c5bafb9dd347b535e22356d00581..ffa316ce4dc834aa3ab4a4e8967e95a47c0b5c49 100644 (file)
  */
 #define MD_MAJOR_VERSION                0
 #define MD_MINOR_VERSION                90
-#define MD_PATCHLEVEL_VERSION           1
+/*
+ * MD_PATCHLEVEL_VERSION indicates kernel functionality.
+ * >=1 means different superblock formats are selectable using SET_ARRAY_INFO
+ *     and major_version/minor_version accordingly
+ * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT
+ *     in the super status byte
+ */
+#define MD_PATCHLEVEL_VERSION           2
 
 extern int register_md_personality (int p_num, mdk_personality_t *p);
 extern int unregister_md_personality (int p_num);
@@ -78,6 +85,12 @@ extern void md_unplug_mddev(mddev_t *mddev);
 
 extern void md_print_devices (void);
 
+extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
+                          sector_t sector, int size, struct page *page);
+extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
+                       struct page *page, int rw);
+
+
 #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
 
 #endif 
index 3e977025cf434a93b5044970c73cff149e2ffc7e..a3725b57fb7d973c11cfd7dc01773bb3b4e0e8b6 100644 (file)
@@ -273,6 +273,10 @@ struct mddev_s
 
        struct bitmap                   *bitmap; /* the bitmap for the device */
        struct file                     *bitmap_file; /* the bitmap file */
+       long                            bitmap_offset; /* offset from superblock of
+                                                       * start of bitmap. May be
+                                                       * negative, but not '0'
+                                                       */
 
        struct list_head                all_mddevs;
 };
index 8ba95d67329f0b6189d80795ee807f62e8ae17b7..8e592a25a8b5a56470572db493da783b9052c203 100644 (file)
@@ -96,6 +96,7 @@ typedef struct mdp_device_descriptor_s {
 #define MD_SB_CLEAN            0
 #define MD_SB_ERRORS           1
 
+#define        MD_SB_BITMAP_PRESENT    8 /* bitmap may be present nearby */
 typedef struct mdp_superblock_s {
        /*
         * Constant generic information
@@ -184,7 +185,7 @@ struct mdp_superblock_1 {
        /* constant array information - 128 bytes */
        __u32   magic;          /* MD_SB_MAGIC: 0xa92b4efc - little endian */
        __u32   major_version;  /* 1 */
-       __u32   feature_map;    /* 0 for now */
+       __u32   feature_map;    /* bit 0 set if 'bitmap_offset' is meaningful */
        __u32   pad0;           /* always set to 0 when writing */
 
        __u8    set_uuid[16];   /* user-space generated. */
@@ -197,6 +198,10 @@ struct mdp_superblock_1 {
 
        __u32   chunksize;      /* in 512byte sectors */
        __u32   raid_disks;
+       __u32   bitmap_offset;  /* sectors after start of superblock that bitmap starts
+                                * NOTE: signed, so bitmap can be before superblock
+                                * only meaningful of feature_map[0] is set.
+                                */
        __u8    pad1[128-96];   /* set to 0 when written */
 
        /* constant this-device information - 64 bytes */