Merge tag 'md/4.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
[sfrench/cifs-2.6.git] / drivers / md / raid5-ppl.c
index 1e237c40d6fa26c816825b6f99631ce76bd0e8db..cd026c88f7efa7e1bf2367a4a6f41ab5336b451e 100644 (file)
@@ -87,6 +87,8 @@
  * The current io_unit accepting new stripes is always at the end of the list.
  */
 
+#define PPL_SPACE_SIZE (128 * 1024)
+
 struct ppl_conf {
        struct mddev *mddev;
 
@@ -122,6 +124,10 @@ struct ppl_log {
                                         * always at the end of io_list */
        spinlock_t io_list_lock;
        struct list_head io_list;       /* all io_units of this log */
+
+       sector_t next_io_sector;
+       unsigned int entry_space;
+       bool use_multippl;
 };
 
 #define PPL_IO_INLINE_BVECS 32
@@ -264,13 +270,12 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh)
        int i;
        sector_t data_sector = 0;
        int data_disks = 0;
-       unsigned int entry_space = (log->rdev->ppl.size << 9) - PPL_HEADER_SIZE;
        struct r5conf *conf = sh->raid_conf;
 
        pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector);
 
        /* check if current io_unit is full */
-       if (io && (io->pp_size == entry_space ||
+       if (io && (io->pp_size == log->entry_space ||
                   io->entries_count == PPL_HDR_MAX_ENTRIES)) {
                pr_debug("%s: add io_unit blocked by seq: %llu\n",
                         __func__, io->seq);
@@ -451,12 +456,25 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
        pplhdr->entries_count = cpu_to_le32(io->entries_count);
        pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE));
 
+       /* Rewind the buffer if current PPL is larger then remaining space */
+       if (log->use_multippl &&
+           log->rdev->ppl.sector + log->rdev->ppl.size - log->next_io_sector <
+           (PPL_HEADER_SIZE + io->pp_size) >> 9)
+               log->next_io_sector = log->rdev->ppl.sector;
+
+
        bio->bi_end_io = ppl_log_endio;
        bio->bi_opf = REQ_OP_WRITE | REQ_FUA;
        bio_set_dev(bio, log->rdev->bdev);
-       bio->bi_iter.bi_sector = log->rdev->ppl.sector;
+       bio->bi_iter.bi_sector = log->next_io_sector;
        bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
 
+       pr_debug("%s: log->current_io_sector: %llu\n", __func__,
+           (unsigned long long)log->next_io_sector);
+
+       if (log->use_multippl)
+               log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9;
+
        list_for_each_entry(sh, &io->stripe_list, log_list) {
                /* entries for full stripe writes have no partial parity */
                if (test_bit(STRIPE_FULL_WRITE, &sh->state))
@@ -813,12 +831,14 @@ out:
        return ret;
 }
 
-static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr)
+static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
+                      sector_t offset)
 {
        struct ppl_conf *ppl_conf = log->ppl_conf;
        struct md_rdev *rdev = log->rdev;
        struct mddev *mddev = rdev->mddev;
-       sector_t ppl_sector = rdev->ppl.sector + (PPL_HEADER_SIZE >> 9);
+       sector_t ppl_sector = rdev->ppl.sector + offset +
+                             (PPL_HEADER_SIZE >> 9);
        struct page *page;
        int i;
        int ret = 0;
@@ -902,6 +922,9 @@ static int ppl_write_empty_header(struct ppl_log *log)
                return -ENOMEM;
 
        pplhdr = page_address(page);
+       /* zero out PPL space to avoid collision with old PPLs */
+       blkdev_issue_zeroout(rdev->bdev, rdev->ppl.sector,
+                           log->rdev->ppl.size, GFP_NOIO, 0);
        memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
        pplhdr->signature = cpu_to_le32(log->ppl_conf->signature);
        pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE));
@@ -922,63 +945,110 @@ static int ppl_load_distributed(struct ppl_log *log)
        struct ppl_conf *ppl_conf = log->ppl_conf;
        struct md_rdev *rdev = log->rdev;
        struct mddev *mddev = rdev->mddev;
-       struct page *page;
-       struct ppl_header *pplhdr;
+       struct page *page, *page2, *tmp;
+       struct ppl_header *pplhdr = NULL, *prev_pplhdr = NULL;
        u32 crc, crc_stored;
        u32 signature;
-       int ret = 0;
+       int ret = 0, i;
+       sector_t pplhdr_offset = 0, prev_pplhdr_offset = 0;
 
        pr_debug("%s: disk: %d\n", __func__, rdev->raid_disk);
-
-       /* read PPL header */
+       /* read PPL headers, find the recent one */
        page = alloc_page(GFP_KERNEL);
        if (!page)
                return -ENOMEM;
 
-       if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset,
-                         PAGE_SIZE, page, REQ_OP_READ, 0, false)) {
-               md_error(mddev, rdev);
-               ret = -EIO;
-               goto out;
+       page2 = alloc_page(GFP_KERNEL);
+       if (!page2) {
+               __free_page(page);
+               return -ENOMEM;
        }
-       pplhdr = page_address(page);
 
-       /* check header validity */
-       crc_stored = le32_to_cpu(pplhdr->checksum);
-       pplhdr->checksum = 0;
-       crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE);
+       /* searching ppl area for latest ppl */
+       while (pplhdr_offset < rdev->ppl.size - (PPL_HEADER_SIZE >> 9)) {
+               if (!sync_page_io(rdev,
+                                 rdev->ppl.sector - rdev->data_offset +
+                                 pplhdr_offset, PAGE_SIZE, page, REQ_OP_READ,
+                                 0, false)) {
+                       md_error(mddev, rdev);
+                       ret = -EIO;
+                       /* if not able to read - don't recover any PPL */
+                       pplhdr = NULL;
+                       break;
+               }
+               pplhdr = page_address(page);
+
+               /* check header validity */
+               crc_stored = le32_to_cpu(pplhdr->checksum);
+               pplhdr->checksum = 0;
+               crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE);
+
+               if (crc_stored != crc) {
+                       pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n",
+                                __func__, crc_stored, crc,
+                                (unsigned long long)pplhdr_offset);
+                       pplhdr = prev_pplhdr;
+                       pplhdr_offset = prev_pplhdr_offset;
+                       break;
+               }
 
-       if (crc_stored != crc) {
-               pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x\n",
-                        __func__, crc_stored, crc);
-               ppl_conf->mismatch_count++;
-               goto out;
-       }
+               signature = le32_to_cpu(pplhdr->signature);
 
-       signature = le32_to_cpu(pplhdr->signature);
+               if (mddev->external) {
+                       /*
+                        * For external metadata the header signature is set and
+                        * validated in userspace.
+                        */
+                       ppl_conf->signature = signature;
+               } else if (ppl_conf->signature != signature) {
+                       pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x (offset: %llu)\n",
+                                __func__, signature, ppl_conf->signature,
+                                (unsigned long long)pplhdr_offset);
+                       pplhdr = prev_pplhdr;
+                       pplhdr_offset = prev_pplhdr_offset;
+                       break;
+               }
 
-       if (mddev->external) {
-               /*
-                * For external metadata the header signature is set and
-                * validated in userspace.
-                */
-               ppl_conf->signature = signature;
-       } else if (ppl_conf->signature != signature) {
-               pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x\n",
-                        __func__, signature, ppl_conf->signature);
-               ppl_conf->mismatch_count++;
-               goto out;
+               if (prev_pplhdr && le64_to_cpu(prev_pplhdr->generation) >
+                   le64_to_cpu(pplhdr->generation)) {
+                       /* previous was newest */
+                       pplhdr = prev_pplhdr;
+                       pplhdr_offset = prev_pplhdr_offset;
+                       break;
+               }
+
+               prev_pplhdr_offset = pplhdr_offset;
+               prev_pplhdr = pplhdr;
+
+               tmp = page;
+               page = page2;
+               page2 = tmp;
+
+               /* calculate next potential ppl offset */
+               for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++)
+                       pplhdr_offset +=
+                           le32_to_cpu(pplhdr->entries[i].pp_size) >> 9;
+               pplhdr_offset += PPL_HEADER_SIZE >> 9;
        }
 
+       /* no valid ppl found */
+       if (!pplhdr)
+               ppl_conf->mismatch_count++;
+       else
+               pr_debug("%s: latest PPL found at offset: %llu, with generation: %llu\n",
+                   __func__, (unsigned long long)pplhdr_offset,
+                   le64_to_cpu(pplhdr->generation));
+
        /* attempt to recover from log if we are starting a dirty array */
-       if (!mddev->pers && mddev->recovery_cp != MaxSector)
-               ret = ppl_recover(log, pplhdr);
-out:
+       if (pplhdr && !mddev->pers && mddev->recovery_cp != MaxSector)
+               ret = ppl_recover(log, pplhdr, pplhdr_offset);
+
        /* write empty header if we are starting the array */
        if (!ret && !mddev->pers)
                ret = ppl_write_empty_header(log);
 
        __free_page(page);
+       __free_page(page2);
 
        pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n",
                 __func__, ret, ppl_conf->mismatch_count,
@@ -1031,6 +1101,7 @@ static int ppl_load(struct ppl_conf *ppl_conf)
 static void __ppl_exit_log(struct ppl_conf *ppl_conf)
 {
        clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
+       clear_bit(MD_HAS_MULTIPLE_PPLS, &ppl_conf->mddev->flags);
 
        kfree(ppl_conf->child_logs);
 
@@ -1099,6 +1170,22 @@ static int ppl_validate_rdev(struct md_rdev *rdev)
        return 0;
 }
 
+static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
+{
+       if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE +
+                                     PPL_HEADER_SIZE) * 2) {
+               log->use_multippl = true;
+               set_bit(MD_HAS_MULTIPLE_PPLS,
+                       &log->ppl_conf->mddev->flags);
+               log->entry_space = PPL_SPACE_SIZE;
+       } else {
+               log->use_multippl = false;
+               log->entry_space = (log->rdev->ppl.size << 9) -
+                                  PPL_HEADER_SIZE;
+       }
+       log->next_io_sector = rdev->ppl.sector;
+}
+
 int ppl_init_log(struct r5conf *conf)
 {
        struct ppl_conf *ppl_conf;
@@ -1196,6 +1283,7 @@ int ppl_init_log(struct r5conf *conf)
                        q = bdev_get_queue(rdev->bdev);
                        if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
                                need_cache_flush = true;
+                       ppl_init_child_log(log, rdev);
                }
        }
 
@@ -1261,6 +1349,7 @@ int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add)
                if (!ret) {
                        log->rdev = rdev;
                        ret = ppl_write_empty_header(log);
+                       ppl_init_child_log(log, rdev);
                }
        } else {
                log->rdev = NULL;