btrfs: drop constant param from btrfs_release_extent_buffer_page
[sfrench/cifs-2.6.git] / fs / btrfs / extent_io.c
index c7648f53f63dddd892d6fa4f64d0f6b5058a99c5..638e1a5b00e24c0dc6cc85f7e2d014ac93759481 100644 (file)
@@ -1962,26 +1962,7 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
                SetPageUptodate(page);
 }
 
-/*
- * When IO fails, either with EIO or csum verification fails, we
- * try other mirrors that might have a good copy of the data.  This
- * io_failure_record is used to record state as we go through all the
- * mirrors.  If another mirror has good data, the page is set up to date
- * and things continue.  If a good mirror can't be found, the original
- * bio end_io callback is called to indicate things have failed.
- */
-struct io_failure_record {
-       struct page *page;
-       u64 start;
-       u64 len;
-       u64 logical;
-       unsigned long bio_flags;
-       int this_mirror;
-       int failed_mirror;
-       int in_validation;
-};
-
-static int free_io_failure(struct inode *inode, struct io_failure_record *rec)
+int free_io_failure(struct inode *inode, struct io_failure_record *rec)
 {
        int ret;
        int err = 0;
@@ -2014,10 +1995,10 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec)
  * currently, there can be no more than two copies of every data bit. thus,
  * exactly one rewrite is required.
  */
-int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
-                       u64 length, u64 logical, struct page *page,
-                       int mirror_num)
+int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
+                     struct page *page, unsigned int pg_offset, int mirror_num)
 {
+       struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
        struct bio *bio;
        struct btrfs_device *dev;
        u64 map_length = 0;
@@ -2055,7 +2036,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
                return -EIO;
        }
        bio->bi_bdev = dev->bdev;
-       bio_add_page(bio, page, length, start - page_offset(page));
+       bio_add_page(bio, page, length, pg_offset);
 
        if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
                /* try to remap that extent elsewhere? */
@@ -2065,10 +2046,9 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
        }
 
        printk_ratelimited_in_rcu(KERN_INFO
-                       "BTRFS: read error corrected: ino %lu off %llu "
-                   "(dev %s sector %llu)\n", page->mapping->host->i_ino,
-                   start, rcu_str_deref(dev->name), sector);
-
+                                 "BTRFS: read error corrected: ino %llu off %llu (dev %s sector %llu)\n",
+                                 btrfs_ino(inode), start,
+                                 rcu_str_deref(dev->name), sector);
        bio_put(bio);
        return 0;
 }
@@ -2085,8 +2065,10 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
 
        for (i = 0; i < num_pages; i++) {
                struct page *p = extent_buffer_page(eb, i);
-               ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
-                                       start, p, mirror_num);
+
+               ret = repair_io_failure(root->fs_info->btree_inode, start,
+                                       PAGE_CACHE_SIZE, start, p,
+                                       start - page_offset(p), mirror_num);
                if (ret)
                        break;
                start += PAGE_CACHE_SIZE;
@@ -2099,12 +2081,12 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
  * each time an IO finishes, we do a fast check in the IO failure tree
  * to see if we need to process or clean up an io_failure_record
  */
-static int clean_io_failure(u64 start, struct page *page)
+int clean_io_failure(struct inode *inode, u64 start, struct page *page,
+                    unsigned int pg_offset)
 {
        u64 private;
        u64 private_failure;
        struct io_failure_record *failrec;
-       struct inode *inode = page->mapping->host;
        struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
        struct extent_state *state;
        int num_copies;
@@ -2144,9 +2126,9 @@ static int clean_io_failure(u64 start, struct page *page)
                num_copies = btrfs_num_copies(fs_info, failrec->logical,
                                              failrec->len);
                if (num_copies > 1)  {
-                       repair_io_failure(fs_info, start, failrec->len,
+                       repair_io_failure(inode, start, failrec->len,
                                          failrec->logical, page,
-                                         failrec->failed_mirror);
+                                         pg_offset, failrec->failed_mirror);
                }
        }
 
@@ -2157,39 +2139,57 @@ out:
 }
 
 /*
- * this is a generic handler for readpage errors (default
- * readpage_io_failed_hook). if other copies exist, read those and write back
- * good data to the failed position. does not investigate in remapping the
- * failed extent elsewhere, hoping the device will be smart enough to do this as
- * needed
+ * Can be called when
+ * - hold extent lock
+ * - under ordered extent
+ * - the inode is freeing
  */
+void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
+{
+       struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
+       struct io_failure_record *failrec;
+       struct extent_state *state, *next;
 
-static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
-                             struct page *page, u64 start, u64 end,
-                             int failed_mirror)
+       if (RB_EMPTY_ROOT(&failure_tree->state))
+               return;
+
+       spin_lock(&failure_tree->lock);
+       state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
+       while (state) {
+               if (state->start > end)
+                       break;
+
+               ASSERT(state->end <= end);
+
+               next = next_state(state);
+
+               failrec = (struct io_failure_record *)state->private;
+               free_extent_state(state);
+               kfree(failrec);
+
+               state = next;
+       }
+       spin_unlock(&failure_tree->lock);
+}
+
+int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
+                               struct io_failure_record **failrec_ret)
 {
-       struct io_failure_record *failrec = NULL;
+       struct io_failure_record *failrec;
        u64 private;
        struct extent_map *em;
-       struct inode *inode = page->mapping->host;
        struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
        struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-       struct bio *bio;
-       struct btrfs_io_bio *btrfs_failed_bio;
-       struct btrfs_io_bio *btrfs_bio;
-       int num_copies;
        int ret;
-       int read_mode;
        u64 logical;
 
-       BUG_ON(failed_bio->bi_rw & REQ_WRITE);
-
        ret = get_state_private(failure_tree, start, &private);
        if (ret) {
                failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
                if (!failrec)
                        return -ENOMEM;
+
                failrec->start = start;
                failrec->len = end - start + 1;
                failrec->this_mirror = 0;
@@ -2209,11 +2209,11 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                        em = NULL;
                }
                read_unlock(&em_tree->lock);
-
                if (!em) {
                        kfree(failrec);
                        return -EIO;
                }
+
                logical = start - em->start;
                logical = em->block_start + logical;
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
@@ -2222,8 +2222,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                        extent_set_compress_type(&failrec->bio_flags,
                                                 em->compress_type);
                }
-               pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, "
-                        "len=%llu\n", logical, start, failrec->len);
+
+               pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n",
+                        logical, start, failrec->len);
+
                failrec->logical = logical;
                free_extent_map(em);
 
@@ -2243,8 +2245,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                }
        } else {
                failrec = (struct io_failure_record *)(unsigned long)private;
-               pr_debug("bio_readpage_error: (found) logical=%llu, "
-                        "start=%llu, len=%llu, validation=%d\n",
+               pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
                         failrec->logical, failrec->start, failrec->len,
                         failrec->in_validation);
                /*
@@ -2253,6 +2254,17 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                 * clean_io_failure() clean all those errors at once.
                 */
        }
+
+       *failrec_ret = failrec;
+
+       return 0;
+}
+
+int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
+                          struct io_failure_record *failrec, int failed_mirror)
+{
+       int num_copies;
+
        num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
                                      failrec->logical, failrec->len);
        if (num_copies == 1) {
@@ -2261,10 +2273,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                 * all the retry and error correction code that follows. no
                 * matter what the error is, it is very likely to persist.
                 */
-               pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
+               pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
                         num_copies, failrec->this_mirror, failed_mirror);
-               free_io_failure(inode, failrec);
-               return -EIO;
+               return 0;
        }
 
        /*
@@ -2284,7 +2295,6 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                BUG_ON(failrec->in_validation);
                failrec->in_validation = 1;
                failrec->this_mirror = failed_mirror;
-               read_mode = READ_SYNC | REQ_FAILFAST_DEV;
        } else {
                /*
                 * we're ready to fulfill a) and b) alongside. get a good copy
@@ -2300,25 +2310,36 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                failrec->this_mirror++;
                if (failrec->this_mirror == failed_mirror)
                        failrec->this_mirror++;
-               read_mode = READ_SYNC;
        }
 
        if (failrec->this_mirror > num_copies) {
-               pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
+               pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
                         num_copies, failrec->this_mirror, failed_mirror);
-               free_io_failure(inode, failrec);
-               return -EIO;
+               return 0;
        }
 
+       return 1;
+}
+
+
+struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
+                                   struct io_failure_record *failrec,
+                                   struct page *page, int pg_offset, int icsum,
+                                   bio_end_io_t *endio_func, void *data)
+{
+       struct bio *bio;
+       struct btrfs_io_bio *btrfs_failed_bio;
+       struct btrfs_io_bio *btrfs_bio;
+
        bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
-       if (!bio) {
-               free_io_failure(inode, failrec);
-               return -EIO;
-       }
-       bio->bi_end_io = failed_bio->bi_end_io;
+       if (!bio)
+               return NULL;
+
+       bio->bi_end_io = endio_func;
        bio->bi_iter.bi_sector = failrec->logical >> 9;
        bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
        bio->bi_iter.bi_size = 0;
+       bio->bi_private = data;
 
        btrfs_failed_bio = btrfs_io_bio(failed_bio);
        if (btrfs_failed_bio->csum) {
@@ -2327,17 +2348,64 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 
                btrfs_bio = btrfs_io_bio(bio);
                btrfs_bio->csum = btrfs_bio->csum_inline;
-               phy_offset >>= inode->i_sb->s_blocksize_bits;
-               phy_offset *= csum_size;
-               memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
+               icsum *= csum_size;
+               memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
                       csum_size);
        }
 
-       bio_add_page(bio, page, failrec->len, start - page_offset(page));
+       bio_add_page(bio, page, failrec->len, pg_offset);
+
+       return bio;
+}
+
+/*
+ * this is a generic handler for readpage errors (default
+ * readpage_io_failed_hook). if other copies exist, read those and write back
+ * good data to the failed position. does not investigate in remapping the
+ * failed extent elsewhere, hoping the device will be smart enough to do this as
+ * needed
+ */
+
+static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
+                             struct page *page, u64 start, u64 end,
+                             int failed_mirror)
+{
+       struct io_failure_record *failrec;
+       struct inode *inode = page->mapping->host;
+       struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+       struct bio *bio;
+       int read_mode;
+       int ret;
+
+       BUG_ON(failed_bio->bi_rw & REQ_WRITE);
+
+       ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
+       if (ret)
+               return ret;
+
+       ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
+       if (!ret) {
+               free_io_failure(inode, failrec);
+               return -EIO;
+       }
+
+       if (failed_bio->bi_vcnt > 1)
+               read_mode = READ_SYNC | REQ_FAILFAST_DEV;
+       else
+               read_mode = READ_SYNC;
 
-       pr_debug("bio_readpage_error: submitting new read[%#x] to "
-                "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
-                failrec->this_mirror, num_copies, failrec->in_validation);
+       phy_offset >>= inode->i_sb->s_blocksize_bits;
+       bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
+                                     start - page_offset(page),
+                                     (int)phy_offset, failed_bio->bi_end_io,
+                                     NULL);
+       if (!bio) {
+               free_io_failure(inode, failrec);
+               return -EIO;
+       }
+
+       pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
+                read_mode, failrec->this_mirror, failrec->in_validation);
 
        ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
                                         failrec->this_mirror,
@@ -2505,7 +2573,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                        if (ret)
                                uptodate = 0;
                        else
-                               clean_io_failure(start, page);
+                               clean_io_failure(inode, start, page, 0);
                }
 
                if (likely(uptodate))
@@ -4382,19 +4450,16 @@ int extent_buffer_under_io(struct extent_buffer *eb)
 /*
  * Helper for releasing extent buffer page.
  */
-static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
-                                               unsigned long start_idx)
+static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
 {
        unsigned long index;
-       unsigned long num_pages;
        struct page *page;
        int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
 
        BUG_ON(extent_buffer_under_io(eb));
 
-       num_pages = num_extent_pages(eb->start, eb->len);
-       index = start_idx + num_pages;
-       if (start_idx >= index)
+       index = num_extent_pages(eb->start, eb->len);
+       if (index == 0)
                return;
 
        do {
@@ -4430,7 +4495,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
                        /* One for when we alloced the page */
                        page_cache_release(page);
                }
-       } while (index != start_idx);
+       } while (index != 0);
 }
 
 /*
@@ -4438,7 +4503,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
  */
 static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
 {
-       btrfs_release_extent_buffer_page(eb, 0);
+       btrfs_release_extent_buffer_page(eb);
        __free_extent_buffer(eb);
 }
 
@@ -4795,7 +4860,7 @@ static int release_extent_buffer(struct extent_buffer *eb)
                }
 
                /* Should be safe to release our pages at this point */
-               btrfs_release_extent_buffer_page(eb, 0);
+               btrfs_release_extent_buffer_page(eb);
                call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
                return 1;
        }