btrfs: get fs_info from eb in repair_eb_io_failure
[sfrench/cifs-2.6.git] / fs / btrfs / disk-io.c
index 6a2a2a9517058b429b286557fd0a2b1a512c833d..9c5b87bc0813c8e3a8fe64ef2f8755180d84ee23 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/semaphore.h>
 #include <linux/error-injection.h>
 #include <linux/crc32c.h>
+#include <linux/sched/mm.h>
 #include <asm/unaligned.h>
 #include "ctree.h"
 #include "disk-io.h"
@@ -259,15 +260,12 @@ void btrfs_csum_final(u32 crc, u8 *result)
 }
 
 /*
- * compute the csum for a btree block, and either verify it or write it
- * into the csum field of the block.
+ * Compute the csum of a btree block and store the result to provided buffer.
+ *
+ * Returns error if the extent buffer cannot be mapped.
  */
-static int csum_tree_block(struct btrfs_fs_info *fs_info,
-                          struct extent_buffer *buf,
-                          int verify)
+static int csum_tree_block(struct extent_buffer *buf, u8 *result)
 {
-       u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
-       char result[BTRFS_CSUM_SIZE];
        unsigned long len;
        unsigned long cur_len;
        unsigned long offset = BTRFS_CSUM_SIZE;
@@ -287,7 +285,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
                 */
                err = map_private_extent_buffer(buf, offset, 32,
                                        &kaddr, &map_start, &map_len);
-               if (err)
+               if (WARN_ON(err))
                        return err;
                cur_len = min(len, map_len - (offset - map_start));
                crc = btrfs_csum_data(kaddr + offset - map_start,
@@ -299,23 +297,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
 
        btrfs_csum_final(crc, result);
 
-       if (verify) {
-               if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
-                       u32 val;
-                       u32 found = 0;
-                       memcpy(&found, result, csum_size);
-
-                       read_extent_buffer(buf, &val, 0, csum_size);
-                       btrfs_warn_rl(fs_info,
-                               "%s checksum verify failed on %llu wanted %X found %X level %d",
-                               fs_info->sb->s_id, buf->start,
-                               val, found, btrfs_header_level(buf));
-                       return -EUCLEAN;
-               }
-       } else {
-               write_extent_buffer(buf, result, 0, csum_size);
-       }
-
        return 0;
 }
 
@@ -341,7 +322,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
 
        if (need_lock) {
                btrfs_tree_read_lock(eb);
-               btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+               btrfs_set_lock_blocking_read(eb);
        }
 
        lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
@@ -413,9 +394,9 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
        return ret;
 }
 
-static int verify_level_key(struct btrfs_fs_info *fs_info,
-                           struct extent_buffer *eb, int level,
-                           struct btrfs_key *first_key, u64 parent_transid)
+int btrfs_verify_level_key(struct btrfs_fs_info *fs_info,
+                          struct extent_buffer *eb, int level,
+                          struct btrfs_key *first_key, u64 parent_transid)
 {
        int found_level;
        struct btrfs_key found_key;
@@ -423,12 +404,11 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
 
        found_level = btrfs_header_level(eb);
        if (found_level != level) {
-#ifdef CONFIG_BTRFS_DEBUG
-               WARN_ON(1);
+               WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+                    KERN_ERR "BTRFS: tree level check failed\n");
                btrfs_err(fs_info,
 "tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
                          eb->start, level, found_level);
-#endif
                return -EIO;
        }
 
@@ -449,9 +429,9 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
                btrfs_item_key_to_cpu(eb, &found_key, 0);
        ret = btrfs_comp_cpu_keys(first_key, &found_key);
 
-#ifdef CONFIG_BTRFS_DEBUG
        if (ret) {
-               WARN_ON(1);
+               WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+                    KERN_ERR "BTRFS: tree first key check failed\n");
                btrfs_err(fs_info,
 "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
                          eb->start, parent_transid, first_key->objectid,
@@ -459,7 +439,6 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
                          found_key.objectid, found_key.type,
                          found_key.offset);
        }
-#endif
        return ret;
 }
 
@@ -492,8 +471,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
                        if (verify_parent_transid(io_tree, eb,
                                                   parent_transid, 0))
                                ret = -EIO;
-                       else if (verify_level_key(fs_info, eb, level,
-                                                 first_key, parent_transid))
+                       else if (btrfs_verify_level_key(fs_info, eb, level,
+                                               first_key, parent_transid))
                                ret = -EUCLEAN;
                        else
                                break;
@@ -518,7 +497,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
        }
 
        if (failed && !ret && failed_mirror)
-               repair_eb_io_failure(fs_info, eb, failed_mirror);
+               btrfs_repair_eb_io_failure(eb, failed_mirror);
 
        return ret;
 }
@@ -532,6 +511,8 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
 {
        u64 start = page_offset(page);
        u64 found_start;
+       u8 result[BTRFS_CSUM_SIZE];
+       u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
        struct extent_buffer *eb;
 
        eb = (struct extent_buffer *)page->private;
@@ -551,7 +532,11 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
        ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
                        btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0);
 
-       return csum_tree_block(fs_info, eb, 0);
+       if (csum_tree_block(eb, result))
+               return -EINVAL;
+
+       write_extent_buffer(eb, result, 0, csum_size);
+       return 0;
 }
 
 static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
@@ -594,7 +579,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
        struct extent_buffer *eb;
        struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
        struct btrfs_fs_info *fs_info = root->fs_info;
+       u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
        int ret = 0;
+       u8 result[BTRFS_CSUM_SIZE];
        int reads_done;
 
        if (!page->private)
@@ -641,10 +628,25 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
        btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
                                       eb, found_level);
 
-       ret = csum_tree_block(fs_info, eb, 1);
+       ret = csum_tree_block(eb, result);
        if (ret)
                goto err;
 
+       if (memcmp_extent_buffer(eb, result, 0, csum_size)) {
+               u32 val;
+               u32 found = 0;
+
+               memcpy(&found, result, csum_size);
+
+               read_extent_buffer(eb, &val, 0, csum_size);
+               btrfs_warn_rl(fs_info,
+               "%s checksum verify failed on %llu wanted %x found %x level %d",
+                             fs_info->sb->s_id, eb->start,
+                             val, found, btrfs_header_level(eb));
+               ret = -EUCLEAN;
+               goto err;
+       }
+
        /*
         * If this is a leaf block and it is corrupt, set the corrupt bit so
         * that we don't try and read the other copies of this block, just
@@ -660,6 +662,10 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 
        if (!ret)
                set_extent_buffer_uptodate(eb);
+       else
+               btrfs_err(fs_info,
+                         "block=%llu read time tree block corruption detected",
+                         eb->start);
 err:
        if (reads_done &&
            test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
@@ -832,9 +838,10 @@ static blk_status_t btree_csum_one_bio(struct bio *bio)
        struct bio_vec *bvec;
        struct btrfs_root *root;
        int i, ret = 0;
+       struct bvec_iter_all iter_all;
 
        ASSERT(!bio_flagged(bio, BIO_CLONED));
-       bio_for_each_segment_all(bvec, bio, i) {
+       bio_for_each_segment_all(bvec, bio, i, iter_all) {
                root = BTRFS_I(bvec->bv_page->mapping->host)->root;
                ret = csum_dirty_buffer(root->fs_info, bvec->bv_page);
                if (ret)
@@ -1016,13 +1023,18 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
 {
        struct extent_buffer *buf = NULL;
        struct inode *btree_inode = fs_info->btree_inode;
+       int ret;
 
        buf = btrfs_find_create_tree_block(fs_info, bytenr);
        if (IS_ERR(buf))
                return;
-       read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
-                                buf, WAIT_NONE, 0);
-       free_extent_buffer(buf);
+
+       ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf,
+                       WAIT_NONE, 0);
+       if (ret < 0)
+               free_extent_buffer_stale(buf);
+       else
+               free_extent_buffer(buf);
 }
 
 int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
@@ -1042,12 +1054,12 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
        ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
                                       mirror_num);
        if (ret) {
-               free_extent_buffer(buf);
+               free_extent_buffer_stale(buf);
                return ret;
        }
 
        if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
-               free_extent_buffer(buf);
+               free_extent_buffer_stale(buf);
                return -EIO;
        } else if (extent_buffer_uptodate(buf)) {
                *eb = buf;
@@ -1066,19 +1078,6 @@ struct extent_buffer *btrfs_find_create_tree_block(
        return alloc_extent_buffer(fs_info, bytenr);
 }
 
-
-int btrfs_write_tree_block(struct extent_buffer *buf)
-{
-       return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
-                                       buf->start + buf->len - 1);
-}
-
-void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
-{
-       filemap_fdatawait_range(buf->pages[0]->mapping,
-                               buf->start, buf->start + buf->len - 1);
-}
-
 /*
  * Read tree block at logical address @bytenr and do variant basic but critical
  * verification.
@@ -1101,7 +1100,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
        ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
                                             level, first_key);
        if (ret) {
-               free_extent_buffer(buf);
+               free_extent_buffer_stale(buf);
                return ERR_PTR(ret);
        }
        return buf;
@@ -1120,7 +1119,7 @@ void clean_tree_block(struct btrfs_fs_info *fs_info,
                                                 -buf->len,
                                                 fs_info->dirty_metadata_batch);
                        /* ugh, clear_extent_buffer_dirty needs to lock the page */
-                       btrfs_set_lock_blocking(buf);
+                       btrfs_set_lock_blocking_write(buf);
                        clear_extent_buffer_dirty(buf);
                }
        }
@@ -1175,6 +1174,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
        INIT_LIST_HEAD(&root->delalloc_root);
        INIT_LIST_HEAD(&root->ordered_extents);
        INIT_LIST_HEAD(&root->ordered_root);
+       INIT_LIST_HEAD(&root->reloc_dirty_list);
        INIT_LIST_HEAD(&root->logged_list[0]);
        INIT_LIST_HEAD(&root->logged_list[1]);
        spin_lock_init(&root->inode_lock);
@@ -1205,7 +1205,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
        root->log_transid_committed = -1;
        root->last_log_commit = 0;
        if (!dummy)
-               extent_io_tree_init(&root->dirty_log_pages, NULL);
+               extent_io_tree_init(fs_info, &root->dirty_log_pages,
+                                   IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL);
 
        memset(&root->root_key, 0, sizeof(root->root_key));
        memset(&root->root_item, 0, sizeof(root->root_item));
@@ -1218,6 +1219,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
        root->anon_dev = 0;
 
        spin_lock_init(&root->root_item_lock);
+       btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
 }
 
 static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
@@ -1258,10 +1260,17 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
        struct btrfs_root *tree_root = fs_info->tree_root;
        struct btrfs_root *root;
        struct btrfs_key key;
+       unsigned int nofs_flag;
        int ret = 0;
        uuid_le uuid = NULL_UUID_LE;
 
+       /*
+        * We're holding a transaction handle, so use a NOFS memory allocation
+        * context to avoid deadlock if reclaim happens.
+        */
+       nofs_flag = memalloc_nofs_save();
        root = btrfs_alloc_root(fs_info, GFP_KERNEL);
+       memalloc_nofs_restore(nofs_flag);
        if (!root)
                return ERR_PTR(-ENOMEM);
 
@@ -1707,9 +1716,7 @@ static int cleaner_kthread(void *arg)
                        goto sleep;
                }
 
-               mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
                btrfs_run_delayed_iputs(fs_info);
-               mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
 
                again = btrfs_clean_one_deleted_snapshot(root);
                mutex_unlock(&fs_info->cleaner_mutex);
@@ -2101,7 +2108,7 @@ static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
        atomic_set(&fs_info->scrubs_paused, 0);
        atomic_set(&fs_info->scrub_cancel_req, 0);
        init_waitqueue_head(&fs_info->scrub_pause_wait);
-       fs_info->scrub_workers_refcnt = 0;
+       refcount_set(&fs_info->scrub_workers_refcnt, 0);
 }
 
 static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
@@ -2129,8 +2136,9 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
        inode->i_mapping->a_ops = &btree_aops;
 
        RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
-       extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode);
-       BTRFS_I(inode)->io_tree.track_uptodate = 0;
+       extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
+                           IO_TREE_INODE_IO, inode);
+       BTRFS_I(inode)->io_tree.track_uptodate = false;
        extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
 
        BTRFS_I(inode)->io_tree.ops = &btree_extent_io_ops;
@@ -2666,7 +2674,6 @@ int open_ctree(struct super_block *sb,
        mutex_init(&fs_info->delete_unused_bgs_mutex);
        mutex_init(&fs_info->reloc_mutex);
        mutex_init(&fs_info->delalloc_root_mutex);
-       mutex_init(&fs_info->cleaner_delayed_iput_mutex);
        seqlock_init(&fs_info->profiles_lock);
 
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -2688,6 +2695,7 @@ int open_ctree(struct super_block *sb,
        atomic_set(&fs_info->defrag_running, 0);
        atomic_set(&fs_info->qgroup_op_seq, 0);
        atomic_set(&fs_info->reada_works_cnt, 0);
+       atomic_set(&fs_info->nr_delayed_iputs, 0);
        atomic64_set(&fs_info->tree_mod_seq, 0);
        fs_info->sb = sb;
        fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
@@ -2739,8 +2747,10 @@ int open_ctree(struct super_block *sb,
        fs_info->block_group_cache_tree = RB_ROOT;
        fs_info->first_logical_byte = (u64)-1;
 
-       extent_io_tree_init(&fs_info->freed_extents[0], NULL);
-       extent_io_tree_init(&fs_info->freed_extents[1], NULL);
+       extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
+                           IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
+       extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
+                           IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
        fs_info->pinned_extents = &fs_info->freed_extents[0];
        set_bit(BTRFS_FS_BARRIER, &fs_info->flags);
 
@@ -2765,6 +2775,7 @@ int open_ctree(struct super_block *sb,
        init_waitqueue_head(&fs_info->transaction_wait);
        init_waitqueue_head(&fs_info->transaction_blocked_wait);
        init_waitqueue_head(&fs_info->async_submit_wait);
+       init_waitqueue_head(&fs_info->delayed_iputs_wait);
 
        INIT_LIST_HEAD(&fs_info->pinned_chunks);
 
@@ -2948,7 +2959,7 @@ int open_ctree(struct super_block *sb,
        sb->s_bdi->congested_fn = btrfs_congested_fn;
        sb->s_bdi->congested_data = fs_info;
        sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
-       sb->s_bdi->ra_pages = VM_MAX_READAHEAD * SZ_1K / PAGE_SIZE;
+       sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
        sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
        sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
 
@@ -4238,16 +4249,9 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 
                head = rb_entry(node, struct btrfs_delayed_ref_head,
                                href_node);
-               if (!mutex_trylock(&head->mutex)) {
-                       refcount_inc(&head->refs);
-                       spin_unlock(&delayed_refs->lock);
-
-                       mutex_lock(&head->mutex);
-                       mutex_unlock(&head->mutex);
-                       btrfs_put_delayed_ref_head(head);
-                       spin_lock(&delayed_refs->lock);
+               if (btrfs_delayed_ref_lock(delayed_refs, head))
                        continue;
-               }
+
                spin_lock(&head->lock);
                while ((n = rb_first_cached(&head->ref_tree)) != NULL) {
                        ref = rb_entry(n, struct btrfs_delayed_ref_node,
@@ -4263,12 +4267,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                if (head->must_insert_reserved)
                        pin_bytes = true;
                btrfs_free_delayed_extent_op(head->extent_op);
-               delayed_refs->num_heads--;
-               if (head->processing == 0)
-                       delayed_refs->num_heads_ready--;
-               atomic_dec(&delayed_refs->num_entries);
-               rb_erase_cached(&head->href_node, &delayed_refs->href_root);
-               RB_CLEAR_NODE(&head->href_node);
+               btrfs_delete_ref_head(delayed_refs, head);
                spin_unlock(&head->lock);
                spin_unlock(&delayed_refs->lock);
                mutex_unlock(&head->mutex);