btrfs: rename submit callbacks and drop double underscores
[sfrench/cifs-2.6.git] / fs / btrfs / disk-io.c
index a8ecccfc36ded1d9470deb0d78f77bcdacb6d0cf..156116655a32f788ab581b2db2a8a1fb770f38aa 100644 (file)
 #include <linux/ratelimit.h>
 #include <linux/uuid.h>
 #include <linux/semaphore.h>
+#include <linux/error-injection.h>
+#include <linux/crc32c.h>
 #include <asm/unaligned.h>
 #include "ctree.h"
 #include "disk-io.h"
-#include "hash.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
 #include "volumes.h"
@@ -61,7 +62,8 @@
                                 BTRFS_HEADER_FLAG_RELOC |\
                                 BTRFS_SUPER_FLAG_ERROR |\
                                 BTRFS_SUPER_FLAG_SEEDING |\
-                                BTRFS_SUPER_FLAG_METADUMP)
+                                BTRFS_SUPER_FLAG_METADUMP |\
+                                BTRFS_SUPER_FLAG_METADUMP_V2)
 
 static const struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
@@ -108,7 +110,7 @@ int __init btrfs_end_io_wq_init(void)
        return 0;
 }
 
-void btrfs_end_io_wq_exit(void)
+void __cold btrfs_end_io_wq_exit(void)
 {
        kmem_cache_destroy(btrfs_end_io_wq_cache);
 }
@@ -122,8 +124,8 @@ struct async_submit_bio {
        void *private_data;
        struct btrfs_fs_info *fs_info;
        struct bio *bio;
-       extent_submit_bio_hook_t *submit_bio_start;
-       extent_submit_bio_hook_t *submit_bio_done;
+       extent_submit_bio_start_t *submit_bio_start;
+       extent_submit_bio_done_t *submit_bio_done;
        int mirror_num;
        unsigned long bio_flags;
        /*
@@ -220,7 +222,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
  * extents on the btree inode are pretty simple, there's one extent
  * that covers the entire device
  */
-static struct extent_map *btree_get_extent(struct btrfs_inode *inode,
+struct extent_map *btree_get_extent(struct btrfs_inode *inode,
                struct page *page, size_t pg_offset, u64 start, u64 len,
                int create)
 {
@@ -268,7 +270,7 @@ out:
 
 u32 btrfs_csum_data(const char *data, u32 seed, size_t len)
 {
-       return btrfs_crc32c(seed, data, len);
+       return crc32c(seed, data, len);
 }
 
 void btrfs_csum_final(u32 crc, u8 *result)
@@ -285,7 +287,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
                           int verify)
 {
        u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
-       char *result = NULL;
+       char result[BTRFS_CSUM_SIZE];
        unsigned long len;
        unsigned long cur_len;
        unsigned long offset = BTRFS_CSUM_SIZE;
@@ -294,7 +296,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
        unsigned long map_len;
        int err;
        u32 crc = ~(u32)0;
-       unsigned long inline_result;
 
        len = buf->len - offset;
        while (len > 0) {
@@ -308,13 +309,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
                len -= cur_len;
                offset += cur_len;
        }
-       if (csum_size > sizeof(inline_result)) {
-               result = kzalloc(csum_size, GFP_NOFS);
-               if (!result)
-                       return -ENOMEM;
-       } else {
-               result = (char *)&inline_result;
-       }
+       memset(result, 0, BTRFS_CSUM_SIZE);
 
        btrfs_csum_final(crc, result);
 
@@ -329,15 +324,12 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
                                "%s checksum verify failed on %llu wanted %X found %X level %d",
                                fs_info->sb->s_id, buf->start,
                                val, found, btrfs_header_level(buf));
-                       if (result != (char *)&inline_result)
-                               kfree(result);
                        return -EUCLEAN;
                }
        } else {
                write_extent_buffer(buf, result, 0, csum_size);
        }
-       if (result != (char *)&inline_result)
-               kfree(result);
+
        return 0;
 }
 
@@ -391,7 +383,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
                clear_extent_buffer_uptodate(eb);
 out:
        unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
-                            &cached_state, GFP_NOFS);
+                            &cached_state);
        if (need_lock)
                btrfs_tree_read_unlock_blocking(eb);
        return ret;
@@ -455,7 +447,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
        io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
        while (1) {
                ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
-                                              btree_get_extent, mirror_num);
+                                              mirror_num);
                if (!ret) {
                        if (!verify_parent_transid(io_tree, eb,
                                                   parent_transid, 0))
@@ -610,12 +602,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
         * that we don't try and read the other copies of this block, just
         * return -EIO.
         */
-       if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
+       if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) {
                set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
                ret = -EIO;
        }
 
-       if (found_level > 0 && btrfs_check_node(root, eb))
+       if (found_level > 0 && btrfs_check_node(fs_info, eb))
                ret = -EIO;
 
        if (!ret)
@@ -718,14 +710,6 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
        return 0;
 }
 
-unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
-{
-       unsigned long limit = min_t(unsigned long,
-                                   info->thread_pool_size,
-                                   info->fs_devices->open_devices);
-       return 256 * limit;
-}
-
 static void run_one_async_start(struct btrfs_work *work)
 {
        struct async_submit_bio *async;
@@ -733,7 +717,6 @@ static void run_one_async_start(struct btrfs_work *work)
 
        async = container_of(work, struct  async_submit_bio, work);
        ret = async->submit_bio_start(async->private_data, async->bio,
-                                     async->mirror_num, async->bio_flags,
                                      async->bio_offset);
        if (ret)
                async->status = ret;
@@ -752,8 +735,7 @@ static void run_one_async_done(struct btrfs_work *work)
                return;
        }
 
-       async->submit_bio_done(async->private_data, async->bio, async->mirror_num,
-                              async->bio_flags, async->bio_offset);
+       async->submit_bio_done(async->private_data, async->bio, async->mirror_num);
 }
 
 static void run_one_async_free(struct btrfs_work *work)
@@ -767,8 +749,8 @@ static void run_one_async_free(struct btrfs_work *work)
 blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
                                 int mirror_num, unsigned long bio_flags,
                                 u64 bio_offset, void *private_data,
-                                extent_submit_bio_hook_t *submit_bio_start,
-                                extent_submit_bio_hook_t *submit_bio_done)
+                                extent_submit_bio_start_t *submit_bio_start,
+                                extent_submit_bio_done_t *submit_bio_done)
 {
        struct async_submit_bio *async;
 
@@ -815,8 +797,7 @@ static blk_status_t btree_csum_one_bio(struct bio *bio)
        return errno_to_blk_status(ret);
 }
 
-static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio,
-                                            int mirror_num, unsigned long bio_flags,
+static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
                                             u64 bio_offset)
 {
        /*
@@ -826,9 +807,8 @@ static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio
        return btree_csum_one_bio(bio);
 }
 
-static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio,
-                                           int mirror_num, unsigned long bio_flags,
-                                           u64 bio_offset)
+static blk_status_t btree_submit_bio_done(void *private_data, struct bio *bio,
+                                           int mirror_num)
 {
        struct inode *inode = private_data;
        blk_status_t ret;
@@ -887,8 +867,8 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
                 */
                ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
                                          bio_offset, private_data,
-                                         __btree_submit_bio_start,
-                                         __btree_submit_bio_done);
+                                         btree_submit_bio_start,
+                                         btree_submit_bio_done);
        }
 
        if (ret)
@@ -1012,7 +992,7 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
        if (IS_ERR(buf))
                return;
        read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
-                                buf, WAIT_NONE, btree_get_extent, 0);
+                                buf, WAIT_NONE, 0);
        free_extent_buffer(buf);
 }
 
@@ -1031,7 +1011,7 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
        set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
 
        ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
-                                      btree_get_extent, mirror_num);
+                                      mirror_num);
        if (ret) {
                free_extent_buffer(buf);
                return ret;
@@ -1243,7 +1223,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
        struct btrfs_root *root;
        struct btrfs_key key;
        int ret = 0;
-       uuid_le uuid;
+       uuid_le uuid = NULL_UUID_LE;
 
        root = btrfs_alloc_root(fs_info, GFP_KERNEL);
        if (!root)
@@ -1284,7 +1264,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
        btrfs_set_root_used(&root->root_item, leaf->len);
        btrfs_set_root_last_snapshot(&root->root_item, 0);
        btrfs_set_root_dirid(&root->root_item, 0);
-       uuid_le_gen(&uuid);
+       if (is_fstree(objectid))
+               uuid_le_gen(&uuid);
        memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE);
        root->root_item.drop_level = 0;
 
@@ -1815,12 +1796,10 @@ sleep:
                if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
                                      &fs_info->fs_state)))
                        btrfs_cleanup_transaction(fs_info);
-               set_current_state(TASK_INTERRUPTIBLE);
                if (!kthread_should_stop() &&
                                (!btrfs_transaction_blocked(fs_info) ||
                                 cannot_commit))
-                       schedule_timeout(delay);
-               __set_current_state(TASK_RUNNING);
+                       schedule_timeout_interruptible(delay);
        } while (!kthread_should_stop());
        return 0;
 }
@@ -2190,7 +2169,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
 static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
                struct btrfs_fs_devices *fs_devices)
 {
-       int max_active = fs_info->thread_pool_size;
+       u32 max_active = fs_info->thread_pool_size;
        unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
 
        fs_info->workers =
@@ -2411,7 +2390,7 @@ int open_ctree(struct super_block *sb,
        int err = -EINVAL;
        int num_backups_tried = 0;
        int backup_index = 0;
-       int max_active;
+       u32 max_active;
        int clear_free_space_tree = 0;
 
        tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
@@ -2775,10 +2754,10 @@ int open_ctree(struct super_block *sb,
        }
 
        /*
-        * keep the device that is marked to be the target device for the
-        * dev_replace procedure
+        * Keep the devid that is marked to be the target device for the
+        * device replace procedure
         */
-       btrfs_close_extra_devices(fs_devices, 0);
+       btrfs_free_extra_devids(fs_devices, 0);
 
        if (!fs_devices->latest_bdev) {
                btrfs_err(fs_info, "failed to read devices");
@@ -2841,7 +2820,7 @@ retry_root_backup:
                goto fail_block_groups;
        }
 
-       btrfs_close_extra_devices(fs_devices, 1);
+       btrfs_free_extra_devids(fs_devices, 1);
 
        ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
        if (ret) {
@@ -2875,7 +2854,7 @@ retry_root_backup:
                goto fail_sysfs;
        }
 
-       if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info)) {
+       if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) {
                btrfs_warn(fs_info,
                "writeable mount is not allowed due to too many missing devices");
                goto fail_sysfs;
@@ -3123,6 +3102,7 @@ recovery_tree_root:
                goto fail_block_groups;
        goto retry_root_backup;
 }
+ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
 
 static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 {
@@ -3296,6 +3276,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
        struct buffer_head *bh;
        int i;
        int errors = 0;
+       bool primary_failed = false;
        u64 bytenr;
 
        if (max_mirrors == 0)
@@ -3312,11 +3293,16 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
                                      BTRFS_SUPER_INFO_SIZE);
                if (!bh) {
                        errors++;
+                       if (i == 0)
+                               primary_failed = true;
                        continue;
                }
                wait_on_buffer(bh);
-               if (!buffer_uptodate(bh))
+               if (!buffer_uptodate(bh)) {
                        errors++;
+                       if (i == 0)
+                               primary_failed = true;
+               }
 
                /* drop our reference */
                brelse(bh);
@@ -3325,6 +3311,13 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
                brelse(bh);
        }
 
+       /* log error, force error return */
+       if (primary_failed) {
+               btrfs_err(device->fs_info, "error writing primary super block to device %llu",
+                         device->devid);
+               return -1;
+       }
+
        return errors < i ? 0 : -1;
 }
 
@@ -3357,7 +3350,7 @@ static void write_dev_flush(struct btrfs_device *device)
        bio->bi_private = &device->flush_wait;
 
        btrfsic_submit_bio(bio);
-       device->flush_bio_sent = 1;
+       set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
 }
 
 /*
@@ -3367,10 +3360,10 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device)
 {
        struct bio *bio = device->flush_bio;
 
-       if (!device->flush_bio_sent)
+       if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
                return BLK_STS_OK;
 
-       device->flush_bio_sent = 0;
+       clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
        wait_for_completion_io(&device->flush_wait);
 
        return bio->bi_status;
@@ -3378,7 +3371,7 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device)
 
 static int check_barrier_error(struct btrfs_fs_info *fs_info)
 {
-       if (!btrfs_check_rw_degradable(fs_info))
+       if (!btrfs_check_rw_degradable(fs_info, NULL))
                return -EIO;
        return 0;
 }
@@ -3394,14 +3387,16 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
        int errors_wait = 0;
        blk_status_t ret;
 
+       lockdep_assert_held(&info->fs_devices->device_list_mutex);
        /* send down all the barriers */
        head = &info->fs_devices->devices;
-       list_for_each_entry_rcu(dev, head, dev_list) {
-               if (dev->missing)
+       list_for_each_entry(dev, head, dev_list) {
+               if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
                        continue;
                if (!dev->bdev)
                        continue;
-               if (!dev->in_fs_metadata || !dev->writeable)
+               if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
+                   !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
                        continue;
 
                write_dev_flush(dev);
@@ -3409,14 +3404,15 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
        }
 
        /* wait for all the barriers */
-       list_for_each_entry_rcu(dev, head, dev_list) {
-               if (dev->missing)
+       list_for_each_entry(dev, head, dev_list) {
+               if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
                        continue;
                if (!dev->bdev) {
                        errors_wait++;
                        continue;
                }
-               if (!dev->in_fs_metadata || !dev->writeable)
+               if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
+                   !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
                        continue;
 
                ret = wait_dev_flush(dev);
@@ -3508,12 +3504,13 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
                }
        }
 
-       list_for_each_entry_rcu(dev, head, dev_list) {
+       list_for_each_entry(dev, head, dev_list) {
                if (!dev->bdev) {
                        total_errors++;
                        continue;
                }
-               if (!dev->in_fs_metadata || !dev->writeable)
+               if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
+                   !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
                        continue;
 
                btrfs_set_stack_device_generation(dev_item, 0);
@@ -3549,10 +3546,11 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
        }
 
        total_errors = 0;
-       list_for_each_entry_rcu(dev, head, dev_list) {
+       list_for_each_entry(dev, head, dev_list) {
                if (!dev->bdev)
                        continue;
-               if (!dev->in_fs_metadata || !dev->writeable)
+               if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
+                   !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
                        continue;
 
                ret = wait_dev_supers(dev, max_mirrors);
@@ -3852,7 +3850,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
         * So here we should only check item pointers, not item data.
         */
        if (btrfs_header_level(buf) == 0 &&
-           btrfs_check_leaf_relaxed(root, buf)) {
+           btrfs_check_leaf_relaxed(fs_info, buf)) {
                btrfs_print_leaf(buf);
                ASSERT(0);
        }
@@ -3910,9 +3908,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
                btrfs_err(fs_info, "no valid FS found");
                ret = -EINVAL;
        }
-       if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
-               btrfs_warn(fs_info, "unrecognized super flag: %llu",
+       if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
+               btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
                                btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
+               ret = -EINVAL;
+       }
        if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
                btrfs_err(fs_info, "tree_root level too big: %d >= %d",
                                btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
@@ -4313,11 +4313,6 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
                cache = list_first_entry(&cur_trans->dirty_bgs,
                                         struct btrfs_block_group_cache,
                                         dirty_list);
-               if (!cache) {
-                       btrfs_err(fs_info, "orphan block group dirty_bgs list");
-                       spin_unlock(&cur_trans->dirty_bgs_lock);
-                       return;
-               }
 
                if (!list_empty(&cache->io_list)) {
                        spin_unlock(&cur_trans->dirty_bgs_lock);
@@ -4337,14 +4332,14 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
        }
        spin_unlock(&cur_trans->dirty_bgs_lock);
 
+       /*
+        * Refer to the definition of io_bgs member for details why it's safe
+        * to use it without any locking
+        */
        while (!list_empty(&cur_trans->io_bgs)) {
                cache = list_first_entry(&cur_trans->io_bgs,
                                         struct btrfs_block_group_cache,
                                         io_list);
-               if (!cache) {
-                       btrfs_err(fs_info, "orphan block group on io_bgs list");
-                       return;
-               }
 
                list_del_init(&cache->io_list);
                spin_lock(&cache->lock);