#include <linux/ratelimit.h>
#include <linux/uuid.h>
#include <linux/semaphore.h>
+#include <linux/error-injection.h>
+#include <linux/crc32c.h>
#include <asm/unaligned.h>
#include "ctree.h"
#include "disk-io.h"
-#include "hash.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "volumes.h"
BTRFS_HEADER_FLAG_RELOC |\
BTRFS_SUPER_FLAG_ERROR |\
BTRFS_SUPER_FLAG_SEEDING |\
- BTRFS_SUPER_FLAG_METADUMP)
+ BTRFS_SUPER_FLAG_METADUMP |\
+ BTRFS_SUPER_FLAG_METADUMP_V2)
static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
return 0;
}
-void btrfs_end_io_wq_exit(void)
+void __cold btrfs_end_io_wq_exit(void)
{
kmem_cache_destroy(btrfs_end_io_wq_cache);
}
void *private_data;
struct btrfs_fs_info *fs_info;
struct bio *bio;
- extent_submit_bio_hook_t *submit_bio_start;
- extent_submit_bio_hook_t *submit_bio_done;
+ extent_submit_bio_start_t *submit_bio_start;
+ extent_submit_bio_done_t *submit_bio_done;
int mirror_num;
unsigned long bio_flags;
/*
* extents on the btree inode are pretty simple, there's one extent
* that covers the entire device
*/
-static struct extent_map *btree_get_extent(struct btrfs_inode *inode,
+struct extent_map *btree_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset, u64 start, u64 len,
int create)
{
u32 btrfs_csum_data(const char *data, u32 seed, size_t len)
{
- return btrfs_crc32c(seed, data, len);
+ return crc32c(seed, data, len);
}
void btrfs_csum_final(u32 crc, u8 *result)
int verify)
{
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
- char *result = NULL;
+ char result[BTRFS_CSUM_SIZE];
unsigned long len;
unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE;
unsigned long map_len;
int err;
u32 crc = ~(u32)0;
- unsigned long inline_result;
len = buf->len - offset;
while (len > 0) {
len -= cur_len;
offset += cur_len;
}
- if (csum_size > sizeof(inline_result)) {
- result = kzalloc(csum_size, GFP_NOFS);
- if (!result)
- return -ENOMEM;
- } else {
- result = (char *)&inline_result;
- }
+ memset(result, 0, BTRFS_CSUM_SIZE);
btrfs_csum_final(crc, result);
"%s checksum verify failed on %llu wanted %X found %X level %d",
fs_info->sb->s_id, buf->start,
val, found, btrfs_header_level(buf));
- if (result != (char *)&inline_result)
- kfree(result);
return -EUCLEAN;
}
} else {
write_extent_buffer(buf, result, 0, csum_size);
}
- if (result != (char *)&inline_result)
- kfree(result);
+
return 0;
}
clear_extent_buffer_uptodate(eb);
out:
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
- &cached_state, GFP_NOFS);
+ &cached_state);
if (need_lock)
btrfs_tree_read_unlock_blocking(eb);
return ret;
io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
while (1) {
ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
- btree_get_extent, mirror_num);
+ mirror_num);
if (!ret) {
if (!verify_parent_transid(io_tree, eb,
parent_transid, 0))
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
- if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
+ if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
- if (found_level > 0 && btrfs_check_node(root, eb))
+ if (found_level > 0 && btrfs_check_node(fs_info, eb))
ret = -EIO;
if (!ret)
return 0;
}
-unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
-{
- unsigned long limit = min_t(unsigned long,
- info->thread_pool_size,
- info->fs_devices->open_devices);
- return 256 * limit;
-}
-
static void run_one_async_start(struct btrfs_work *work)
{
struct async_submit_bio *async;
async = container_of(work, struct async_submit_bio, work);
ret = async->submit_bio_start(async->private_data, async->bio,
- async->mirror_num, async->bio_flags,
async->bio_offset);
if (ret)
async->status = ret;
return;
}
- async->submit_bio_done(async->private_data, async->bio, async->mirror_num,
- async->bio_flags, async->bio_offset);
+ async->submit_bio_done(async->private_data, async->bio, async->mirror_num);
}
static void run_one_async_free(struct btrfs_work *work)
blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset, void *private_data,
- extent_submit_bio_hook_t *submit_bio_start,
- extent_submit_bio_hook_t *submit_bio_done)
+ extent_submit_bio_start_t *submit_bio_start,
+ extent_submit_bio_done_t *submit_bio_done)
{
struct async_submit_bio *async;
return errno_to_blk_status(ret);
}
-static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
+static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
u64 bio_offset)
{
/*
return btree_csum_one_bio(bio);
}
-static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t btree_submit_bio_done(void *private_data, struct bio *bio,
+ int mirror_num)
{
struct inode *inode = private_data;
blk_status_t ret;
*/
ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
bio_offset, private_data,
- __btree_submit_bio_start,
- __btree_submit_bio_done);
+ btree_submit_bio_start,
+ btree_submit_bio_done);
}
if (ret)
if (IS_ERR(buf))
return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
- buf, WAIT_NONE, btree_get_extent, 0);
+ buf, WAIT_NONE, 0);
free_extent_buffer(buf);
}
set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
- btree_get_extent, mirror_num);
+ mirror_num);
if (ret) {
free_extent_buffer(buf);
return ret;
struct btrfs_root *root;
struct btrfs_key key;
int ret = 0;
- uuid_le uuid;
+ uuid_le uuid = NULL_UUID_LE;
root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!root)
btrfs_set_root_used(&root->root_item, leaf->len);
btrfs_set_root_last_snapshot(&root->root_item, 0);
btrfs_set_root_dirid(&root->root_item, 0);
- uuid_le_gen(&uuid);
+ if (is_fstree(objectid))
+ uuid_le_gen(&uuid);
memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE);
root->root_item.drop_level = 0;
if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
&fs_info->fs_state)))
btrfs_cleanup_transaction(fs_info);
- set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop() &&
(!btrfs_transaction_blocked(fs_info) ||
cannot_commit))
- schedule_timeout(delay);
- __set_current_state(TASK_RUNNING);
+ schedule_timeout_interruptible(delay);
} while (!kthread_should_stop());
return 0;
}
static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices)
{
- int max_active = fs_info->thread_pool_size;
+ u32 max_active = fs_info->thread_pool_size;
unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
fs_info->workers =
int err = -EINVAL;
int num_backups_tried = 0;
int backup_index = 0;
- int max_active;
+ u32 max_active;
int clear_free_space_tree = 0;
tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
}
/*
- * keep the device that is marked to be the target device for the
- * dev_replace procedure
+ * Keep the devid that is marked to be the target device for the
+ * device replace procedure
*/
- btrfs_close_extra_devices(fs_devices, 0);
+ btrfs_free_extra_devids(fs_devices, 0);
if (!fs_devices->latest_bdev) {
btrfs_err(fs_info, "failed to read devices");
goto fail_block_groups;
}
- btrfs_close_extra_devices(fs_devices, 1);
+ btrfs_free_extra_devids(fs_devices, 1);
ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
if (ret) {
goto fail_sysfs;
}
- if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info)) {
+ if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info,
"writeable mount is not allowed due to too many missing devices");
goto fail_sysfs;
goto fail_block_groups;
goto retry_root_backup;
}
+ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
{
struct buffer_head *bh;
int i;
int errors = 0;
+ bool primary_failed = false;
u64 bytenr;
if (max_mirrors == 0)
BTRFS_SUPER_INFO_SIZE);
if (!bh) {
errors++;
+ if (i == 0)
+ primary_failed = true;
continue;
}
wait_on_buffer(bh);
- if (!buffer_uptodate(bh))
+ if (!buffer_uptodate(bh)) {
errors++;
+ if (i == 0)
+ primary_failed = true;
+ }
/* drop our reference */
brelse(bh);
brelse(bh);
}
+ /* log error, force error return */
+ if (primary_failed) {
+ btrfs_err(device->fs_info, "error writing primary super block to device %llu",
+ device->devid);
+ return -1;
+ }
+
return errors < i ? 0 : -1;
}
bio->bi_private = &device->flush_wait;
btrfsic_submit_bio(bio);
- device->flush_bio_sent = 1;
+ set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
}
/*
{
struct bio *bio = device->flush_bio;
- if (!device->flush_bio_sent)
+ if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
return BLK_STS_OK;
- device->flush_bio_sent = 0;
+ clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
wait_for_completion_io(&device->flush_wait);
return bio->bi_status;
static int check_barrier_error(struct btrfs_fs_info *fs_info)
{
- if (!btrfs_check_rw_degradable(fs_info))
+ if (!btrfs_check_rw_degradable(fs_info, NULL))
return -EIO;
return 0;
}
int errors_wait = 0;
blk_status_t ret;
+ lockdep_assert_held(&info->fs_devices->device_list_mutex);
/* send down all the barriers */
head = &info->fs_devices->devices;
- list_for_each_entry_rcu(dev, head, dev_list) {
- if (dev->missing)
+ list_for_each_entry(dev, head, dev_list) {
+ if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue;
if (!dev->bdev)
continue;
- if (!dev->in_fs_metadata || !dev->writeable)
+ if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
+ !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue;
write_dev_flush(dev);
}
/* wait for all the barriers */
- list_for_each_entry_rcu(dev, head, dev_list) {
- if (dev->missing)
+ list_for_each_entry(dev, head, dev_list) {
+ if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue;
if (!dev->bdev) {
errors_wait++;
continue;
}
- if (!dev->in_fs_metadata || !dev->writeable)
+ if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
+ !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue;
ret = wait_dev_flush(dev);
}
}
- list_for_each_entry_rcu(dev, head, dev_list) {
+ list_for_each_entry(dev, head, dev_list) {
if (!dev->bdev) {
total_errors++;
continue;
}
- if (!dev->in_fs_metadata || !dev->writeable)
+ if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
+ !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue;
btrfs_set_stack_device_generation(dev_item, 0);
}
total_errors = 0;
- list_for_each_entry_rcu(dev, head, dev_list) {
+ list_for_each_entry(dev, head, dev_list) {
if (!dev->bdev)
continue;
- if (!dev->in_fs_metadata || !dev->writeable)
+ if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
+ !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue;
ret = wait_dev_supers(dev, max_mirrors);
* So here we should only check item pointers, not item data.
*/
if (btrfs_header_level(buf) == 0 &&
- btrfs_check_leaf_relaxed(root, buf)) {
+ btrfs_check_leaf_relaxed(fs_info, buf)) {
btrfs_print_leaf(buf);
ASSERT(0);
}
btrfs_err(fs_info, "no valid FS found");
ret = -EINVAL;
}
- if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
- btrfs_warn(fs_info, "unrecognized super flag: %llu",
+ if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
+ btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
+ ret = -EINVAL;
+ }
if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
btrfs_err(fs_info, "tree_root level too big: %d >= %d",
btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
cache = list_first_entry(&cur_trans->dirty_bgs,
struct btrfs_block_group_cache,
dirty_list);
- if (!cache) {
- btrfs_err(fs_info, "orphan block group dirty_bgs list");
- spin_unlock(&cur_trans->dirty_bgs_lock);
- return;
- }
if (!list_empty(&cache->io_list)) {
spin_unlock(&cur_trans->dirty_bgs_lock);
}
spin_unlock(&cur_trans->dirty_bgs_lock);
+ /*
+ * Refer to the definition of io_bgs member for details why it's safe
+ * to use it without any locking
+ */
while (!list_empty(&cur_trans->io_bgs)) {
cache = list_first_entry(&cur_trans->io_bgs,
struct btrfs_block_group_cache,
io_list);
- if (!cache) {
- btrfs_err(fs_info, "orphan block group on io_bgs list");
- return;
- }
list_del_init(&cache->io_list);
spin_lock(&cache->lock);