return 0;
found_start = btrfs_header_bytenr(eb);
+
+ if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
+ WARN_ON(found_start != 0);
+ return 0;
+ }
+
/*
* Please do not consolidate these warnings into a single if.
* It is useful to know what went wrong.
return ret;
}
+static int validate_subpage_buffer(struct page *page, u64 start, u64 end,
+ int mirror)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+ struct extent_buffer *eb;
+ bool reads_done;
+ int ret = 0;
+
+ /*
+ * We don't allow bio merge for subpage metadata read, so we should
+ * only get one eb for each endio hook.
+ */
+ ASSERT(end == start + fs_info->nodesize - 1);
+ ASSERT(PagePrivate(page));
+
+ eb = find_extent_buffer(fs_info, start);
+ /*
+ * When we are reading one tree block, eb must have been inserted into
+ * the radix tree. If not, something is wrong.
+ */
+ ASSERT(eb);
+
+ reads_done = atomic_dec_and_test(&eb->io_pages);
+ /* Subpage read must finish in page read */
+ ASSERT(reads_done);
+
+ eb->read_mirror = mirror;
+ if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
+ ret = -EIO;
+ goto err;
+ }
+ ret = validate_extent_buffer(eb);
+ if (ret < 0)
+ goto err;
+
+ if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
+ btree_readahead_hook(eb, ret);
+
+ set_extent_buffer_uptodate(eb);
+
+ free_extent_buffer(eb);
+ return ret;
+err:
+ /*
+ * end_bio_extent_readpage decrements io_pages in case of error,
+ * make sure it has something to decrement.
+ */
+ atomic_inc(&eb->io_pages);
+ clear_extent_buffer_uptodate(eb);
+ free_extent_buffer(eb);
+ return ret;
+}
+
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio,
struct page *page, u64 start, u64 end,
int mirror)
int reads_done;
ASSERT(page->private);
+
+ if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
+ return validate_subpage_buffer(page, start, end, mirror);
+
eb = (struct extent_buffer *)page->private;
/*
fs_info = end_io_wq->info;
end_io_wq->status = bio->bi_status;
- if (bio_op(bio) == REQ_OP_WRITE) {
+ if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
wq = fs_info->endio_meta_write_workers;
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
static int check_async_write(struct btrfs_fs_info *fs_info,
struct btrfs_inode *bi)
{
+ if (btrfs_is_zoned(fs_info))
+ return 0;
if (atomic_read(&bi->sync_writers))
return 0;
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
int async = check_async_write(fs_info, BTRFS_I(inode));
blk_status_t ret;
- if (bio_op(bio) != REQ_OP_WRITE) {
+ if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
/*
* called for a read, do the setup so that checksum validation
* can happen in the async kernel threads
root->orphan_cleanup_state = 0;
root->last_trans = 0;
- root->highest_objectid = 0;
+ root->free_objectid = 0;
root->nr_delalloc_inodes = 0;
root->nr_ordered_extents = 0;
root->inode_tree = RB_ROOT;
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
- struct extent_buffer *leaf;
root = btrfs_alloc_root(fs_info, BTRFS_TREE_LOG_OBJECTID, GFP_NOFS);
if (!root)
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
+ return root;
+}
+
+int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct extent_buffer *leaf;
+
/*
* DON'T set SHAREABLE bit for log trees.
*
leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID,
NULL, 0, 0, 0, BTRFS_NESTING_NORMAL);
- if (IS_ERR(leaf)) {
- btrfs_put_root(root);
- return ERR_CAST(leaf);
- }
+ if (IS_ERR(leaf))
+ return PTR_ERR(leaf);
root->node = leaf;
btrfs_mark_buffer_dirty(root->node);
btrfs_tree_unlock(root->node);
- return root;
+
+ return 0;
}
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
log_root = alloc_log_tree(trans, fs_info);
if (IS_ERR(log_root))
return PTR_ERR(log_root);
+
+ if (!btrfs_is_zoned(fs_info)) {
+ int ret = btrfs_alloc_log_tree_node(trans, log_root);
+
+ if (ret) {
+ btrfs_put_root(log_root);
+ return ret;
+ }
+ }
+
WARN_ON(fs_info->log_root_tree);
fs_info->log_root_tree = log_root;
return 0;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *log_root;
struct btrfs_inode_item *inode_item;
+ int ret;
log_root = alloc_log_tree(trans, fs_info);
if (IS_ERR(log_root))
return PTR_ERR(log_root);
+ ret = btrfs_alloc_log_tree_node(trans, log_root);
+ if (ret) {
+ btrfs_put_root(log_root);
+ return ret;
+ }
+
log_root->last_trans = trans->transid;
log_root->root_key.offset = root->root_key.objectid;
}
mutex_lock(&root->objectid_mutex);
- ret = btrfs_find_highest_objectid(root,
- &root->highest_objectid);
+ ret = btrfs_init_root_free_objectid(root);
if (ret) {
mutex_unlock(&root->objectid_mutex);
goto fail;
}
- ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+ ASSERT(root->free_objectid <= BTRFS_LAST_FREE_OBJECTID);
mutex_unlock(&root->objectid_mutex);
{
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
percpu_counter_destroy(&fs_info->delalloc_bytes);
- percpu_counter_destroy(&fs_info->dio_bytes);
+ percpu_counter_destroy(&fs_info->ordered_bytes);
percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
btrfs_free_csum_hash(fs_info);
btrfs_free_stripe_hash_table(fs_info);
btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
ret = -EINVAL;
}
- /* Only PAGE SIZE is supported yet */
- if (sectorsize != PAGE_SIZE) {
+
+ /*
+ * For 4K page size, we only support 4K sector size.
+ * For 64K page size, we support read-write for 64K sector size, and
+ * read-only for 4K sector size.
+ */
+ if ((PAGE_SIZE == SZ_4K && sectorsize != PAGE_SIZE) ||
+ (PAGE_SIZE == SZ_64K && (sectorsize != SZ_4K &&
+ sectorsize != SZ_64K))) {
btrfs_err(fs_info,
- "sectorsize %llu not supported yet, only support %lu",
+ "sectorsize %llu not yet supported for page size %lu",
sectorsize, PAGE_SIZE);
ret = -EINVAL;
}
+
if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
btrfs_err(fs_info, "invalid nodesize %llu", nodesize);
* No need to hold btrfs_root::objectid_mutex since the fs
* hasn't been fully initialised and we are the only user
*/
- ret = btrfs_find_highest_objectid(tree_root,
- &tree_root->highest_objectid);
+ ret = btrfs_init_root_free_objectid(tree_root);
if (ret < 0) {
handle_error = true;
continue;
}
- ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+ ASSERT(tree_root->free_objectid <= BTRFS_LAST_FREE_OBJECTID);
ret = btrfs_read_roots(fs_info);
if (ret < 0) {
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
+ spin_lock_init(&fs_info->treelog_bg_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->unused_bg_unpin_mutex);
mutex_init(&fs_info->delete_unused_bgs_mutex);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
+ mutex_init(&fs_info->zoned_meta_io_lock);
seqlock_init(&fs_info->profiles_lock);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE;
sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE);
- ret = percpu_counter_init(&fs_info->dio_bytes, 0, GFP_KERNEL);
+ ret = percpu_counter_init(&fs_info->ordered_bytes, 0, GFP_KERNEL);
if (ret)
return ret;
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
btrfs_info(fs_info, "has skinny extents");
- fs_info->zoned = (features & BTRFS_FEATURE_INCOMPAT_ZONED);
-
/*
* flag our filesystem as having big metadata blocks if
* they are bigger than the page size
goto fail_alloc;
}
+ /* For 4K sector size support, it's only read-only */
+ if (PAGE_SIZE == SZ_64K && sectorsize == SZ_4K) {
+ if (!sb_rdonly(sb) || btrfs_super_log_root(disk_super)) {
+ btrfs_err(fs_info,
+ "subpage sectorsize %u only supported read-only for page size %lu",
+ sectorsize, PAGE_SIZE);
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+ }
+
ret = btrfs_init_workqueues(fs_info, fs_devices);
if (ret) {
err = ret;
if (ret)
goto fail_tree_roots;
+ /*
+ * Get zone type information of zoned block devices. This will also
+ * handle emulation of a zoned filesystem if a regular device has the
+ * zoned incompat feature flag set.
+ */
+ ret = btrfs_get_dev_zone_info_all_devices(fs_info);
+ if (ret) {
+ btrfs_err(fs_info,
+ "zoned: failed to read device zone info: %d",
+ ret);
+ goto fail_block_groups;
+ }
+
/*
* If we have a uuid root and we're not being told to rescan we need to
* check the generation here so we can set the
cancel_work_sync(&fs_info->async_reclaim_work);
cancel_work_sync(&fs_info->async_data_reclaim_work);
+ cancel_work_sync(&fs_info->preempt_reclaim_work);
/* Cancel or finish ongoing discard work */
btrfs_discard_cleanup(fs_info);
percpu_counter_sum(&fs_info->delalloc_bytes));
}
- if (percpu_counter_sum(&fs_info->dio_bytes))
+ if (percpu_counter_sum(&fs_info->ordered_bytes))
btrfs_info(fs_info, "at unmount dio bytes count %lld",
- percpu_counter_sum(&fs_info->dio_bytes));
+ percpu_counter_sum(&fs_info->ordered_bytes));
btrfs_sysfs_remove_mounted(fs_info);
btrfs_sysfs_remove_fsid(fs_info->fs_devices);
EXTENT_DIRTY);
btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents);
+ btrfs_free_redirty_list(cur_trans);
+
cur_trans->state =TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
}
return 0;
}
-int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
+int btrfs_init_root_free_objectid(struct btrfs_root *root)
{
struct btrfs_path *path;
int ret;
slot = path->slots[0] - 1;
l = path->nodes[0];
btrfs_item_key_to_cpu(l, &found_key, slot);
- *objectid = max_t(u64, found_key.objectid,
- BTRFS_FIRST_FREE_OBJECTID - 1);
+ root->free_objectid = max_t(u64, found_key.objectid + 1,
+ BTRFS_FIRST_FREE_OBJECTID);
} else {
- *objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
+ root->free_objectid = BTRFS_FIRST_FREE_OBJECTID;
}
ret = 0;
error:
return ret;
}
-int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
+int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid)
{
int ret;
mutex_lock(&root->objectid_mutex);
- if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
+ if (unlikely(root->free_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
btrfs_warn(root->fs_info,
"the objectid of root %llu reaches its highest value",
root->root_key.objectid);
goto out;
}
- *objectid = ++root->highest_objectid;
+ *objectid = root->free_objectid++;
ret = 0;
out:
mutex_unlock(&root->objectid_mutex);