#include "free-space-cache.h"
#include "inode-map.h"
#include "backref.h"
-#include "hash.h"
#include "props.h"
#include "qgroup.h"
#include "dedupe.h"
};
static int btrfs_setsize(struct inode *inode, struct iattr *attr);
-static int btrfs_truncate(struct inode *inode);
+static int btrfs_truncate(struct inode *inode, bool skip_writeback);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
* does the checks required to make sure the data is small enough
* to fit as an inline extent.
*/
-static noinline int cow_file_range_inline(struct btrfs_root *root,
- struct inode *inode, u64 start,
+static noinline int cow_file_range_inline(struct inode *inode, u64 start,
u64 end, size_t compressed_size,
int compress_type,
struct page **compressed_pages)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
u64 isize = i_size_read(inode);
int *num_added)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
u64 blocksize = fs_info->sectorsize;
u64 actual_end;
u64 isize = i_size_read(inode);
/* we didn't compress the entire range, try
* to make an uncompressed inline extent.
*/
- ret = cow_file_range_inline(root, inode, start, end,
- 0, BTRFS_COMPRESS_NONE, NULL);
+ ret = cow_file_range_inline(inode, start, end, 0,
+ BTRFS_COMPRESS_NONE, NULL);
} else {
/* try making a compressed inline extent */
- ret = cow_file_range_inline(root, inode, start, end,
+ ret = cow_file_range_inline(inode, start, end,
total_compressed,
compress_type, pages);
}
u64 alloc_hint = 0;
u64 num_bytes;
unsigned long ram_size;
- u64 disk_num_bytes;
u64 cur_alloc_size = 0;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
- disk_num_bytes = num_bytes;
+ ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
if (start == 0) {
/* lets try to make an inline extent */
- ret = cow_file_range_inline(root, inode, start, end, 0,
- BTRFS_COMPRESS_NONE, NULL);
+ ret = cow_file_range_inline(inode, start, end, 0,
+ BTRFS_COMPRESS_NONE, NULL);
if (ret == 0) {
/*
* We use DO_ACCOUNTING here because we need the
}
}
- BUG_ON(disk_num_bytes >
- btrfs_super_total_bytes(fs_info->super_copy));
-
alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + num_bytes - 1, 0);
- while (disk_num_bytes > 0) {
- cur_alloc_size = disk_num_bytes;
+ while (num_bytes > 0) {
+ cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
fs_info->sectorsize, 0, alloc_hint,
&ins, 1, 1);
delalloc_end, locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC,
page_ops);
- if (disk_num_bytes < cur_alloc_size)
- disk_num_bytes = 0;
+ if (num_bytes < cur_alloc_size)
+ num_bytes = 0;
else
- disk_num_bytes -= cur_alloc_size;
- num_bytes -= cur_alloc_size;
+ num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
extent_reserved = false;
list_del(&sums->list);
kfree(sums);
}
+ if (ret < 0)
+ return ret;
return 1;
}
goto out_check;
if (btrfs_extent_readonly(fs_info, disk_bytenr))
goto out_check;
- if (btrfs_cross_ref_exist(root, ino,
- found_key.offset -
- extent_offset, disk_bytenr))
+ ret = btrfs_cross_ref_exist(root, ino,
+ found_key.offset -
+ extent_offset, disk_bytenr);
+ if (ret) {
+ /*
+ * ret could be -EIO if the above fails to read
+ * metadata.
+ */
+ if (ret < 0) {
+ if (cow_start != (u64)-1)
+ cur_offset = cow_start;
+ goto error;
+ }
+
+ WARN_ON_ONCE(nolock);
goto out_check;
+ }
disk_bytenr += extent_offset;
disk_bytenr += cur_offset - found_key.offset;
num_bytes = min(end + 1, extent_end) - cur_offset;
* this ensure that csum for a given extent are
* either valid or do not exist.
*/
- if (csum_exist_in_range(fs_info, disk_bytenr,
- num_bytes)) {
+ ret = csum_exist_in_range(fs_info, disk_bytenr,
+ num_bytes);
+ if (ret) {
if (!nolock)
btrfs_end_write_no_snapshotting(root);
+
+ /*
+ * ret could be -EIO if the above fails to read
+ * metadata.
+ */
+ if (ret < 0) {
+ if (cow_start != (u64)-1)
+ cur_offset = cow_start;
+ goto error;
+ }
+ WARN_ON_ONCE(nolock);
goto out_check;
}
if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
* are inserted into the btree
*/
static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
struct inode *inode = private_data;
* are inserted into the btree
*/
static blk_status_t __btrfs_submit_bio_done(void *private_data, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+ int mirror_num)
{
struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
start, (size_t)(end - start + 1));
}
+/*
+ * btrfs_add_delayed_iput - perform a delayed iput on @inode
+ *
+ * @inode: The inode we want to perform iput on
+ *
+ * This function uses the generic vfs_inode::i_count to track whether we should
+ * just decrement it (in case it's > 1) or if this is the last iput then link
+ * the inode to the delayed iput machinery. Delayed iputs are processed at
+ * transaction commit time/superblock commit/cleaner kthread.
+ */
void btrfs_add_delayed_iput(struct inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
return;
spin_lock(&fs_info->delayed_iput_lock);
- if (binode->delayed_iput_count == 0) {
- ASSERT(list_empty(&binode->delayed_iput));
- list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
- } else {
- binode->delayed_iput_count++;
- }
+ ASSERT(list_empty(&binode->delayed_iput));
+ list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
spin_unlock(&fs_info->delayed_iput_lock);
}
inode = list_first_entry(&fs_info->delayed_iputs,
struct btrfs_inode, delayed_iput);
- if (inode->delayed_iput_count) {
- inode->delayed_iput_count--;
- list_move_tail(&inode->delayed_iput,
- &fs_info->delayed_iputs);
- } else {
- list_del_init(&inode->delayed_iput);
- }
+ list_del_init(&inode->delayed_iput);
spin_unlock(&fs_info->delayed_iput_lock);
iput(&inode->vfs_inode);
spin_lock(&fs_info->delayed_iput_lock);
return -ENOMEM;
}
- spin_lock(&root->orphan_lock);
- if (!root->orphan_block_rsv) {
- root->orphan_block_rsv = block_rsv;
- } else if (block_rsv) {
- btrfs_free_block_rsv(fs_info, block_rsv);
- block_rsv = NULL;
- }
-
if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&inode->runtime_flags)) {
#if 0
insert = 1;
#endif
insert = 1;
- atomic_inc(&root->orphan_inodes);
}
if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
&inode->runtime_flags))
reserve = 1;
+
+ spin_lock(&root->orphan_lock);
+ /* If someone has created ->orphan_block_rsv, be happy to use it. */
+ if (!root->orphan_block_rsv) {
+ root->orphan_block_rsv = block_rsv;
+ } else if (block_rsv) {
+ btrfs_free_block_rsv(fs_info, block_rsv);
+ block_rsv = NULL;
+ }
+
+ if (insert)
+ atomic_inc(&root->orphan_inodes);
spin_unlock(&root->orphan_lock);
/* grab metadata reservation from transaction handle */
goto out;
}
- ret = btrfs_truncate(inode);
+ ret = btrfs_truncate(inode, false);
if (ret)
btrfs_orphan_del(NULL, BTRFS_I(inode));
} else {
inode_dio_wait(inode);
btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
- ret = btrfs_truncate(inode);
+ ret = btrfs_truncate(inode, newsize == oldsize);
if (ret && inode->i_nlink) {
int err;
/*
* this returns the key found in the dir entry in the location pointer.
- * If no dir entries were found, location->objectid is 0.
+ * If no dir entries were found, returns -ENOENT.
+ * If found a corrupted location in dir entry, returns -EUCLEAN.
*/
static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
struct btrfs_key *location)
di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
name, namelen, 0);
- if (IS_ERR(di))
+ if (!di) {
+ ret = -ENOENT;
+ goto out;
+ }
+ if (IS_ERR(di)) {
ret = PTR_ERR(di);
-
- if (IS_ERR_OR_NULL(di))
- goto out_err;
+ goto out;
+ }
btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
if (location->type != BTRFS_INODE_ITEM_KEY &&
location->type != BTRFS_ROOT_ITEM_KEY) {
+ ret = -EUCLEAN;
btrfs_warn(root->fs_info,
"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
__func__, name, btrfs_ino(BTRFS_I(dir)),
location->objectid, location->type, location->offset);
- goto out_err;
}
out:
btrfs_free_path(path);
return ret;
-out_err:
- location->objectid = 0;
- goto out;
}
/*
if (ret < 0)
return ERR_PTR(ret);
- if (location.objectid == 0)
- return ERR_PTR(-ENOENT);
-
if (location.type == BTRFS_INODE_ITEM_KEY) {
inode = btrfs_iget(dir->i_sb, &location, root, NULL);
return inode;
return ret;
}
-bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
-{
- struct radix_tree_root *root = &inode->i_mapping->page_tree;
- bool found = false;
- void **pagep = NULL;
- struct page *page = NULL;
- unsigned long start_idx;
- unsigned long end_idx;
-
- start_idx = start >> PAGE_SHIFT;
-
- /*
- * end is the last byte in the last page. end == start is legal
- */
- end_idx = end >> PAGE_SHIFT;
-
- rcu_read_lock();
-
- /* Most of the code in this while loop is lifted from
- * find_get_page. It's been modified to begin searching from a
- * page and return just the first page found in that range. If the
- * found idx is less than or equal to the end idx then we know that
- * a page exists. If no pages are found or if those pages are
- * outside of the range then we're fine (yay!) */
- while (page == NULL &&
- radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) {
- page = radix_tree_deref_slot(pagep);
- if (unlikely(!page))
- break;
-
- if (radix_tree_exception(page)) {
- if (radix_tree_deref_retry(page)) {
- page = NULL;
- continue;
- }
- /*
- * Otherwise, shmem/tmpfs must be storing a swap entry
- * here as an exceptional entry: so return it without
- * attempting to raise page count.
- */
- page = NULL;
- break; /* TODO: Is this relevant for this use case? */
- }
-
- if (!page_cache_get_speculative(page)) {
- page = NULL;
- continue;
- }
-
- /*
- * Has the page moved?
- * This is part of the lockless pagecache protocol. See
- * include/linux/pagemap.h for details.
- */
- if (unlikely(page != *pagep)) {
- put_page(page);
- page = NULL;
- }
- }
-
- if (page) {
- if (page->index <= end_idx)
- found = true;
- put_page(page);
- }
-
- rcu_read_unlock();
- return found;
-}
-
static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
struct extent_state **cached_state, int writing)
{
* get stale data.
*/
if (!ordered &&
- (!writing ||
- !btrfs_page_exists_in_range(inode, lockstart, lockend)))
+ (!writing || !filemap_range_has_page(inode->i_mapping,
+ lockstart, lockend)))
break;
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
}
static blk_status_t __btrfs_submit_bio_start_direct_io(void *private_data,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags, u64 offset)
+ struct bio *bio, u64 offset)
{
struct inode *inode = private_data;
blk_status_t ret;
err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
if (err) {
- dip->errors = 1;
-
/*
- * before atomic variable goto zero, we must make sure
- * dip->errors is perceived to be set.
+ * We want to perceive the errors flag being set before
+ * decrementing the reference count. We don't need a barrier
+ * since atomic operations with a return value are fully
+ * ordered as per atomic_t.txt
*/
- smp_mb__before_atomic();
+ dip->errors = 1;
}
/* if there are more bios still pending for this dio, just exit */
out_err:
dip->errors = 1;
/*
- * before atomic variable goto zero, we must
- * make sure dip->errors is perceived to be set.
+ * Before atomic variable goto zero, we must make sure dip->errors is
+ * perceived to be set. This ordering is ensured by the fact that an
+ * atomic operations with a return value are fully ordered as per
+ * atomic_t.txt
*/
- smp_mb__before_atomic();
if (atomic_dec_and_test(&dip->pending_bios))
bio_io_error(dip->orig_bio);
return ret;
}
-static int btrfs_truncate(struct inode *inode)
+static int btrfs_truncate(struct inode *inode, bool skip_writeback)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 mask = fs_info->sectorsize - 1;
u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
- ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
- (u64)-1);
- if (ret)
- return ret;
+ if (!skip_writeback) {
+ ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
+ (u64)-1);
+ if (ret)
+ return ret;
+ }
/*
* Yes ladies and gentlemen, this is indeed ugly. The fact is we have
ei->dir_index = 0;
ei->last_unlink_trans = 0;
ei->last_log_commit = 0;
- ei->delayed_iput_count = 0;
spin_lock_init(&ei->lock);
ei->outstanding_extents = 0;
inode_init_once(&ei->vfs_inode);
}
-void btrfs_destroy_cachep(void)
+void __cold btrfs_destroy_cachep(void)
{
/*
* Make sure all delayed rcu free inodes are flushed before we