From: Linus Torvalds Date: Mon, 30 Sep 2019 17:25:24 +0000 (-0700) Subject: Merge tag 'for-5.4-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave... X-Git-Tag: v5.4-rc1~1 X-Git-Url: http://git.samba.org/samba.git/?p=sfrench%2Fcifs-2.6.git;a=commitdiff_plain;h=bb48a59135926ece9b1361e8b96b33fc658830bc;hp=80b29b6b8cd7479a67f5e338195dbc121b30c879 Merge tag 'for-5.4-rc1-tag' of git://git./linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: "A bunch of fixes that accumulated in recent weeks, mostly material for stable. Summary: - fix for regression from 5.3 that prevents to use balance convert with single profile - qgroup fixes: rescan race, accounting leak with multiple writers, potential leak after io failure recovery - fix for use after free in relocation (reported by KASAN) - other error handling fixups" * tag 'for-5.4-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: qgroup: Fix reserved data space leak if we have multiple reserve calls btrfs: qgroup: Fix the wrong target io_tree when freeing reserved data space btrfs: Fix a regression which we can't convert to SINGLE profile btrfs: relocation: fix use-after-free on dead relocation roots Btrfs: fix race setting up and completing qgroup rescan workers Btrfs: fix missing error return if writeback for extent buffer never started btrfs: adjust dirty_metadata_bytes after writeback failure of extent buffer Btrfs: fix selftests failure due to uninitialized i_mode in test inodes --- diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7b32b6af322d..cceaf05aada2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3745,11 +3745,20 @@ err_unlock: static void set_btree_ioerr(struct page *page) { struct extent_buffer *eb = (struct extent_buffer *)page->private; + struct btrfs_fs_info *fs_info; SetPageError(page); if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) return; + /* + * If we error out, we should add back the dirty_metadata_bytes + * to make it consistent. + */ + fs_info = eb->fs_info; + percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, + eb->len, fs_info->dirty_metadata_batch); + /* * If writeback for a btree extent that doesn't belong to a log tree * failed, increment the counter transaction->eb_write_errors. @@ -3986,6 +3995,10 @@ retry: if (!ret) { free_extent_buffer(eb); continue; + } else if (ret < 0) { + done = 1; + free_extent_buffer(eb); + break; } ret = write_one_eb(eb, wbc, &epd); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 8d3bd799ac7d..c4bb69941c77 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3166,9 +3166,6 @@ out: btrfs_free_path(path); mutex_lock(&fs_info->qgroup_rescan_lock); - if (!btrfs_fs_closing(fs_info)) - fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; - if (err > 0 && fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; @@ -3184,16 +3181,30 @@ out: trans = btrfs_start_transaction(fs_info->quota_root, 1); if (IS_ERR(trans)) { err = PTR_ERR(trans); + trans = NULL; btrfs_err(fs_info, "fail to start transaction for status update: %d", err); - goto done; } - ret = update_qgroup_status_item(trans); - if (ret < 0) { - err = ret; - btrfs_err(fs_info, "fail to update qgroup status: %d", err); + + mutex_lock(&fs_info->qgroup_rescan_lock); + if (!btrfs_fs_closing(fs_info)) + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; + if (trans) { + ret = update_qgroup_status_item(trans); + if (ret < 0) { + err = ret; + btrfs_err(fs_info, "fail to update qgroup status: %d", + err); + } } + fs_info->qgroup_rescan_running = false; + complete_all(&fs_info->qgroup_rescan_completion); + mutex_unlock(&fs_info->qgroup_rescan_lock); + + if (!trans) + return; + btrfs_end_transaction(trans); if (btrfs_fs_closing(fs_info)) { @@ -3204,12 +3215,6 @@ out: } else { btrfs_err(fs_info, "qgroup scan failed with %d", err); } - -done: - mutex_lock(&fs_info->qgroup_rescan_lock); - fs_info->qgroup_rescan_running = false; - mutex_unlock(&fs_info->qgroup_rescan_lock); - complete_all(&fs_info->qgroup_rescan_completion); } /* @@ -3437,6 +3442,9 @@ cleanup: while ((unode = ulist_next(&reserved->range_changed, &uiter))) clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL); + /* Also free data bytes of already reserved one */ + btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, + orig_reserved, BTRFS_QGROUP_RSV_DATA); extent_changeset_release(reserved); return ret; } @@ -3481,7 +3489,7 @@ static int qgroup_free_reserved_data(struct inode *inode, * EXTENT_QGROUP_RESERVED, we won't double free. * So not need to rush. */ - ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree, + ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, free_start, free_start + free_len - 1, EXTENT_QGROUP_RESERVED, &changeset); if (ret < 0) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2f0e25afa486..00504657b602 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1435,6 +1435,13 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, int clear_rsv = 0; int ret; + /* + * The subvolume has reloc tree but the swap is finished, no need to + * create/update the dead reloc tree + */ + if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)) + return 0; + if (root->reloc_root) { reloc_root = root->reloc_root; reloc_root->last_trans = trans->transid; @@ -2187,7 +2194,6 @@ static int clean_dirty_subvols(struct reloc_control *rc) /* Merged subvolume, cleanup its reloc root */ struct btrfs_root *reloc_root = root->reloc_root; - clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); list_del_init(&root->reloc_dirty_list); root->reloc_root = NULL; if (reloc_root) { @@ -2196,6 +2202,7 @@ static int clean_dirty_subvols(struct reloc_control *rc) if (ret2 < 0 && !ret) ret = ret2; } + clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); btrfs_put_fs_root(root); } else { /* Orphan reloc tree, just clean it up */ diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index b5e80563efaa..99fe9bf3fdac 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -52,7 +52,13 @@ static struct file_system_type test_type = { struct inode *btrfs_new_test_inode(void) { - return new_inode(test_mnt->mnt_sb); + struct inode *inode; + + inode = new_inode(test_mnt->mnt_sb); + if (inode) + inode_init_owner(inode, NULL, S_IFREG); + + return inode; } static int btrfs_init_test_fs(void) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a324480bc88b..cdd7af424033 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4063,7 +4063,13 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, } num_devices = btrfs_num_devices(fs_info); - allowed = 0; + + /* + * SINGLE profile on-disk has no profile bit, but in-memory we have a + * special bit for it, to make it easier to distinguish. Thus we need + * to set it manually, or balance would refuse the profile. + */ + allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) if (num_devices >= btrfs_raid_array[i].devs_min) allowed |= btrfs_raid_array[i].bg_flag;