btrfs: fix fsfreeze hang caused by delayed iputs deal
authorWang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Mon, 1 Aug 2016 05:28:08 +0000 (13:28 +0800)
committerChris Mason <clm@fb.com>
Thu, 25 Aug 2016 10:58:26 +0000 (03:58 -0700)
When running fstests generic/068, sometimes we got below deadlock:
  xfs_io          D ffff8800331dbb20     0  6697   6693 0x00000080
  ffff8800331dbb20 ffff88007acfc140 ffff880034d895c0 ffff8800331dc000
  ffff880032d243e8 fffffffeffffffff ffff880032d24400 0000000000000001
  ffff8800331dbb38 ffffffff816a9045 ffff880034d895c0 ffff8800331dbba8
  Call Trace:
  [<ffffffff816a9045>] schedule+0x35/0x80
  [<ffffffff816abab2>] rwsem_down_read_failed+0xf2/0x140
  [<ffffffff8118f5e1>] ? __filemap_fdatawrite_range+0xd1/0x100
  [<ffffffff8134f978>] call_rwsem_down_read_failed+0x18/0x30
  [<ffffffffa06631fc>] ? btrfs_alloc_block_rsv+0x2c/0xb0 [btrfs]
  [<ffffffff810d32b5>] percpu_down_read+0x35/0x50
  [<ffffffff81217dfc>] __sb_start_write+0x2c/0x40
  [<ffffffffa067f5d5>] start_transaction+0x2a5/0x4d0 [btrfs]
  [<ffffffffa067f857>] btrfs_join_transaction+0x17/0x20 [btrfs]
  [<ffffffffa068ba34>] btrfs_evict_inode+0x3c4/0x5d0 [btrfs]
  [<ffffffff81230a1a>] evict+0xba/0x1a0
  [<ffffffff812316b6>] iput+0x196/0x200
  [<ffffffffa06851d0>] btrfs_run_delayed_iputs+0x70/0xc0 [btrfs]
  [<ffffffffa067f1d8>] btrfs_commit_transaction+0x928/0xa80 [btrfs]
  [<ffffffffa0646df0>] btrfs_freeze+0x30/0x40 [btrfs]
  [<ffffffff81218040>] freeze_super+0xf0/0x190
  [<ffffffff81229275>] do_vfs_ioctl+0x4a5/0x5c0
  [<ffffffff81003176>] ? do_audit_syscall_entry+0x66/0x70
  [<ffffffff810038cf>] ? syscall_trace_enter_phase1+0x11f/0x140
  [<ffffffff81229409>] SyS_ioctl+0x79/0x90
  [<ffffffff81003c12>] do_syscall_64+0x62/0x110
  [<ffffffff816acbe1>] entry_SYSCALL64_slow_path+0x25/0x25

>From this warning, freeze_super() already holds SB_FREEZE_FS, but
btrfs_freeze() will call btrfs_commit_transaction() again, if
btrfs_commit_transaction() finds that it has delayed iputs to handle,
it'll start_transaction(), which will try to get SB_FREEZE_FS lock
again, then deadlock occurs.

The root cause is that in btrfs, sync_filesystem(sb) does not make
sure all metadata is updated. There still maybe some codes adding
delayed iputs, see below sample race window:

         CPU1                                  |         CPU2
|-> freeze_super()                             |
    |-> sync_filesystem(sb);                   |
    |                                          |-> cleaner_kthread()
    |                                          |   |-> btrfs_delete_unused_bgs()
    |                                          |       |-> btrfs_remove_chunk()
    |                                          |           |-> btrfs_remove_block_group()
    |                                          |               |-> btrfs_add_delayed_iput()
    |                                          |
    |-> sb->s_writers.frozen = SB_FREEZE_FS;   |
    |-> sb_wait_write(sb, SB_FREEZE_FS);       |
    |   acquire SB_FREEZE_FS lock.             |
    |                                          |
    |-> btrfs_freeze()                         |
        |-> btrfs_commit_transaction()         |
            |-> btrfs_run_delayed_iputs()      |
            |   will handle delayed iputs,     |
            |   that means start_transaction() |
            |   will be called, which will try |
            |   to get SB_FREEZE_FS lock.      |

To fix this issue, introduce a "int fs_frozen" to record internally whether
fs has been frozen. If fs has been frozen, we can not handle delayed iputs.

Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ add comment to btrfs_freeze ]
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/super.c
fs/btrfs/transaction.c

index 09cdff0d58e879cec6706a3856ec8b314cba6a92..ec4154faab6121a4fd6f3cd3d0c6182f9b7c74dd 100644 (file)
@@ -1080,6 +1080,8 @@ struct btrfs_fs_info {
        struct list_head pinned_chunks;
 
        int creating_free_space_tree;
+       /* Used to record internally whether fs has been frozen */
+       int fs_frozen;
 };
 
 struct btrfs_subvolume_writers {
index 7857f64e1cae23e3b87f1f2f7b8e792ff371ced5..17062223fac31e1a42872cdebc6fb67bd88c0d1c 100644 (file)
@@ -2631,6 +2631,7 @@ int open_ctree(struct super_block *sb,
        atomic_set(&fs_info->qgroup_op_seq, 0);
        atomic_set(&fs_info->reada_works_cnt, 0);
        atomic64_set(&fs_info->tree_mod_seq, 0);
+       fs_info->fs_frozen = 0;
        fs_info->sb = sb;
        fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
        fs_info->metadata_ratio = 0;
index 864ce334f696c31badf2919b07d503416545ded6..4071fe2bd0981a547f983ebeaa944f3179d97659 100644 (file)
@@ -2241,6 +2241,13 @@ static int btrfs_freeze(struct super_block *sb)
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = btrfs_sb(sb)->tree_root;
 
+       root->fs_info->fs_frozen = 1;
+       /*
+        * We don't need a barrier here, we'll wait for any transaction that
+        * could be in progress on other threads (and do delayed iputs that
+        * we want to avoid on a frozen filesystem), or do the commit
+        * ourselves.
+        */
        trans = btrfs_attach_transaction_barrier(root);
        if (IS_ERR(trans)) {
                /* no transaction, don't bother */
@@ -2251,6 +2258,14 @@ static int btrfs_freeze(struct super_block *sb)
        return btrfs_commit_transaction(trans, root);
 }
 
+static int btrfs_unfreeze(struct super_block *sb)
+{
+       struct btrfs_root *root = btrfs_sb(sb)->tree_root;
+
+       root->fs_info->fs_frozen = 0;
+       return 0;
+}
+
 static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@@ -2299,6 +2314,7 @@ static const struct super_operations btrfs_super_ops = {
        .statfs         = btrfs_statfs,
        .remount_fs     = btrfs_remount,
        .freeze_fs      = btrfs_freeze,
+       .unfreeze_fs    = btrfs_unfreeze,
 };
 
 static const struct file_operations btrfs_ctl_fops = {
index 9cca0a72196180986b440865221a8aafa5659315..95d41919d034ef69647b6983dfac27f87f17e130 100644 (file)
@@ -2278,8 +2278,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        kmem_cache_free(btrfs_trans_handle_cachep, trans);
 
+       /*
+        * If fs has been frozen, we can not handle delayed iputs, otherwise
+        * it'll result in deadlock about SB_FREEZE_FS.
+        */
        if (current != root->fs_info->transaction_kthread &&
-           current != root->fs_info->cleaner_kthread)
+           current != root->fs_info->cleaner_kthread &&
+           !root->fs_info->fs_frozen)
                btrfs_run_delayed_iputs(root);
 
        return ret;