Btrfs: fix file corruption after snapshotting due to mix of buffered/DIO writes

author Filipe Manana <fdmanana@suse.com>

Wed, 27 Feb 2019 13:42:30 +0000 (13:42 +0000)

committer David Sterba <dsterba@suse.com>

Wed, 13 Mar 2019 16:13:48 +0000 (17:13 +0100)
author Filipe Manana <fdmanana@suse.com>
Wed, 27 Feb 2019 13:42:30 +0000 (13:42 +0000)
committer David Sterba <dsterba@suse.com>
Wed, 13 Mar 2019 16:13:48 +0000 (17:13 +0100)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index acdad6d658f54bda7cf9c379867d212a41d1c24b..e4e665f422fc4c87b05181211b73ca2097e3b7c2 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1886,8 +1886,10 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
         }
  }
  
         }
  }
  
-static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
+static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
  {
  {
+       struct btrfs_fs_info *fs_info = trans->fs_info;
+
         /*
          * We use writeback_inodes_sb here because if we used
          * btrfs_start_delalloc_roots we would deadlock with fs freeze.
         /*
          * We use writeback_inodes_sb here because if we used
          * btrfs_start_delalloc_roots we would deadlock with fs freeze.
@@ -1897,15 +1899,50 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
          * from already being in a transaction and our join_transaction doesn't
          * have to re-take the fs freeze lock.
          */
          * from already being in a transaction and our join_transaction doesn't
          * have to re-take the fs freeze lock.
          */
-       if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
+       if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
                 writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
                 writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
+       } else {
+               struct btrfs_pending_snapshot *pending;
+               struct list_head *head = &trans->transaction->pending_snapshots;
+
+               /*
+                * Flush dellaloc for any root that is going to be snapshotted.
+                * This is done to avoid a corrupted version of files, in the
+                * snapshots, that had both buffered and direct IO writes (even
+                * if they were done sequentially) due to an unordered update of
+                * the inode's size on disk.
+                */
+               list_for_each_entry(pending, head, list) {
+                       int ret;
+
+                       ret = btrfs_start_delalloc_snapshot(pending->root);
+                       if (ret)
+                               return ret;
+               }
+       }
         return 0;
  }
  
         return 0;
  }
  
-static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
+static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans)
  {
  {
-       if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
+       struct btrfs_fs_info *fs_info = trans->fs_info;
+
+       if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
                 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
                 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
+       } else {
+               struct btrfs_pending_snapshot *pending;
+               struct list_head *head = &trans->transaction->pending_snapshots;
+
+               /*
+                * Wait for any dellaloc that we started previously for the roots
+                * that are going to be snapshotted. This is to avoid a corrupted
+                * version of files in the snapshots that had both buffered and
+                * direct IO writes (even if they were done sequentially).
+                */
+               list_for_each_entry(pending, head, list)
+                       btrfs_wait_ordered_extents(pending->root,
+                                                  U64_MAX, 0, U64_MAX);
+       }
  }
  
  int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
  }
  
  int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
@@ -2023,7 +2060,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
  
         extwriter_counter_dec(cur_trans, trans->type);
  
  
         extwriter_counter_dec(cur_trans, trans->type);
  
-       ret = btrfs_start_delalloc_flush(fs_info);
+       ret = btrfs_start_delalloc_flush(trans);
         if (ret)
                 goto cleanup_transaction;
  
         if (ret)
                 goto cleanup_transaction;
  
@@ -2039,7 +2076,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
         if (ret)
                 goto cleanup_transaction;
  
         if (ret)
                 goto cleanup_transaction;
  
-       btrfs_wait_delalloc_flush(fs_info);
+       btrfs_wait_delalloc_flush(trans);
  
         btrfs_scrub_pause(fs_info);
         /*
  
         btrfs_scrub_pause(fs_info);
         /*
author	Filipe Manana <fdmanana@suse.com>
	Wed, 27 Feb 2019 13:42:30 +0000 (13:42 +0000)
committer	David Sterba <dsterba@suse.com>
	Wed, 13 Mar 2019 16:13:48 +0000 (17:13 +0100)