Btrfs: fix fsync xattr loss in the fast fsync path
[sfrench/cifs-2.6.git] / fs / btrfs / tree-log.c
index d04968374e9d8bd5b118024d53ea0a5bfa6d0bd2..66f87156882f17e2b4028aa475d63a10f2949123 100644 (file)
@@ -3881,12 +3881,6 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,
                                     &ordered->flags))
                        continue;
 
-               if (ordered->csum_bytes_left) {
-                       btrfs_start_ordered_extent(inode, ordered, 0);
-                       wait_event(ordered->wait,
-                                  ordered->csum_bytes_left == 0);
-               }
-
                list_for_each_entry(sum, &ordered->list, list) {
                        ret = btrfs_csum_file_blocks(trans, log, sum);
                        if (ret)
@@ -4123,6 +4117,86 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
        return 0;
 }
 
+/*
+ * At the moment we always log all xattrs. This is to figure out at log replay
+ * time which xattrs must have their deletion replayed. If a xattr is missing
+ * in the log tree and exists in the fs/subvol tree, we delete it. This is
+ * because if a xattr is deleted, the inode is fsynced and a power failure
+ * happens, causing the log to be replayed the next time the fs is mounted,
+ * we want the xattr to not exist anymore (same behaviour as other filesystems
+ * with a journal, ext3/4, xfs, f2fs, etc).
+ */
+static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               struct inode *inode,
+                               struct btrfs_path *path,
+                               struct btrfs_path *dst_path)
+{
+       int ret;
+       struct btrfs_key key;
+       const u64 ino = btrfs_ino(inode);
+       int ins_nr = 0;
+       int start_slot = 0;
+
+       key.objectid = ino;
+       key.type = BTRFS_XATTR_ITEM_KEY;
+       key.offset = 0;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               return ret;
+
+       while (true) {
+               int slot = path->slots[0];
+               struct extent_buffer *leaf = path->nodes[0];
+               int nritems = btrfs_header_nritems(leaf);
+
+               if (slot >= nritems) {
+                       if (ins_nr > 0) {
+                               u64 last_extent = 0;
+
+                               ret = copy_items(trans, inode, dst_path, path,
+                                                &last_extent, start_slot,
+                                                ins_nr, 1, 0);
+                               /* can't be 1, extent items aren't processed */
+                               ASSERT(ret <= 0);
+                               if (ret < 0)
+                                       return ret;
+                               ins_nr = 0;
+                       }
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               return ret;
+                       else if (ret > 0)
+                               break;
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+               if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY)
+                       break;
+
+               if (ins_nr == 0)
+                       start_slot = slot;
+               ins_nr++;
+               path->slots[0]++;
+               cond_resched();
+       }
+       if (ins_nr > 0) {
+               u64 last_extent = 0;
+
+               ret = copy_items(trans, inode, dst_path, path,
+                                &last_extent, start_slot,
+                                ins_nr, 1, 0);
+               /* can't be 1, extent items aren't processed */
+               ASSERT(ret <= 0);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
 /* log a single inode in the tree log.
  * At least one parent directory for this inode must exist in the tree
  * or be logged already.
@@ -4161,6 +4235,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        u64 ino = btrfs_ino(inode);
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        u64 logged_isize = 0;
+       bool need_log_inode_item = true;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -4269,11 +4344,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                } else {
                        if (inode_only == LOG_INODE_ALL)
                                fast_search = true;
-                       ret = log_inode_item(trans, log, dst_path, inode);
-                       if (ret) {
-                               err = ret;
-                               goto out_unlock;
-                       }
                        goto log_extents;
                }
 
@@ -4296,6 +4366,28 @@ again:
                if (min_key.type > max_key.type)
                        break;
 
+               if (min_key.type == BTRFS_INODE_ITEM_KEY)
+                       need_log_inode_item = false;
+
+               /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
+               if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
+                       if (ins_nr == 0)
+                               goto next_slot;
+                       ret = copy_items(trans, inode, dst_path, path,
+                                        &last_extent, ins_start_slot,
+                                        ins_nr, inode_only, logged_isize);
+                       if (ret < 0) {
+                               err = ret;
+                               goto out_unlock;
+                       }
+                       ins_nr = 0;
+                       if (ret) {
+                               btrfs_release_path(path);
+                               continue;
+                       }
+                       goto next_slot;
+               }
+
                src = path->nodes[0];
                if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
                        ins_nr++;
@@ -4363,9 +4455,19 @@ next_slot:
                ins_nr = 0;
        }
 
+       btrfs_release_path(path);
+       btrfs_release_path(dst_path);
+       err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
+       if (err)
+               goto out_unlock;
 log_extents:
        btrfs_release_path(path);
        btrfs_release_path(dst_path);
+       if (need_log_inode_item) {
+               err = log_inode_item(trans, log, dst_path, inode);
+               if (err)
+                       goto out_unlock;
+       }
        if (fast_search) {
                /*
                 * Some ordered extents started by fsync might have completed