btrfs: use kvzalloc for EXTENT_SAME temporary data

[sfrench/cifs-2.6.git] / fs / btrfs / ioctl.c
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index 632e26d6f7ce0cd26284b0e39c140c7f09a1f3d2..aeef6cd8aaeb242feba6b409582d97b11ac83c0e 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -93,20 +93,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
                        int no_time_update);
  
  /* Mask out flags that are inappropriate for the given type of inode. */
-static unsigned int btrfs_mask_flags(umode_t mode, unsigned int flags)
+static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
+               unsigned int flags)
  {
-       if (S_ISDIR(mode))
+       if (S_ISDIR(inode->i_mode))
                 return flags;
-       else if (S_ISREG(mode))
+       else if (S_ISREG(inode->i_mode))
                 return flags & ~FS_DIRSYNC_FL;
         else
                 return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
  }
  
  /*
- * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
+ * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
+ * ioctl.
   */
-static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
+static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
  {
         unsigned int iflags = 0;
  
@@ -136,20 +138,20 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
  /*
   * Update inode->i_flags based on the btrfs internal flags.
   */
-void btrfs_update_iflags(struct inode *inode)
+void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
  {
-       struct btrfs_inode *ip = BTRFS_I(inode);
+       struct btrfs_inode *binode = BTRFS_I(inode);
         unsigned int new_fl = 0;
  
-       if (ip->flags & BTRFS_INODE_SYNC)
+       if (binode->flags & BTRFS_INODE_SYNC)
                 new_fl |= S_SYNC;
-       if (ip->flags & BTRFS_INODE_IMMUTABLE)
+       if (binode->flags & BTRFS_INODE_IMMUTABLE)
                 new_fl |= S_IMMUTABLE;
-       if (ip->flags & BTRFS_INODE_APPEND)
+       if (binode->flags & BTRFS_INODE_APPEND)
                 new_fl |= S_APPEND;
-       if (ip->flags & BTRFS_INODE_NOATIME)
+       if (binode->flags & BTRFS_INODE_NOATIME)
                 new_fl |= S_NOATIME;
-       if (ip->flags & BTRFS_INODE_DIRSYNC)
+       if (binode->flags & BTRFS_INODE_DIRSYNC)
                 new_fl |= S_DIRSYNC;
  
         set_mask_bits(&inode->i_flags,
@@ -159,15 +161,16 @@ void btrfs_update_iflags(struct inode *inode)
  
  static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
  {
-       struct btrfs_inode *ip = BTRFS_I(file_inode(file));
-       unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
+       struct btrfs_inode *binode = BTRFS_I(file_inode(file));
+       unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags);
  
         if (copy_to_user(arg, &flags, sizeof(flags)))
                 return -EFAULT;
         return 0;
  }
  
-static int check_flags(unsigned int flags)
+/* Check if @flags are a supported and valid set of FS_*_FL flags */
+static int check_fsflags(unsigned int flags)
  {
         if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
                       FS_NOATIME_FL | FS_NODUMP_FL | \
@@ -186,13 +189,13 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
  {
         struct inode *inode = file_inode(file);
         struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_inode *ip = BTRFS_I(inode);
-       struct btrfs_root *root = ip->root;
+       struct btrfs_inode *binode = BTRFS_I(inode);
+       struct btrfs_root *root = binode->root;
         struct btrfs_trans_handle *trans;
-       unsigned int flags, oldflags;
+       unsigned int fsflags, old_fsflags;
         int ret;
-       u64 ip_oldflags;
-       unsigned int i_oldflags;
+       u64 old_flags;
+       unsigned int old_i_flags;
         umode_t mode;
  
         if (!inode_owner_or_capable(inode))
@@ -201,10 +204,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
         if (btrfs_root_readonly(root))
                 return -EROFS;
  
-       if (copy_from_user(&flags, arg, sizeof(flags)))
+       if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
                 return -EFAULT;
  
-       ret = check_flags(flags);
+       ret = check_fsflags(fsflags);
         if (ret)
                 return ret;
  
@@ -214,44 +217,44 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
  
         inode_lock(inode);
  
-       ip_oldflags = ip->flags;
-       i_oldflags = inode->i_flags;
+       old_flags = binode->flags;
+       old_i_flags = inode->i_flags;
         mode = inode->i_mode;
  
-       flags = btrfs_mask_flags(inode->i_mode, flags);
-       oldflags = btrfs_flags_to_ioctl(ip->flags);
-       if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+       fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
+       old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+       if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
                 if (!capable(CAP_LINUX_IMMUTABLE)) {
                         ret = -EPERM;
                         goto out_unlock;
                 }
         }
  
-       if (flags & FS_SYNC_FL)
-               ip->flags |= BTRFS_INODE_SYNC;
+       if (fsflags & FS_SYNC_FL)
+               binode->flags |= BTRFS_INODE_SYNC;
         else
-               ip->flags &= ~BTRFS_INODE_SYNC;
-       if (flags & FS_IMMUTABLE_FL)
-               ip->flags |= BTRFS_INODE_IMMUTABLE;
+               binode->flags &= ~BTRFS_INODE_SYNC;
+       if (fsflags & FS_IMMUTABLE_FL)
+               binode->flags |= BTRFS_INODE_IMMUTABLE;
         else
-               ip->flags &= ~BTRFS_INODE_IMMUTABLE;
-       if (flags & FS_APPEND_FL)
-               ip->flags |= BTRFS_INODE_APPEND;
+               binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+       if (fsflags & FS_APPEND_FL)
+               binode->flags |= BTRFS_INODE_APPEND;
         else
-               ip->flags &= ~BTRFS_INODE_APPEND;
-       if (flags & FS_NODUMP_FL)
-               ip->flags |= BTRFS_INODE_NODUMP;
+               binode->flags &= ~BTRFS_INODE_APPEND;
+       if (fsflags & FS_NODUMP_FL)
+               binode->flags |= BTRFS_INODE_NODUMP;
         else
-               ip->flags &= ~BTRFS_INODE_NODUMP;
-       if (flags & FS_NOATIME_FL)
-               ip->flags |= BTRFS_INODE_NOATIME;
+               binode->flags &= ~BTRFS_INODE_NODUMP;
+       if (fsflags & FS_NOATIME_FL)
+               binode->flags |= BTRFS_INODE_NOATIME;
         else
-               ip->flags &= ~BTRFS_INODE_NOATIME;
-       if (flags & FS_DIRSYNC_FL)
-               ip->flags |= BTRFS_INODE_DIRSYNC;
+               binode->flags &= ~BTRFS_INODE_NOATIME;
+       if (fsflags & FS_DIRSYNC_FL)
+               binode->flags |= BTRFS_INODE_DIRSYNC;
         else
-               ip->flags &= ~BTRFS_INODE_DIRSYNC;
-       if (flags & FS_NOCOW_FL) {
+               binode->flags &= ~BTRFS_INODE_DIRSYNC;
+       if (fsflags & FS_NOCOW_FL) {
                 if (S_ISREG(mode)) {
                         /*
                          * It's safe to turn csums off here, no extents exist.
@@ -259,10 +262,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                          * status of the file and will not set it.
                          */
                         if (inode->i_size == 0)
-                               ip->flags |= BTRFS_INODE_NODATACOW
-                                          | BTRFS_INODE_NODATASUM;
+                               binode->flags |= BTRFS_INODE_NODATACOW
+                                             | BTRFS_INODE_NODATASUM;
                 } else {
-                       ip->flags |= BTRFS_INODE_NODATACOW;
+                       binode->flags |= BTRFS_INODE_NODATACOW;
                 }
         } else {
                 /*
@@ -270,10 +273,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                  */
                 if (S_ISREG(mode)) {
                         if (inode->i_size == 0)
-                               ip->flags &= ~(BTRFS_INODE_NODATACOW
+                               binode->flags &= ~(BTRFS_INODE_NODATACOW
                                              | BTRFS_INODE_NODATASUM);
                 } else {
-                       ip->flags &= ~BTRFS_INODE_NODATACOW;
+                       binode->flags &= ~BTRFS_INODE_NODATACOW;
                 }
         }
  
@@ -282,18 +285,18 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
          * flag may be changed automatically if compression code won't make
          * things smaller.
          */
-       if (flags & FS_NOCOMP_FL) {
-               ip->flags &= ~BTRFS_INODE_COMPRESS;
-               ip->flags |= BTRFS_INODE_NOCOMPRESS;
+       if (fsflags & FS_NOCOMP_FL) {
+               binode->flags &= ~BTRFS_INODE_COMPRESS;
+               binode->flags |= BTRFS_INODE_NOCOMPRESS;
  
                 ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
                 if (ret && ret != -ENODATA)
                         goto out_drop;
-       } else if (flags & FS_COMPR_FL) {
+       } else if (fsflags & FS_COMPR_FL) {
                 const char *comp;
  
-               ip->flags |= BTRFS_INODE_COMPRESS;
-               ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+               binode->flags |= BTRFS_INODE_COMPRESS;
+               binode->flags &= ~BTRFS_INODE_NOCOMPRESS;
  
                 comp = btrfs_compress_type2str(fs_info->compress_type);
                 if (!comp || comp[0] == 0)
@@ -308,7 +311,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                 ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
                 if (ret && ret != -ENODATA)
                         goto out_drop;
-               ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
+               binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
         }
  
         trans = btrfs_start_transaction(root, 1);
@@ -317,7 +320,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                 goto out_drop;
         }
  
-       btrfs_update_iflags(inode);
+       btrfs_sync_inode_flags_to_i_flags(inode);
         inode_inc_iversion(inode);
         inode->i_ctime = current_time(inode);
         ret = btrfs_update_inode(trans, root, inode);
@@ -325,8 +328,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
         btrfs_end_transaction(trans);
   out_drop:
         if (ret) {
-               ip->flags = ip_oldflags;
-               inode->i_flags = i_oldflags;
+               binode->flags = old_flags;
+               inode->i_flags = old_i_flags;
         }
  
   out_unlock:
@@ -335,6 +338,148 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
         return ret;
  }
  
+/*
+ * Translate btrfs internal inode flags to xflags as expected by the
+ * FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are
+ * silently dropped.
+ */
+static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags)
+{
+       unsigned int xflags = 0;
+
+       if (flags & BTRFS_INODE_APPEND)
+               xflags |= FS_XFLAG_APPEND;
+       if (flags & BTRFS_INODE_IMMUTABLE)
+               xflags |= FS_XFLAG_IMMUTABLE;
+       if (flags & BTRFS_INODE_NOATIME)
+               xflags |= FS_XFLAG_NOATIME;
+       if (flags & BTRFS_INODE_NODUMP)
+               xflags |= FS_XFLAG_NODUMP;
+       if (flags & BTRFS_INODE_SYNC)
+               xflags |= FS_XFLAG_SYNC;
+
+       return xflags;
+}
+
+/* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */
+static int check_xflags(unsigned int flags)
+{
+       if (flags & ~(FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE | FS_XFLAG_NOATIME |
+                     FS_XFLAG_NODUMP | FS_XFLAG_SYNC))
+               return -EOPNOTSUPP;
+       return 0;
+}
+
+/*
+ * Set the xflags from the internal inode flags. The remaining items of fsxattr
+ * are zeroed.
+ */
+static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg)
+{
+       struct btrfs_inode *binode = BTRFS_I(file_inode(file));
+       struct fsxattr fa;
+
+       memset(&fa, 0, sizeof(fa));
+       fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);
+
+       if (copy_to_user(arg, &fa, sizeof(fa)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
+{
+       struct inode *inode = file_inode(file);
+       struct btrfs_inode *binode = BTRFS_I(inode);
+       struct btrfs_root *root = binode->root;
+       struct btrfs_trans_handle *trans;
+       struct fsxattr fa;
+       unsigned old_flags;
+       unsigned old_i_flags;
+       int ret = 0;
+
+       if (!inode_owner_or_capable(inode))
+               return -EPERM;
+
+       if (btrfs_root_readonly(root))
+               return -EROFS;
+
+       memset(&fa, 0, sizeof(fa));
+       if (copy_from_user(&fa, arg, sizeof(fa)))
+               return -EFAULT;
+
+       ret = check_xflags(fa.fsx_xflags);
+       if (ret)
+               return ret;
+
+       if (fa.fsx_extsize != 0 || fa.fsx_projid != 0 || fa.fsx_cowextsize != 0)
+               return -EOPNOTSUPP;
+
+       ret = mnt_want_write_file(file);
+       if (ret)
+               return ret;
+
+       inode_lock(inode);
+
+       old_flags = binode->flags;
+       old_i_flags = inode->i_flags;
+
+       /* We need the capabilities to change append-only or immutable inode */
+       if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) ||
+            (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) &&
+           !capable(CAP_LINUX_IMMUTABLE)) {
+               ret = -EPERM;
+               goto out_unlock;
+       }
+
+       if (fa.fsx_xflags & FS_XFLAG_SYNC)
+               binode->flags |= BTRFS_INODE_SYNC;
+       else
+               binode->flags &= ~BTRFS_INODE_SYNC;
+       if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE)
+               binode->flags |= BTRFS_INODE_IMMUTABLE;
+       else
+               binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+       if (fa.fsx_xflags & FS_XFLAG_APPEND)
+               binode->flags |= BTRFS_INODE_APPEND;
+       else
+               binode->flags &= ~BTRFS_INODE_APPEND;
+       if (fa.fsx_xflags & FS_XFLAG_NODUMP)
+               binode->flags |= BTRFS_INODE_NODUMP;
+       else
+               binode->flags &= ~BTRFS_INODE_NODUMP;
+       if (fa.fsx_xflags & FS_XFLAG_NOATIME)
+               binode->flags |= BTRFS_INODE_NOATIME;
+       else
+               binode->flags &= ~BTRFS_INODE_NOATIME;
+
+       /* 1 item for the inode */
+       trans = btrfs_start_transaction(root, 1);
+       if (IS_ERR(trans)) {
+               ret = PTR_ERR(trans);
+               goto out_unlock;
+       }
+
+       btrfs_sync_inode_flags_to_i_flags(inode);
+       inode_inc_iversion(inode);
+       inode->i_ctime = current_time(inode);
+       ret = btrfs_update_inode(trans, root, inode);
+
+       btrfs_end_transaction(trans);
+
+out_unlock:
+       if (ret) {
+               binode->flags = old_flags;
+               inode->i_flags = old_i_flags;
+       }
+
+       inode_unlock(inode);
+       mnt_drop_write_file(file);
+
+       return ret;
+}
+
  static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
  {
         struct inode *inode = file_inode(file);
@@ -640,7 +785,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
         wait_event(root->subv_writers->wait,
                    percpu_counter_sum(&root->subv_writers->counter) == 0);
  
-       ret = btrfs_start_delalloc_inodes(root, 0);
+       ret = btrfs_start_delalloc_inodes(root);
         if (ret)
                 goto dec_and_free;
  
@@ -1457,7 +1602,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,
                 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
         }
  
-       mutex_lock(&fs_info->volume_mutex);
         vol_args = memdup_user(arg, sizeof(*vol_args));
         if (IS_ERR(vol_args)) {
                 ret = PTR_ERR(vol_args);
@@ -1565,7 +1709,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,
  out_free:
         kfree(vol_args);
  out:
-       mutex_unlock(&fs_info->volume_mutex);
         clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
         mnt_drop_write_file(file);
         return ret;
@@ -1832,60 +1975,6 @@ out:
         return ret;
  }
  
-/*
- * helper to check if the subvolume references other subvolumes
- */
-static noinline int may_destroy_subvol(struct btrfs_root *root)
-{
-       struct btrfs_fs_info *fs_info = root->fs_info;
-       struct btrfs_path *path;
-       struct btrfs_dir_item *di;
-       struct btrfs_key key;
-       u64 dir_id;
-       int ret;
-
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
-       /* Make sure this root isn't set as the default subvol */
-       dir_id = btrfs_super_root_dir(fs_info->super_copy);
-       di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
-                                  dir_id, "default", 7, 0);
-       if (di && !IS_ERR(di)) {
-               btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
-               if (key.objectid == root->root_key.objectid) {
-                       ret = -EPERM;
-                       btrfs_err(fs_info,
-                                 "deleting default subvolume %llu is not allowed",
-                                 key.objectid);
-                       goto out;
-               }
-               btrfs_release_path(path);
-       }
-
-       key.objectid = root->root_key.objectid;
-       key.type = BTRFS_ROOT_REF_KEY;
-       key.offset = (u64)-1;
-
-       ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
-       if (ret < 0)
-               goto out;
-       BUG_ON(ret == 0);
-
-       ret = 0;
-       if (path->slots[0] > 0) {
-               path->slots[0]--;
-               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
-               if (key.objectid == root->root_key.objectid &&
-                   key.type == BTRFS_ROOT_REF_KEY)
-                       ret = -ENOTEMPTY;
-       }
-out:
-       btrfs_free_path(path);
-       return ret;
-}
-
  static noinline int key_in_sk(struct btrfs_key *key,
                               struct btrfs_ioctl_search_key *sk)
  {
@@ -2309,12 +2398,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
         struct btrfs_root *root = BTRFS_I(dir)->root;
         struct btrfs_root *dest = NULL;
         struct btrfs_ioctl_vol_args *vol_args;
-       struct btrfs_trans_handle *trans;
-       struct btrfs_block_rsv block_rsv;
-       u64 root_flags;
-       u64 qgroup_reserved;
         int namelen;
-       int ret;
         int err = 0;
  
         if (!S_ISDIR(dir->i_mode))
@@ -2398,133 +2482,11 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
         }
  
         inode_lock(inode);
-
-       /*
-        * Don't allow to delete a subvolume with send in progress. This is
-        * inside the i_mutex so the error handling that has to drop the bit
-        * again is not run concurrently.
-        */
-       spin_lock(&dest->root_item_lock);
-       root_flags = btrfs_root_flags(&dest->root_item);
-       if (dest->send_in_progress == 0) {
-               btrfs_set_root_flags(&dest->root_item,
-                               root_flags | BTRFS_ROOT_SUBVOL_DEAD);
-               spin_unlock(&dest->root_item_lock);
-       } else {
-               spin_unlock(&dest->root_item_lock);
-               btrfs_warn(fs_info,
-                          "Attempt to delete subvolume %llu during send",
-                          dest->root_key.objectid);
-               err = -EPERM;
-               goto out_unlock_inode;
-       }
-
-       down_write(&fs_info->subvol_sem);
-
-       err = may_destroy_subvol(dest);
-       if (err)
-               goto out_up_write;
-
-       btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
-       /*
-        * One for dir inode, two for dir entries, two for root
-        * ref/backref.
-        */
-       err = btrfs_subvolume_reserve_metadata(root, &block_rsv,
-                                              5, &qgroup_reserved, true);
-       if (err)
-               goto out_up_write;
-
-       trans = btrfs_start_transaction(root, 0);
-       if (IS_ERR(trans)) {
-               err = PTR_ERR(trans);
-               goto out_release;
-       }
-       trans->block_rsv = &block_rsv;
-       trans->bytes_reserved = block_rsv.size;
-
-       btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
-
-       ret = btrfs_unlink_subvol(trans, root, dir,
-                               dest->root_key.objectid,
-                               dentry->d_name.name,
-                               dentry->d_name.len);
-       if (ret) {
-               err = ret;
-               btrfs_abort_transaction(trans, ret);
-               goto out_end_trans;
-       }
-
-       btrfs_record_root_in_trans(trans, dest);
-
-       memset(&dest->root_item.drop_progress, 0,
-               sizeof(dest->root_item.drop_progress));
-       dest->root_item.drop_level = 0;
-       btrfs_set_root_refs(&dest->root_item, 0);
-
-       if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
-               ret = btrfs_insert_orphan_item(trans,
-                                       fs_info->tree_root,
-                                       dest->root_key.objectid);
-               if (ret) {
-                       btrfs_abort_transaction(trans, ret);
-                       err = ret;
-                       goto out_end_trans;
-               }
-       }
-
-       ret = btrfs_uuid_tree_rem(trans, fs_info, dest->root_item.uuid,
-                                 BTRFS_UUID_KEY_SUBVOL,
-                                 dest->root_key.objectid);
-       if (ret && ret != -ENOENT) {
-               btrfs_abort_transaction(trans, ret);
-               err = ret;
-               goto out_end_trans;
-       }
-       if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
-               ret = btrfs_uuid_tree_rem(trans, fs_info,
-                                         dest->root_item.received_uuid,
-                                         BTRFS_UUID_KEY_RECEIVED_SUBVOL,
-                                         dest->root_key.objectid);
-               if (ret && ret != -ENOENT) {
-                       btrfs_abort_transaction(trans, ret);
-                       err = ret;
-                       goto out_end_trans;
-               }
-       }
-
-out_end_trans:
-       trans->block_rsv = NULL;
-       trans->bytes_reserved = 0;
-       ret = btrfs_end_transaction(trans);
-       if (ret && !err)
-               err = ret;
-       inode->i_flags |= S_DEAD;
-out_release:
-       btrfs_subvolume_release_metadata(fs_info, &block_rsv);
-out_up_write:
-       up_write(&fs_info->subvol_sem);
-       if (err) {
-               spin_lock(&dest->root_item_lock);
-               root_flags = btrfs_root_flags(&dest->root_item);
-               btrfs_set_root_flags(&dest->root_item,
-                               root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
-               spin_unlock(&dest->root_item_lock);
-       }
-out_unlock_inode:
+       err = btrfs_delete_subvolume(dir, dentry);
         inode_unlock(inode);
-       if (!err) {
-               d_invalidate(dentry);
-               btrfs_invalidate_inodes(dest);
+       if (!err)
                 d_delete(dentry);
-               ASSERT(dest->send_in_progress == 0);
  
-               /* the last ref */
-               if (dest->ino_cache_inode) {
-                       iput(dest->ino_cache_inode);
-                       dest->ino_cache_inode = NULL;
-               }
-       }
  out_dput:
         dput(dentry);
  out_unlock_dir:
@@ -2613,7 +2575,6 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
         if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
                 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
  
-       mutex_lock(&fs_info->volume_mutex);
         vol_args = memdup_user(arg, sizeof(*vol_args));
         if (IS_ERR(vol_args)) {
                 ret = PTR_ERR(vol_args);
@@ -2628,7 +2589,6 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
  
         kfree(vol_args);
  out:
-       mutex_unlock(&fs_info->volume_mutex);
         clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
         return ret;
  }
@@ -2954,8 +2914,6 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
                         put_page(pg);
                 }
         }
-       kfree(cmp->src_pages);
-       kfree(cmp->dst_pages);
  }
  
  static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
@@ -2964,40 +2922,14 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
  {
         int ret;
         int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT;
-       struct page **src_pgarr, **dst_pgarr;
  
-       /*
-        * We must gather up all the pages before we initiate our
-        * extent locking. We use an array for the page pointers. Size
-        * of the array is bounded by len, which is in turn bounded by
-        * BTRFS_MAX_DEDUPE_LEN.
-        */
-       src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
-       dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
-       if (!src_pgarr || !dst_pgarr) {
-               kfree(src_pgarr);
-               kfree(dst_pgarr);
-               return -ENOMEM;
-       }
         cmp->num_pages = num_pages;
-       cmp->src_pages = src_pgarr;
-       cmp->dst_pages = dst_pgarr;
  
-       /*
-        * If deduping ranges in the same inode, locking rules make it mandatory
-        * to always lock pages in ascending order to avoid deadlocks with
-        * concurrent tasks (such as starting writeback/delalloc).
-        */
-       if (src == dst && dst_loff < loff) {
-               swap(src_pgarr, dst_pgarr);
-               swap(loff, dst_loff);
-       }
-
-       ret = gather_extent_pages(src, src_pgarr, cmp->num_pages, loff);
+       ret = gather_extent_pages(src, cmp->src_pages, num_pages, loff);
         if (ret)
                 goto out;
  
-       ret = gather_extent_pages(dst, dst_pgarr, cmp->num_pages, dst_loff);
+       ret = gather_extent_pages(dst, cmp->dst_pages, num_pages, dst_loff);
  
  out:
         if (ret)
@@ -3067,31 +2999,23 @@ static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
         return 0;
  }
  
-static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
-                            struct inode *dst, u64 dst_loff)
+static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
+                                  struct inode *dst, u64 dst_loff,
+                                  struct cmp_pages *cmp)
  {
         int ret;
         u64 len = olen;
-       struct cmp_pages cmp;
         bool same_inode = (src == dst);
         u64 same_lock_start = 0;
         u64 same_lock_len = 0;
  
-       if (len == 0)
-               return 0;
-
-       if (same_inode)
-               inode_lock(src);
-       else
-               btrfs_double_inode_lock(src, dst);
-
         ret = extent_same_check_offsets(src, loff, &len, olen);
         if (ret)
-               goto out_unlock;
+               return ret;
  
         ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
         if (ret)
-               goto out_unlock;
+               return ret;
  
         if (same_inode) {
                 /*
@@ -3108,32 +3032,21 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
                  * allow an unaligned length so long as it ends at
                  * i_size.
                  */
-               if (len != olen) {
-                       ret = -EINVAL;
-                       goto out_unlock;
-               }
+               if (len != olen)
+                       return -EINVAL;
  
                 /* Check for overlapping ranges */
-               if (dst_loff + len > loff && dst_loff < loff + len) {
-                       ret = -EINVAL;
-                       goto out_unlock;
-               }
+               if (dst_loff + len > loff && dst_loff < loff + len)
+                       return -EINVAL;
  
                 same_lock_start = min_t(u64, loff, dst_loff);
                 same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
         }
  
-       /* don't make the dst file partly checksummed */
-       if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
-           (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
-               ret = -EINVAL;
-               goto out_unlock;
-       }
-
  again:
-       ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
+       ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, cmp);
         if (ret)
-               goto out_unlock;
+               return ret;
  
         if (same_inode)
                 ret = lock_extent_range(src, same_lock_start, same_lock_len,
@@ -3154,7 +3067,7 @@ again:
                  * Ranges in the io trees already unlocked. Now unlock all
                  * pages before waiting for all IO to complete.
                  */
-               btrfs_cmp_data_free(&cmp);
+               btrfs_cmp_data_free(cmp);
                 if (same_inode) {
                         btrfs_wait_ordered_range(src, same_lock_start,
                                                  same_lock_len);
@@ -3167,12 +3080,12 @@ again:
         ASSERT(ret == 0);
         if (WARN_ON(ret)) {
                 /* ranges in the io trees already unlocked */
-               btrfs_cmp_data_free(&cmp);
+               btrfs_cmp_data_free(cmp);
                 return ret;
         }
  
         /* pass original length for comparison so we stay within i_size */
-       ret = btrfs_cmp_data(olen, &cmp);
+       ret = btrfs_cmp_data(olen, cmp);
         if (ret == 0)
                 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
  
@@ -3182,18 +3095,91 @@ again:
         else
                 btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
  
-       btrfs_cmp_data_free(&cmp);
+       btrfs_cmp_data_free(cmp);
+
+       return ret;
+}
+
+#define BTRFS_MAX_DEDUPE_LEN   SZ_16M
+
+static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
+                            struct inode *dst, u64 dst_loff)
+{
+       int ret;
+       struct cmp_pages cmp;
+       int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT;
+       bool same_inode = (src == dst);
+       u64 i, tail_len, chunk_count;
+
+       if (olen == 0)
+               return 0;
+
+       if (same_inode)
+               inode_lock(src);
+       else
+               btrfs_double_inode_lock(src, dst);
+
+       /* don't make the dst file partly checksummed */
+       if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+           (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
+       chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
+       if (chunk_count == 0)
+               num_pages = PAGE_ALIGN(tail_len) >> PAGE_SHIFT;
+
+       /*
+        * If deduping ranges in the same inode, locking rules make it
+        * mandatory to always lock pages in ascending order to avoid deadlocks
+        * with concurrent tasks (such as starting writeback/delalloc).
+        */
+       if (same_inode && dst_loff < loff)
+               swap(loff, dst_loff);
+
+       /*
+        * We must gather up all the pages before we initiate our extent
+        * locking. We use an array for the page pointers. Size of the array is
+        * bounded by len, which is in turn bounded by BTRFS_MAX_DEDUPE_LEN.
+        */
+       cmp.src_pages = kvmalloc_array(num_pages, sizeof(struct page *),
+                                      GFP_KERNEL | __GFP_ZERO);
+       cmp.dst_pages = kvmalloc_array(num_pages, sizeof(struct page *),
+                                      GFP_KERNEL | __GFP_ZERO);
+       if (!cmp.src_pages || !cmp.dst_pages) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
+
+       for (i = 0; i < chunk_count; i++) {
+               ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
+                                             dst, dst_loff, &cmp);
+               if (ret)
+                       goto out_unlock;
+
+               loff += BTRFS_MAX_DEDUPE_LEN;
+               dst_loff += BTRFS_MAX_DEDUPE_LEN;
+       }
+
+       if (tail_len > 0)
+               ret = btrfs_extent_same_range(src, loff, tail_len, dst,
+                                             dst_loff, &cmp);
+
  out_unlock:
         if (same_inode)
                 inode_unlock(src);
         else
                 btrfs_double_inode_unlock(src, dst);
  
+out_free:
+       kvfree(cmp.src_pages);
+       kvfree(cmp.dst_pages);
+
         return ret;
  }
  
-#define BTRFS_MAX_DEDUPE_LEN   SZ_16M
-
  ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
                                 struct file *dst_file, u64 dst_loff)
  {
@@ -3202,9 +3188,6 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
         u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
         ssize_t res;
  
-       if (olen > BTRFS_MAX_DEDUPE_LEN)
-               olen = BTRFS_MAX_DEDUPE_LEN;
-
         if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
                 /*
                  * Btrfs does not support blocksize < page_size. As a
@@ -4007,8 +3990,8 @@ out:
         return ret;
  }
  
-void btrfs_get_block_group_info(struct list_head *groups_list,
-                               struct btrfs_ioctl_space_info *space)
+static void get_block_group_info(struct list_head *groups_list,
+                                struct btrfs_ioctl_space_info *space)
  {
         struct btrfs_block_group_cache *block_group;
  
@@ -4124,8 +4107,8 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
                 down_read(&info->groups_sem);
                 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
                         if (!list_empty(&info->block_groups[c])) {
-                               btrfs_get_block_group_info(
-                                       &info->block_groups[c], &space);
+                               get_block_group_info(&info->block_groups[c],
+                                                    &space);
                                 memcpy(dest, &space, sizeof(space));
                                 dest++;
                                 space_args.total_spaces++;
@@ -4490,14 +4473,14 @@ out_loi:
         return ret;
  }
  
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
                                struct btrfs_ioctl_balance_args *bargs)
  {
         struct btrfs_balance_control *bctl = fs_info->balance_ctl;
  
         bargs->flags = bctl->flags;
  
-       if (atomic_read(&fs_info->balance_running))
+       if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags))
                 bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
         if (atomic_read(&fs_info->balance_pause_req))
                 bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
@@ -4508,13 +4491,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
         memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
         memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
  
-       if (lock) {
-               spin_lock(&fs_info->balance_lock);
-               memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
-               spin_unlock(&fs_info->balance_lock);
-       } else {
-               memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
-       }
+       spin_lock(&fs_info->balance_lock);
+       memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
+       spin_unlock(&fs_info->balance_lock);
  }
  
  static long btrfs_ioctl_balance(struct file *file, void __user *arg)
@@ -4535,7 +4514,6 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
  
  again:
         if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
-               mutex_lock(&fs_info->volume_mutex);
                 mutex_lock(&fs_info->balance_mutex);
                 need_unlock = true;
                 goto locked;
@@ -4550,21 +4528,22 @@ again:
         mutex_lock(&fs_info->balance_mutex);
         if (fs_info->balance_ctl) {
                 /* this is either (2) or (3) */
-               if (!atomic_read(&fs_info->balance_running)) {
+               if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
                         mutex_unlock(&fs_info->balance_mutex);
-                       if (!mutex_trylock(&fs_info->volume_mutex))
-                               goto again;
+                       /*
+                        * Lock released to allow other waiters to continue,
+                        * we'll reexamine the status again.
+                        */
                         mutex_lock(&fs_info->balance_mutex);
  
                         if (fs_info->balance_ctl &&
-                           !atomic_read(&fs_info->balance_running)) {
+                           !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
                                 /* this is (3) */
                                 need_unlock = false;
                                 goto locked;
                         }
  
                         mutex_unlock(&fs_info->balance_mutex);
-                       mutex_unlock(&fs_info->volume_mutex);
                         goto again;
                 } else {
                         /* this is (2) */
@@ -4617,7 +4596,6 @@ locked:
                 goto out_bargs;
         }
  
-       bctl->fs_info = fs_info;
         if (arg) {
                 memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
                 memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
@@ -4636,14 +4614,14 @@ locked:
  
  do_balance:
         /*
-        * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP
-        * goes to to btrfs_balance.  bctl is freed in __cancel_balance,
-        * or, if restriper was paused all the way until unmount, in
-        * free_fs_info.  The flag is cleared in __cancel_balance.
+        * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to
+        * btrfs_balance.  bctl is freed in reset_balance_state, or, if
+        * restriper was paused all the way until unmount, in free_fs_info.
+        * The flag should be cleared after reset_balance_state.
          */
         need_unlock = false;
  
-       ret = btrfs_balance(bctl, bargs);
+       ret = btrfs_balance(fs_info, bctl, bargs);
         bctl = NULL;
  
         if (arg) {
@@ -4657,7 +4635,6 @@ out_bargs:
         kfree(bargs);
  out_unlock:
         mutex_unlock(&fs_info->balance_mutex);
-       mutex_unlock(&fs_info->volume_mutex);
         if (need_unlock)
                 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
  out:
@@ -4701,7 +4678,7 @@ static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,
                 goto out;
         }
  
-       update_ioctl_balance_args(fs_info, 1, bargs);
+       btrfs_update_ioctl_balance_args(fs_info, bargs);
  
         if (copy_to_user(arg, bargs, sizeof(*bargs)))
                 ret = -EFAULT;
@@ -5497,7 +5474,7 @@ long btrfs_ioctl(struct file *file, unsigned int
         case BTRFS_IOC_SYNC: {
                 int ret;
  
-               ret = btrfs_start_delalloc_roots(fs_info, 0, -1);
+               ret = btrfs_start_delalloc_roots(fs_info, -1);
                 if (ret)
                         return ret;
                 ret = btrfs_sync_fs(inode->i_sb, 1);
@@ -5565,6 +5542,10 @@ long btrfs_ioctl(struct file *file, unsigned int
                 return btrfs_ioctl_get_features(file, argp);
         case BTRFS_IOC_SET_FEATURES:
                 return btrfs_ioctl_set_features(file, argp);
+       case FS_IOC_FSGETXATTR:
+               return btrfs_ioctl_fsgetxattr(file, argp);
+       case FS_IOC_FSSETXATTR:
+               return btrfs_ioctl_fssetxattr(file, argp);
         }
  
         return -ENOTTY;