btrfs: use kvzalloc for EXTENT_SAME temporary data
[sfrench/cifs-2.6.git] / fs / btrfs / ioctl.c
index 592ef10a66046d1151d67bfc2e96b64f56823afa..aeef6cd8aaeb242feba6b409582d97b11ac83c0e 100644 (file)
@@ -93,20 +93,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
                       int no_time_update);
 
 /* Mask out flags that are inappropriate for the given type of inode. */
-static unsigned int btrfs_mask_flags(umode_t mode, unsigned int flags)
+static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
+               unsigned int flags)
 {
-       if (S_ISDIR(mode))
+       if (S_ISDIR(inode->i_mode))
                return flags;
-       else if (S_ISREG(mode))
+       else if (S_ISREG(inode->i_mode))
                return flags & ~FS_DIRSYNC_FL;
        else
                return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
 }
 
 /*
- * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
+ * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
+ * ioctl.
  */
-static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
+static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
 {
        unsigned int iflags = 0;
 
@@ -136,20 +138,20 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
 /*
  * Update inode->i_flags based on the btrfs internal flags.
  */
-void btrfs_update_iflags(struct inode *inode)
+void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
 {
-       struct btrfs_inode *ip = BTRFS_I(inode);
+       struct btrfs_inode *binode = BTRFS_I(inode);
        unsigned int new_fl = 0;
 
-       if (ip->flags & BTRFS_INODE_SYNC)
+       if (binode->flags & BTRFS_INODE_SYNC)
                new_fl |= S_SYNC;
-       if (ip->flags & BTRFS_INODE_IMMUTABLE)
+       if (binode->flags & BTRFS_INODE_IMMUTABLE)
                new_fl |= S_IMMUTABLE;
-       if (ip->flags & BTRFS_INODE_APPEND)
+       if (binode->flags & BTRFS_INODE_APPEND)
                new_fl |= S_APPEND;
-       if (ip->flags & BTRFS_INODE_NOATIME)
+       if (binode->flags & BTRFS_INODE_NOATIME)
                new_fl |= S_NOATIME;
-       if (ip->flags & BTRFS_INODE_DIRSYNC)
+       if (binode->flags & BTRFS_INODE_DIRSYNC)
                new_fl |= S_DIRSYNC;
 
        set_mask_bits(&inode->i_flags,
@@ -159,15 +161,16 @@ void btrfs_update_iflags(struct inode *inode)
 
 static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
 {
-       struct btrfs_inode *ip = BTRFS_I(file_inode(file));
-       unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
+       struct btrfs_inode *binode = BTRFS_I(file_inode(file));
+       unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags);
 
        if (copy_to_user(arg, &flags, sizeof(flags)))
                return -EFAULT;
        return 0;
 }
 
-static int check_flags(unsigned int flags)
+/* Check if @flags are a supported and valid set of FS_*_FL flags */
+static int check_fsflags(unsigned int flags)
 {
        if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
                      FS_NOATIME_FL | FS_NODUMP_FL | \
@@ -186,13 +189,13 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 {
        struct inode *inode = file_inode(file);
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_inode *ip = BTRFS_I(inode);
-       struct btrfs_root *root = ip->root;
+       struct btrfs_inode *binode = BTRFS_I(inode);
+       struct btrfs_root *root = binode->root;
        struct btrfs_trans_handle *trans;
-       unsigned int flags, oldflags;
+       unsigned int fsflags, old_fsflags;
        int ret;
-       u64 ip_oldflags;
-       unsigned int i_oldflags;
+       u64 old_flags;
+       unsigned int old_i_flags;
        umode_t mode;
 
        if (!inode_owner_or_capable(inode))
@@ -201,10 +204,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        if (btrfs_root_readonly(root))
                return -EROFS;
 
-       if (copy_from_user(&flags, arg, sizeof(flags)))
+       if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
                return -EFAULT;
 
-       ret = check_flags(flags);
+       ret = check_fsflags(fsflags);
        if (ret)
                return ret;
 
@@ -214,44 +217,44 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 
        inode_lock(inode);
 
-       ip_oldflags = ip->flags;
-       i_oldflags = inode->i_flags;
+       old_flags = binode->flags;
+       old_i_flags = inode->i_flags;
        mode = inode->i_mode;
 
-       flags = btrfs_mask_flags(inode->i_mode, flags);
-       oldflags = btrfs_flags_to_ioctl(ip->flags);
-       if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+       fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
+       old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+       if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
                if (!capable(CAP_LINUX_IMMUTABLE)) {
                        ret = -EPERM;
                        goto out_unlock;
                }
        }
 
-       if (flags & FS_SYNC_FL)
-               ip->flags |= BTRFS_INODE_SYNC;
+       if (fsflags & FS_SYNC_FL)
+               binode->flags |= BTRFS_INODE_SYNC;
        else
-               ip->flags &= ~BTRFS_INODE_SYNC;
-       if (flags & FS_IMMUTABLE_FL)
-               ip->flags |= BTRFS_INODE_IMMUTABLE;
+               binode->flags &= ~BTRFS_INODE_SYNC;
+       if (fsflags & FS_IMMUTABLE_FL)
+               binode->flags |= BTRFS_INODE_IMMUTABLE;
        else
-               ip->flags &= ~BTRFS_INODE_IMMUTABLE;
-       if (flags & FS_APPEND_FL)
-               ip->flags |= BTRFS_INODE_APPEND;
+               binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+       if (fsflags & FS_APPEND_FL)
+               binode->flags |= BTRFS_INODE_APPEND;
        else
-               ip->flags &= ~BTRFS_INODE_APPEND;
-       if (flags & FS_NODUMP_FL)
-               ip->flags |= BTRFS_INODE_NODUMP;
+               binode->flags &= ~BTRFS_INODE_APPEND;
+       if (fsflags & FS_NODUMP_FL)
+               binode->flags |= BTRFS_INODE_NODUMP;
        else
-               ip->flags &= ~BTRFS_INODE_NODUMP;
-       if (flags & FS_NOATIME_FL)
-               ip->flags |= BTRFS_INODE_NOATIME;
+               binode->flags &= ~BTRFS_INODE_NODUMP;
+       if (fsflags & FS_NOATIME_FL)
+               binode->flags |= BTRFS_INODE_NOATIME;
        else
-               ip->flags &= ~BTRFS_INODE_NOATIME;
-       if (flags & FS_DIRSYNC_FL)
-               ip->flags |= BTRFS_INODE_DIRSYNC;
+               binode->flags &= ~BTRFS_INODE_NOATIME;
+       if (fsflags & FS_DIRSYNC_FL)
+               binode->flags |= BTRFS_INODE_DIRSYNC;
        else
-               ip->flags &= ~BTRFS_INODE_DIRSYNC;
-       if (flags & FS_NOCOW_FL) {
+               binode->flags &= ~BTRFS_INODE_DIRSYNC;
+       if (fsflags & FS_NOCOW_FL) {
                if (S_ISREG(mode)) {
                        /*
                         * It's safe to turn csums off here, no extents exist.
@@ -259,10 +262,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                         * status of the file and will not set it.
                         */
                        if (inode->i_size == 0)
-                               ip->flags |= BTRFS_INODE_NODATACOW
-                                          | BTRFS_INODE_NODATASUM;
+                               binode->flags |= BTRFS_INODE_NODATACOW
+                                             | BTRFS_INODE_NODATASUM;
                } else {
-                       ip->flags |= BTRFS_INODE_NODATACOW;
+                       binode->flags |= BTRFS_INODE_NODATACOW;
                }
        } else {
                /*
@@ -270,10 +273,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                 */
                if (S_ISREG(mode)) {
                        if (inode->i_size == 0)
-                               ip->flags &= ~(BTRFS_INODE_NODATACOW
+                               binode->flags &= ~(BTRFS_INODE_NODATACOW
                                             | BTRFS_INODE_NODATASUM);
                } else {
-                       ip->flags &= ~BTRFS_INODE_NODATACOW;
+                       binode->flags &= ~BTRFS_INODE_NODATACOW;
                }
        }
 
@@ -282,18 +285,18 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
         * flag may be changed automatically if compression code won't make
         * things smaller.
         */
-       if (flags & FS_NOCOMP_FL) {
-               ip->flags &= ~BTRFS_INODE_COMPRESS;
-               ip->flags |= BTRFS_INODE_NOCOMPRESS;
+       if (fsflags & FS_NOCOMP_FL) {
+               binode->flags &= ~BTRFS_INODE_COMPRESS;
+               binode->flags |= BTRFS_INODE_NOCOMPRESS;
 
                ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
                if (ret && ret != -ENODATA)
                        goto out_drop;
-       } else if (flags & FS_COMPR_FL) {
+       } else if (fsflags & FS_COMPR_FL) {
                const char *comp;
 
-               ip->flags |= BTRFS_INODE_COMPRESS;
-               ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+               binode->flags |= BTRFS_INODE_COMPRESS;
+               binode->flags &= ~BTRFS_INODE_NOCOMPRESS;
 
                comp = btrfs_compress_type2str(fs_info->compress_type);
                if (!comp || comp[0] == 0)
@@ -308,7 +311,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
                if (ret && ret != -ENODATA)
                        goto out_drop;
-               ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
+               binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
        }
 
        trans = btrfs_start_transaction(root, 1);
@@ -317,7 +320,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                goto out_drop;
        }
 
-       btrfs_update_iflags(inode);
+       btrfs_sync_inode_flags_to_i_flags(inode);
        inode_inc_iversion(inode);
        inode->i_ctime = current_time(inode);
        ret = btrfs_update_inode(trans, root, inode);
@@ -325,8 +328,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        btrfs_end_transaction(trans);
  out_drop:
        if (ret) {
-               ip->flags = ip_oldflags;
-               inode->i_flags = i_oldflags;
+               binode->flags = old_flags;
+               inode->i_flags = old_i_flags;
        }
 
  out_unlock:
@@ -335,6 +338,148 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        return ret;
 }
 
+/*
+ * Translate btrfs internal inode flags to xflags as expected by the
+ * FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are
+ * silently dropped.
+ */
+static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags)
+{
+       unsigned int xflags = 0;
+
+       if (flags & BTRFS_INODE_APPEND)
+               xflags |= FS_XFLAG_APPEND;
+       if (flags & BTRFS_INODE_IMMUTABLE)
+               xflags |= FS_XFLAG_IMMUTABLE;
+       if (flags & BTRFS_INODE_NOATIME)
+               xflags |= FS_XFLAG_NOATIME;
+       if (flags & BTRFS_INODE_NODUMP)
+               xflags |= FS_XFLAG_NODUMP;
+       if (flags & BTRFS_INODE_SYNC)
+               xflags |= FS_XFLAG_SYNC;
+
+       return xflags;
+}
+
+/* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */
+static int check_xflags(unsigned int flags)
+{
+       if (flags & ~(FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE | FS_XFLAG_NOATIME |
+                     FS_XFLAG_NODUMP | FS_XFLAG_SYNC))
+               return -EOPNOTSUPP;
+       return 0;
+}
+
+/*
+ * Set the xflags from the internal inode flags. The remaining items of fsxattr
+ * are zeroed.
+ */
+static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg)
+{
+       struct btrfs_inode *binode = BTRFS_I(file_inode(file));
+       struct fsxattr fa;
+
+       memset(&fa, 0, sizeof(fa));
+       fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);
+
+       if (copy_to_user(arg, &fa, sizeof(fa)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
+{
+       struct inode *inode = file_inode(file);
+       struct btrfs_inode *binode = BTRFS_I(inode);
+       struct btrfs_root *root = binode->root;
+       struct btrfs_trans_handle *trans;
+       struct fsxattr fa;
+       unsigned old_flags;
+       unsigned old_i_flags;
+       int ret = 0;
+
+       if (!inode_owner_or_capable(inode))
+               return -EPERM;
+
+       if (btrfs_root_readonly(root))
+               return -EROFS;
+
+       memset(&fa, 0, sizeof(fa));
+       if (copy_from_user(&fa, arg, sizeof(fa)))
+               return -EFAULT;
+
+       ret = check_xflags(fa.fsx_xflags);
+       if (ret)
+               return ret;
+
+       if (fa.fsx_extsize != 0 || fa.fsx_projid != 0 || fa.fsx_cowextsize != 0)
+               return -EOPNOTSUPP;
+
+       ret = mnt_want_write_file(file);
+       if (ret)
+               return ret;
+
+       inode_lock(inode);
+
+       old_flags = binode->flags;
+       old_i_flags = inode->i_flags;
+
+       /* We need the capabilities to change append-only or immutable inode */
+       if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) ||
+            (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) &&
+           !capable(CAP_LINUX_IMMUTABLE)) {
+               ret = -EPERM;
+               goto out_unlock;
+       }
+
+       if (fa.fsx_xflags & FS_XFLAG_SYNC)
+               binode->flags |= BTRFS_INODE_SYNC;
+       else
+               binode->flags &= ~BTRFS_INODE_SYNC;
+       if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE)
+               binode->flags |= BTRFS_INODE_IMMUTABLE;
+       else
+               binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+       if (fa.fsx_xflags & FS_XFLAG_APPEND)
+               binode->flags |= BTRFS_INODE_APPEND;
+       else
+               binode->flags &= ~BTRFS_INODE_APPEND;
+       if (fa.fsx_xflags & FS_XFLAG_NODUMP)
+               binode->flags |= BTRFS_INODE_NODUMP;
+       else
+               binode->flags &= ~BTRFS_INODE_NODUMP;
+       if (fa.fsx_xflags & FS_XFLAG_NOATIME)
+               binode->flags |= BTRFS_INODE_NOATIME;
+       else
+               binode->flags &= ~BTRFS_INODE_NOATIME;
+
+       /* 1 item for the inode */
+       trans = btrfs_start_transaction(root, 1);
+       if (IS_ERR(trans)) {
+               ret = PTR_ERR(trans);
+               goto out_unlock;
+       }
+
+       btrfs_sync_inode_flags_to_i_flags(inode);
+       inode_inc_iversion(inode);
+       inode->i_ctime = current_time(inode);
+       ret = btrfs_update_inode(trans, root, inode);
+
+       btrfs_end_transaction(trans);
+
+out_unlock:
+       if (ret) {
+               binode->flags = old_flags;
+               inode->i_flags = old_i_flags;
+       }
+
+       inode_unlock(inode);
+       mnt_drop_write_file(file);
+
+       return ret;
+}
+
 static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
 {
        struct inode *inode = file_inode(file);
@@ -640,7 +785,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
        wait_event(root->subv_writers->wait,
                   percpu_counter_sum(&root->subv_writers->counter) == 0);
 
-       ret = btrfs_start_delalloc_inodes(root, 0);
+       ret = btrfs_start_delalloc_inodes(root);
        if (ret)
                goto dec_and_free;
 
@@ -1457,7 +1602,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,
                return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
        }
 
-       mutex_lock(&fs_info->volume_mutex);
        vol_args = memdup_user(arg, sizeof(*vol_args));
        if (IS_ERR(vol_args)) {
                ret = PTR_ERR(vol_args);
@@ -1565,7 +1709,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 out_free:
        kfree(vol_args);
 out:
-       mutex_unlock(&fs_info->volume_mutex);
        clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
        mnt_drop_write_file(file);
        return ret;
@@ -2255,12 +2398,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_root *dest = NULL;
        struct btrfs_ioctl_vol_args *vol_args;
-       struct btrfs_trans_handle *trans;
-       struct btrfs_block_rsv block_rsv;
-       u64 root_flags;
-       u64 qgroup_reserved;
        int namelen;
-       int ret;
        int err = 0;
 
        if (!S_ISDIR(dir->i_mode))
@@ -2344,133 +2482,11 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
        }
 
        inode_lock(inode);
-
-       /*
-        * Don't allow to delete a subvolume with send in progress. This is
-        * inside the i_mutex so the error handling that has to drop the bit
-        * again is not run concurrently.
-        */
-       spin_lock(&dest->root_item_lock);
-       root_flags = btrfs_root_flags(&dest->root_item);
-       if (dest->send_in_progress == 0) {
-               btrfs_set_root_flags(&dest->root_item,
-                               root_flags | BTRFS_ROOT_SUBVOL_DEAD);
-               spin_unlock(&dest->root_item_lock);
-       } else {
-               spin_unlock(&dest->root_item_lock);
-               btrfs_warn(fs_info,
-                          "Attempt to delete subvolume %llu during send",
-                          dest->root_key.objectid);
-               err = -EPERM;
-               goto out_unlock_inode;
-       }
-
-       down_write(&fs_info->subvol_sem);
-
-       err = may_destroy_subvol(dest);
-       if (err)
-               goto out_up_write;
-
-       btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
-       /*
-        * One for dir inode, two for dir entries, two for root
-        * ref/backref.
-        */
-       err = btrfs_subvolume_reserve_metadata(root, &block_rsv,
-                                              5, &qgroup_reserved, true);
-       if (err)
-               goto out_up_write;
-
-       trans = btrfs_start_transaction(root, 0);
-       if (IS_ERR(trans)) {
-               err = PTR_ERR(trans);
-               goto out_release;
-       }
-       trans->block_rsv = &block_rsv;
-       trans->bytes_reserved = block_rsv.size;
-
-       btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
-
-       ret = btrfs_unlink_subvol(trans, root, dir,
-                               dest->root_key.objectid,
-                               dentry->d_name.name,
-                               dentry->d_name.len);
-       if (ret) {
-               err = ret;
-               btrfs_abort_transaction(trans, ret);
-               goto out_end_trans;
-       }
-
-       btrfs_record_root_in_trans(trans, dest);
-
-       memset(&dest->root_item.drop_progress, 0,
-               sizeof(dest->root_item.drop_progress));
-       dest->root_item.drop_level = 0;
-       btrfs_set_root_refs(&dest->root_item, 0);
-
-       if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
-               ret = btrfs_insert_orphan_item(trans,
-                                       fs_info->tree_root,
-                                       dest->root_key.objectid);
-               if (ret) {
-                       btrfs_abort_transaction(trans, ret);
-                       err = ret;
-                       goto out_end_trans;
-               }
-       }
-
-       ret = btrfs_uuid_tree_rem(trans, fs_info, dest->root_item.uuid,
-                                 BTRFS_UUID_KEY_SUBVOL,
-                                 dest->root_key.objectid);
-       if (ret && ret != -ENOENT) {
-               btrfs_abort_transaction(trans, ret);
-               err = ret;
-               goto out_end_trans;
-       }
-       if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
-               ret = btrfs_uuid_tree_rem(trans, fs_info,
-                                         dest->root_item.received_uuid,
-                                         BTRFS_UUID_KEY_RECEIVED_SUBVOL,
-                                         dest->root_key.objectid);
-               if (ret && ret != -ENOENT) {
-                       btrfs_abort_transaction(trans, ret);
-                       err = ret;
-                       goto out_end_trans;
-               }
-       }
-
-out_end_trans:
-       trans->block_rsv = NULL;
-       trans->bytes_reserved = 0;
-       ret = btrfs_end_transaction(trans);
-       if (ret && !err)
-               err = ret;
-       inode->i_flags |= S_DEAD;
-out_release:
-       btrfs_subvolume_release_metadata(fs_info, &block_rsv);
-out_up_write:
-       up_write(&fs_info->subvol_sem);
-       if (err) {
-               spin_lock(&dest->root_item_lock);
-               root_flags = btrfs_root_flags(&dest->root_item);
-               btrfs_set_root_flags(&dest->root_item,
-                               root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
-               spin_unlock(&dest->root_item_lock);
-       }
-out_unlock_inode:
+       err = btrfs_delete_subvolume(dir, dentry);
        inode_unlock(inode);
-       if (!err) {
-               d_invalidate(dentry);
-               btrfs_invalidate_inodes(dest);
+       if (!err)
                d_delete(dentry);
-               ASSERT(dest->send_in_progress == 0);
 
-               /* the last ref */
-               if (dest->ino_cache_inode) {
-                       iput(dest->ino_cache_inode);
-                       dest->ino_cache_inode = NULL;
-               }
-       }
 out_dput:
        dput(dentry);
 out_unlock_dir:
@@ -2559,7 +2575,6 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
        if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
                return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 
-       mutex_lock(&fs_info->volume_mutex);
        vol_args = memdup_user(arg, sizeof(*vol_args));
        if (IS_ERR(vol_args)) {
                ret = PTR_ERR(vol_args);
@@ -2574,7 +2589,6 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
 
        kfree(vol_args);
 out:
-       mutex_unlock(&fs_info->volume_mutex);
        clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
        return ret;
 }
@@ -2900,8 +2914,6 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
                        put_page(pg);
                }
        }
-       kfree(cmp->src_pages);
-       kfree(cmp->dst_pages);
 }
 
 static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
@@ -2910,40 +2922,14 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
 {
        int ret;
        int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT;
-       struct page **src_pgarr, **dst_pgarr;
 
-       /*
-        * We must gather up all the pages before we initiate our
-        * extent locking. We use an array for the page pointers. Size
-        * of the array is bounded by len, which is in turn bounded by
-        * BTRFS_MAX_DEDUPE_LEN.
-        */
-       src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
-       dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
-       if (!src_pgarr || !dst_pgarr) {
-               kfree(src_pgarr);
-               kfree(dst_pgarr);
-               return -ENOMEM;
-       }
        cmp->num_pages = num_pages;
-       cmp->src_pages = src_pgarr;
-       cmp->dst_pages = dst_pgarr;
-
-       /*
-        * If deduping ranges in the same inode, locking rules make it mandatory
-        * to always lock pages in ascending order to avoid deadlocks with
-        * concurrent tasks (such as starting writeback/delalloc).
-        */
-       if (src == dst && dst_loff < loff) {
-               swap(src_pgarr, dst_pgarr);
-               swap(loff, dst_loff);
-       }
 
-       ret = gather_extent_pages(src, src_pgarr, cmp->num_pages, loff);
+       ret = gather_extent_pages(src, cmp->src_pages, num_pages, loff);
        if (ret)
                goto out;
 
-       ret = gather_extent_pages(dst, dst_pgarr, cmp->num_pages, dst_loff);
+       ret = gather_extent_pages(dst, cmp->dst_pages, num_pages, dst_loff);
 
 out:
        if (ret)
@@ -3013,31 +2999,23 @@ static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
        return 0;
 }
 
-static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
-                            struct inode *dst, u64 dst_loff)
+static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
+                                  struct inode *dst, u64 dst_loff,
+                                  struct cmp_pages *cmp)
 {
        int ret;
        u64 len = olen;
-       struct cmp_pages cmp;
        bool same_inode = (src == dst);
        u64 same_lock_start = 0;
        u64 same_lock_len = 0;
 
-       if (len == 0)
-               return 0;
-
-       if (same_inode)
-               inode_lock(src);
-       else
-               btrfs_double_inode_lock(src, dst);
-
        ret = extent_same_check_offsets(src, loff, &len, olen);
        if (ret)
-               goto out_unlock;
+               return ret;
 
        ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
        if (ret)
-               goto out_unlock;
+               return ret;
 
        if (same_inode) {
                /*
@@ -3054,32 +3032,21 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
                 * allow an unaligned length so long as it ends at
                 * i_size.
                 */
-               if (len != olen) {
-                       ret = -EINVAL;
-                       goto out_unlock;
-               }
+               if (len != olen)
+                       return -EINVAL;
 
                /* Check for overlapping ranges */
-               if (dst_loff + len > loff && dst_loff < loff + len) {
-                       ret = -EINVAL;
-                       goto out_unlock;
-               }
+               if (dst_loff + len > loff && dst_loff < loff + len)
+                       return -EINVAL;
 
                same_lock_start = min_t(u64, loff, dst_loff);
                same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
        }
 
-       /* don't make the dst file partly checksummed */
-       if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
-           (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
-               ret = -EINVAL;
-               goto out_unlock;
-       }
-
 again:
-       ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
+       ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, cmp);
        if (ret)
-               goto out_unlock;
+               return ret;
 
        if (same_inode)
                ret = lock_extent_range(src, same_lock_start, same_lock_len,
@@ -3100,7 +3067,7 @@ again:
                 * Ranges in the io trees already unlocked. Now unlock all
                 * pages before waiting for all IO to complete.
                 */
-               btrfs_cmp_data_free(&cmp);
+               btrfs_cmp_data_free(cmp);
                if (same_inode) {
                        btrfs_wait_ordered_range(src, same_lock_start,
                                                 same_lock_len);
@@ -3113,12 +3080,12 @@ again:
        ASSERT(ret == 0);
        if (WARN_ON(ret)) {
                /* ranges in the io trees already unlocked */
-               btrfs_cmp_data_free(&cmp);
+               btrfs_cmp_data_free(cmp);
                return ret;
        }
 
        /* pass original length for comparison so we stay within i_size */
-       ret = btrfs_cmp_data(olen, &cmp);
+       ret = btrfs_cmp_data(olen, cmp);
        if (ret == 0)
                ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
 
@@ -3128,18 +3095,91 @@ again:
        else
                btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
 
-       btrfs_cmp_data_free(&cmp);
+       btrfs_cmp_data_free(cmp);
+
+       return ret;
+}
+
+#define BTRFS_MAX_DEDUPE_LEN   SZ_16M
+
+static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
+                            struct inode *dst, u64 dst_loff)
+{
+       int ret;
+       struct cmp_pages cmp;
+       int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT;
+       bool same_inode = (src == dst);
+       u64 i, tail_len, chunk_count;
+
+       if (olen == 0)
+               return 0;
+
+       if (same_inode)
+               inode_lock(src);
+       else
+               btrfs_double_inode_lock(src, dst);
+
+       /* don't make the dst file partly checksummed */
+       if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+           (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
+       chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
+       if (chunk_count == 0)
+               num_pages = PAGE_ALIGN(tail_len) >> PAGE_SHIFT;
+
+       /*
+        * If deduping ranges in the same inode, locking rules make it
+        * mandatory to always lock pages in ascending order to avoid deadlocks
+        * with concurrent tasks (such as starting writeback/delalloc).
+        */
+       if (same_inode && dst_loff < loff)
+               swap(loff, dst_loff);
+
+       /*
+        * We must gather up all the pages before we initiate our extent
+        * locking. We use an array for the page pointers. Size of the array is
+        * bounded by len, which is in turn bounded by BTRFS_MAX_DEDUPE_LEN.
+        */
+       cmp.src_pages = kvmalloc_array(num_pages, sizeof(struct page *),
+                                      GFP_KERNEL | __GFP_ZERO);
+       cmp.dst_pages = kvmalloc_array(num_pages, sizeof(struct page *),
+                                      GFP_KERNEL | __GFP_ZERO);
+       if (!cmp.src_pages || !cmp.dst_pages) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
+
+       for (i = 0; i < chunk_count; i++) {
+               ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
+                                             dst, dst_loff, &cmp);
+               if (ret)
+                       goto out_unlock;
+
+               loff += BTRFS_MAX_DEDUPE_LEN;
+               dst_loff += BTRFS_MAX_DEDUPE_LEN;
+       }
+
+       if (tail_len > 0)
+               ret = btrfs_extent_same_range(src, loff, tail_len, dst,
+                                             dst_loff, &cmp);
+
 out_unlock:
        if (same_inode)
                inode_unlock(src);
        else
                btrfs_double_inode_unlock(src, dst);
 
+out_free:
+       kvfree(cmp.src_pages);
+       kvfree(cmp.dst_pages);
+
        return ret;
 }
 
-#define BTRFS_MAX_DEDUPE_LEN   SZ_16M
-
 ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
                                struct file *dst_file, u64 dst_loff)
 {
@@ -3148,9 +3188,6 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
        u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
        ssize_t res;
 
-       if (olen > BTRFS_MAX_DEDUPE_LEN)
-               olen = BTRFS_MAX_DEDUPE_LEN;
-
        if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
                /*
                 * Btrfs does not support blocksize < page_size. As a
@@ -4436,14 +4473,14 @@ out_loi:
        return ret;
 }
 
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
                               struct btrfs_ioctl_balance_args *bargs)
 {
        struct btrfs_balance_control *bctl = fs_info->balance_ctl;
 
        bargs->flags = bctl->flags;
 
-       if (atomic_read(&fs_info->balance_running))
+       if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags))
                bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
        if (atomic_read(&fs_info->balance_pause_req))
                bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
@@ -4454,13 +4491,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
        memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
        memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
 
-       if (lock) {
-               spin_lock(&fs_info->balance_lock);
-               memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
-               spin_unlock(&fs_info->balance_lock);
-       } else {
-               memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
-       }
+       spin_lock(&fs_info->balance_lock);
+       memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
+       spin_unlock(&fs_info->balance_lock);
 }
 
 static long btrfs_ioctl_balance(struct file *file, void __user *arg)
@@ -4481,7 +4514,6 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
 
 again:
        if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
-               mutex_lock(&fs_info->volume_mutex);
                mutex_lock(&fs_info->balance_mutex);
                need_unlock = true;
                goto locked;
@@ -4496,21 +4528,22 @@ again:
        mutex_lock(&fs_info->balance_mutex);
        if (fs_info->balance_ctl) {
                /* this is either (2) or (3) */
-               if (!atomic_read(&fs_info->balance_running)) {
+               if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
                        mutex_unlock(&fs_info->balance_mutex);
-                       if (!mutex_trylock(&fs_info->volume_mutex))
-                               goto again;
+                       /*
+                        * Lock released to allow other waiters to continue,
+                        * we'll reexamine the status again.
+                        */
                        mutex_lock(&fs_info->balance_mutex);
 
                        if (fs_info->balance_ctl &&
-                           !atomic_read(&fs_info->balance_running)) {
+                           !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
                                /* this is (3) */
                                need_unlock = false;
                                goto locked;
                        }
 
                        mutex_unlock(&fs_info->balance_mutex);
-                       mutex_unlock(&fs_info->volume_mutex);
                        goto again;
                } else {
                        /* this is (2) */
@@ -4563,7 +4596,6 @@ locked:
                goto out_bargs;
        }
 
-       bctl->fs_info = fs_info;
        if (arg) {
                memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
                memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
@@ -4582,14 +4614,14 @@ locked:
 
 do_balance:
        /*
-        * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP
-        * goes to to btrfs_balance.  bctl is freed in __cancel_balance,
-        * or, if restriper was paused all the way until unmount, in
-        * free_fs_info.  The flag is cleared in __cancel_balance.
+        * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to
+        * btrfs_balance.  bctl is freed in reset_balance_state, or, if
+        * restriper was paused all the way until unmount, in free_fs_info.
+        * The flag should be cleared after reset_balance_state.
         */
        need_unlock = false;
 
-       ret = btrfs_balance(bctl, bargs);
+       ret = btrfs_balance(fs_info, bctl, bargs);
        bctl = NULL;
 
        if (arg) {
@@ -4603,7 +4635,6 @@ out_bargs:
        kfree(bargs);
 out_unlock:
        mutex_unlock(&fs_info->balance_mutex);
-       mutex_unlock(&fs_info->volume_mutex);
        if (need_unlock)
                clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 out:
@@ -4647,7 +4678,7 @@ static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,
                goto out;
        }
 
-       update_ioctl_balance_args(fs_info, 1, bargs);
+       btrfs_update_ioctl_balance_args(fs_info, bargs);
 
        if (copy_to_user(arg, bargs, sizeof(*bargs)))
                ret = -EFAULT;
@@ -5443,7 +5474,7 @@ long btrfs_ioctl(struct file *file, unsigned int
        case BTRFS_IOC_SYNC: {
                int ret;
 
-               ret = btrfs_start_delalloc_roots(fs_info, 0, -1);
+               ret = btrfs_start_delalloc_roots(fs_info, -1);
                if (ret)
                        return ret;
                ret = btrfs_sync_fs(inode->i_sb, 1);
@@ -5511,6 +5542,10 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_get_features(file, argp);
        case BTRFS_IOC_SET_FEATURES:
                return btrfs_ioctl_set_features(file, argp);
+       case FS_IOC_FSGETXATTR:
+               return btrfs_ioctl_fsgetxattr(file, argp);
+       case FS_IOC_FSSETXATTR:
+               return btrfs_ioctl_fssetxattr(file, argp);
        }
 
        return -ENOTTY;