* may be used to exclude some operations from running concurrently without any
* modifications to the list (see write_all_supers)
*
- * volume_mutex
- * ------------
- * coarse lock owned by a mounted filesystem; used to exclude some operations
- * that cannot run in parallel and affect the higher-level properties of the
- * filesystem like: device add/deleting/resize/replace, or balance
- *
* balance_mutex
* -------------
* protects balance structures (status, state) and context accessed from
return fs_devs;
}
-static void free_device(struct btrfs_device *device)
+void btrfs_free_device(struct btrfs_device *device)
{
rcu_string_free(device->name);
bio_put(device->flush_bio);
device = list_entry(fs_devices->devices.next,
struct btrfs_device, dev_list);
list_del(&device->dev_list);
- free_device(device);
+ btrfs_free_device(device);
}
kfree(fs_devices);
}
/*
* Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
* Returned struct is not linked onto any lists and must be destroyed using
- * free_device.
+ * btrfs_free_device.
*/
static struct btrfs_device *__alloc_device(void)
{
} else {
fs_devs->num_devices--;
list_del(&dev->dev_list);
- free_device(dev);
+ btrfs_free_device(dev);
}
}
}
name = rcu_string_strdup(path, GFP_NOFS);
if (!name) {
- free_device(device);
+ btrfs_free_device(device);
return ERR_PTR(-ENOMEM);
}
rcu_assign_pointer(device->name, name);
name = rcu_string_strdup(orig_dev->name->str,
GFP_KERNEL);
if (!name) {
- free_device(device);
+ btrfs_free_device(device);
goto error;
}
rcu_assign_pointer(device->name, name);
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
- free_device(device);
+ btrfs_free_device(device);
}
if (fs_devices->seed) {
struct btrfs_device *device;
device = container_of(head, struct btrfs_device, rcu);
- free_device(device);
+ btrfs_free_device(device);
}
static void btrfs_close_bdev(struct btrfs_device *device)
u64 num_devices;
int ret = 0;
- mutex_lock(&fs_info->volume_mutex);
mutex_lock(&uuid_mutex);
num_devices = fs_devices->num_devices;
out:
mutex_unlock(&uuid_mutex);
- mutex_unlock(&fs_info->volume_mutex);
return ret;
error_undo:
struct btrfs_device *tmp;
devices = &fs_info->fs_devices->devices;
- /*
- * It is safe to read the devices since the volume_mutex
- * is held by the caller.
- */
list_for_each_entry(tmp, devices, dev_list) {
if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
&tmp->dev_state) && !tmp->bdev) {
if (trans)
btrfs_end_transaction(trans);
error_free_device:
- free_device(device);
+ btrfs_free_device(device);
error:
blkdev_put(bdev, FMODE_EXCL);
if (seeding_dev && !unlocked) {
return ret;
}
-int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- const char *device_path,
- struct btrfs_device *srcdev,
- struct btrfs_device **device_out)
-{
- struct btrfs_device *device;
- struct block_device *bdev;
- struct list_head *devices;
- struct rcu_string *name;
- u64 devid = BTRFS_DEV_REPLACE_DEVID;
- int ret = 0;
-
- *device_out = NULL;
- if (fs_info->fs_devices->seeding) {
- btrfs_err(fs_info, "the filesystem is a seed filesystem!");
- return -EINVAL;
- }
-
- bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
- fs_info->bdev_holder);
- if (IS_ERR(bdev)) {
- btrfs_err(fs_info, "target device %s is invalid!", device_path);
- return PTR_ERR(bdev);
- }
-
- filemap_write_and_wait(bdev->bd_inode->i_mapping);
-
- devices = &fs_info->fs_devices->devices;
- list_for_each_entry(device, devices, dev_list) {
- if (device->bdev == bdev) {
- btrfs_err(fs_info,
- "target device is in the filesystem!");
- ret = -EEXIST;
- goto error;
- }
- }
-
-
- if (i_size_read(bdev->bd_inode) <
- btrfs_device_get_total_bytes(srcdev)) {
- btrfs_err(fs_info,
- "target device is smaller than source device!");
- ret = -EINVAL;
- goto error;
- }
-
-
- device = btrfs_alloc_device(NULL, &devid, NULL);
- if (IS_ERR(device)) {
- ret = PTR_ERR(device);
- goto error;
- }
-
- name = rcu_string_strdup(device_path, GFP_KERNEL);
- if (!name) {
- free_device(device);
- ret = -ENOMEM;
- goto error;
- }
- rcu_assign_pointer(device->name, name);
-
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
- device->generation = 0;
- device->io_width = fs_info->sectorsize;
- device->io_align = fs_info->sectorsize;
- device->sector_size = fs_info->sectorsize;
- device->total_bytes = btrfs_device_get_total_bytes(srcdev);
- device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
- device->bytes_used = btrfs_device_get_bytes_used(srcdev);
- device->commit_total_bytes = srcdev->commit_total_bytes;
- device->commit_bytes_used = device->bytes_used;
- device->fs_info = fs_info;
- device->bdev = bdev;
- set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
- set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
- device->mode = FMODE_EXCL;
- device->dev_stats_valid = 1;
- set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
- device->fs_devices = fs_info->fs_devices;
- list_add(&device->dev_list, &fs_info->fs_devices->devices);
- fs_info->fs_devices->num_devices++;
- fs_info->fs_devices->open_devices++;
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-
- *device_out = device;
- return ret;
-
-error:
- blkdev_put(bdev, FMODE_EXCL);
- return ret;
-}
-
static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device)
{
}
/*
- * Should be called with both balance and volume mutexes held to
- * serialize other volume operations (add_dev/rm_dev/resize) with
- * restriper. Same goes for unset_balance_control.
+ * Should be called with balance mutex held to protect against checking the
+ * balance status or progress. Same goes for reset_balance_state.
*/
static void set_balance_control(struct btrfs_balance_control *bctl)
{
spin_unlock(&fs_info->balance_lock);
}
-static void unset_balance_control(struct btrfs_fs_info *fs_info)
+/*
+ * Clear the balance status in fs_info and delete the balance item from disk.
+ */
+static void reset_balance_state(struct btrfs_fs_info *fs_info)
{
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
+ int ret;
BUG_ON(!fs_info->balance_ctl);
spin_unlock(&fs_info->balance_lock);
kfree(bctl);
+ ret = del_balance_item(fs_info);
+ if (ret)
+ btrfs_handle_fs_error(fs_info, ret, NULL);
}
/*
atomic_read(&fs_info->balance_cancel_req) == 0);
}
-static void __cancel_balance(struct btrfs_fs_info *fs_info)
-{
- int ret;
-
- unset_balance_control(fs_info);
- ret = del_balance_item(fs_info);
- if (ret)
- btrfs_handle_fs_error(fs_info, ret, NULL);
-
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
-}
-
/* Non-zero return value signifies invalidity */
static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
u64 allowed)
}
/*
- * Should be called with both balance and volume mutexes held
+ * Should be called with balance mutexe held
*/
int btrfs_balance(struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs)
spin_unlock(&fs_info->balance_lock);
}
- atomic_inc(&fs_info->balance_running);
+ ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+ set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
mutex_unlock(&fs_info->balance_mutex);
ret = __btrfs_balance(fs_info);
mutex_lock(&fs_info->balance_mutex);
- atomic_dec(&fs_info->balance_running);
+ clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
if (bargs) {
memset(bargs, 0, sizeof(*bargs));
- update_ioctl_balance_args(fs_info, 0, bargs);
+ btrfs_update_ioctl_balance_args(fs_info, bargs);
}
if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
balance_need_close(fs_info)) {
- __cancel_balance(fs_info);
+ reset_balance_state(fs_info);
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
}
wake_up(&fs_info->balance_wait_q);
return ret;
out:
if (bctl->flags & BTRFS_BALANCE_RESUME)
- __cancel_balance(fs_info);
- else {
+ reset_balance_state(fs_info);
+ else
kfree(bctl);
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
- }
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+
return ret;
}
struct btrfs_fs_info *fs_info = data;
int ret = 0;
- mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
-
if (fs_info->balance_ctl) {
btrfs_info(fs_info, "continuing balance");
ret = btrfs_balance(fs_info->balance_ctl, NULL);
}
-
mutex_unlock(&fs_info->balance_mutex);
- mutex_unlock(&fs_info->volume_mutex);
return ret;
}
{
struct task_struct *tsk;
- spin_lock(&fs_info->balance_lock);
+ mutex_lock(&fs_info->balance_mutex);
if (!fs_info->balance_ctl) {
- spin_unlock(&fs_info->balance_lock);
+ mutex_unlock(&fs_info->balance_mutex);
return 0;
}
- spin_unlock(&fs_info->balance_lock);
+ mutex_unlock(&fs_info->balance_mutex);
if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
btrfs_info(fs_info, "force skipping balance");
btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
- WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
+ /*
+ * This should never happen, as the paused balance state is recovered
+ * during mount without any chance of other exclusive ops to collide.
+ *
+ * This gives the exclusive op status to balance and keeps in paused
+ * state until user intervention (cancel or umount). If the ownership
+ * cannot be assigned, show a message but do not fail. The balance
+ * is in a paused state and must have fs_info::balance_ctl properly
+ * set up.
+ */
+ if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
+ btrfs_warn(fs_info,
+ "cannot set exclusive op status to balance, resume manually");
- mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
-
set_balance_control(bctl);
-
mutex_unlock(&fs_info->balance_mutex);
- mutex_unlock(&fs_info->volume_mutex);
out:
btrfs_free_path(path);
return ret;
return -ENOTCONN;
}
- if (atomic_read(&fs_info->balance_running)) {
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
atomic_inc(&fs_info->balance_pause_req);
mutex_unlock(&fs_info->balance_mutex);
wait_event(fs_info->balance_wait_q,
- atomic_read(&fs_info->balance_running) == 0);
+ !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
mutex_lock(&fs_info->balance_mutex);
/* we are good with balance_ctl ripped off from under us */
- BUG_ON(atomic_read(&fs_info->balance_running));
+ BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_pause_req);
} else {
ret = -ENOTCONN;
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
{
- if (sb_rdonly(fs_info->sb))
- return -EROFS;
-
mutex_lock(&fs_info->balance_mutex);
if (!fs_info->balance_ctl) {
mutex_unlock(&fs_info->balance_mutex);
return -ENOTCONN;
}
+ /*
+ * A paused balance with the item stored on disk can be resumed at
+ * mount time if the mount is read-write. Otherwise it's still paused
+ * and we must not allow cancelling as it deletes the item.
+ */
+ if (sb_rdonly(fs_info->sb)) {
+ mutex_unlock(&fs_info->balance_mutex);
+ return -EROFS;
+ }
+
atomic_inc(&fs_info->balance_cancel_req);
/*
* if we are running just wait and return, balance item is
* deleted in btrfs_balance in this case
*/
- if (atomic_read(&fs_info->balance_running)) {
+ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
mutex_unlock(&fs_info->balance_mutex);
wait_event(fs_info->balance_wait_q,
- atomic_read(&fs_info->balance_running) == 0);
+ !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
mutex_lock(&fs_info->balance_mutex);
} else {
- /* __cancel_balance needs volume_mutex */
mutex_unlock(&fs_info->balance_mutex);
- mutex_lock(&fs_info->volume_mutex);
+ /*
+ * Lock released to allow other waiters to continue, we'll
+ * reexamine the status again.
+ */
mutex_lock(&fs_info->balance_mutex);
- if (fs_info->balance_ctl)
- __cancel_balance(fs_info);
-
- mutex_unlock(&fs_info->volume_mutex);
+ if (fs_info->balance_ctl) {
+ reset_balance_state(fs_info);
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+ }
}
- BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
+ BUG_ON(fs_info->balance_ctl ||
+ test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_cancel_req);
mutex_unlock(&fs_info->balance_mutex);
return 0;
*
* Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
* on error. Returned struct is not linked onto any lists and must be
- * destroyed with free_device.
+ * destroyed with btrfs_free_device.
*/
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u64 *devid,
ret = find_next_devid(fs_info, &tmp);
if (ret) {
- free_device(dev);
+ btrfs_free_device(dev);
return ERR_PTR(ret);
}
}