* device_list_mutex
* chunk_mutex
* balance_mutex
+ *
+ *
+ * Exclusive operations, BTRFS_FS_EXCL_OP
+ * ======================================
+ *
+ * Maintains the exclusivity of the following operations that apply to the
+ * whole filesystem and cannot run in parallel.
+ *
+ * - Balance (*)
+ * - Device add
+ * - Device remove
+ * - Device replace (*)
+ * - Resize
+ *
+ * The device operations (as above) can be in one of the following states:
+ *
+ * - Running state
+ * - Paused state
+ * - Completed state
+ *
+ * Only device operations marked with (*) can go into the Paused state for the
+ * following reasons:
+ *
+ * - ioctl (only Balance can be Paused through ioctl)
+ * - filesystem remounted as read-only
+ * - filesystem unmounted and mounted as read-only
+ * - system power-cycle and filesystem mounted as read-only
+ * - filesystem or device errors leading to forced read-only
+ *
+ * BTRFS_FS_EXCL_OP flag is set and cleared using atomic operations.
+ * During the course of Paused state, the BTRFS_FS_EXCL_OP remains set.
+ * A device operation in Paused or Running state can be canceled or resumed
+ * either by ioctl (Balance only) or when remounted as read-write.
+ * BTRFS_FS_EXCL_OP flag is cleared when the device operation is canceled or
+ * completed.
*/
DEFINE_MUTEX(uuid_mutex);
INIT_LIST_HEAD(&fs_devs->devices);
INIT_LIST_HEAD(&fs_devs->resized_devices);
INIT_LIST_HEAD(&fs_devs->alloc_list);
- INIT_LIST_HEAD(&fs_devs->list);
+ INIT_LIST_HEAD(&fs_devs->fs_list);
if (fsid)
memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
return fs_devs;
}
-static void free_device(struct btrfs_device *device)
+void btrfs_free_device(struct btrfs_device *device)
{
rcu_string_free(device->name);
bio_put(device->flush_bio);
device = list_entry(fs_devices->devices.next,
struct btrfs_device, dev_list);
list_del(&device->dev_list);
- free_device(device);
+ btrfs_free_device(device);
}
kfree(fs_devices);
}
while (!list_empty(&fs_uuids)) {
fs_devices = list_entry(fs_uuids.next,
- struct btrfs_fs_devices, list);
- list_del(&fs_devices->list);
+ struct btrfs_fs_devices, fs_list);
+ list_del(&fs_devices->fs_list);
free_fs_devices(fs_devices);
}
}
/*
* Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
* Returned struct is not linked onto any lists and must be destroyed using
- * free_device.
+ * btrfs_free_device.
*/
static struct btrfs_device *__alloc_device(void)
{
static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
u64 devid, const u8 *uuid)
{
- struct list_head *head = &fs_devices->devices;
struct btrfs_device *dev;
- list_for_each_entry(dev, head, dev_list) {
+ list_for_each_entry(dev, &fs_devices->devices, dev_list) {
if (dev->devid == devid &&
(!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
return dev;
{
struct btrfs_fs_devices *fs_devices;
- list_for_each_entry(fs_devices, &fs_uuids, list) {
+ list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
return fs_devices;
}
struct btrfs_fs_devices *fs_devs, *tmp_fs_devs;
struct btrfs_device *dev, *tmp_dev;
- list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, list) {
+ list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, fs_list) {
if (fs_devs->opened)
continue;
/* delete the stale device */
if (fs_devs->num_devices == 1) {
btrfs_sysfs_remove_fsid(fs_devs);
- list_del(&fs_devs->list);
+ list_del(&fs_devs->fs_list);
free_fs_devices(fs_devs);
break;
} else {
fs_devs->num_devices--;
list_del(&dev->dev_list);
- free_device(dev);
+ btrfs_free_device(dev);
}
}
}
if (IS_ERR(fs_devices))
return ERR_CAST(fs_devices);
- list_add(&fs_devices->list, &fs_uuids);
+ list_add(&fs_devices->fs_list, &fs_uuids);
device = NULL;
} else {
name = rcu_string_strdup(path, GFP_NOFS);
if (!name) {
- free_device(device);
+ btrfs_free_device(device);
return ERR_PTR(-ENOMEM);
}
rcu_assign_pointer(device->name, name);
name = rcu_string_strdup(orig_dev->name->str,
GFP_KERNEL);
if (!name) {
- free_device(device);
+ btrfs_free_device(device);
goto error;
}
rcu_assign_pointer(device->name, name);
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
- free_device(device);
+ btrfs_free_device(device);
}
if (fs_devices->seed) {
struct btrfs_device *device;
device = container_of(head, struct btrfs_device, rcu);
- free_device(device);
+ btrfs_free_device(device);
}
static void btrfs_close_bdev(struct btrfs_device *device)
new_device->fs_devices = device->fs_devices;
}
-static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
+static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *device, *tmp;
struct list_head pending_put;
int ret;
mutex_lock(&uuid_mutex);
- ret = __btrfs_close_devices(fs_devices);
+ ret = close_fs_devices(fs_devices);
if (!fs_devices->opened) {
seed_devices = fs_devices->seed;
fs_devices->seed = NULL;
while (seed_devices) {
fs_devices = seed_devices;
seed_devices = fs_devices->seed;
- __btrfs_close_devices(fs_devices);
+ close_fs_devices(fs_devices);
free_fs_devices(fs_devices);
}
return ret;
}
-static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
+static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder)
{
- struct list_head *head = &fs_devices->devices;
struct btrfs_device *device;
struct btrfs_device *latest_dev = NULL;
int ret = 0;
flags |= FMODE_EXCL;
- list_for_each_entry(device, head, dev_list) {
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
/* Just open everything we can; ignore failures here */
if (btrfs_open_one_device(fs_devices, device, flags, holder))
continue;
ret = 0;
} else {
list_sort(NULL, &fs_devices->devices, devid_cmp);
- ret = __btrfs_open_devices(fs_devices, flags, holder);
+ ret = open_fs_devices(fs_devices, flags, holder);
}
mutex_unlock(&uuid_mutex);
return ret;
{
struct btrfs_device *device;
struct btrfs_fs_devices *cur_devices;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
u64 num_devices;
int ret = 0;
- mutex_lock(&fs_info->volume_mutex);
mutex_lock(&uuid_mutex);
- num_devices = fs_info->fs_devices->num_devices;
+ num_devices = fs_devices->num_devices;
btrfs_dev_replace_read_lock(&fs_info->dev_replace);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
WARN_ON(num_devices < 1);
*/
cur_devices = device->fs_devices;
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
list_del_rcu(&device->dev_list);
device->fs_devices->num_devices--;
if (device->bdev) {
device->fs_devices->open_devices--;
/* remove sysfs entry */
- btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
+ btrfs_sysfs_rm_device_link(fs_devices, device);
}
num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
/*
* at this point, the device is zero sized and detached from
call_rcu(&device->rcu, free_device_rcu);
if (cur_devices->open_devices == 0) {
- struct btrfs_fs_devices *fs_devices;
- fs_devices = fs_info->fs_devices;
while (fs_devices) {
if (fs_devices->seed == cur_devices) {
fs_devices->seed = cur_devices->seed;
fs_devices = fs_devices->seed;
}
cur_devices->seed = NULL;
- __btrfs_close_devices(cur_devices);
+ close_fs_devices(cur_devices);
free_fs_devices(cur_devices);
}
out:
mutex_unlock(&uuid_mutex);
- mutex_unlock(&fs_info->volume_mutex);
return ret;
error_undo:
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
mutex_lock(&fs_info->chunk_mutex);
list_add(&device->dev_alloc_list,
- &fs_info->fs_devices->alloc_list);
+ &fs_devices->alloc_list);
device->fs_devices->rw_devices++;
mutex_unlock(&fs_info->chunk_mutex);
}
tmp_fs_devices = tmp_fs_devices->seed;
}
fs_devices->seed = NULL;
- __btrfs_close_devices(fs_devices);
+ close_fs_devices(fs_devices);
free_fs_devices(fs_devices);
}
}
struct btrfs_device *tmp;
devices = &fs_info->fs_devices->devices;
- /*
- * It is safe to read the devices since the volume_mutex
- * is held by the caller.
- */
list_for_each_entry(tmp, devices, dev_list) {
if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
&tmp->dev_state) && !tmp->bdev) {
return PTR_ERR(old_devices);
}
- list_add(&old_devices->list, &fs_uuids);
+ list_add(&old_devices->fs_list, &fs_uuids);
memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
seed_devices->opened = 1;
if (trans)
btrfs_end_transaction(trans);
error_free_device:
- free_device(device);
+ btrfs_free_device(device);
error:
blkdev_put(bdev, FMODE_EXCL);
if (seeding_dev && !unlocked) {
return ret;
}
-int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- const char *device_path,
- struct btrfs_device *srcdev,
- struct btrfs_device **device_out)
-{
- struct btrfs_device *device;
- struct block_device *bdev;
- struct list_head *devices;
- struct rcu_string *name;
- u64 devid = BTRFS_DEV_REPLACE_DEVID;
- int ret = 0;
-
- *device_out = NULL;
- if (fs_info->fs_devices->seeding) {
- btrfs_err(fs_info, "the filesystem is a seed filesystem!");
- return -EINVAL;
- }
-
- bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
- fs_info->bdev_holder);
- if (IS_ERR(bdev)) {
- btrfs_err(fs_info, "target device %s is invalid!", device_path);
- return PTR_ERR(bdev);
- }
-
- filemap_write_and_wait(bdev->bd_inode->i_mapping);
-
- devices = &fs_info->fs_devices->devices;
- list_for_each_entry(device, devices, dev_list) {
- if (device->bdev == bdev) {
- btrfs_err(fs_info,
- "target device is in the filesystem!");
- ret = -EEXIST;
- goto error;
- }
- }
-
-
- if (i_size_read(bdev->bd_inode) <
- btrfs_device_get_total_bytes(srcdev)) {
- btrfs_err(fs_info,
- "target device is smaller than source device!");
- ret = -EINVAL;
- goto error;
- }
-
-
- device = btrfs_alloc_device(NULL, &devid, NULL);
- if (IS_ERR(device)) {
- ret = PTR_ERR(device);
- goto error;
- }
-
- name = rcu_string_strdup(device_path, GFP_KERNEL);
- if (!name) {
- free_device(device);
- ret = -ENOMEM;
- goto error;
- }
- rcu_assign_pointer(device->name, name);
-
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
- device->generation = 0;
- device->io_width = fs_info->sectorsize;
- device->io_align = fs_info->sectorsize;
- device->sector_size = fs_info->sectorsize;
- device->total_bytes = btrfs_device_get_total_bytes(srcdev);
- device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
- device->bytes_used = btrfs_device_get_bytes_used(srcdev);
- device->commit_total_bytes = srcdev->commit_total_bytes;
- device->commit_bytes_used = device->bytes_used;
- device->fs_info = fs_info;
- device->bdev = bdev;
- set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
- set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
- device->mode = FMODE_EXCL;
- device->dev_stats_valid = 1;
- set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
- device->fs_devices = fs_info->fs_devices;
- list_add(&device->dev_list, &fs_info->fs_devices->devices);
- fs_info->fs_devices->num_devices++;
- fs_info->fs_devices->open_devices++;
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-
- *device_out = device;
- return ret;
-
-error:
- blkdev_put(bdev, FMODE_EXCL);
- return ret;
-}
-
static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device)
{
/*
* Should be called with both balance and volume mutexes held to
* serialize other volume operations (add_dev/rm_dev/resize) with
- * restriper. Same goes for unset_balance_control.
+ * restriper. Same goes for reset_balance_state.
*/
static void set_balance_control(struct btrfs_balance_control *bctl)
{
spin_unlock(&fs_info->balance_lock);
}
-static void unset_balance_control(struct btrfs_fs_info *fs_info)
+/*
+ * Clear the balance status in fs_info and delete the balance item from disk.
+ */
+static void reset_balance_state(struct btrfs_fs_info *fs_info)
{
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
+ int ret;
BUG_ON(!fs_info->balance_ctl);
spin_unlock(&fs_info->balance_lock);
kfree(bctl);
+ ret = del_balance_item(fs_info);
+ if (ret)
+ btrfs_handle_fs_error(fs_info, ret, NULL);
}
/*
atomic_read(&fs_info->balance_cancel_req) == 0);
}
-static void __cancel_balance(struct btrfs_fs_info *fs_info)
-{
- int ret;
-
- unset_balance_control(fs_info);
- ret = del_balance_item(fs_info);
- if (ret)
- btrfs_handle_fs_error(fs_info, ret, NULL);
-
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
-}
-
/* Non-zero return value signifies invalidity */
static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
u64 allowed)
if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
balance_need_close(fs_info)) {
- __cancel_balance(fs_info);
+ reset_balance_state(fs_info);
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
}
wake_up(&fs_info->balance_wait_q);
return ret;
out:
if (bctl->flags & BTRFS_BALANCE_RESUME)
- __cancel_balance(fs_info);
- else {
+ reset_balance_state(fs_info);
+ else
kfree(bctl);
- clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
- }
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+
return ret;
}
btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
- WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
+ /*
+ * This should never happen, as the paused balance state is recovered
+ * during mount without any chance of other exclusive ops to collide.
+ *
+ * This gives the exclusive op status to balance and keeps in paused
+ * state until user intervention (cancel or umount). If the ownership
+ * cannot be assigned, show a message but do not fail. The balance
+ * is in a paused state and must have fs_info::balance_ctl properly
+ * set up.
+ */
+ if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
+ btrfs_warn(fs_info,
+ "cannot set exclusive op status to balance, resume manually");
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
atomic_read(&fs_info->balance_running) == 0);
mutex_lock(&fs_info->balance_mutex);
} else {
- /* __cancel_balance needs volume_mutex */
+ /* reset_balance_state needs volume_mutex */
mutex_unlock(&fs_info->balance_mutex);
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
- if (fs_info->balance_ctl)
- __cancel_balance(fs_info);
+ if (fs_info->balance_ctl) {
+ reset_balance_state(fs_info);
+ clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+ }
mutex_unlock(&fs_info->volume_mutex);
}
*
* Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
* on error. Returned struct is not linked onto any lists and must be
- * destroyed with free_device.
+ * destroyed with btrfs_free_device.
*/
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u64 *devid,
ret = find_next_devid(fs_info, &tmp);
if (ret) {
- free_device(dev);
+ btrfs_free_device(dev);
return ERR_PTR(ret);
}
}
if (IS_ERR(fs_devices))
return fs_devices;
- ret = __btrfs_open_devices(fs_devices, FMODE_READ,
- fs_info->bdev_holder);
+ ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
if (ret) {
free_fs_devices(fs_devices);
fs_devices = ERR_PTR(ret);
}
if (!fs_devices->seeding) {
- __btrfs_close_devices(fs_devices);
+ close_fs_devices(fs_devices);
free_fs_devices(fs_devices);
fs_devices = ERR_PTR(-EINVAL);
goto out;