spin_lock_init(&dev->io_lock);
- spin_lock_init(&dev->reada_lock);
atomic_set(&dev->reada_in_flight, 0);
atomic_set(&dev->dev_stats_ccnt, 0);
btrfs_device_data_ordered_init(dev);
run_scheduled_bios(device);
}
-
-static void btrfs_free_stale_device(struct btrfs_device *cur_dev)
+/*
+ * Search and remove all stale (devices which are not mounted) devices.
+ * When both inputs are NULL, it will search and release all stale devices.
+ * path: Optional. When provided will it release all unmounted devices
+ * matching this path only.
+ * skip_dev: Optional. Will skip this device when searching for the stale
+ * devices.
+ */
+static void btrfs_free_stale_devices(const char *path,
+ struct btrfs_device *skip_dev)
{
- struct btrfs_fs_devices *fs_devs;
- struct btrfs_device *dev;
-
- if (!cur_dev->name)
- return;
+ struct btrfs_fs_devices *fs_devs, *tmp_fs_devs;
+ struct btrfs_device *dev, *tmp_dev;
- list_for_each_entry(fs_devs, &fs_uuids, list) {
- int del = 1;
+ list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, list) {
if (fs_devs->opened)
continue;
- if (fs_devs->seeding)
- continue;
- list_for_each_entry(dev, &fs_devs->devices, dev_list) {
+ list_for_each_entry_safe(dev, tmp_dev,
+ &fs_devs->devices, dev_list) {
+ int not_found = 0;
- if (dev == cur_dev)
+ if (skip_dev && skip_dev == dev)
continue;
- if (!dev->name)
+ if (path && !dev->name)
continue;
- /*
- * Todo: This won't be enough. What if the same device
- * comes back (with new uuid and) with its mapper path?
- * But for now, this does help as mostly an admin will
- * either use mapper or non mapper path throughout.
- */
rcu_read_lock();
- del = strcmp(rcu_str_deref(dev->name),
- rcu_str_deref(cur_dev->name));
+ if (path)
+ not_found = strcmp(rcu_str_deref(dev->name),
+ path);
rcu_read_unlock();
- if (!del)
- break;
- }
+ if (not_found)
+ continue;
- if (!del) {
/* delete the stale device */
if (fs_devs->num_devices == 1) {
btrfs_sysfs_remove_fsid(fs_devs);
list_del(&fs_devs->list);
free_fs_devices(fs_devs);
+ break;
} else {
fs_devs->num_devices--;
list_del(&dev->dev_list);
free_device(dev);
}
- break;
}
}
}
* Add new device to list of registered devices
*
* Returns:
- * 1 - first time device is seen
- * 0 - device already known
- * < 0 - error
+ * device pointer which was just added or updated when successful
+ * error pointer when failed
*/
-static noinline int device_list_add(const char *path,
- struct btrfs_super_block *disk_super,
- u64 devid, struct btrfs_fs_devices **fs_devices_ret)
+static noinline struct btrfs_device *device_list_add(const char *path,
+ struct btrfs_super_block *disk_super)
{
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices;
struct rcu_string *name;
- int ret = 0;
u64 found_transid = btrfs_super_generation(disk_super);
+ u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
fs_devices = find_fsid(disk_super->fsid);
if (!fs_devices) {
fs_devices = alloc_fs_devices(disk_super->fsid);
if (IS_ERR(fs_devices))
- return PTR_ERR(fs_devices);
+ return ERR_CAST(fs_devices);
list_add(&fs_devices->list, &fs_uuids);
if (!device) {
if (fs_devices->opened)
- return -EBUSY;
+ return ERR_PTR(-EBUSY);
device = btrfs_alloc_device(NULL, &devid,
disk_super->dev_item.uuid);
if (IS_ERR(device)) {
/* we can safely leave the fs_devices entry around */
- return PTR_ERR(device);
+ return device;
}
name = rcu_string_strdup(path, GFP_NOFS);
if (!name) {
free_device(device);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
rcu_assign_pointer(device->name, name);
fs_devices->num_devices++;
mutex_unlock(&fs_devices->device_list_mutex);
- ret = 1;
device->fs_devices = fs_devices;
+ btrfs_free_stale_devices(path, device);
+
+ if (disk_super->label[0])
+ pr_info("BTRFS: device label %s devid %llu transid %llu %s\n",
+ disk_super->label, devid, found_transid, path);
+ else
+ pr_info("BTRFS: device fsid %pU devid %llu transid %llu %s\n",
+ disk_super->fsid, devid, found_transid, path);
+
} else if (!device->name || strcmp(device->name->str, path)) {
/*
* When FS is already mounted.
* with larger generation number or the last-in if
* generation are equal.
*/
- return -EEXIST;
+ return ERR_PTR(-EEXIST);
}
name = rcu_string_strdup(path, GFP_NOFS);
if (!name)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
rcu_string_free(device->name);
rcu_assign_pointer(device->name, name);
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
if (!fs_devices->opened)
device->generation = found_transid;
- /*
- * if there is new btrfs on an already registered device,
- * then remove the stale device entry.
- */
- if (ret > 0)
- btrfs_free_stale_device(device);
-
- *fs_devices_ret = fs_devices;
+ fs_devices->total_devices = btrfs_super_num_devices(disk_super);
- return ret;
+ return device;
}
static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
struct btrfs_fs_devices **fs_devices_ret)
{
struct btrfs_super_block *disk_super;
+ struct btrfs_device *device;
struct block_device *bdev;
struct page *page;
- int ret = -EINVAL;
- u64 devid;
- u64 transid;
- u64 total_devices;
+ int ret = 0;
u64 bytenr;
/*
goto error;
}
- if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super))
+ if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
+ ret = -EINVAL;
goto error_bdev_put;
-
- devid = btrfs_stack_device_id(&disk_super->dev_item);
- transid = btrfs_super_generation(disk_super);
- total_devices = btrfs_super_num_devices(disk_super);
-
- ret = device_list_add(path, disk_super, devid, fs_devices_ret);
- if (ret > 0) {
- if (disk_super->label[0]) {
- pr_info("BTRFS: device label %s ", disk_super->label);
- } else {
- pr_info("BTRFS: device fsid %pU ", disk_super->fsid);
- }
-
- pr_cont("devid %llu transid %llu %s\n", devid, transid, path);
- ret = 0;
}
- if (!ret && fs_devices_ret)
- (*fs_devices_ret)->total_devices = total_devices;
+
+ device = device_list_add(path, disk_super);
+ if (IS_ERR(device))
+ ret = PTR_ERR(device);
+ else
+ *fs_devices_ret = device->fs_devices;
btrfs_release_disk_super(page);
return ret;
}
+/*
+ * return 1 : allocate a data chunk successfully,
+ * return <0: errors during allocating a data chunk,
+ * return 0 : no need to allocate a data chunk.
+ */
+static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
+ u64 chunk_offset)
+{
+ struct btrfs_block_group_cache *cache;
+ u64 bytes_used;
+ u64 chunk_type;
+
+ cache = btrfs_lookup_block_group(fs_info, chunk_offset);
+ ASSERT(cache);
+ chunk_type = cache->flags;
+ btrfs_put_block_group(cache);
+
+ if (chunk_type & BTRFS_BLOCK_GROUP_DATA) {
+ spin_lock(&fs_info->data_sinfo->lock);
+ bytes_used = fs_info->data_sinfo->bytes_used;
+ spin_unlock(&fs_info->data_sinfo->lock);
+
+ if (!bytes_used) {
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ trans = btrfs_join_transaction(fs_info->tree_root);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ ret = btrfs_force_chunk_alloc(trans, fs_info,
+ BTRFS_BLOCK_GROUP_DATA);
+ btrfs_end_transaction(trans);
+ if (ret < 0)
+ return ret;
+
+ return 1;
+ }
+ }
+ return 0;
+}
+
static int insert_balance_item(struct btrfs_fs_info *fs_info,
struct btrfs_balance_control *bctl)
{
u32 count_meta = 0;
u32 count_sys = 0;
int chunk_reserved = 0;
- u64 bytes_used = 0;
/* step one make some room on all the devices */
devices = &fs_info->fs_devices->devices;
goto loop;
}
- ASSERT(fs_info->data_sinfo);
- spin_lock(&fs_info->data_sinfo->lock);
- bytes_used = fs_info->data_sinfo->bytes_used;
- spin_unlock(&fs_info->data_sinfo->lock);
-
- if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
- !chunk_reserved && !bytes_used) {
- trans = btrfs_start_transaction(chunk_root, 0);
- if (IS_ERR(trans)) {
- mutex_unlock(&fs_info->delete_unused_bgs_mutex);
- ret = PTR_ERR(trans);
- goto error;
- }
-
- ret = btrfs_force_chunk_alloc(trans, fs_info,
- BTRFS_BLOCK_GROUP_DATA);
- btrfs_end_transaction(trans);
+ if (!chunk_reserved) {
+ /*
+ * We may be relocating the only data chunk we have,
+ * which could potentially end up with losing data's
+ * raid profile, so lets allocate an empty one in
+ * advance.
+ */
+ ret = btrfs_may_alloc_data_chunk(fs_info,
+ found_key.offset);
if (ret < 0) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
goto error;
+ } else if (ret == 1) {
+ chunk_reserved = 1;
}
- chunk_reserved = 1;
}
ret = btrfs_relocate_chunk(fs_info, found_key.offset);
chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
btrfs_release_path(path);
+ /*
+ * We may be relocating the only data chunk we have,
+ * which could potentially end up with losing data's
+ * raid profile, so lets allocate an empty one in
+ * advance.
+ */
+ ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
+ if (ret < 0) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ goto done;
+ }
+
ret = btrfs_relocate_chunk(fs_info, chunk_offset);
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
if (ret && ret != -ENOSPC)
ndevs = min(ndevs, devs_max);
/*
- * the primary goal is to maximize the number of stripes, so use as many
- * devices as possible, even if the stripes are not maximum sized.
+ * The primary goal is to maximize the number of stripes, so use as
+ * many devices as possible, even if the stripes are not maximum sized.
+ *
+ * The DUP profile stores more than one stripe per device, the
+ * max_avail is the total size so we have to adjust.
*/
- stripe_size = devices_info[ndevs-1].max_avail;
+ stripe_size = div_u64(devices_info[ndevs - 1].max_avail, dev_stripes);
num_stripes = ndevs * dev_stripes;
/*
stripe_size = devices_info[ndevs-1].max_avail;
}
- stripe_size = div_u64(stripe_size, dev_stripes);
-
/* align to BTRFS_STRIPE_LEN */
stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN);
else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
ret = 2;
else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
- ret = 3;
+ /*
+ * There could be two corrupted data stripes, we need
+ * to loop retry in order to rebuild the correct data.
+ *
+ * Fail a stripe at a time on every retry except the
+ * stripe under reconstruction.
+ */
+ ret = map->num_stripes;
else
ret = 1;
free_extent_map(em);