* devices.
*/
static void btrfs_free_stale_devices(const char *path,
- struct btrfs_device *skip_dev)
+ struct btrfs_device *skip_device)
{
- struct btrfs_fs_devices *fs_devs, *tmp_fs_devs;
- struct btrfs_device *dev, *tmp_dev;
+ struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
+ struct btrfs_device *device, *tmp_device;
- list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, fs_list) {
-
- if (fs_devs->opened)
+ list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
+ mutex_lock(&fs_devices->device_list_mutex);
+ if (fs_devices->opened) {
+ mutex_unlock(&fs_devices->device_list_mutex);
continue;
+ }
- list_for_each_entry_safe(dev, tmp_dev,
- &fs_devs->devices, dev_list) {
+ list_for_each_entry_safe(device, tmp_device,
+ &fs_devices->devices, dev_list) {
int not_found = 0;
- if (skip_dev && skip_dev == dev)
+ if (skip_device && skip_device == device)
continue;
- if (path && !dev->name)
+ if (path && !device->name)
continue;
rcu_read_lock();
if (path)
- not_found = strcmp(rcu_str_deref(dev->name),
+ not_found = strcmp(rcu_str_deref(device->name),
path);
rcu_read_unlock();
if (not_found)
continue;
/* delete the stale device */
- if (fs_devs->num_devices == 1) {
- btrfs_sysfs_remove_fsid(fs_devs);
- list_del(&fs_devs->fs_list);
- free_fs_devices(fs_devs);
+ fs_devices->num_devices--;
+ list_del(&device->dev_list);
+ btrfs_free_device(device);
+
+ if (fs_devices->num_devices == 0)
break;
- } else {
- fs_devs->num_devices--;
- list_del(&dev->dev_list);
- btrfs_free_device(dev);
- }
+ }
+ mutex_unlock(&fs_devices->device_list_mutex);
+ if (fs_devices->num_devices == 0) {
+ btrfs_sysfs_remove_fsid(fs_devices);
+ list_del(&fs_devices->fs_list);
+ free_fs_devices(fs_devices);
}
}
}
{
int ret;
- mutex_lock(&uuid_mutex);
+ lockdep_assert_held(&uuid_mutex);
+
mutex_lock(&fs_devices->device_list_mutex);
if (fs_devices->opened) {
fs_devices->opened++;
ret = open_fs_devices(fs_devices, flags, holder);
}
mutex_unlock(&fs_devices->device_list_mutex);
- mutex_unlock(&uuid_mutex);
return ret;
}
* and we are not allowed to call set_blocksize during the scan. The superblock
* is read via pagecache
*/
-int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
- struct btrfs_fs_devices **fs_devices_ret)
+struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
+ void *holder)
{
struct btrfs_super_block *disk_super;
bool new_device_added = false;
- struct btrfs_device *device;
+ struct btrfs_device *device = NULL;
struct block_device *bdev;
struct page *page;
- int ret = 0;
u64 bytenr;
+ lockdep_assert_held(&uuid_mutex);
+
/*
* we would like to check all the supers, but that would make
* a btrfs mount succeed after a mkfs from a different FS.
bdev = blkdev_get_by_path(path, flags, holder);
if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ return ERR_CAST(bdev);
if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
- ret = -EINVAL;
+ device = ERR_PTR(-EINVAL);
goto error_bdev_put;
}
- mutex_lock(&uuid_mutex);
device = device_list_add(path, disk_super, &new_device_added);
- if (IS_ERR(device)) {
- ret = PTR_ERR(device);
- } else {
- *fs_devices_ret = device->fs_devices;
+ if (!IS_ERR(device)) {
if (new_device_added)
btrfs_free_stale_devices(path, device);
}
- mutex_unlock(&uuid_mutex);
btrfs_release_disk_super(page);
error_bdev_put:
blkdev_put(bdev, flags);
- return ret;
+ return device;
}
static int contains_pending_extent(struct btrfs_transaction *transaction,
* the btrfs_device struct should be fully filled in
*/
static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_device *device)
{
- struct btrfs_root *root = fs_info->chunk_root;
int ret;
struct btrfs_path *path;
struct btrfs_dev_item *dev_item;
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid;
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- sizeof(*dev_item));
+ ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
+ &key, sizeof(*dev_item));
if (ret)
goto out;
ptr = btrfs_device_uuid(dev_item);
write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
ptr = btrfs_device_fsid(dev_item);
- write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_FSID_SIZE);
+ write_extent_buffer(leaf, trans->fs_info->fsid, ptr, BTRFS_FSID_SIZE);
btrfs_mark_buffer_dirty(leaf);
ret = 0;
* where this function called, there should be always be another device (or
* this_dev) which is active.
*/
-void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
- struct btrfs_device *device, struct btrfs_device *this_dev)
+void btrfs_assign_next_active_device(struct btrfs_device *device,
+ struct btrfs_device *this_dev)
{
+ struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_device *next_device;
if (this_dev)
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
cur_devices->missing_devices--;
- btrfs_assign_next_active_device(fs_info, device, NULL);
+ btrfs_assign_next_active_device(device, NULL);
if (device->bdev) {
cur_devices->open_devices--;
goto out;
}
-void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *srcdev)
+void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
{
struct btrfs_fs_devices *fs_devices;
- lockdep_assert_held(&fs_info->fs_devices->device_list_mutex);
+ lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex);
/*
* in case of fs with no seed, srcdev->fs_devices will point
}
}
-void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *tgtdev)
+void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
{
- struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
WARN_ON(!tgtdev);
mutex_lock(&fs_devices->device_list_mutex);
fs_devices->num_devices--;
- btrfs_assign_next_active_device(fs_info, tgtdev, NULL);
+ btrfs_assign_next_active_device(tgtdev, NULL);
list_del_rcu(&tgtdev->dev_list);
INIT_LIST_HEAD(&seed_devices->alloc_list);
mutex_init(&seed_devices->device_list_mutex);
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
synchronize_rcu);
list_for_each_entry(device, &seed_devices->devices, dev_list)
generate_random_uuid(fs_devices->fsid);
memcpy(fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
super_flags = btrfs_super_flags(disk_super) &
~BTRFS_SUPER_FLAG_SEEDING;
}
}
- ret = btrfs_add_dev_item(trans, fs_info, device);
+ ret = btrfs_add_dev_item(trans, device);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto error_sysfs;
return btrfs_update_device(trans, device);
}
-static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 chunk_offset)
+static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->chunk_root;
int ret;
struct btrfs_path *path;
return em;
}
-int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 chunk_offset)
+int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct extent_map *em;
struct map_lookup *map;
u64 dev_extent_len = 0;
}
mutex_unlock(&fs_devices->device_list_mutex);
- ret = btrfs_free_chunk(trans, fs_info, chunk_offset);
+ ret = btrfs_free_chunk(trans, chunk_offset);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
* step two, delete the device extents and the
* chunk tree entries
*/
- ret = btrfs_remove_chunk(trans, fs_info, chunk_offset);
+ ret = btrfs_remove_chunk(trans, chunk_offset);
btrfs_end_transaction(trans);
return ret;
}
}
int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 chunk_offset, u64 chunk_size)
+ u64 chunk_offset, u64 chunk_size)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *extent_root = fs_info->extent_root;
struct btrfs_root *chunk_root = fs_info->chunk_root;
struct btrfs_key key;
btrfs_io_bio(bio)->stripe_index = dev_nr;
bio->bi_end_io = btrfs_end_bio;
bio->bi_iter.bi_sector = physical >> 9;
-#ifdef DEBUG
- {
- struct rcu_string *name;
-
- rcu_read_lock();
- name = rcu_dereference(dev->name);
- btrfs_debug(fs_info,
- "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
- bio_op(bio), bio->bi_opf,
- (u64)bio->bi_iter.bi_sector,
- (u_long)dev->bdev->bd_dev, name->str, dev->devid,
- bio->bi_iter.bi_size);
- rcu_read_unlock();
- }
-#endif
+ btrfs_debug_in_rcu(fs_info,
+ "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
+ bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
+ (u_long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid,
+ bio->bi_iter.bi_size);
bio_set_dev(bio, dev->bdev);
btrfs_bio_counter_inc_noblocked(fs_info);
u16 num_stripes;
u16 sub_stripes;
u64 type;
+ u64 features;
+ bool mixed = false;
length = btrfs_chunk_length(leaf, chunk);
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
btrfs_chunk_type(leaf, chunk));
return -EIO;
}
+
+ if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
+ btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
+ return -EIO;
+ }
+
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
+ (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
+ btrfs_err(fs_info,
+ "system chunk with data or metadata type: 0x%llx", type);
+ return -EIO;
+ }
+
+ features = btrfs_super_incompat_flags(fs_info->super_copy);
+ if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+ mixed = true;
+
+ if (!mixed) {
+ if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
+ (type & BTRFS_BLOCK_GROUP_DATA)) {
+ btrfs_err(fs_info,
+ "mixed chunk type in non-mixed mode: 0x%llx", type);
+ return -EIO;
+ }
+ }
+
if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
(type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
(type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
map->type = btrfs_chunk_type(leaf, chunk);
map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+ map->verified_stripes = 0;
for (i = 0; i < num_stripes; i++) {
map->stripes[i].physical =
btrfs_stripe_offset_nr(leaf, chunk, i);
}
static int update_dev_stat_item(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_device *device)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *dev_root = fs_info->dev_root;
struct btrfs_path *path;
struct btrfs_key key;
*/
smp_rmb();
- ret = update_dev_stat_item(trans, fs_info, device);
+ ret = update_dev_stat_item(trans, device);
if (!ret)
atomic_sub(stats_cnt, &device->dev_stats_ccnt);
}
fs_devices = fs_devices->seed;
}
}
+
+/*
+ * Multiplicity factor for simple profiles: DUP, RAID1-like and RAID10.
+ */
+int btrfs_bg_type_to_factor(u64 flags)
+{
+ if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ return 2;
+ return 1;
+}
+
+
+static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
+{
+ int index = btrfs_bg_flags_to_raid_index(type);
+ int ncopies = btrfs_raid_array[index].ncopies;
+ int data_stripes;
+
+ switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case BTRFS_BLOCK_GROUP_RAID5:
+ data_stripes = num_stripes - 1;
+ break;
+ case BTRFS_BLOCK_GROUP_RAID6:
+ data_stripes = num_stripes - 2;
+ break;
+ default:
+ data_stripes = num_stripes / ncopies;
+ break;
+ }
+ return div_u64(chunk_len, data_stripes);
+}
+
+static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
+ u64 chunk_offset, u64 devid,
+ u64 physical_offset, u64 physical_len)
+{
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map *em;
+ struct map_lookup *map;
+ u64 stripe_len;
+ bool found = false;
+ int ret = 0;
+ int i;
+
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, chunk_offset, 1);
+ read_unlock(&em_tree->lock);
+
+ if (!em) {
+ btrfs_err(fs_info,
+"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
+ physical_offset, devid);
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ map = em->map_lookup;
+ stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
+ if (physical_len != stripe_len) {
+ btrfs_err(fs_info,
+"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
+ physical_offset, devid, em->start, physical_len,
+ stripe_len);
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ for (i = 0; i < map->num_stripes; i++) {
+ if (map->stripes[i].dev->devid == devid &&
+ map->stripes[i].physical == physical_offset) {
+ found = true;
+ if (map->verified_stripes >= map->num_stripes) {
+ btrfs_err(fs_info,
+ "too many dev extents for chunk %llu found",
+ em->start);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ map->verified_stripes++;
+ break;
+ }
+ }
+ if (!found) {
+ btrfs_err(fs_info,
+ "dev extent physical offset %llu devid %llu has no corresponding chunk",
+ physical_offset, devid);
+ ret = -EUCLEAN;
+ }
+out:
+ free_extent_map(em);
+ return ret;
+}
+
+static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
+{
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map *em;
+ struct rb_node *node;
+ int ret = 0;
+
+ read_lock(&em_tree->lock);
+ for (node = rb_first(&em_tree->map); node; node = rb_next(node)) {
+ em = rb_entry(node, struct extent_map, rb_node);
+ if (em->map_lookup->num_stripes !=
+ em->map_lookup->verified_stripes) {
+ btrfs_err(fs_info,
+ "chunk %llu has missing dev extent, have %d expect %d",
+ em->start, em->map_lookup->verified_stripes,
+ em->map_lookup->num_stripes);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+out:
+ read_unlock(&em_tree->lock);
+ return ret;
+}
+
+/*
+ * Ensure that all dev extents are mapped to correct chunk, otherwise
+ * later chunk allocation/free would cause unexpected behavior.
+ *
+ * NOTE: This will iterate through the whole device tree, which should be of
+ * the same size level as the chunk tree. This slightly increases mount time.
+ */
+int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_path *path;
+ struct btrfs_root *root = fs_info->dev_root;
+ struct btrfs_key key;
+ int ret = 0;
+
+ key.objectid = 1;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ key.offset = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ path->reada = READA_FORWARD;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ goto out;
+ /* No dev extents at all? Not good */
+ if (ret > 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+ while (1) {
+ struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_dev_extent *dext;
+ int slot = path->slots[0];
+ u64 chunk_offset;
+ u64 physical_offset;
+ u64 physical_len;
+ u64 devid;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.type != BTRFS_DEV_EXTENT_KEY)
+ break;
+ devid = key.objectid;
+ physical_offset = key.offset;
+
+ dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
+ chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
+ physical_len = btrfs_dev_extent_length(leaf, dext);
+
+ ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
+ physical_offset, physical_len);
+ if (ret < 0)
+ goto out;
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+
+ /* Ensure all chunks have corresponding dev extents */
+ ret = verify_chunk_dev_extent_mapping(fs_info);
+out:
+ btrfs_free_path(path);
+ return ret;
+}