Merge tag 'for-6.8-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 22 Jan 2024 21:29:42 +0000 (13:29 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 22 Jan 2024 21:29:42 +0000 (13:29 -0800)
Pull btrfs fixes from David Sterba:

 - zoned mode fixes:
     - fix slowdown when writing large file sequentially by looking up
       block groups with enough space faster
     - locking fixes when activating a zone

 - new mount API fixes:
     - preserve mount options for a ro/rw mount of the same subvolume

 - scrub fixes:
     - fix use-after-free in case the chunk length is not aligned to
       64K, this does not happen normally but has been reported on
       images converted from ext4
     - similar alignment check was missing with raid-stripe-tree

 - subvolume deletion fixes:
     - prevent calling ioctl on already deleted subvolume
     - properly track flag tracking a deleted subvolume

 - in subpage mode, fix decompression of an inline extent (zlib, lzo,
   zstd)

 - fix crash when starting writeback on a folio, after integration with
   recent MM changes this needs to be started conditionally

 - reject unknown flags in defrag ioctl

 - error handling, API fixes, minor warning fixes

* tag 'for-6.8-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: scrub: limit RST scrub to chunk boundary
  btrfs: scrub: avoid use-after-free when chunk length is not 64K aligned
  btrfs: don't unconditionally call folio_start_writeback in subpage
  btrfs: use the original mount's mount options for the legacy reconfigure
  btrfs: don't warn if discard range is not aligned to sector
  btrfs: tree-checker: fix inline ref size in error messages
  btrfs: zstd: fix and simplify the inline extent decompression
  btrfs: lzo: fix and simplify the inline extent decompression
  btrfs: zlib: fix and simplify the inline extent decompression
  btrfs: defrag: reject unknown flags of btrfs_ioctl_defrag_range_args
  btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted
  btrfs: don't abort filesystem when attempting to snapshot deleted subvolume
  btrfs: zoned: fix lock ordering in btrfs_zone_activate()
  btrfs: fix unbalanced unlock of mapping_tree_lock
  btrfs: ref-verify: free ref cache before clearing mount opt
  btrfs: fix kvcalloc() arguments order in btrfs_ioctl_send()
  btrfs: zoned: optimize hint byte for zoned allocator
  btrfs: zoned: factor out prepare_allocation_zoned()

1  2 
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/subpage.c
fs/btrfs/super.c
fs/btrfs/zoned.c

diff --combined fs/btrfs/inode.c
index 809b11472a806c92ef9ad4454d354a9460a51b7b,7bcc1c03437a842785cd3e29720b664688a6d0e1..1eb93d3962aac4608cda0255ea31d7e53dbc8da2
@@@ -4458,6 -4458,8 +4458,8 @@@ int btrfs_delete_subvolume(struct btrfs
        u64 root_flags;
        int ret;
  
+       down_write(&fs_info->subvol_sem);
        /*
         * Don't allow to delete a subvolume with send in progress. This is
         * inside the inode lock so the error handling that has to drop the bit
                btrfs_warn(fs_info,
                           "attempt to delete subvolume %llu during send",
                           dest->root_key.objectid);
-               return -EPERM;
+               ret = -EPERM;
+               goto out_up_write;
        }
        if (atomic_read(&dest->nr_swapfiles)) {
                spin_unlock(&dest->root_item_lock);
                btrfs_warn(fs_info,
                           "attempt to delete subvolume %llu with active swapfile",
                           root->root_key.objectid);
-               return -EPERM;
+               ret = -EPERM;
+               goto out_up_write;
        }
        root_flags = btrfs_root_flags(&dest->root_item);
        btrfs_set_root_flags(&dest->root_item,
                             root_flags | BTRFS_ROOT_SUBVOL_DEAD);
        spin_unlock(&dest->root_item_lock);
  
-       down_write(&fs_info->subvol_sem);
        ret = may_destroy_subvol(dest);
        if (ret)
-               goto out_up_write;
+               goto out_undead;
  
        btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
        /*
         */
        ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
        if (ret)
-               goto out_up_write;
+               goto out_undead;
  
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
@@@ -4563,15 -4565,17 +4565,17 @@@ out_end_trans
        inode->i_flags |= S_DEAD;
  out_release:
        btrfs_subvolume_release_metadata(root, &block_rsv);
- out_up_write:
-       up_write(&fs_info->subvol_sem);
+ out_undead:
        if (ret) {
                spin_lock(&dest->root_item_lock);
                root_flags = btrfs_root_flags(&dest->root_item);
                btrfs_set_root_flags(&dest->root_item,
                                root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
                spin_unlock(&dest->root_item_lock);
-       } else {
+       }
+ out_up_write:
+       up_write(&fs_info->subvol_sem);
+       if (!ret) {
                d_invalidate(dentry);
                btrfs_prune_dentries(dest);
                ASSERT(dest->send_in_progress == 0);
@@@ -10973,7 -10977,7 +10977,7 @@@ static const struct address_space_opera
        .release_folio  = btrfs_release_folio,
        .migrate_folio  = btrfs_migrate_folio,
        .dirty_folio    = filemap_dirty_folio,
 -      .error_remove_page = generic_error_remove_page,
 +      .error_remove_folio = generic_error_remove_folio,
        .swap_activate  = btrfs_swap_activate,
        .swap_deactivate = btrfs_swap_deactivate,
  };
diff --combined fs/btrfs/ioctl.c
index 41b479861b3c767bb582920db56ea442c8f7f381,5d42319b43f2ddea959f56e23dff8be5eb907ae0..dfed9dd9c2d75b8205531b030c220b42820e77ce
@@@ -790,6 -790,9 +790,9 @@@ static int create_snapshot(struct btrfs
                return -EOPNOTSUPP;
        }
  
+       if (btrfs_root_refs(&root->root_item) == 0)
+               return -ENOENT;
        if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
                return -EINVAL;
  
@@@ -1290,15 -1293,6 +1293,15 @@@ static noinline int __btrfs_ioctl_snap_
                         * are limited to own subvolumes only
                         */
                        ret = -EPERM;
 +              } else if (btrfs_ino(BTRFS_I(src_inode)) != BTRFS_FIRST_FREE_OBJECTID) {
 +                      /*
 +                       * Snapshots must be made with the src_inode referring
 +                       * to the subvolume inode, otherwise the permission
 +                       * checking above is useless because we may have
 +                       * permission on a lower directory but not the subvol
 +                       * itself.
 +                       */
 +                      ret = -EINVAL;
                } else {
                        ret = btrfs_mksnapshot(&file->f_path, idmap,
                                               name, namelen,
@@@ -2608,6 -2602,10 +2611,10 @@@ static int btrfs_ioctl_defrag(struct fi
                                ret = -EFAULT;
                                goto out;
                        }
+                       if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) {
+                               ret = -EOPNOTSUPP;
+                               goto out;
+                       }
                        /* compression requires us to start the IO */
                        if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
                                range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
@@@ -4533,29 -4531,29 +4540,29 @@@ static int btrfs_ioctl_encoded_write(st
        if (ret < 0)
                goto out_acct;
  
 -      file_start_write(file);
 -
        if (iov_iter_count(&iter) == 0) {
                ret = 0;
 -              goto out_end_write;
 +              goto out_iov;
        }
        pos = args.offset;
        ret = rw_verify_area(WRITE, file, &pos, args.len);
        if (ret < 0)
 -              goto out_end_write;
 +              goto out_iov;
  
        init_sync_kiocb(&kiocb, file);
        ret = kiocb_set_rw_flags(&kiocb, 0);
        if (ret)
 -              goto out_end_write;
 +              goto out_iov;
        kiocb.ki_pos = pos;
  
 +      file_start_write(file);
 +
        ret = btrfs_do_write_iter(&kiocb, &iter, &args);
        if (ret > 0)
                fsnotify_modify(file);
  
 -out_end_write:
        file_end_write(file);
 +out_iov:
        kfree(iov);
  out_acct:
        if (ret > 0)
diff --combined fs/btrfs/subpage.c
index 93511d54abf8280bc6778a17b5fa75a28d3585c1,277dd6d312ee31353a32f5513ac87fd2d5ebf18e..0e49dab8dad2480243f4d32e6ee934c0f2b35b67
@@@ -197,7 -197,7 +197,7 @@@ void btrfs_folio_inc_eb_refs(const stru
                return;
  
        ASSERT(folio_test_private(folio) && folio->mapping);
 -      lockdep_assert_held(&folio->mapping->private_lock);
 +      lockdep_assert_held(&folio->mapping->i_private_lock);
  
        subpage = folio_get_private(folio);
        atomic_inc(&subpage->eb_refs);
@@@ -211,7 -211,7 +211,7 @@@ void btrfs_folio_dec_eb_refs(const stru
                return;
  
        ASSERT(folio_test_private(folio) && folio->mapping);
 -      lockdep_assert_held(&folio->mapping->private_lock);
 +      lockdep_assert_held(&folio->mapping->i_private_lock);
  
        subpage = folio_get_private(folio);
        ASSERT(atomic_read(&subpage->eb_refs));
@@@ -475,7 -475,8 +475,8 @@@ void btrfs_subpage_set_writeback(const 
  
        spin_lock_irqsave(&subpage->lock, flags);
        bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
-       folio_start_writeback(folio);
+       if (!folio_test_writeback(folio))
+               folio_start_writeback(folio);
        spin_unlock_irqrestore(&subpage->lock, flags);
  }
  
diff --combined fs/btrfs/super.c
index 896acfda17895150ff501960dd72f084c542301e,f192f8fe0ce62edab4b6a85d2f06a3a08a67ff15..101f786963d4d7712baab28c912226fb741c0c9b
@@@ -259,12 -259,6 +259,12 @@@ static const struct fs_parameter_spec b
        {}
  };
  
 +/* No support for restricting writes to btrfs devices yet... */
 +static inline blk_mode_t btrfs_open_mode(struct fs_context *fc)
 +{
 +      return sb_open_mode(fc->sb_flags) & ~BLK_OPEN_RESTRICT_WRITES;
 +}
 +
  static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
  {
        struct btrfs_fs_context *ctx = fc->fs_private;
                break;
        case Opt_device: {
                struct btrfs_device *device;
 -              blk_mode_t mode = sb_open_mode(fc->sb_flags);
 +              blk_mode_t mode = btrfs_open_mode(fc);
  
                mutex_lock(&uuid_mutex);
                device = btrfs_scan_one_device(param->string, mode, false);
@@@ -1457,6 -1451,14 +1457,14 @@@ static int btrfs_reconfigure(struct fs_
  
        btrfs_info_to_ctx(fs_info, &old_ctx);
  
+       /*
+        * This is our "bind mount" trick, we don't want to allow the user to do
+        * anything other than mount a different ro/rw and a different subvol,
+        * all of the mount options should be maintained.
+        */
+       if (mount_reconfigure)
+               ctx->mount_opt = old_ctx.mount_opt;
        sync_filesystem(sb);
        set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
  
@@@ -1792,7 -1794,7 +1800,7 @@@ static int btrfs_get_tree_super(struct 
        struct block_device *bdev;
        struct btrfs_device *device;
        struct super_block *sb;
 -      blk_mode_t mode = sb_open_mode(fc->sb_flags);
 +      blk_mode_t mode = btrfs_open_mode(fc);
        int ret;
  
        btrfs_ctx_to_info(fs_info, ctx);
diff --combined fs/btrfs/zoned.c
index 5bd76813b23f065fdf670bf8fe3fbd59ee0c88d9,ac9bbe0c4ffe691747eb2f9247c2aaa75f65f3b5..168af9d000d168324fcc8355781517ddeedeefd1
@@@ -578,12 -578,26 +578,12 @@@ int btrfs_get_dev_zone_info(struct btrf
  
        kvfree(zones);
  
 -      switch (bdev_zoned_model(bdev)) {
 -      case BLK_ZONED_HM:
 +      if (bdev_is_zoned(bdev)) {
                model = "host-managed zoned";
                emulated = "";
 -              break;
 -      case BLK_ZONED_HA:
 -              model = "host-aware zoned";
 -              emulated = "";
 -              break;
 -      case BLK_ZONED_NONE:
 +      } else {
                model = "regular";
                emulated = "emulated ";
 -              break;
 -      default:
 -              /* Just in case */
 -              btrfs_err_in_rcu(fs_info, "zoned: unsupported model %d on %s",
 -                               bdev_zoned_model(bdev),
 -                               rcu_str_deref(device->name));
 -              ret = -EOPNOTSUPP;
 -              goto out_free_zone_info;
        }
  
        btrfs_info_in_rcu(fs_info,
  
  out:
        kvfree(zones);
 -out_free_zone_info:
        btrfs_destroy_dev_zone_info(device);
 -
        return ret;
  }
  
@@@ -672,7 -688,8 +672,7 @@@ static int btrfs_check_for_zoned_device
        struct btrfs_device *device;
  
        list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
 -              if (device->bdev &&
 -                  bdev_zoned_model(device->bdev) == BLK_ZONED_HM) {
 +              if (device->bdev && bdev_is_zoned(device->bdev)) {
                        btrfs_err(fs_info,
                                "zoned: mode not enabled but zoned device found: %pg",
                                device->bdev);
@@@ -2055,6 -2072,7 +2055,7 @@@ bool btrfs_zone_activate(struct btrfs_b
  
        map = block_group->physical_map;
  
+       spin_lock(&fs_info->zone_active_bgs_lock);
        spin_lock(&block_group->lock);
        if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) {
                ret = true;
                goto out_unlock;
        }
  
-       spin_lock(&fs_info->zone_active_bgs_lock);
        for (i = 0; i < map->num_stripes; i++) {
                struct btrfs_zoned_device_info *zinfo;
                int reserved = 0;
                 */
                if (atomic_read(&zinfo->active_zones_left) <= reserved) {
                        ret = false;
-                       spin_unlock(&fs_info->zone_active_bgs_lock);
                        goto out_unlock;
                }
  
                if (!btrfs_dev_set_active_zone(device, physical)) {
                        /* Cannot activate the zone */
                        ret = false;
-                       spin_unlock(&fs_info->zone_active_bgs_lock);
                        goto out_unlock;
                }
                if (!is_data)
                        zinfo->reserved_active_zones--;
        }
-       spin_unlock(&fs_info->zone_active_bgs_lock);
  
        /* Successfully activated all the zones */
        set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
  
        /* For the active block group list */
        btrfs_get_block_group(block_group);
-       spin_lock(&fs_info->zone_active_bgs_lock);
        list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
        spin_unlock(&fs_info->zone_active_bgs_lock);
  
  
  out_unlock:
        spin_unlock(&block_group->lock);
+       spin_unlock(&fs_info->zone_active_bgs_lock);
        return ret;
  }