Merge tag 'for-6.3-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 24 Mar 2023 15:32:10 +0000 (08:32 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 24 Mar 2023 15:32:10 +0000 (08:32 -0700)
Pull btrfs fixes from David Sterba:
 "A few more fixes, the zoned accounting fix is spread across a few
  patches, preparatory and the actual fixes:

   - zoned mode:
      - fix accounting of unusable zone space
      - fix zone activation condition for DUP profile
      - preparatory patches

   - improved error handling of missing chunks

   - fix compiler warning"

* tag 'for-6.3-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: zoned: drop space_info->active_total_bytes
  btrfs: zoned: count fresh BG region as zone unusable
  btrfs: use temporary variable for space_info in btrfs_update_block_group
  btrfs: rename BTRFS_FS_NO_OVERCOMMIT to BTRFS_FS_ACTIVE_ZONE_TRACKING
  btrfs: zoned: fix btrfs_can_activate_zone() to support DUP profile
  btrfs: fix compiler warning on SPARC/PA-RISC handling fscrypt_setup_filename
  btrfs: handle missing chunk mapping more gracefully

fs/btrfs/block-group.c
fs/btrfs/free-space-cache.c
fs/btrfs/fs.h
fs/btrfs/inode.c
fs/btrfs/space-info.c
fs/btrfs/space-info.h
fs/btrfs/volumes.c
fs/btrfs/zoned.c

index 0ef8b8926bfa7209a6e82145ee1c2b5f81b407c7..5fc670c27f8648d996016fe8da3f6fae658fdc78 100644 (file)
@@ -1175,14 +1175,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                        < block_group->zone_unusable);
                WARN_ON(block_group->space_info->disk_total
                        < block_group->length * factor);
-               WARN_ON(test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
-                                &block_group->runtime_flags) &&
-                       block_group->space_info->active_total_bytes
-                       < block_group->length);
        }
        block_group->space_info->total_bytes -= block_group->length;
-       if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
-               block_group->space_info->active_total_bytes -= block_group->length;
        block_group->space_info->bytes_readonly -=
                (block_group->length - block_group->zone_unusable);
        block_group->space_info->bytes_zone_unusable -=
@@ -3476,6 +3470,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
        spin_unlock(&info->delalloc_root_lock);
 
        while (total) {
+               struct btrfs_space_info *space_info;
                bool reclaim = false;
 
                cache = btrfs_lookup_block_group(info, bytenr);
@@ -3483,6 +3478,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
                        ret = -ENOENT;
                        break;
                }
+               space_info = cache->space_info;
                factor = btrfs_bg_type_to_factor(cache->flags);
 
                /*
@@ -3497,7 +3493,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
                byte_in_group = bytenr - cache->start;
                WARN_ON(byte_in_group > cache->length);
 
-               spin_lock(&cache->space_info->lock);
+               spin_lock(&space_info->lock);
                spin_lock(&cache->lock);
 
                if (btrfs_test_opt(info, SPACE_CACHE) &&
@@ -3510,24 +3506,24 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
                        old_val += num_bytes;
                        cache->used = old_val;
                        cache->reserved -= num_bytes;
-                       cache->space_info->bytes_reserved -= num_bytes;
-                       cache->space_info->bytes_used += num_bytes;
-                       cache->space_info->disk_used += num_bytes * factor;
+                       space_info->bytes_reserved -= num_bytes;
+                       space_info->bytes_used += num_bytes;
+                       space_info->disk_used += num_bytes * factor;
                        spin_unlock(&cache->lock);
-                       spin_unlock(&cache->space_info->lock);
+                       spin_unlock(&space_info->lock);
                } else {
                        old_val -= num_bytes;
                        cache->used = old_val;
                        cache->pinned += num_bytes;
-                       btrfs_space_info_update_bytes_pinned(info,
-                                       cache->space_info, num_bytes);
-                       cache->space_info->bytes_used -= num_bytes;
-                       cache->space_info->disk_used -= num_bytes * factor;
+                       btrfs_space_info_update_bytes_pinned(info, space_info,
+                                                            num_bytes);
+                       space_info->bytes_used -= num_bytes;
+                       space_info->disk_used -= num_bytes * factor;
 
                        reclaim = should_reclaim_block_group(cache, num_bytes);
 
                        spin_unlock(&cache->lock);
-                       spin_unlock(&cache->space_info->lock);
+                       spin_unlock(&space_info->lock);
 
                        set_extent_dirty(&trans->transaction->pinned_extents,
                                         bytenr, bytenr + num_bytes - 1,
index 0d250d052487cf04085e5b059e10be1810e7d654..d84cef89cdff522ab64931da108498d00637ea53 100644 (file)
@@ -2693,8 +2693,13 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
                bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
 
        spin_lock(&ctl->tree_lock);
+       /* Count initial region as zone_unusable until it gets activated. */
        if (!used)
                to_free = size;
+       else if (initial &&
+                test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &block_group->fs_info->flags) &&
+                (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
+               to_free = 0;
        else if (initial)
                to_free = block_group->zone_capacity;
        else if (offset >= block_group->alloc_offset)
@@ -2722,7 +2727,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
        reclaimable_unusable = block_group->zone_unusable -
                               (block_group->length - block_group->zone_capacity);
        /* All the region is now unusable. Mark it as unused and reclaim */
-       if (block_group->zone_unusable == block_group->length) {
+       if (block_group->zone_unusable == block_group->length &&
+           block_group->alloc_offset) {
                btrfs_mark_bg_unused(block_group);
        } else if (bg_reclaim_threshold &&
                   reclaimable_unusable >=
index 4c477eae689148dd59c45514eb2069a073d08155..24cd492294086c1dfa02b59e61d5f4d311e99c30 100644 (file)
@@ -120,11 +120,8 @@ enum {
        /* Indicate that we want to commit the transaction. */
        BTRFS_FS_NEED_TRANS_COMMIT,
 
-       /*
-        * Indicate metadata over-commit is disabled. This is set when active
-        * zone tracking is needed.
-        */
-       BTRFS_FS_NO_OVERCOMMIT,
+       /* This is set when active zone tracking is needed. */
+       BTRFS_FS_ACTIVE_ZONE_TRACKING,
 
        /*
         * Indicate if we have some features changed, this is mostly for
index 6c18dc9a1831d03b7ec04c13ac6efc19a28d7a67..957e4d76a7b6578d59fd8f0b7d887e30d02c4e2b 100644 (file)
@@ -5421,8 +5421,13 @@ static int btrfs_inode_by_name(struct btrfs_inode *dir, struct dentry *dentry,
                return -ENOMEM;
 
        ret = fscrypt_setup_filename(&dir->vfs_inode, &dentry->d_name, 1, &fname);
-       if (ret)
+       if (ret < 0)
                goto out;
+       /*
+        * fscrypt_setup_filename() should never return a positive value, but
+        * gcc on sparc/parisc thinks it can, so assert that doesn't happen.
+        */
+       ASSERT(ret == 0);
 
        /* This needs to handle no-key deletions later on */
 
index 69c09508afb506ac8121caeae39fb2e7071362c0..3eecce86f63fc4e8d1cc96c6c7b09d84db076d1e 100644 (file)
@@ -308,8 +308,6 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
        ASSERT(found);
        spin_lock(&found->lock);
        found->total_bytes += block_group->length;
-       if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
-               found->active_total_bytes += block_group->length;
        found->disk_total += block_group->length * factor;
        found->bytes_used += block_group->used;
        found->disk_used += block_group->used * factor;
@@ -379,22 +377,6 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
        return avail;
 }
 
-static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info,
-                                      struct btrfs_space_info *space_info)
-{
-       /*
-        * On regular filesystem, all total_bytes are always writable. On zoned
-        * filesystem, there may be a limitation imposed by max_active_zones.
-        * For metadata allocation, we cannot finish an existing active block
-        * group to avoid a deadlock. Thus, we need to consider only the active
-        * groups to be writable for metadata space.
-        */
-       if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
-               return space_info->total_bytes;
-
-       return space_info->active_total_bytes;
-}
-
 int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
                         struct btrfs_space_info *space_info, u64 bytes,
                         enum btrfs_reserve_flush_enum flush)
@@ -407,13 +389,13 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
                return 0;
 
        used = btrfs_space_info_used(space_info, true);
-       if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) &&
+       if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) &&
            (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
                avail = 0;
        else
                avail = calc_available_free_space(fs_info, space_info, flush);
 
-       if (used + bytes < writable_total_bytes(fs_info, space_info) + avail)
+       if (used + bytes < space_info->total_bytes + avail)
                return 1;
        return 0;
 }
@@ -449,7 +431,7 @@ again:
                ticket = list_first_entry(head, struct reserve_ticket, list);
 
                /* Check and see if our ticket can be satisfied now. */
-               if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) ||
+               if ((used + ticket->bytes <= space_info->total_bytes) ||
                    btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
                                         flush)) {
                        btrfs_space_info_update_bytes_may_use(fs_info,
@@ -829,7 +811,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
 {
        u64 used;
        u64 avail;
-       u64 total;
        u64 to_reclaim = space_info->reclaim_size;
 
        lockdep_assert_held(&space_info->lock);
@@ -844,9 +825,8 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
         * space.  If that's the case add in our overage so we make sure to put
         * appropriate pressure on the flushing state machine.
         */
-       total = writable_total_bytes(fs_info, space_info);
-       if (total + avail < used)
-               to_reclaim += used - (total + avail);
+       if (space_info->total_bytes + avail < used)
+               to_reclaim += used - (space_info->total_bytes + avail);
 
        return to_reclaim;
 }
@@ -856,11 +836,10 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
 {
        u64 global_rsv_size = fs_info->global_block_rsv.reserved;
        u64 ordered, delalloc;
-       u64 total = writable_total_bytes(fs_info, space_info);
        u64 thresh;
        u64 used;
 
-       thresh = mult_perc(total, 90);
+       thresh = mult_perc(space_info->total_bytes, 90);
 
        lockdep_assert_held(&space_info->lock);
 
@@ -923,8 +902,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
                                           BTRFS_RESERVE_FLUSH_ALL);
        used = space_info->bytes_used + space_info->bytes_reserved +
               space_info->bytes_readonly + global_rsv_size;
-       if (used < total)
-               thresh += total - used;
+       if (used < space_info->total_bytes)
+               thresh += space_info->total_bytes - used;
        thresh >>= space_info->clamp;
 
        used = space_info->bytes_pinned;
@@ -1651,7 +1630,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
         * can_overcommit() to ensure we can overcommit to continue.
         */
        if (!pending_tickets &&
-           ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) ||
+           ((used + orig_bytes <= space_info->total_bytes) ||
             btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
                btrfs_space_info_update_bytes_may_use(fs_info, space_info,
                                                      orig_bytes);
@@ -1665,8 +1644,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
         */
        if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) {
                used = btrfs_space_info_used(space_info, false);
-               if (used + orig_bytes <=
-                   writable_total_bytes(fs_info, space_info)) {
+               if (used + orig_bytes <= space_info->total_bytes) {
                        btrfs_space_info_update_bytes_may_use(fs_info, space_info,
                                                              orig_bytes);
                        ret = 0;
index fc99ea2b0c34fc2187813dffc2a206577e65ee22..2033b71b18cece2b5ee1e35877d184e1d3ced1b2 100644 (file)
@@ -96,8 +96,6 @@ struct btrfs_space_info {
        u64 bytes_may_use;      /* number of bytes that may be used for
                                   delalloc/allocations */
        u64 bytes_readonly;     /* total bytes that are read only */
-       /* Total bytes in the space, but only accounts active block groups. */
-       u64 active_total_bytes;
        u64 bytes_zone_unusable;        /* total bytes that are unusable until
                                           resetting the device zone */
 
index 7823168c08a6aa22466addc1472f0c4f3732529c..6d0124b6e79e3ddeb1ed32c19f06aec306c78eb9 100644 (file)
@@ -6363,7 +6363,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
        ASSERT(op != BTRFS_MAP_DISCARD);
 
        em = btrfs_get_chunk_map(fs_info, logical, *length);
-       ASSERT(!IS_ERR(em));
+       if (IS_ERR(em))
+               return PTR_ERR(em);
 
        map = em->map_lookup;
        data_stripes = nr_data_stripes(map);
index f95b2c94d6199a62194d785ffa3d427c0a3fa79b..45d04092f2f8cdadded5570b677738c9de74197a 100644 (file)
@@ -524,8 +524,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
                }
                atomic_set(&zone_info->active_zones_left,
                           max_active_zones - nactive);
-               /* Overcommit does not work well with active zone tacking. */
-               set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags);
+               set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags);
        }
 
        /* Validate superblock log */
@@ -1581,9 +1580,19 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
                return;
 
        WARN_ON(cache->bytes_super != 0);
-       unusable = (cache->alloc_offset - cache->used) +
-                  (cache->length - cache->zone_capacity);
-       free = cache->zone_capacity - cache->alloc_offset;
+
+       /* Check for block groups never get activated */
+       if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) &&
+           cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) &&
+           !test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) &&
+           cache->alloc_offset == 0) {
+               unusable = cache->length;
+               free = 0;
+       } else {
+               unusable = (cache->alloc_offset - cache->used) +
+                          (cache->length - cache->zone_capacity);
+               free = cache->zone_capacity - cache->alloc_offset;
+       }
 
        /* We only need ->free_space in ALLOC_SEQ block groups */
        cache->cached = BTRFS_CACHE_FINISHED;
@@ -1902,7 +1911,11 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 
        /* Successfully activated all the zones */
        set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
-       space_info->active_total_bytes += block_group->length;
+       WARN_ON(block_group->alloc_offset != 0);
+       if (block_group->zone_unusable == block_group->length) {
+               block_group->zone_unusable = block_group->length - block_group->zone_capacity;
+               space_info->bytes_zone_unusable -= block_group->zone_capacity;
+       }
        spin_unlock(&block_group->lock);
        btrfs_try_granting_tickets(fs_info, space_info);
        spin_unlock(&space_info->lock);
@@ -2086,11 +2099,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
                if (!device->bdev)
                        continue;
 
-               if (!zinfo->max_active_zones ||
-                   atomic_read(&zinfo->active_zones_left)) {
+               if (!zinfo->max_active_zones) {
                        ret = true;
                        break;
                }
+
+               switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+               case 0: /* single */
+                       ret = (atomic_read(&zinfo->active_zones_left) >= 1);
+                       break;
+               case BTRFS_BLOCK_GROUP_DUP:
+                       ret = (atomic_read(&zinfo->active_zones_left) >= 2);
+                       break;
+               }
+               if (ret)
+                       break;
        }
        mutex_unlock(&fs_info->chunk_mutex);
 
@@ -2256,7 +2279,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
                u64 avail;
 
                spin_lock(&block_group->lock);
-               if (block_group->reserved ||
+               if (block_group->reserved || block_group->alloc_offset == 0 ||
                    (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
                        spin_unlock(&block_group->lock);
                        continue;
@@ -2293,10 +2316,6 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
        if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
                return 0;
 
-       /* No more block groups to activate */
-       if (space_info->active_total_bytes == space_info->total_bytes)
-               return 0;
-
        for (;;) {
                int ret;
                bool need_finish = false;