fs/btrfs/locking.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2008 Oracle.  All rights reserved.
   4  */
   5
   6 #include <linux/sched.h>
   7 #include <linux/pagemap.h>
   8 #include <linux/spinlock.h>
   9 #include <linux/page-flags.h>
  10 #include <asm/bug.h>
  11 #include "misc.h"
  12 #include "ctree.h"
  13 #include "extent_io.h"
  14 #include "locking.h"
  15
  16 /*
  17  * Extent buffer locking
  18  * =====================
  19  *
  20  * We use a rw_semaphore for tree locking, and the semantics are exactly the
  21  * same:
  22  *
  23  * - reader/writer exclusion
  24  * - writer/writer exclusion
  25  * - reader/reader sharing
  26  * - try-lock semantics for readers and writers
  27  *
  28  * Additionally we need one level nesting recursion, see below. The rwsem
  29  * implementation does opportunistic spinning which reduces number of times the
  30  * locking task needs to sleep.
  31  *
  32  *
  33  * Lock recursion
  34  * --------------
  35  *
  36  * A write operation on a tree might indirectly start a look up on the same
  37  * tree.  This can happen when btrfs_cow_block locks the tree and needs to
  38  * lookup free extents.
  39  *
  40  * btrfs_cow_block
  41  *   ..
  42  *   alloc_tree_block_no_bg_flush
  43  *     btrfs_alloc_tree_block
  44  *       btrfs_reserve_extent
  45  *         ..
  46  *         load_free_space_cache
  47  *           ..
  48  *           btrfs_lookup_file_extent
  49  *             btrfs_search_slot
  50  *
  51  */
  52
  53 /*
  54  * __btrfs_tree_read_lock - lock extent buffer for read
  55  * @eb:         the eb to be locked
  56  * @nest:       the nesting level to be used for lockdep
  57  * @recurse:    if this lock is able to be recursed
  58  *
  59  * This takes the read lock on the extent buffer, using the specified nesting
  60  * level for lockdep purposes.
  61  *
  62  * If you specify recurse = true, then we will allow this to be taken if we
  63  * currently own the lock already.  This should only be used in specific
  64  * usecases, and the subsequent unlock will not change the state of the lock.
  65  */
  66 void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest,
  67                             bool recurse)
  68 {
  69         u64 start_ns = 0;
  70
  71         if (trace_btrfs_tree_read_lock_enabled())
  72                 start_ns = ktime_get_ns();
  73
  74         if (unlikely(recurse)) {
  75                 /* First see if we can grab the lock outright */
  76                 if (down_read_trylock(&eb->lock))
  77                         goto out;
  78
  79                 /*
  80                  * Ok still doesn't necessarily mean we are already holding the
  81                  * lock, check the owner.
  82                  */
  83                 if (eb->lock_owner != current->pid) {
  84                         down_read_nested(&eb->lock, nest);
  85                         goto out;
  86                 }
  87
  88                 /*
  89                  * Ok we have actually recursed, but we should only be recursing
  90                  * once, so blow up if we're already recursed, otherwise set
  91                  * ->lock_recursed and carry on.
  92                  */
  93                 BUG_ON(eb->lock_recursed);
  94                 eb->lock_recursed = true;
  95                 goto out;
  96         }
  97         down_read_nested(&eb->lock, nest);
  98 out:
  99         eb->lock_owner = current->pid;
 100         trace_btrfs_tree_read_lock(eb, start_ns);
 101 }
 102
 103 void btrfs_tree_read_lock(struct extent_buffer *eb)
 104 {
 105         __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL, false);
 106 }
 107
 108 /*
 109  * Try-lock for read.
 110  *
 111  * Retrun 1 if the rwlock has been taken, 0 otherwise
 112  */
 113 int btrfs_try_tree_read_lock(struct extent_buffer *eb)
 114 {
 115         if (down_read_trylock(&eb->lock)) {
 116                 eb->lock_owner = current->pid;
 117                 trace_btrfs_try_tree_read_lock(eb);
 118                 return 1;
 119         }
 120         return 0;
 121 }
 122
 123 /*
 124  * Try-lock for write.
 125  *
 126  * Retrun 1 if the rwlock has been taken, 0 otherwise
 127  */
 128 int btrfs_try_tree_write_lock(struct extent_buffer *eb)
 129 {
 130         if (down_write_trylock(&eb->lock)) {
 131                 eb->lock_owner = current->pid;
 132                 trace_btrfs_try_tree_write_lock(eb);
 133                 return 1;
 134         }
 135         return 0;
 136 }
 137
 138 /*
 139  * Release read lock.  If the read lock was recursed then the lock stays in the
 140  * original state that it was before it was recursively locked.
 141  */
 142 void btrfs_tree_read_unlock(struct extent_buffer *eb)
 143 {
 144         trace_btrfs_tree_read_unlock(eb);
 145         /*
 146          * if we're nested, we have the write lock.  No new locking
 147          * is needed as long as we are the lock owner.
 148          * The write unlock will do a barrier for us, and the lock_recursed
 149          * field only matters to the lock owner.
 150          */
 151         if (eb->lock_recursed && current->pid == eb->lock_owner) {
 152                 eb->lock_recursed = false;
 153                 return;
 154         }
 155         eb->lock_owner = 0;
 156         up_read(&eb->lock);
 157 }
 158
 159 /*
 160  * __btrfs_tree_lock - lock eb for write
 161  * @eb:         the eb to lock
 162  * @nest:       the nesting to use for the lock
 163  *
 164  * Returns with the eb->lock write locked.
 165  */
 166 void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
 167         __acquires(&eb->lock)
 168 {
 169         u64 start_ns = 0;
 170
 171         if (trace_btrfs_tree_lock_enabled())
 172                 start_ns = ktime_get_ns();
 173
 174         down_write_nested(&eb->lock, nest);
 175         eb->lock_owner = current->pid;
 176         trace_btrfs_tree_lock(eb, start_ns);
 177 }
 178
 179 void btrfs_tree_lock(struct extent_buffer *eb)
 180 {
 181         __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL);
 182 }
 183
 184 /*
 185  * Release the write lock.
 186  */
 187 void btrfs_tree_unlock(struct extent_buffer *eb)
 188 {
 189         trace_btrfs_tree_unlock(eb);
 190         eb->lock_owner = 0;
 191         up_write(&eb->lock);
 192 }
 193
 194 /*
 195  * This releases any locks held in the path starting at level and going all the
 196  * way up to the root.
 197  *
 198  * btrfs_search_slot will keep the lock held on higher nodes in a few corner
 199  * cases, such as COW of the block at slot zero in the node.  This ignores
 200  * those rules, and it should only be called when there are no more updates to
 201  * be done higher up in the tree.
 202  */
 203 void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
 204 {
 205         int i;
 206
 207         if (path->keep_locks)
 208                 return;
 209
 210         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
 211                 if (!path->nodes[i])
 212                         continue;
 213                 if (!path->locks[i])
 214                         continue;
 215                 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
 216                 path->locks[i] = 0;
 217         }
 218 }
 219
 220 /*
 221  * Loop around taking references on and locking the root node of the tree until
 222  * we end up with a lock on the root node.
 223  *
 224  * Return: root extent buffer with write lock held
 225  */
 226 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
 227 {
 228         struct extent_buffer *eb;
 229
 230         while (1) {
 231                 eb = btrfs_root_node(root);
 232                 btrfs_tree_lock(eb);
 233                 if (eb == root->node)
 234                         break;
 235                 btrfs_tree_unlock(eb);
 236                 free_extent_buffer(eb);
 237         }
 238         return eb;
 239 }
 240
 241 /*
 242  * Loop around taking references on and locking the root node of the tree until
 243  * we end up with a lock on the root node.
 244  *
 245  * Return: root extent buffer with read lock held
 246  */
 247 struct extent_buffer *__btrfs_read_lock_root_node(struct btrfs_root *root,
 248                                                   bool recurse)
 249 {
 250         struct extent_buffer *eb;
 251
 252         while (1) {
 253                 eb = btrfs_root_node(root);
 254                 __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL, recurse);
 255                 if (eb == root->node)
 256                         break;
 257                 btrfs_tree_read_unlock(eb);
 258                 free_extent_buffer(eb);
 259         }
 260         return eb;
 261 }
 262
 263 /*
 264  * DREW locks
 265  * ==========
 266  *
 267  * DREW stands for double-reader-writer-exclusion lock. It's used in situation
 268  * where you want to provide A-B exclusion but not AA or BB.
 269  *
 270  * Currently implementation gives more priority to reader. If a reader and a
 271  * writer both race to acquire their respective sides of the lock the writer
 272  * would yield its lock as soon as it detects a concurrent reader. Additionally
 273  * if there are pending readers no new writers would be allowed to come in and
 274  * acquire the lock.
 275  */
 276
 277 int btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
 278 {
 279         int ret;
 280
 281         ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL);
 282         if (ret)
 283                 return ret;
 284
 285         atomic_set(&lock->readers, 0);
 286         init_waitqueue_head(&lock->pending_readers);
 287         init_waitqueue_head(&lock->pending_writers);
 288
 289         return 0;
 290 }
 291
 292 void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock)
 293 {
 294         percpu_counter_destroy(&lock->writers);
 295 }
 296
 297 /* Return true if acquisition is successful, false otherwise */
 298 bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
 299 {
 300         if (atomic_read(&lock->readers))
 301                 return false;
 302
 303         percpu_counter_inc(&lock->writers);
 304
 305         /* Ensure writers count is updated before we check for pending readers */
 306         smp_mb();
 307         if (atomic_read(&lock->readers)) {
 308                 btrfs_drew_write_unlock(lock);
 309                 return false;
 310         }
 311
 312         return true;
 313 }
 314
 315 void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
 316 {
 317         while (true) {
 318                 if (btrfs_drew_try_write_lock(lock))
 319                         return;
 320                 wait_event(lock->pending_writers, !atomic_read(&lock->readers));
 321         }
 322 }
 323
 324 void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
 325 {
 326         percpu_counter_dec(&lock->writers);
 327         cond_wake_up(&lock->pending_readers);
 328 }
 329
 330 void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
 331 {
 332         atomic_inc(&lock->readers);
 333
 334         /*
 335          * Ensure the pending reader count is perceieved BEFORE this reader
 336          * goes to sleep in case of active writers. This guarantees new writers
 337          * won't be allowed and that the current reader will be woken up when
 338          * the last active writer finishes its jobs.
 339          */
 340         smp_mb__after_atomic();
 341
 342         wait_event(lock->pending_readers,
 343                    percpu_counter_sum(&lock->writers) == 0);
 344 }
 345
 346 void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
 347 {
 348         /*
 349          * atomic_dec_and_test implies a full barrier, so woken up writers
 350          * are guaranteed to see the decrement
 351          */
 352         if (atomic_dec_and_test(&lock->readers))
 353                 wake_up(&lock->pending_writers);
 354 }