bcachefs: Ensure srcu lock is not held too long
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 30 Oct 2023 16:30:52 +0000 (12:30 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 4 Nov 2023 18:17:11 +0000 (14:17 -0400)
The SRCU read lock that btree_trans takes exists to make it safe for
bch2_trans_relock() to deref pointers to btree nodes/key cache items we
don't have locked, but as a side effect it blocks reclaim from freeing
those items.

Thus, it's important to not hold it for too long: we need to
differentiate between bch2_trans_unlock() calls that will be only for a
short duration, and ones that will be for an unbounded duration.

This introduces bch2_trans_unlock_long(), to be used mainly by the data
move paths.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_locking.c
fs/bcachefs/btree_types.h

index 0622f729411fd809580757f5bcc91bd68e23d965..b22fd395a1fd7dca494aecb90a490f4fe424b4ac 100644 (file)
@@ -1109,6 +1109,9 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
        if (unlikely(ret))
                goto out;
 
+       if (unlikely(!trans->srcu_held))
+               bch2_trans_srcu_lock(trans);
+
        /*
         * Ensure we obey path->should_be_locked: if it's set, we can't unlock
         * and re-traverse the path without a transaction restart:
@@ -2830,18 +2833,28 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
        return p;
 }
 
-static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
+void bch2_trans_srcu_unlock(struct btree_trans *trans)
 {
-       struct bch_fs *c = trans->c;
-       struct btree_path *path;
+       if (trans->srcu_held) {
+               struct bch_fs *c = trans->c;
+               struct btree_path *path;
 
-       trans_for_each_path(trans, path)
-               if (path->cached && !btree_node_locked(path, 0))
-                       path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+               trans_for_each_path(trans, path)
+                       if (path->cached && !btree_node_locked(path, 0))
+                               path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
 
-       srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
-       trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
-       trans->srcu_lock_time   = jiffies;
+               srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+               trans->srcu_held = false;
+       }
+}
+
+void bch2_trans_srcu_lock(struct btree_trans *trans)
+{
+       if (!trans->srcu_held) {
+               trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
+               trans->srcu_lock_time   = jiffies;
+               trans->srcu_held = true;
+       }
 }
 
 /**
@@ -2895,8 +2908,9 @@ u32 bch2_trans_begin(struct btree_trans *trans)
        }
        trans->last_begin_time = now;
 
-       if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
-               bch2_trans_reset_srcu_lock(trans);
+       if (unlikely(trans->srcu_held &&
+                    time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
+               bch2_trans_srcu_unlock(trans);
 
        trans->last_begin_ip = _RET_IP_;
        if (trans->restarted) {
@@ -2981,8 +2995,9 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
                trans->wb_updates_size = s->wb_updates_size;
        }
 
-       trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+       trans->srcu_idx         = srcu_read_lock(&c->btree_trans_barrier);
        trans->srcu_lock_time   = jiffies;
+       trans->srcu_held        = true;
 
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
                struct btree_trans *pos;
@@ -3059,7 +3074,8 @@ void bch2_trans_put(struct btree_trans *trans)
 
        check_btree_paths_leaked(trans);
 
-       srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+       if (trans->srcu_held)
+               srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
 
        bch2_journal_preres_put(&c->journal, &trans->journal_preres);
 
index 70759ee3e5c730092ccc7b12d13f7b73c85d136e..5e103f519e62ec280863c389cb765904a6becb91 100644 (file)
@@ -274,6 +274,7 @@ void bch2_path_put(struct btree_trans *, struct btree_path *, bool);
 int bch2_trans_relock(struct btree_trans *);
 int bch2_trans_relock_notrace(struct btree_trans *);
 void bch2_trans_unlock(struct btree_trans *);
+void bch2_trans_unlock_long(struct btree_trans *);
 bool bch2_trans_locked(struct btree_trans *);
 
 static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count)
@@ -579,6 +580,9 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
        __bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags,      \
                                  KEY_TYPE_##_type, sizeof(*_val), _val)
 
+void bch2_trans_srcu_unlock(struct btree_trans *);
+void bch2_trans_srcu_lock(struct btree_trans *);
+
 u32 bch2_trans_begin(struct btree_trans *);
 
 /*
index bc45cd2a34a41fa66be3b795ce05a26c6397340a..3d48834d091fbda928e9e462b6061ca03f847bb1 100644 (file)
@@ -753,6 +753,12 @@ void bch2_trans_unlock(struct btree_trans *trans)
                __bch2_btree_path_unlock(trans, path);
 }
 
+void bch2_trans_unlock_long(struct btree_trans *trans)
+{
+       bch2_trans_unlock(trans);
+       bch2_trans_srcu_unlock(trans);
+}
+
 bool bch2_trans_locked(struct btree_trans *trans)
 {
        struct btree_path *path;
index ecbb44b939a05cd3560c68e7745e4493aea66e64..7cc8d6b12161be2fe1c64c68a7d034c7323bd193 100644 (file)
@@ -426,6 +426,7 @@ struct btree_trans {
        u8                      nr_updates;
        u8                      nr_wb_updates;
        u8                      wb_updates_size;
+       bool                    srcu_held:1;
        bool                    used_mempool:1;
        bool                    in_traverse_all:1;
        bool                    paths_sorted:1;