Merge tag 'for-linus-20180210' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 10 Feb 2018 22:05:11 +0000 (14:05 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 10 Feb 2018 22:05:11 +0000 (14:05 -0800)
Pull block fixes from Jens Axboe:
 "A few fixes to round off the merge window on the block side:

   - a set of bcache fixes by way of Michael Lyle, from the usual bcache
     suspects.

   - add a simple-to-hook-into function for bpf EIO error injection.

   - fix blk-wbt that mischarectized flushes as reads. Improve the logic
     so that flushes and writes are accounted as writes, and only reads
     as reads. From me.

   - fix requeue crash in BFQ, from Paolo"

* tag 'for-linus-20180210' of git://git.kernel.dk/linux-block:
  block, bfq: add requeue-request hook
  bcache: fix for data collapse after re-attaching an attached device
  bcache: return attach error when no cache set exist
  bcache: set writeback_rate_update_seconds in range [1, 60] seconds
  bcache: fix for allocator and register thread race
  bcache: set error_limit correctly
  bcache: properly set task state in bch_writeback_thread()
  bcache: fix high CPU occupancy during journal
  bcache: add journal statistic
  block: Add should_fail_bio() for bpf error injection
  blk-wbt: account flush requests correctly

12 files changed:
block/bfq-iosched.c
block/blk-core.c
block/blk-wbt.c
drivers/md/bcache/alloc.c
drivers/md/bcache/bcache.h
drivers/md/bcache/btree.c
drivers/md/bcache/journal.c
drivers/md/bcache/super.c
drivers/md/bcache/sysfs.c
drivers/md/bcache/util.h
drivers/md/bcache/writeback.c
drivers/md/bcache/writeback.h

index 47e6ec7427c44bfefeba20af5842f88711b1c3d9..aeca22d911010dc1a11ae9377a46f4ac99b3f629 100644 (file)
@@ -3823,24 +3823,26 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
                }
 
                /*
-                * We exploit the bfq_finish_request hook to decrement
-                * rq_in_driver, but bfq_finish_request will not be
-                * invoked on this request. So, to avoid unbalance,
-                * just start this request, without incrementing
-                * rq_in_driver. As a negative consequence,
-                * rq_in_driver is deceptively lower than it should be
-                * while this request is in service. This may cause
-                * bfq_schedule_dispatch to be invoked uselessly.
+                * We exploit the bfq_finish_requeue_request hook to
+                * decrement rq_in_driver, but
+                * bfq_finish_requeue_request will not be invoked on
+                * this request. So, to avoid unbalance, just start
+                * this request, without incrementing rq_in_driver. As
+                * a negative consequence, rq_in_driver is deceptively
+                * lower than it should be while this request is in
+                * service. This may cause bfq_schedule_dispatch to be
+                * invoked uselessly.
                 *
                 * As for implementing an exact solution, the
-                * bfq_finish_request hook, if defined, is probably
-                * invoked also on this request. So, by exploiting
-                * this hook, we could 1) increment rq_in_driver here,
-                * and 2) decrement it in bfq_finish_request. Such a
-                * solution would let the value of the counter be
-                * always accurate, but it would entail using an extra
-                * interface function. This cost seems higher than the
-                * benefit, being the frequency of non-elevator-private
+                * bfq_finish_requeue_request hook, if defined, is
+                * probably invoked also on this request. So, by
+                * exploiting this hook, we could 1) increment
+                * rq_in_driver here, and 2) decrement it in
+                * bfq_finish_requeue_request. Such a solution would
+                * let the value of the counter be always accurate,
+                * but it would entail using an extra interface
+                * function. This cost seems higher than the benefit,
+                * being the frequency of non-elevator-private
                 * requests very low.
                 */
                goto start_rq;
@@ -4515,6 +4517,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
                                           unsigned int cmd_flags) {}
 #endif
 
+static void bfq_prepare_request(struct request *rq, struct bio *bio);
+
 static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
                               bool at_head)
 {
@@ -4541,6 +4545,18 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
                else
                        list_add_tail(&rq->queuelist, &bfqd->dispatch);
        } else {
+               if (WARN_ON_ONCE(!bfqq)) {
+                       /*
+                        * This should never happen. Most likely rq is
+                        * a requeued regular request, being
+                        * re-inserted without being first
+                        * re-prepared. Do a prepare, to avoid
+                        * failure.
+                        */
+                       bfq_prepare_request(rq, rq->bio);
+                       bfqq = RQ_BFQQ(rq);
+               }
+
                idle_timer_disabled = __bfq_insert_request(bfqd, rq);
                /*
                 * Update bfqq, because, if a queue merge has occurred
@@ -4697,22 +4713,44 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
                bfq_schedule_dispatch(bfqd);
 }
 
-static void bfq_finish_request_body(struct bfq_queue *bfqq)
+static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
 {
        bfqq->allocated--;
 
        bfq_put_queue(bfqq);
 }
 
-static void bfq_finish_request(struct request *rq)
+/*
+ * Handle either a requeue or a finish for rq. The things to do are
+ * the same in both cases: all references to rq are to be dropped. In
+ * particular, rq is considered completed from the point of view of
+ * the scheduler.
+ */
+static void bfq_finish_requeue_request(struct request *rq)
 {
-       struct bfq_queue *bfqq;
+       struct bfq_queue *bfqq = RQ_BFQQ(rq);
        struct bfq_data *bfqd;
 
-       if (!rq->elv.icq)
+       /*
+        * Requeue and finish hooks are invoked in blk-mq without
+        * checking whether the involved request is actually still
+        * referenced in the scheduler. To handle this fact, the
+        * following two checks make this function exit in case of
+        * spurious invocations, for which there is nothing to do.
+        *
+        * First, check whether rq has nothing to do with an elevator.
+        */
+       if (unlikely(!(rq->rq_flags & RQF_ELVPRIV)))
+               return;
+
+       /*
+        * rq either is not associated with any icq, or is an already
+        * requeued request that has not (yet) been re-inserted into
+        * a bfq_queue.
+        */
+       if (!rq->elv.icq || !bfqq)
                return;
 
-       bfqq = RQ_BFQQ(rq);
        bfqd = bfqq->bfqd;
 
        if (rq->rq_flags & RQF_STARTED)
@@ -4727,13 +4765,14 @@ static void bfq_finish_request(struct request *rq)
                spin_lock_irqsave(&bfqd->lock, flags);
 
                bfq_completed_request(bfqq, bfqd);
-               bfq_finish_request_body(bfqq);
+               bfq_finish_requeue_request_body(bfqq);
 
                spin_unlock_irqrestore(&bfqd->lock, flags);
        } else {
                /*
                 * Request rq may be still/already in the scheduler,
-                * in which case we need to remove it. And we cannot
+                * in which case we need to remove it (this should
+                * never happen in case of requeue). And we cannot
                 * defer such a check and removal, to avoid
                 * inconsistencies in the time interval from the end
                 * of this function to the start of the deferred work.
@@ -4748,9 +4787,26 @@ static void bfq_finish_request(struct request *rq)
                        bfqg_stats_update_io_remove(bfqq_group(bfqq),
                                                    rq->cmd_flags);
                }
-               bfq_finish_request_body(bfqq);
+               bfq_finish_requeue_request_body(bfqq);
        }
 
+       /*
+        * Reset private fields. In case of a requeue, this allows
+        * this function to correctly do nothing if it is spuriously
+        * invoked again on this same request (see the check at the
+        * beginning of the function). Probably, a better general
+        * design would be to prevent blk-mq from invoking the requeue
+        * or finish hooks of an elevator, for a request that is not
+        * referred by that elevator.
+        *
+        * Resetting the following fields would break the
+        * request-insertion logic if rq is re-inserted into a bfq
+        * internal queue, without a re-preparation. Here we assume
+        * that re-insertions of requeued requests, without
+        * re-preparation, can happen only for pass_through or at_head
+        * requests (which are not re-inserted into bfq internal
+        * queues).
+        */
        rq->elv.priv[0] = NULL;
        rq->elv.priv[1] = NULL;
 }
@@ -5426,7 +5482,8 @@ static struct elevator_type iosched_bfq_mq = {
        .ops.mq = {
                .limit_depth            = bfq_limit_depth,
                .prepare_request        = bfq_prepare_request,
-               .finish_request         = bfq_finish_request,
+               .requeue_request        = bfq_finish_requeue_request,
+               .finish_request         = bfq_finish_requeue_request,
                .exit_icq               = bfq_exit_icq,
                .insert_requests        = bfq_insert_requests,
                .dispatch_request       = bfq_dispatch_request,
index d0d104268f1a99bed64cd7d7871122a3dda243ec..2d1a7bbe063437bfacfca43ad479c305fccf56c7 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/blk-cgroup.h>
 #include <linux/debugfs.h>
+#include <linux/bpf.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
@@ -2083,6 +2084,14 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
        return false;
 }
 
+static noinline int should_fail_bio(struct bio *bio)
+{
+       if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
+               return -EIO;
+       return 0;
+}
+ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
+
 /*
  * Remap block n of partition p to block n+start(p) of the disk.
  */
@@ -2174,7 +2183,7 @@ generic_make_request_checks(struct bio *bio)
        if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
                goto not_supported;
 
-       if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
+       if (should_fail_bio(bio))
                goto end_io;
 
        if (!bio->bi_partno) {
index ae8de9780085ae7b8e99237ed16fc9cd02b233a5..f92fc84b5e2c497b77578408c10c22bf37929384 100644 (file)
@@ -697,7 +697,15 @@ u64 wbt_default_latency_nsec(struct request_queue *q)
 
 static int wbt_data_dir(const struct request *rq)
 {
-       return rq_data_dir(rq);
+       const int op = req_op(rq);
+
+       if (op == REQ_OP_READ)
+               return READ;
+       else if (op == REQ_OP_WRITE || op == REQ_OP_FLUSH)
+               return WRITE;
+
+       /* don't account */
+       return -1;
 }
 
 int wbt_init(struct request_queue *q)
index 6cc6c0f9c3a95fba8ef93dc256fb4cf7ba040f4d..458e1d38577db1e879a7cdf86f7343c23561b3d4 100644 (file)
@@ -287,8 +287,10 @@ do {                                                                       \
                        break;                                          \
                                                                        \
                mutex_unlock(&(ca)->set->bucket_lock);                  \
-               if (kthread_should_stop())                              \
+               if (kthread_should_stop()) {                            \
+                       set_current_state(TASK_RUNNING);                \
                        return 0;                                       \
+               }                                                       \
                                                                        \
                schedule();                                             \
                mutex_lock(&(ca)->set->bucket_lock);                    \
index 5e2d4e80198e596e426f05a7a902b3eff54bb3ac..12e5197f186cd2efbd7d32f6374d9fc6bf0207f5 100644 (file)
@@ -658,10 +658,15 @@ struct cache_set {
        atomic_long_t           writeback_keys_done;
        atomic_long_t           writeback_keys_failed;
 
+       atomic_long_t           reclaim;
+       atomic_long_t           flush_write;
+       atomic_long_t           retry_flush_write;
+
        enum                    {
                ON_ERROR_UNREGISTER,
                ON_ERROR_PANIC,
        }                       on_error;
+#define DEFAULT_IO_ERROR_LIMIT 8
        unsigned                error_limit;
        unsigned                error_decay;
 
@@ -675,6 +680,8 @@ struct cache_set {
 
 #define BUCKET_HASH_BITS       12
        struct hlist_head       bucket_hash[1 << BUCKET_HASH_BITS];
+
+       DECLARE_HEAP(struct btree *, flush_btree);
 };
 
 struct bbio {
@@ -917,7 +924,7 @@ void bcache_write_super(struct cache_set *);
 
 int bch_flash_dev_create(struct cache_set *c, uint64_t size);
 
-int bch_cached_dev_attach(struct cached_dev *, struct cache_set *);
+int bch_cached_dev_attach(struct cached_dev *, struct cache_set *, uint8_t *);
 void bch_cached_dev_detach(struct cached_dev *);
 void bch_cached_dev_run(struct cached_dev *);
 void bcache_device_stop(struct bcache_device *);
index bf3a48aa9a9a47bda762975ee36f765b741ea043..fad9fe8817eb1a6c3e63e082dc600a72bb52284e 100644 (file)
@@ -1869,14 +1869,17 @@ void bch_initial_gc_finish(struct cache_set *c)
         */
        for_each_cache(ca, c, i) {
                for_each_bucket(b, ca) {
-                       if (fifo_full(&ca->free[RESERVE_PRIO]))
+                       if (fifo_full(&ca->free[RESERVE_PRIO]) &&
+                           fifo_full(&ca->free[RESERVE_BTREE]))
                                break;
 
                        if (bch_can_invalidate_bucket(ca, b) &&
                            !GC_MARK(b)) {
                                __bch_invalidate_one_bucket(ca, b);
-                               fifo_push(&ca->free[RESERVE_PRIO],
-                                         b - ca->buckets);
+                               if (!fifo_push(&ca->free[RESERVE_PRIO],
+                                  b - ca->buckets))
+                                       fifo_push(&ca->free[RESERVE_BTREE],
+                                                 b - ca->buckets);
                        }
                }
        }
index a87165c1d8e5262d01962eb706b60ff9fd02cb78..1b736b8607399921ff59ad9f4dff6549be907dbc 100644 (file)
@@ -368,6 +368,12 @@ err:
 }
 
 /* Journalling */
+#define journal_max_cmp(l, r) \
+       (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) < \
+        fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
+#define journal_min_cmp(l, r) \
+       (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) > \
+        fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
 
 static void btree_flush_write(struct cache_set *c)
 {
@@ -375,28 +381,41 @@ static void btree_flush_write(struct cache_set *c)
         * Try to find the btree node with that references the oldest journal
         * entry, best is our current candidate and is locked if non NULL:
         */
-       struct btree *b, *best;
-       unsigned i;
+       struct btree *b;
+       int i;
+
+       atomic_long_inc(&c->flush_write);
+
 retry:
-       best = NULL;
-
-       for_each_cached_btree(b, c, i)
-               if (btree_current_write(b)->journal) {
-                       if (!best)
-                               best = b;
-                       else if (journal_pin_cmp(c,
-                                       btree_current_write(best)->journal,
-                                       btree_current_write(b)->journal)) {
-                               best = b;
+       spin_lock(&c->journal.lock);
+       if (heap_empty(&c->flush_btree)) {
+               for_each_cached_btree(b, c, i)
+                       if (btree_current_write(b)->journal) {
+                               if (!heap_full(&c->flush_btree))
+                                       heap_add(&c->flush_btree, b,
+                                                journal_max_cmp);
+                               else if (journal_max_cmp(b,
+                                        heap_peek(&c->flush_btree))) {
+                                       c->flush_btree.data[0] = b;
+                                       heap_sift(&c->flush_btree, 0,
+                                                 journal_max_cmp);
+                               }
                        }
-               }
 
-       b = best;
+               for (i = c->flush_btree.used / 2 - 1; i >= 0; --i)
+                       heap_sift(&c->flush_btree, i, journal_min_cmp);
+       }
+
+       b = NULL;
+       heap_pop(&c->flush_btree, b, journal_min_cmp);
+       spin_unlock(&c->journal.lock);
+
        if (b) {
                mutex_lock(&b->write_lock);
                if (!btree_current_write(b)->journal) {
                        mutex_unlock(&b->write_lock);
                        /* We raced */
+                       atomic_long_inc(&c->retry_flush_write);
                        goto retry;
                }
 
@@ -476,6 +495,8 @@ static void journal_reclaim(struct cache_set *c)
        unsigned iter, n = 0;
        atomic_t p;
 
+       atomic_long_inc(&c->reclaim);
+
        while (!atomic_read(&fifo_front(&c->journal.pin)))
                fifo_pop(&c->journal.pin, p);
 
@@ -819,7 +840,8 @@ int bch_journal_alloc(struct cache_set *c)
        j->w[0].c = c;
        j->w[1].c = c;
 
-       if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
+       if (!(init_heap(&c->flush_btree, 128, GFP_KERNEL)) ||
+           !(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
            !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) ||
            !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)))
                return -ENOMEM;
index 133b81225ea9c6ac61934d8593e09e94749c3152..312895788036705cb0426d1af5b5662a4797faf3 100644 (file)
@@ -957,7 +957,8 @@ void bch_cached_dev_detach(struct cached_dev *dc)
        cached_dev_put(dc);
 }
 
-int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
+int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
+                         uint8_t *set_uuid)
 {
        uint32_t rtime = cpu_to_le32(get_seconds());
        struct uuid_entry *u;
@@ -965,7 +966,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
 
        bdevname(dc->bdev, buf);
 
-       if (memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))
+       if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) ||
+           (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
                return -ENOENT;
 
        if (dc->disk.c) {
@@ -1194,7 +1196,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
 
        list_add(&dc->list, &uncached_devices);
        list_for_each_entry(c, &bch_cache_sets, list)
-               bch_cached_dev_attach(dc, c);
+               bch_cached_dev_attach(dc, c, NULL);
 
        if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
            BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
@@ -1553,7 +1555,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 
        c->congested_read_threshold_us  = 2000;
        c->congested_write_threshold_us = 20000;
-       c->error_limit  = 8 << IO_ERROR_SHIFT;
+       c->error_limit  = DEFAULT_IO_ERROR_LIMIT;
 
        return c;
 err:
@@ -1716,7 +1718,7 @@ static void run_cache_set(struct cache_set *c)
        bcache_write_super(c);
 
        list_for_each_entry_safe(dc, t, &uncached_devices, list)
-               bch_cached_dev_attach(dc, c);
+               bch_cached_dev_attach(dc, c, NULL);
 
        flash_devs_run(c);
 
@@ -1833,6 +1835,7 @@ void bch_cache_release(struct kobject *kobj)
 static int cache_alloc(struct cache *ca)
 {
        size_t free;
+       size_t btree_buckets;
        struct bucket *b;
 
        __module_get(THIS_MODULE);
@@ -1840,9 +1843,19 @@ static int cache_alloc(struct cache *ca)
 
        bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8);
 
+       /*
+        * when ca->sb.njournal_buckets is not zero, journal exists,
+        * and in bch_journal_replay(), tree node may split,
+        * so bucket of RESERVE_BTREE type is needed,
+        * the worst situation is all journal buckets are valid journal,
+        * and all the keys need to replay,
+        * so the number of  RESERVE_BTREE type buckets should be as much
+        * as journal buckets
+        */
+       btree_buckets = ca->sb.njournal_buckets ?: 8;
        free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
 
-       if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) ||
+       if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, GFP_KERNEL) ||
            !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
            !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
            !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
index b4184092c7279fa2fe1246f6a3e70650c850d6a1..78cd7bd50fddd131298562b0a9d3b128115e0115 100644 (file)
@@ -65,6 +65,9 @@ read_attribute(bset_tree_stats);
 
 read_attribute(state);
 read_attribute(cache_read_races);
+read_attribute(reclaim);
+read_attribute(flush_write);
+read_attribute(retry_flush_write);
 read_attribute(writeback_keys_done);
 read_attribute(writeback_keys_failed);
 read_attribute(io_errors);
@@ -195,7 +198,7 @@ STORE(__cached_dev)
 {
        struct cached_dev *dc = container_of(kobj, struct cached_dev,
                                             disk.kobj);
-       ssize_t v = size;
+       ssize_t v;
        struct cache_set *c;
        struct kobj_uevent_env *env;
 
@@ -215,7 +218,9 @@ STORE(__cached_dev)
        sysfs_strtoul_clamp(writeback_rate,
                            dc->writeback_rate.rate, 1, INT_MAX);
 
-       d_strtoul_nonzero(writeback_rate_update_seconds);
+       sysfs_strtoul_clamp(writeback_rate_update_seconds,
+                           dc->writeback_rate_update_seconds,
+                           1, WRITEBACK_RATE_UPDATE_SECS_MAX);
        d_strtoul(writeback_rate_i_term_inverse);
        d_strtoul_nonzero(writeback_rate_p_term_inverse);
 
@@ -267,17 +272,20 @@ STORE(__cached_dev)
        }
 
        if (attr == &sysfs_attach) {
-               if (bch_parse_uuid(buf, dc->sb.set_uuid) < 16)
+               uint8_t         set_uuid[16];
+
+               if (bch_parse_uuid(buf, set_uuid) < 16)
                        return -EINVAL;
 
+               v = -ENOENT;
                list_for_each_entry(c, &bch_cache_sets, list) {
-                       v = bch_cached_dev_attach(dc, c);
+                       v = bch_cached_dev_attach(dc, c, set_uuid);
                        if (!v)
                                return size;
                }
 
                pr_err("Can't attach %s: cache set not found", buf);
-               size = v;
+               return v;
        }
 
        if (attr == &sysfs_detach && dc->disk.c)
@@ -545,6 +553,15 @@ SHOW(__bch_cache_set)
        sysfs_print(cache_read_races,
                    atomic_long_read(&c->cache_read_races));
 
+       sysfs_print(reclaim,
+                   atomic_long_read(&c->reclaim));
+
+       sysfs_print(flush_write,
+                   atomic_long_read(&c->flush_write));
+
+       sysfs_print(retry_flush_write,
+                   atomic_long_read(&c->retry_flush_write));
+
        sysfs_print(writeback_keys_done,
                    atomic_long_read(&c->writeback_keys_done));
        sysfs_print(writeback_keys_failed,
@@ -556,7 +573,7 @@ SHOW(__bch_cache_set)
 
        /* See count_io_errors for why 88 */
        sysfs_print(io_error_halflife,  c->error_decay * 88);
-       sysfs_print(io_error_limit,     c->error_limit >> IO_ERROR_SHIFT);
+       sysfs_print(io_error_limit,     c->error_limit);
 
        sysfs_hprint(congested,
                     ((uint64_t) bch_get_congested(c)) << 9);
@@ -656,7 +673,7 @@ STORE(__bch_cache_set)
        }
 
        if (attr == &sysfs_io_error_limit)
-               c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
+               c->error_limit = strtoul_or_return(buf);
 
        /* See count_io_errors() for why 88 */
        if (attr == &sysfs_io_error_halflife)
@@ -731,6 +748,9 @@ static struct attribute *bch_cache_set_internal_files[] = {
 
        &sysfs_bset_tree_stats,
        &sysfs_cache_read_races,
+       &sysfs_reclaim,
+       &sysfs_flush_write,
+       &sysfs_retry_flush_write,
        &sysfs_writeback_keys_done,
        &sysfs_writeback_keys_failed,
 
index 4df4c5c1cab2ea4ca7f76d9622392ae56de92435..a6763db7f061b64bbfc7552859765725c5b85450 100644 (file)
@@ -112,6 +112,8 @@ do {                                                                        \
 
 #define heap_full(h)   ((h)->used == (h)->size)
 
+#define heap_empty(h)  ((h)->used == 0)
+
 #define DECLARE_FIFO(type, name)                                       \
        struct {                                                        \
                size_t front, back, size, mask;                         \
index 51306a19ab032272663dfbd4173e967f680f715c..f1d2fc15abcc05d7fb338d67efcac1c1454954f4 100644 (file)
@@ -564,18 +564,21 @@ static int bch_writeback_thread(void *arg)
 
        while (!kthread_should_stop()) {
                down_write(&dc->writeback_lock);
+               set_current_state(TASK_INTERRUPTIBLE);
                if (!atomic_read(&dc->has_dirty) ||
                    (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
                     !dc->writeback_running)) {
                        up_write(&dc->writeback_lock);
-                       set_current_state(TASK_INTERRUPTIBLE);
 
-                       if (kthread_should_stop())
+                       if (kthread_should_stop()) {
+                               set_current_state(TASK_RUNNING);
                                return 0;
+                       }
 
                        schedule();
                        continue;
                }
+               set_current_state(TASK_RUNNING);
 
                searched_full_index = refill_dirty(dc);
 
@@ -652,7 +655,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
        dc->writeback_rate.rate         = 1024;
        dc->writeback_rate_minimum      = 8;
 
-       dc->writeback_rate_update_seconds = 5;
+       dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
        dc->writeback_rate_p_term_inverse = 40;
        dc->writeback_rate_i_term_inverse = 10000;
 
index 66f1c527fa243c8c22e2ae9451af7e90727dbd26..587b255998568b51be37642d05ecb9f87c41492c 100644 (file)
@@ -8,6 +8,9 @@
 #define MAX_WRITEBACKS_IN_PASS  5
 #define MAX_WRITESIZE_IN_PASS   5000   /* *512b */
 
+#define WRITEBACK_RATE_UPDATE_SECS_MAX         60
+#define WRITEBACK_RATE_UPDATE_SECS_DEFAULT     5
+
 /*
  * 14 (16384ths) is chosen here as something that each backing device
  * should be a reasonable fraction of the share, and not to blow up