NULL
};
+/* Default is -1; we skip past it for stop_when_cache_set_failed */
+const char * const bch_stop_on_failure_modes[] = {
+ "default",
+ "auto",
+ "always",
+ NULL
+};
+
static struct kobject *bcache_kobj;
struct mutex bch_register_lock;
LIST_HEAD(bch_cache_sets);
bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
bch_bio_map(bio, ca->disk_buckets);
- closure_bio_submit(bio, &ca->prio);
+ closure_bio_submit(ca->set, bio, &ca->prio);
closure_sync(cl);
}
q->limits.io_min = block_size;
q->limits.logical_block_size = block_size;
q->limits.physical_block_size = block_size;
- set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags);
- clear_bit(QUEUE_FLAG_ADD_RANDOM, &d->disk->queue->queue_flags);
- set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
blk_queue_write_cache(q, true, true);
pr_debug("error creating sysfs link");
}
+/*
+ * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
+ * work dc->writeback_rate_update is running. Wait until the routine
+ * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
+ * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
+ * seconds, give up waiting here and continue to cancel it too.
+ */
+static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
+{
+ int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
+
+ do {
+ if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
+ &dc->disk.flags))
+ break;
+ time_out--;
+ schedule_timeout_interruptible(1);
+ } while (time_out > 0);
+
+ if (time_out == 0)
+ pr_warn("give up waiting for dc->writeback_write_update to quit");
+
+ cancel_delayed_work_sync(&dc->writeback_rate_update);
+}
+
static void cached_dev_detach_finish(struct work_struct *w)
{
struct cached_dev *dc = container_of(w, struct cached_dev, detach);
mutex_lock(&bch_register_lock);
- cancel_delayed_work_sync(&dc->writeback_rate_update);
+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
+ cancel_writeback_rate_update_dwork(dc);
+
if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
kthread_stop(dc->writeback_thread);
dc->writeback_thread = NULL;
closure_get(&dc->disk.cl);
bch_writeback_queue(dc);
+
cached_dev_put(dc);
}
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
bch_sectors_dirty_init(&dc->disk);
atomic_set(&dc->has_dirty, 1);
- refcount_inc(&dc->count);
bch_writeback_queue(dc);
}
{
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
- cancel_delayed_work_sync(&dc->writeback_rate_update);
+ mutex_lock(&bch_register_lock);
+
+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
+ cancel_writeback_rate_update_dwork(dc);
+
if (!IS_ERR_OR_NULL(dc->writeback_thread))
kthread_stop(dc->writeback_thread);
if (dc->writeback_write_wq)
destroy_workqueue(dc->writeback_write_wq);
- mutex_lock(&bch_register_lock);
-
if (atomic_read(&dc->running))
bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
bcache_device_free(&dc->disk);
max(dc->disk.disk->queue->backing_dev_info->ra_pages,
q->backing_dev_info->ra_pages);
+ /* default to auto */
+ dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
+
bch_cached_dev_request_init(dc);
bch_cached_dev_writeback_init(dc);
return 0;
struct uuid_entry *u;
for (u = c->uuids;
- u < c->uuids + c->devices_max_used && !ret;
+ u < c->uuids + c->nr_uuids && !ret;
u++)
if (UUID_FLASH_ONLY(u))
ret = flash_dev_run(c, u);
test_bit(CACHE_SET_STOPPING, &c->flags))
return false;
+ if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
+ pr_warn("CACHE_SET_IO_DISABLE already set");
+
/* XXX: we can be called from atomic context
acquire_console_sem();
*/
closure_return(cl);
}
+/*
+ * This function is only called when CACHE_SET_IO_DISABLE is set, which means
+ * cache set is unregistering due to too many I/O errors. In this condition,
+ * the bcache device might be stopped, it depends on stop_when_cache_set_failed
+ * value and whether the broken cache has dirty data:
+ *
+ * dc->stop_when_cache_set_failed dc->has_dirty stop bcache device
+ * BCH_CACHED_STOP_AUTO 0 NO
+ * BCH_CACHED_STOP_AUTO 1 YES
+ * BCH_CACHED_DEV_STOP_ALWAYS 0 YES
+ * BCH_CACHED_DEV_STOP_ALWAYS 1 YES
+ *
+ * The expected behavior is, if stop_when_cache_set_failed is configured to
+ * "auto" via sysfs interface, the bcache device will not be stopped if the
+ * backing device is clean on the broken cache device.
+ */
+static void conditional_stop_bcache_device(struct cache_set *c,
+ struct bcache_device *d,
+ struct cached_dev *dc)
+{
+ if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
+ pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.",
+ d->disk->disk_name, c->sb.set_uuid);
+ bcache_device_stop(d);
+ } else if (atomic_read(&dc->has_dirty)) {
+ /*
+ * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
+ * and dc->has_dirty == 1
+ */
+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
+ d->disk->disk_name);
+ bcache_device_stop(d);
+ } else {
+ /*
+ * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
+ * and dc->has_dirty == 0
+ */
+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.",
+ d->disk->disk_name);
+ }
+}
+
static void __cache_set_unregister(struct closure *cl)
{
struct cache_set *c = container_of(cl, struct cache_set, caching);
struct cached_dev *dc;
+ struct bcache_device *d;
size_t i;
mutex_lock(&bch_register_lock);
- for (i = 0; i < c->devices_max_used; i++)
- if (c->devices[i]) {
- if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
- test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
- dc = container_of(c->devices[i],
- struct cached_dev, disk);
- bch_cached_dev_detach(dc);
- } else {
- bcache_device_stop(c->devices[i]);
- }
+ for (i = 0; i < c->devices_max_used; i++) {
+ d = c->devices[i];
+ if (!d)
+ continue;
+
+ if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
+ test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
+ dc = container_of(d, struct cached_dev, disk);
+ bch_cached_dev_detach(dc);
+ if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
+ conditional_stop_bcache_device(c, d, dc);
+ } else {
+ bcache_device_stop(d);
}
+ }
mutex_unlock(&bch_register_lock);
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
c->error_limit = DEFAULT_IO_ERROR_LIMIT;
+ WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
return c;
err:
mutex_init(&bch_register_lock);
init_waitqueue_head(&unregister_wait);
register_reboot_notifier(&reboot);
- closure_debug_init();
bcache_major = register_blkdev(0, "bcache");
if (bcache_major < 0) {
if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
bch_request_init() ||
- bch_debug_init(bcache_kobj) ||
+ bch_debug_init(bcache_kobj) || closure_debug_init() ||
sysfs_create_files(bcache_kobj, files))
goto err;