aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorColy Li <colyli@suse.de>2018-04-30 14:20:24 +0800
committerColy Li <colyli@suse.de>2018-04-30 14:20:24 +0800
commit8cf7259febfc5df4c167ffb04b67874d807a48d6 (patch)
treef331fa08fc552b500699c0f8a6f16ebfd7335746
parent3e2132d3eb52cfeedbe4f8dd9a557a6df385542a (diff)
downloadbcache-patches-8cf7259febfc5df4c167ffb04b67874d807a48d6.tar.gz
remove bcache device failure patche set v1/2/3/4/5/6
-rw-r--r--for-next/v1/v1-0000-cover-letter.patch56
-rw-r--r--for-next/v1/v1-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch58
-rw-r--r--for-next/v1/v1-0002-bcache-set-task-properly-in-allocator_wait.patch79
-rw-r--r--for-next/v1/v1-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch119
-rw-r--r--for-next/v1/v1-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch171
-rw-r--r--for-next/v1/v1-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch68
-rw-r--r--for-next/v1/v1-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch122
-rw-r--r--for-next/v1/v1-0007-bcache-set-error_limit-correctly.patch114
-rw-r--r--for-next/v1/v1-0008-bcache-fix-misleading-error-message-in-bch_count_.patch118
-rw-r--r--for-next/v1/v1-0009-bcache-add-io_disable-to-struct-cache_set.patch433
-rw-r--r--for-next/v1/v1-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch60
-rw-r--r--for-next/v2/v2-0000-cover-letter.patch92
-rw-r--r--for-next/v2/v2-0001-bcache-set-writeback_rate_update_seconds-in-range.patch72
-rw-r--r--for-next/v2/v2-0002-bcache-properly-set-task-state-in-bch_writeback_t.patch91
-rw-r--r--for-next/v2/v2-0003-bcache-set-task-properly-in-allocator_wait.patch65
-rw-r--r--for-next/v2/v2-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch178
-rw-r--r--for-next/v2/v2-0005-bcache-stop-dc-writeback_rate_update-properly.patch266
-rw-r--r--for-next/v2/v2-0006-bcache-set-error_limit-correctly.patch121
-rw-r--r--for-next/v2/v2-0007-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch489
-rw-r--r--for-next/v2/v2-0008-bcache-stop-all-attached-bcache-devices-for-a-ret.patch67
-rw-r--r--for-next/v2/v2-0009-bcache-fix-inaccurate-io-state-for-detached-bcach.patch118
-rw-r--r--for-next/v2/v2-0010-bcache-add-backing_request_endio-for-bi_end_io-of.patch254
-rw-r--r--for-next/v2/v2-0011-bcache-add-io_disable-to-struct-cached_dev.patch235
-rw-r--r--for-next/v2/v2-0012-bcache-stop-bcache-device-when-backing-device-is-.patch151
-rw-r--r--for-next/v3/v3-0000-cover-letter.patch92
-rw-r--r--for-next/v3/v3-0001-bcache-set-writeback_rate_update_seconds-in-range.patch72
-rw-r--r--for-next/v3/v3-0002-bcache-properly-set-task-state-in-bch_writeback_t.patch91
-rw-r--r--for-next/v3/v3-0003-bcache-set-task-properly-in-allocator_wait.patch65
-rw-r--r--for-next/v3/v3-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch178
-rw-r--r--for-next/v3/v3-0005-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch83
-rw-r--r--for-next/v3/v3-0006-bcache-stop-dc-writeback_rate_update-properly.patch266
-rw-r--r--for-next/v3/v3-0007-bcache-set-error_limit-correctly.patch121
-rw-r--r--for-next/v3/v3-0008-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch489
-rw-r--r--for-next/v3/v3-0009-bcache-stop-all-attached-bcache-devices-for-a-ret.patch67
-rw-r--r--for-next/v3/v3-0010-bcache-fix-inaccurate-io-state-for-detached-bcach.patch118
-rw-r--r--for-next/v3/v3-0011-bcache-add-backing_request_endio-for-bi_end_io-of.patch251
-rw-r--r--for-next/v3/v3-0012-bcache-add-io_disable-to-struct-cached_dev.patch232
-rw-r--r--for-next/v3/v3-0013-bcache-stop-bcache-device-when-backing-device-is-.patch148
-rw-r--r--for-next/v4/v4-0000-cover-letter.patch91
-rw-r--r--for-next/v4/v4-0001-bcache-set-writeback_rate_update_seconds-in-range.patch73
-rw-r--r--for-next/v4/v4-0002-bcache-properly-set-task-state-in-bch_writeback_t.patch112
-rw-r--r--for-next/v4/v4-0003-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch178
-rw-r--r--for-next/v4/v4-0004-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch83
-rw-r--r--for-next/v4/v4-0005-bcache-stop-dc-writeback_rate_update-properly.patch266
-rw-r--r--for-next/v4/v4-0006-bcache-set-error_limit-correctly.patch121
-rw-r--r--for-next/v4/v4-0007-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch489
-rw-r--r--for-next/v4/v4-0008-bcache-stop-all-attached-bcache-devices-for-a-ret.patch67
-rw-r--r--for-next/v4/v4-0009-bcache-fix-inaccurate-io-state-for-detached-bcach.patch119
-rw-r--r--for-next/v4/v4-0010-bcache-add-backing_request_endio-for-bi_end_io-of.patch255
-rw-r--r--for-next/v4/v4-0011-bcache-add-io_disable-to-struct-cached_dev.patch235
-rw-r--r--for-next/v4/v4-0012-bcache-stop-bcache-device-when-backing-device-is-.patch148
-rw-r--r--for-next/v4/v4-0013-bcache-add-stop_attached_devs_on_fail-to-struct-c.patch180
-rw-r--r--for-next/v5/v5-0000-cover-letter.patch95
-rw-r--r--for-next/v5/v5-0001-bcache-set-writeback_rate_update_seconds-in-range.patch79
-rw-r--r--for-next/v5/v5-0002-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch178
-rw-r--r--for-next/v5/v5-0003-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch130
-rw-r--r--for-next/v5/v5-0004-bcache-stop-dc-writeback_rate_update-properly.patch268
-rw-r--r--for-next/v5/v5-0005-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch491
-rw-r--r--for-next/v5/v5-0006-bcache-add-stop_when_cache_set_failed-option-to-b.patch258
-rw-r--r--for-next/v5/v5-0007-bcache-fix-inaccurate-io-state-for-detached-bcach.patch119
-rw-r--r--for-next/v5/v5-0008-bcache-add-backing_request_endio-for-bi_end_io-of.patch255
-rw-r--r--for-next/v5/v5-0009-bcache-add-io_disable-to-struct-cached_dev.patch237
-rw-r--r--for-next/v5/v5-0010-bcache-stop-bcache-device-when-backing-device-is-.patch152
-rw-r--r--for-next/v6/v6-0000-cover-letter.patch82
-rw-r--r--for-next/v6/v6-0001-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch178
-rw-r--r--for-next/v6/v6-0002-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch130
-rw-r--r--for-next/v6/v6-0003-bcache-stop-dc-writeback_rate_update-properly.patch268
-rw-r--r--for-next/v6/v6-0004-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch491
-rw-r--r--for-next/v6/v6-0005-bcache-add-stop_when_cache_set_failed-option-to-b.patch258
-rw-r--r--for-next/v6/v6-0006-bcache-fix-inaccurate-io-state-for-detached-bcach.patch124
-rw-r--r--for-next/v6/v6-0007-bcache-add-backing_request_endio-for-bi_end_io-of.patch255
-rw-r--r--for-next/v6/v6-0008-bcache-add-io_disable-to-struct-cached_dev.patch235
-rw-r--r--for-next/v6/v6-0009-bcache-stop-bcache-device-when-backing-device-is-.patch152
73 files changed, 0 insertions, 12722 deletions
diff --git a/for-next/v1/v1-0000-cover-letter.patch b/for-next/v1/v1-0000-cover-letter.patch
deleted file mode 100644
index 0ac36de..0000000
--- a/for-next/v1/v1-0000-cover-letter.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 595d5d28a7ed23cae061b9e0dd201611afd6db6d Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 3 Jan 2018 21:20:57 +0800
-Subject: [PATCH v1 00/10] cache device failure handling improvement
-
-Hi maintainers and folks,
-
-This patch set tries to improve cache device failure handling. A basic
-idea to handle failed cache device is,
-- Unregister cache set
-- Detach all backing devices attached to this cache set
-- Stop all bcache devices linked to this cache set
-The above process is named 'cache set retire' by me. The result of cache
-set retire is, cache set and bcache devices are all removed, following
-I/O requests will get failed immediately to notift upper layer or user
-space coce that the cache device is failed or disconnected.
-
-The first 8 patches of this patch set is to fix existing bugs in bcache,
-the last 2 patches do the real improvement. Order of applying these patches
-is important, if the last 2 patches are applied firstly, kernel panic or
-process hang will be observed. Therefore I suggest to apply the first 8
-fixes, then apply the last 2 patches.
-
-The patch set is tested with writethrough, writeback, writearound mode,
-read/write/readwrite workloads, so far it works as expected. IMHO the
-cache set retire logic is complicated, I need your help to review the
-patches, any question is warmly wlecome.
-
-Coly Li (10):
- bcache: exit bch_writeback_thread() with proper task state
- bcache: set task properly in allocator_wait()
- bcache: reduce cache_set devices iteration by devices_max_used
- bcache: fix cached_dev->count usage for bch_cache_set_error()
- bcache: stop dc->writeback_rate_update if cache set is stopping
- bcache: stop dc->writeback_rate_update, dc->writeback_thread earlier
- bcache: set error_limit correctly
- bcache: fix misleading error message in bch_count_io_errors()
- bcache: add io_disable to struct cache_set
- bcache: stop all attached bcache devices for a retired cache set
-
- drivers/md/bcache/alloc.c | 5 ++---
- drivers/md/bcache/bcache.h | 19 +++++++++++++++-
- drivers/md/bcache/btree.c | 8 ++++---
- drivers/md/bcache/io.c | 15 ++++++++-----
- drivers/md/bcache/journal.c | 4 ++--
- drivers/md/bcache/request.c | 26 ++++++++++++++++------
- drivers/md/bcache/super.c | 51 +++++++++++++++++++++++++++++++++++--------
- drivers/md/bcache/sysfs.c | 8 +++++--
- drivers/md/bcache/util.h | 6 -----
- drivers/md/bcache/writeback.c | 51 +++++++++++++++++++++++++++++++++----------
- drivers/md/bcache/writeback.h | 4 +---
- 11 files changed, 144 insertions(+), 53 deletions(-)
-
-Thanks in advance.
-
-Coly Li
diff --git a/for-next/v1/v1-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch b/for-next/v1/v1-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch
deleted file mode 100644
index 1ce1bfb..0000000
--- a/for-next/v1/v1-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch
+++ /dev/null
@@ -1,58 +0,0 @@
-From 02cd6111e6e305665b9b734b41d9e66735eefba5 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 20 Dec 2017 20:32:58 +0800
-Subject: [PATCH v1 01/10] bcache: exit bch_writeback_thread() with proper task
- state
-
-Kernel thread routine bch_writeback_thread() has the following code block,
-
-452 set_current_state(TASK_INTERRUPTIBLE);
-453
-454 if (kthread_should_stop())
-455 return 0;
-456
-457 schedule();
-458 continue;
-
-At line 452, its status is set to TASK_INTERRUPTIBLE, and at line 454 if
-kthread_should_stop() is true, a "return 0" at line 455 will to function
-kernel/kthread.c:kthread() and call do_exit().
-
-It is not good to enter do_exit() with task state TASK_INTERRUPTIBLE, in
-following code path might_sleep() is called and a warning message is
-reported by __might_sleep(): "WARNING: do not call blocking ops when
-!TASK_RUNNING; state=1 set at [xxxx]".
-
-Indeed it does not hurt when kernel thread exits with TASK_INTERRUPTIBLE
-state, but this warning message scares users, makes them feel there might
-be something risky with bcache and hurt their data.
-
-In this patch, TASK_INTERRUPTIBLE is set after kthread_should_stop(),
-so writeback kernel thread can exist and enter do_exit() with
-TASK_RUNNING state. Warning message from might_sleep() is removed.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/writeback.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 56a37884ca8b..a57149803df6 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -449,11 +449,11 @@ static int bch_writeback_thread(void *arg)
- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
- !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-- set_current_state(TASK_INTERRUPTIBLE);
-
- if (kthread_should_stop())
- return 0;
-
-+ set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- continue;
- }
---
-2.15.1
-
diff --git a/for-next/v1/v1-0002-bcache-set-task-properly-in-allocator_wait.patch b/for-next/v1/v1-0002-bcache-set-task-properly-in-allocator_wait.patch
deleted file mode 100644
index a9b6799..0000000
--- a/for-next/v1/v1-0002-bcache-set-task-properly-in-allocator_wait.patch
+++ /dev/null
@@ -1,79 +0,0 @@
-From 9eb34cfed6f7cf086a31d0e01f79548aaa82eab9 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 20 Dec 2017 22:37:11 +0800
-Subject: [PATCH v1 02/10] bcache: set task properly in allocator_wait()
-
-Kernel thread routine bch_allocator_thread() references macro
-allocator_wait() to wait for a condition or quit to do_exit()
-when kthread_should_stop() is true.
-
-Macro allocator_wait() has 2 issues in setting task state, let's
-see its code piece,
-
-284 while (1) { \
-285 set_current_state(TASK_INTERRUPTIBLE); \
-286 if (cond) \
-287 break; \
-288 \
-289 mutex_unlock(&(ca)->set->bucket_lock); \
-290 if (kthread_should_stop()) \
-291 return 0; \
-292 \
-293 schedule(); \
-294 mutex_lock(&(ca)->set->bucket_lock); \
-295 } \
-296 __set_current_state(TASK_RUNNING); \
-
-1) At line 285, task state is set to TASK_INTERRUPTIBLE, if at line 290
-kthread_should_stop() is true, the kernel thread will terminate and return
-to kernel/kthread.s:kthread(), then calls do_exit() with TASK_INTERRUPTIBLE
-state. This is not a suggested behavior and a warning message will be
-reported by might_sleep() in do_exit() code path: "WARNING: do not call
-blocking ops when !TASK_RUNNING; state=1 set at [xxxx]".
-
-2) Because task state is set to TASK_INTERRUPTIBLE at line 285, when break
-while-loop the task state has to be set back to TASK_RUNNING at line 296.
-Indeed it is unncessary, if task state is set to TASK_INTERRUPTIBLE before
-calling schedule() at line 293, we don't need to set the state back to
-TASK_RUNNING at line 296 anymore. The reason is, allocator kthread is only
-woken up by wake_up_process(), this routine makes sure the task state of
-allocator kthread will be TASK_RUNNING after it returns from schedule() at
-line 294 (see kernel/sched/core.c:try_to_wake_up() for more detailed
-information).
-
-This patch fixes the above 2 issues by,
-1) Setting TASK_INTERRUPTIBLE state just before calling schedule().
-2) Then setting TASK_RUNNING at line 296 is unnecessary, remove it.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/alloc.c | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index a0cc1bc6d884..48c002faf08d 100644
---- a/drivers/md/bcache/alloc.c
-+++ b/drivers/md/bcache/alloc.c
-@@ -282,7 +282,6 @@ static void invalidate_buckets(struct cache *ca)
- #define allocator_wait(ca, cond) \
- do { \
- while (1) { \
-- set_current_state(TASK_INTERRUPTIBLE); \
- if (cond) \
- break; \
- \
-@@ -290,10 +289,10 @@ do { \
- if (kthread_should_stop()) \
- return 0; \
- \
-+ set_current_state(TASK_INTERRUPTIBLE); \
- schedule(); \
- mutex_lock(&(ca)->set->bucket_lock); \
- } \
-- __set_current_state(TASK_RUNNING); \
- } while (0)
-
- static int bch_allocator_push(struct cache *ca, long bucket)
---
-2.15.1
-
diff --git a/for-next/v1/v1-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch b/for-next/v1/v1-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch
deleted file mode 100644
index 8cbf66c..0000000
--- a/for-next/v1/v1-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch
+++ /dev/null
@@ -1,119 +0,0 @@
-From fd33195d255d0f152d9e2b36032b1cc816ededb3 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 20 Dec 2017 23:27:41 +0800
-Subject: [PATCH v1 03/10] bcache: reduce cache_set devices iteration by
- devices_max_used
-
-Member devices of struct cache_set is used to reference all attached
-bcache devices to this cache set. If it is treated as array of pointers,
-size of devices[] is indicated by member nr_uuids of struct cache_set.
-
-nr_uuids is calculated in drivers/md/super.c:bch_cache_set_alloc(),
- bucket_bytes(c) / sizeof(struct uuid_entry)
-Bucket size is determined by user space tool "make-bcache", by default it
-is 1024 sectors (defined in bcache-tools/make-bcache.c:main()). So default
-nr_uuids value is 4096 from the above calculation.
-
-Every time when bcache code iterates bcache devices of a cache set, all
-the 4096 pointers are checked even only 1 bcache device is attached to the
-cache set, that's a wast of time and unncessary.
-
-This patch adds a member devices_max_used to struct cache_set. Its value
-is 1 + the maximum used index of devices[] in a cache set. When iterating
-all valid bcache devices of a cache set, use c->devices_max_used in
-for-loop may reduce a lot of useless checking.
-
-Personally, my motivation of this patch is not for performance, I use it
-in bcache debugging, which helps me to narrow down the scape to check
-valid bcached devices of a cache set.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 1 +
- drivers/md/bcache/btree.c | 2 +-
- drivers/md/bcache/super.c | 9 ++++++---
- drivers/md/bcache/writeback.h | 2 +-
- 4 files changed, 9 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 843877e017e1..83c569942bd0 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -488,6 +488,7 @@ struct cache_set {
- int caches_loaded;
-
- struct bcache_device **devices;
-+ unsigned devices_max_used;
- struct list_head cached_devs;
- uint64_t cached_dev_sectors;
- struct closure caching;
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index 81e8dc3dbe5e..bf0d7978bc3d 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -1678,7 +1678,7 @@ static void bch_btree_gc_finish(struct cache_set *c)
-
- /* don't reclaim buckets to which writeback keys point */
- rcu_read_lock();
-- for (i = 0; i < c->nr_uuids; i++) {
-+ for (i = 0; i < c->devices_max_used; i++) {
- struct bcache_device *d = c->devices[i];
- struct cached_dev *dc;
- struct keybuf_key *w, *n;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index b4d28928dec5..064efd869017 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -721,6 +721,9 @@ static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
- d->c = c;
- c->devices[id] = d;
-
-+ if (id >= c->devices_max_used)
-+ c->devices_max_used = id + 1;
-+
- closure_get(&c->caching);
- }
-
-@@ -1261,7 +1264,7 @@ static int flash_devs_run(struct cache_set *c)
- struct uuid_entry *u;
-
- for (u = c->uuids;
-- u < c->uuids + c->nr_uuids && !ret;
-+ u < c->uuids + c->devices_max_used && !ret;
- u++)
- if (UUID_FLASH_ONLY(u))
- ret = flash_dev_run(c, u);
-@@ -1427,7 +1430,7 @@ static void __cache_set_unregister(struct closure *cl)
-
- mutex_lock(&bch_register_lock);
-
-- for (i = 0; i < c->nr_uuids; i++)
-+ for (i = 0; i < c->devices_max_used; i++)
- if (c->devices[i]) {
- if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
- test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
-@@ -1490,7 +1493,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
- c->bucket_bits = ilog2(sb->bucket_size);
- c->block_bits = ilog2(sb->block_size);
- c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry);
--
-+ c->devices_max_used = 0;
- c->btree_pages = bucket_pages(c);
- if (c->btree_pages > BTREE_MAX_PAGES)
- c->btree_pages = max_t(int, c->btree_pages / 4,
-diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index a9e3ffb4b03c..1d284f3d0363 100644
---- a/drivers/md/bcache/writeback.h
-+++ b/drivers/md/bcache/writeback.h
-@@ -21,7 +21,7 @@ static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c)
-
- mutex_lock(&bch_register_lock);
-
-- for (i = 0; i < c->nr_uuids; i++) {
-+ for (i = 0; i < c->devices_max_used; i++) {
- struct bcache_device *d = c->devices[i];
-
- if (!d || !UUID_FLASH_ONLY(&c->uuids[i]))
---
-2.15.1
-
diff --git a/for-next/v1/v1-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch b/for-next/v1/v1-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
deleted file mode 100644
index e3975b2..0000000
--- a/for-next/v1/v1-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
+++ /dev/null
@@ -1,171 +0,0 @@
-From d697858f6f515b4bacee984c82535cf2b896ace9 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Fri, 22 Dec 2017 16:37:17 +0800
-Subject: [PATCH v1 04/10] bcache: fix cached_dev->count usage for
- bch_cache_set_error()
-
-When bcache metadata I/O fails, bcache will call bch_cache_set_error()
-to retire the whole cache set. The expected behavior to retire a cache
-set is to unregister the cache set, and unregister all backing device
-attached to this cache set, then remove sysfs entries of the cache set
-and all attached backing devices, finally release memory of structs
-cache_set, cache, cached_dev and bcache_device.
-
-In my testing when journal I/O failure triggered by disconnected cache
-device, sometimes the cache set cannot be retired, and its sysfs
-entry /sys/fs/bcache/<uuid> still exits and the backing device also
-references it. This is not expected behavior.
-
-When metadata I/O failes, the call senquence to retire whole cache set is,
- bch_cache_set_error()
- bch_cache_set_unregister()
- bch_cache_set_stop()
- __cache_set_unregister() <- called as callback by calling
- clousre_queue(&c->caching)
- cache_set_flush() <- called as a callback when refcount
- of cache_set->caching is 0
- cache_set_free() <- called as a callback when refcount
- of catch_set->cl is 0
- bch_cache_set_release() <- called as a callback when refcount
- of catch_set->kobj is 0
-
-I find if kernel thread bch_writeback_thread() quits while-loop when
-kthread_should_stop() is true and searched_full_index is false, clousre
-callback cache_set_flush() set by continue_at() will never be called. The
-result is, bcache fails to retire whole cache set.
-
-cache_set_flush() will be called when refcount of closure c->caching is 0,
-and in function bcache_device_detach() refcount of closure c->caching is
-released to 0 by clousre_put(). In metadata error code path, function
-bcache_device_detach() is called by cached_dev_detach_finish(). This is a
-callback routine being called when cached_dev->count is 0. This refcount
-is decreased by cached_dev_put().
-
-The above dependence indicates, cache_set_flush() will be called when
-refcount of cache_set->cl is 0, and refcount of cache_set->cl to be 0
-when refcount of cache_dev->count is 0.
-
-The reason why sometimes cache_dev->count is not 0 (when metadata I/O fails
-and bch_cache_set_error() called) is, in bch_writeback_thread(), refcount
-of cache_dev is not decreased properly.
-
-In bch_writeback_thread(), cached_dev_put() is called only when
-searched_full_index is true and cached_dev->writeback_keys is empty, a.k.a
-there is no dirty data on cache. In most of run time it is correct, but
-when bch_writeback_thread() quits the while-loop while cache is still
-dirty, current code forget to call cached_dev_put() before this kernel
-thread exits. This is why sometimes cache_set_flush() is not executed and
-cache set fails to be retired.
-
-The reason to call cached_dev_put() in bch_writeback_rate() is, when the
-cache device changes from clean to dirty, cached_dev_get() is called, to
-make sure during writeback operatiions both backing and cache devices
-won't be released.
-
-Adding following code in bch_writeback_thread() does not work,
- static int bch_writeback_thread(void *arg)
- [code snip]
-
- + if (atomic_read(&dc->has_dirty))
- + cached_dev_put()
- +
- return 0;
- [code snip]
-
-because writeback kernel thread can be waken up and start via sysfs entry:
- echo 1 > /sys/block/bcache<N>/bcache/writeback_running
-It is difficult to check whether backing device is dirty without race and
-extra lock. So the above modification will introduce potential refcount
-underflow in some conditions.
-
-The correct fix is, to take cached dev refcount when creating the kernel
-thread, and put it before the kernel thread exits. Then bcache does not
-need to take a cached dev refcount when cache turns from clean to dirty,
-or to put a cached dev refcount when cache turns from ditry to clean. The
-writeback kernel thread is alwasy safe to reference data structure from
-cache set, cache and cached device (because a refcount of cache device is
-taken for it already), and no matter the kernel thread is stopped by I/O
-errors or system reboot, cached_dev->count can always be used correctly.
-
-The patch is simple, but understanding how it works is quite complicated.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 1 -
- drivers/md/bcache/writeback.c | 10 +++++++---
- drivers/md/bcache/writeback.h | 2 --
- 3 files changed, 7 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 064efd869017..5401d2356aa3 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1044,7 +1044,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
- if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
- bch_sectors_dirty_init(&dc->disk);
- atomic_set(&dc->has_dirty, 1);
-- refcount_inc(&dc->count);
- bch_writeback_queue(dc);
- }
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index a57149803df6..0789a9e18337 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -451,7 +451,7 @@ static int bch_writeback_thread(void *arg)
- up_write(&dc->writeback_lock);
-
- if (kthread_should_stop())
-- return 0;
-+ break;
-
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
-@@ -463,7 +463,6 @@ static int bch_writeback_thread(void *arg)
- if (searched_full_index &&
- RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
- atomic_set(&dc->has_dirty, 0);
-- cached_dev_put(dc);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
- bch_write_bdev_super(dc, NULL);
- }
-@@ -484,6 +483,8 @@ static int bch_writeback_thread(void *arg)
- }
- }
-
-+ cached_dev_put(dc);
-+
- return 0;
- }
-
-@@ -547,10 +548,13 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- if (!dc->writeback_write_wq)
- return -ENOMEM;
-
-+ cached_dev_get(dc);
- dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
- "bcache_writeback");
-- if (IS_ERR(dc->writeback_thread))
-+ if (IS_ERR(dc->writeback_thread)) {
-+ cached_dev_put(dc);
- return PTR_ERR(dc->writeback_thread);
-+ }
-
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index 1d284f3d0363..aab21afe49cf 100644
---- a/drivers/md/bcache/writeback.h
-+++ b/drivers/md/bcache/writeback.h
-@@ -92,8 +92,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
- {
- if (!atomic_read(&dc->has_dirty) &&
- !atomic_xchg(&dc->has_dirty, 1)) {
-- refcount_inc(&dc->count);
--
- if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
- /* XXX: should do this synchronously */
---
-2.15.1
-
diff --git a/for-next/v1/v1-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch b/for-next/v1/v1-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch
deleted file mode 100644
index d3e78e8..0000000
--- a/for-next/v1/v1-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch
+++ /dev/null
@@ -1,68 +0,0 @@
-From 1a9aae02c180b47b2ae2ef9c61915b2b694d1fc2 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 23 Dec 2017 01:50:19 +0800
-Subject: [PATCH v1 05/10] bcache: stop dc->writeback_rate_update if cache set
- is stopping
-
-struct delayed_work writeback_rate_update in struct cache_dev is a delayed
-worker to call function update_writeback_rate() in period (the interval is
-defined by dc->writeback_rate_update_seconds).
-
-When a metadate I/O error happens on cache device, bcache error handling
-routine bch_cache_set_error() will call bch_cache_set_unregister() to
-retire whole cache set. On the unregister code path, cached_dev_free()
-calls cancel_delayed_work_sync(&dc->writeback_rate_update) to stop this
-delayed work.
-
-dc->writeback_rate_update is a special delayed work from others in bcache.
-In its routine update_writeback_rate(), this delayed work is re-armed
-after a piece of time. That means when cancel_delayed_work_sync() returns,
-this delayed work can still be executed after several seconds defined by
-dc->writeback_rate_update_seconds.
-
-The problem is, after cancel_delayed_work_sync() returns, the cache set
-unregister code path will eventually release memory of struct cache set.
-Then the delayed work is scheduled to run, and inside its routine
-update_writeback_rate() that already released cache set NULL pointer will
-be accessed. Now a NULL pointer deference panic is triggered.
-
-In order to avoid the above problem, this patch checks cache set flags in
-delayed work routine update_writeback_rate(). If flag CACHE_SET_STOPPING
-is set, this routine will quit without re-arm the delayed work. Then the
-NULL pointer deference panic won't happen after cache set is released.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/writeback.c | 9 +++++++++
- 1 file changed, 9 insertions(+)
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 0789a9e18337..745d9b2a326f 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -91,6 +91,11 @@ static void update_writeback_rate(struct work_struct *work)
- struct cached_dev *dc = container_of(to_delayed_work(work),
- struct cached_dev,
- writeback_rate_update);
-+ struct cache_set *c = dc->disk.c;
-+
-+ /* quit directly if cache set is stopping */
-+ if (test_bit(CACHE_SET_STOPPING, &c->flags))
-+ return;
-
- down_read(&dc->writeback_lock);
-
-@@ -100,6 +105,10 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-+ /* do not schedule delayed work if cache set is stopping */
-+ if (test_bit(CACHE_SET_STOPPING, &c->flags))
-+ return;
-+
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
- }
---
-2.15.1
-
diff --git a/for-next/v1/v1-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch b/for-next/v1/v1-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch
deleted file mode 100644
index 53ce3f2..0000000
--- a/for-next/v1/v1-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch
+++ /dev/null
@@ -1,122 +0,0 @@
-From 2da5b83720460c83d0f20d0771a0c955e60028e8 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 3 Jan 2018 00:03:45 +0800
-Subject: [PATCH v1 06/10] bcache: stop dc->writeback_rate_update,
- dc->writeback_thread earlier
-
-Delayed worker dc->writeback_rate_update and kernel thread
-dc->writeback_thread reference cache set data structure in their routine,
-Therefor, before they are stopped, cache set should not be release. Other-
-wise, NULL pointer deference will be triggered.
-
-Currenly delayed worker dc->writeback_rate_update and kernel thread
-dc->writeback_thread are stopped in cached_dev_free(). When cache set is
-retiring by too many I/O errors, cached_dev_free() is called when refcount
-of bcache device's closure (disk.cl) reaches 0. In most of cases, last
-refcount of disk.cl is dropped in last line of cached_dev_detach_finish().
-But in cached_dev_detach_finish() before calling closure_put(&dc->disk.cl),
-bcache_device_detach() is called, and inside bcache_device_detach()
-refcount of cache_set->caching is dropped by closure_put(&d->c->caching).
-
-It is very probably this is the last refcount of this closure, so routine
-cache_set_flush() will be called (it is set in __cache_set_unregister()),
-and its parent closure cache_set->cl may also drop its last refcount and
-cache_set_free() is called too. In cache_set_free() the last refcount of
-cache_set->kobj is dropped and then bch_cache_set_release() is called. Now
-in bch_cache_set_release(), the memory of struct cache_set is freeed.
-
-bch_cache_set_release() is called before cached_dev_free(), then there is a
-time window after cache set memory freed and before dc->writeback_thread
-and dc->writeback_rate_update stopped, if one of them is scheduled to run,
-a NULL pointer deference will be triggered.
-
-This patch fixes the above problem by stopping dc->writeback_thread and
-dc->writeback_rate_update earlier in bcache_device_detach() before calling
-closure_put(&d->c->caching). Because cancel_delayed_work_sync() and
-kthread_stop() are synchronized operations, we can make sure cache set
-is available when the delayed work and kthread are stopping.
-
-Because cached_dev_free() can also be called by writing 1 to sysfs file
-/sys/block/bcache<N>/bcache/stop, this code path may not call
-bcache_device_detach() if d-c is NULL. So stopping dc->writeback_thread
-and dc->writeback_rate_update in cached_dev_free() is still necessary. In
-order to avoid stop them twice, dc->rate_update_canceled is added to
-indicate dc->writeback_rate_update is canceled, and dc->writeback_thread
-is set to NULL to indicate it is stopped.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 1 +
- drivers/md/bcache/super.c | 21 +++++++++++++++++++--
- drivers/md/bcache/writeback.c | 1 +
- 3 files changed, 21 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 83c569942bd0..395b87942a2f 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -322,6 +322,7 @@ struct cached_dev {
-
- struct bch_ratelimit writeback_rate;
- struct delayed_work writeback_rate_update;
-+ bool rate_update_canceled;
-
- /*
- * Internal to the writeback code, so read_dirty() can keep track of
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 5401d2356aa3..8912be4165c5 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -696,8 +696,20 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
-
- static void bcache_device_detach(struct bcache_device *d)
- {
-+ struct cached_dev *dc;
-+
- lockdep_assert_held(&bch_register_lock);
-
-+ dc = container_of(d, struct cached_dev, disk);
-+ if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
-+ kthread_stop(dc->writeback_thread);
-+ dc->writeback_thread = NULL;
-+ }
-+ if (!dc->rate_update_canceled) {
-+ cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ dc->rate_update_canceled = true;
-+ }
-+
- if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
- struct uuid_entry *u = d->c->uuids + d->id;
-
-@@ -1071,9 +1083,14 @@ static void cached_dev_free(struct closure *cl)
- {
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
-
-- cancel_delayed_work_sync(&dc->writeback_rate_update);
-- if (!IS_ERR_OR_NULL(dc->writeback_thread))
-+ if (!dc->rate_update_canceled) {
-+ cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ dc->rate_update_canceled = true;
-+ }
-+ if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
- kthread_stop(dc->writeback_thread);
-+ dc->writeback_thread = NULL;
-+ }
- if (dc->writeback_write_wq)
- destroy_workqueue(dc->writeback_write_wq);
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 745d9b2a326f..ab2ac3d72393 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -548,6 +548,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
- dc->writeback_rate_i_term_inverse = 10000;
-
- INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
-+ dc->rate_update_canceled = false;
- }
-
- int bch_cached_dev_writeback_start(struct cached_dev *dc)
---
-2.15.1
-
diff --git a/for-next/v1/v1-0007-bcache-set-error_limit-correctly.patch b/for-next/v1/v1-0007-bcache-set-error_limit-correctly.patch
deleted file mode 100644
index 2d5b243..0000000
--- a/for-next/v1/v1-0007-bcache-set-error_limit-correctly.patch
+++ /dev/null
@@ -1,114 +0,0 @@
-From 93e9a82ee54b8fb1e50c4df95a74ab2953aec9ff Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 3 Jan 2018 20:37:27 +0800
-Subject: [PATCH v1 07/10] bcache: set error_limit correctly
-
-Struct cache uses io_errors for two purposes,
-- Error decay: when cache set error_decay is set, io_errors is used to
- generate a small piece of delay when I/O error happens.
-- I/O errors counter: in order to generate big enough value for error
- decay, I/O errors counter value is stored by left shifting 20 bits (a.k.a
- IO_ERROR_SHIFT).
-
-In function bch_count_io_errors(), if I/O errors counter reaches cache set
-error limit, bch_cache_set_error() will be called to retire the whold cache
-set. But current code is problematic when checking the error limit, see the
-following code piece from bch_count_io_errors(),
-
- 90 if (error) {
- 91 char buf[BDEVNAME_SIZE];
- 92 unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT,
- 93 &ca->io_errors);
- 94 errors >>= IO_ERROR_SHIFT;
- 95
- 96 if (errors < ca->set->error_limit)
- 97 pr_err("%s: IO error on %s, recovering",
- 98 bdevname(ca->bdev, buf), m);
- 99 else
-100 bch_cache_set_error(ca->set,
-101 "%s: too many IO errors %s",
-102 bdevname(ca->bdev, buf), m);
-103 }
-
-At line 94, errors is right shifting IO_ERROR_SHIFT bits, now it is real
-errors counter to compare at line 96. But ca->set->error_limit is initia-
-lized with an amplified value in bch_cache_set_alloc(),
-1545 c->error_limit = 8 << IO_ERROR_SHIFT;
-
-It means by default, in bch_count_io_errors(), before 8<<20 errors happened
-bch_cache_set_error() won't be called to retire the problematic cache
-device. If the average request size is 64KB, it means bcache won't handle
-failed device until 512GB data is requested. This is too large to be an I/O
-threashold. So I believe the correct error limit should be much less.
-
-This patch sets default cache set error limit to 8, then in
-bch_count_io_errors() when errors counter reaches 8 (if it is default
-value), function bch_cache_set_error() will be called to retire the whole
-cache set. This patch also removes bits shifting when store or show
-io_error_limit value via sysfs interface.
-
-Nowadays most of SSDs handle internal flash failure automatically by LBA
-address re-indirect mapping. If an I/O error can be observed by upper layer
-code, it will be a notable error because that SSD can not re-indirect
-map the problematic LBA address to an available flash block. This situation
-indicates the whole SSD will be failed very soon. Therefore setting 8 as
-the default io error limit value makes sense, it is enough for most of
-cache devices.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 1 +
- drivers/md/bcache/super.c | 2 +-
- drivers/md/bcache/sysfs.c | 4 ++--
- 3 files changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 395b87942a2f..a31dc3737dae 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -654,6 +654,7 @@ struct cache_set {
- ON_ERROR_UNREGISTER,
- ON_ERROR_PANIC,
- } on_error;
-+#define DEFAULT_IO_ERROR_LIMIT 8
- unsigned error_limit;
- unsigned error_decay;
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 8912be4165c5..02d9d7110769 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1561,7 +1561,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
-
- c->congested_read_threshold_us = 2000;
- c->congested_write_threshold_us = 20000;
-- c->error_limit = 8 << IO_ERROR_SHIFT;
-+ c->error_limit = DEFAULT_IO_ERROR_LIMIT;
-
- return c;
- err:
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index b4184092c727..d7ce9a05b304 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -556,7 +556,7 @@ SHOW(__bch_cache_set)
-
- /* See count_io_errors for why 88 */
- sysfs_print(io_error_halflife, c->error_decay * 88);
-- sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT);
-+ sysfs_print(io_error_limit, c->error_limit);
-
- sysfs_hprint(congested,
- ((uint64_t) bch_get_congested(c)) << 9);
-@@ -656,7 +656,7 @@ STORE(__bch_cache_set)
- }
-
- if (attr == &sysfs_io_error_limit)
-- c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
-+ c->error_limit = strtoul_or_return(buf);
-
- /* See count_io_errors() for why 88 */
- if (attr == &sysfs_io_error_halflife)
---
-2.15.1
-
diff --git a/for-next/v1/v1-0008-bcache-fix-misleading-error-message-in-bch_count_.patch b/for-next/v1/v1-0008-bcache-fix-misleading-error-message-in-bch_count_.patch
deleted file mode 100644
index 18a5c32..0000000
--- a/for-next/v1/v1-0008-bcache-fix-misleading-error-message-in-bch_count_.patch
+++ /dev/null
@@ -1,118 +0,0 @@
-From 80d7abeee0b81a7ee0e3789bac9580f540437d0e Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 3 Jan 2018 15:59:33 +0800
-Subject: [PATCH v1 08/10] bcache: fix misleading error message in
- bch_count_io_errors()
-
-Bcache only does recoverable I/O for read operations by calling
-cached_dev_read_error(). For write opertions there is no I/O recovery for
-failed requests.
-
-But in bch_count_io_errors() no matter read or write I/Os, before errors
-counter reaches io error limit, pr_err() always prints "IO error on %,
-recoverying". For write requests this information is misleading, because
-there is no I/O recovery at all.
-
-This patch adds a parameter 'is_read' to bch_count_io_errors(), and only
-prints "recovering" by pr_err() when the bio direction is READ.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 2 +-
- drivers/md/bcache/io.c | 13 +++++++++----
- drivers/md/bcache/super.c | 4 +++-
- drivers/md/bcache/writeback.c | 4 +++-
- 4 files changed, 16 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index a31dc3737dae..c53f312b2216 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -855,7 +855,7 @@ static inline void wake_up_allocators(struct cache_set *c)
-
- /* Forward declarations */
-
--void bch_count_io_errors(struct cache *, blk_status_t, const char *);
-+void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
- void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- blk_status_t, const char *);
- void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index fac97ec2d0e2..a783c5a41ff1 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -51,7 +51,10 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
-
- /* IO errors */
-
--void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
-+void bch_count_io_errors(struct cache *ca,
-+ blk_status_t error,
-+ int is_read,
-+ const char *m)
- {
- /*
- * The halflife of an error is:
-@@ -94,8 +97,9 @@ void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
- errors >>= IO_ERROR_SHIFT;
-
- if (errors < ca->set->error_limit)
-- pr_err("%s: IO error on %s, recovering",
-- bdevname(ca->bdev, buf), m);
-+ pr_err("%s: IO error on %s%s",
-+ bdevname(ca->bdev, buf), m,
-+ is_read ? ", recovering." : ".");
- else
- bch_cache_set_error(ca->set,
- "%s: too many IO errors %s",
-@@ -108,6 +112,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
- {
- struct bbio *b = container_of(bio, struct bbio, bio);
- struct cache *ca = PTR_CACHE(c, &b->key, 0);
-+ int is_read = (bio_data_dir(bio) == READ ? 1 : 0);
-
- unsigned threshold = op_is_write(bio_op(bio))
- ? c->congested_write_threshold_us
-@@ -129,7 +134,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
- atomic_inc(&c->congested);
- }
-
-- bch_count_io_errors(ca, error, m);
-+ bch_count_io_errors(ca, error, is_read, m);
- }
-
- void bch_bbio_endio(struct cache_set *c, struct bio *bio,
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 02d9d7110769..bbe911847eea 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -274,7 +274,9 @@ static void write_super_endio(struct bio *bio)
- {
- struct cache *ca = bio->bi_private;
-
-- bch_count_io_errors(ca, bio->bi_status, "writing superblock");
-+ /* is_read = 0 */
-+ bch_count_io_errors(ca, bio->bi_status, 0,
-+ "writing superblock");
- closure_put(&ca->set->sb_write);
- }
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index ab2ac3d72393..e58f9be5ae43 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -228,8 +228,10 @@ static void read_dirty_endio(struct bio *bio)
- struct keybuf_key *w = bio->bi_private;
- struct dirty_io *io = w->private;
-
-+ /* is_read = 1 */
- bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
-- bio->bi_status, "reading dirty data from cache");
-+ bio->bi_status, 1,
-+ "reading dirty data from cache");
-
- dirty_endio(bio);
- }
---
-2.15.1
-
diff --git a/for-next/v1/v1-0009-bcache-add-io_disable-to-struct-cache_set.patch b/for-next/v1/v1-0009-bcache-add-io_disable-to-struct-cache_set.patch
deleted file mode 100644
index 1c6159e..0000000
--- a/for-next/v1/v1-0009-bcache-add-io_disable-to-struct-cache_set.patch
+++ /dev/null
@@ -1,433 +0,0 @@
-From 5996e95d633ad28ebbd113004efc488162cd22b7 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 2 Jan 2018 17:31:07 +0800
-Subject: [PATCH v1 09/10] bcache: add io_disable to struct cache_set
-
-When too many I/Os failed on cache device, bch_cache_set_error() is called
-in the error handling code path to retire whole problematic cache set. If
-new I/O requests continue to come and take refcount dc->count, the cache
-set won't be retired immediately, this is a problem.
-
-Further more, there are several kernel thread and self-armed kernel work
-may still running after bch_cache_set_error() is called. It needs to wait
-quite a while for them to stop, or they won't stop at all. They also
-prevent the cache set from being retired.
-
-The solution in this patch is, to add per cache set flag to disable I/O
-request on this cache and all attached backing devices. Then new coming I/O
-requests can be rejected in *_make_request() before taking refcount, kernel
-threads and self-armed kernel worker can stop very fast when io_disable is
-true.
-
-Because bcache also do internal I/Os for writeback, garbage collection,
-bucket allocation, journaling, this kind of I/O should be disabled after
-bch_cache_set_error() is called. So closure_bio_submit() is modified to
-check whether cache_set->io_disable is true. If cache_set->io_disable is
-true, closure_bio_submit() will set bio->bi_status to BLK_STS_IOERR and
-return, generic_make_request() won't be called.
-
-A sysfs interface is also added for cache_set->io_disable, to read and set
-io_disable value for debugging. It is helpful to trigger more corner case
-issues for failed cache device.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/alloc.c | 2 +-
- drivers/md/bcache/bcache.h | 14 ++++++++++++++
- drivers/md/bcache/btree.c | 6 ++++--
- drivers/md/bcache/io.c | 2 +-
- drivers/md/bcache/journal.c | 4 ++--
- drivers/md/bcache/request.c | 26 +++++++++++++++++++-------
- drivers/md/bcache/super.c | 7 ++++++-
- drivers/md/bcache/sysfs.c | 4 ++++
- drivers/md/bcache/util.h | 6 ------
- drivers/md/bcache/writeback.c | 34 ++++++++++++++++++++++------------
- 10 files changed, 73 insertions(+), 32 deletions(-)
-
-diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index 48c002faf08d..3be737582f27 100644
---- a/drivers/md/bcache/alloc.c
-+++ b/drivers/md/bcache/alloc.c
-@@ -286,7 +286,7 @@ do { \
- break; \
- \
- mutex_unlock(&(ca)->set->bucket_lock); \
-- if (kthread_should_stop()) \
-+ if (kthread_should_stop() || ca->set->io_disable) \
- return 0; \
- \
- set_current_state(TASK_INTERRUPTIBLE); \
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index c53f312b2216..9c7f9b1cb791 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -481,6 +481,7 @@ struct cache_set {
- struct cache_accounting accounting;
-
- unsigned long flags;
-+ bool io_disable;
-
- struct cache_sb sb;
-
-@@ -853,6 +854,19 @@ static inline void wake_up_allocators(struct cache_set *c)
- wake_up_process(ca->alloc_thread);
- }
-
-+static inline void closure_bio_submit(struct cache_set *c,
-+ struct bio *bio,
-+ struct closure *cl)
-+{
-+ closure_get(cl);
-+ if (unlikely(c->io_disable)) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return;
-+ }
-+ generic_make_request(bio);
-+}
-+
- /* Forward declarations */
-
- void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index bf0d7978bc3d..75470cce1177 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -1788,9 +1788,11 @@ static int bch_gc_thread(void *arg)
-
- while (1) {
- wait_event_interruptible(c->gc_wait,
-- kthread_should_stop() || gc_should_run(c));
-+ kthread_should_stop() ||
-+ c->io_disable ||
-+ gc_should_run(c));
-
-- if (kthread_should_stop())
-+ if (kthread_should_stop() || c->io_disable)
- break;
-
- set_gc_sectors(c);
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index a783c5a41ff1..8013ecbcdbda 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
- bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
-
- b->submit_time_us = local_clock_us();
-- closure_bio_submit(bio, bio->bi_private);
-+ closure_bio_submit(c, bio, bio->bi_private);
- }
-
- void bch_submit_bbio(struct bio *bio, struct cache_set *c,
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index a87165c1d8e5..979873641030 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
- bch_bio_map(bio, data);
-
-- closure_bio_submit(bio, &cl);
-+ closure_bio_submit(ca->set, bio, &cl);
- closure_sync(&cl);
-
- /* This function could be simpler now since we no longer write
-@@ -653,7 +653,7 @@ static void journal_write_unlocked(struct closure *cl)
- spin_unlock(&c->journal.lock);
-
- while ((bio = bio_list_pop(&list)))
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(c, bio, cl);
-
- continue_at(cl, journal_write_done, NULL);
- }
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 643c3021624f..a85d6a605a8e 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -725,7 +725,7 @@ static void cached_dev_read_error(struct closure *cl)
-
- /* XXX: invalidate cache */
-
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- }
-
- continue_at(cl, cached_dev_cache_miss_done, NULL);
-@@ -850,7 +850,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- s->cache_miss = miss;
- s->iop.bio = cache_bio;
- bio_get(cache_bio);
-- closure_bio_submit(cache_bio, &s->cl);
-+ closure_bio_submit(s->iop.c, cache_bio, &s->cl);
-
- return ret;
- out_put:
-@@ -858,7 +858,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- out_submit:
- miss->bi_end_io = request_endio;
- miss->bi_private = &s->cl;
-- closure_bio_submit(miss, &s->cl);
-+ closure_bio_submit(s->iop.c, miss, &s->cl);
- return ret;
- }
-
-@@ -923,7 +923,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
-
- if ((bio_op(bio) != REQ_OP_DISCARD) ||
- blk_queue_discard(bdev_get_queue(dc->bdev)))
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- } else if (s->iop.writeback) {
- bch_writeback_add(dc);
- s->iop.bio = bio;
-@@ -938,12 +938,12 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
- flush->bi_private = cl;
- flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
-- closure_bio_submit(flush, cl);
-+ closure_bio_submit(s->iop.c, flush, cl);
- }
- } else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
-
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- }
-
- closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
-@@ -959,7 +959,7 @@ static void cached_dev_nodata(struct closure *cl)
- bch_journal_meta(s->iop.c, cl);
-
- /* If it's a flush, we send the flush to the backing device too */
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
-
- continue_at(cl, cached_dev_bio_complete, NULL);
- }
-@@ -974,6 +974,12 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
-
-+ if (unlikely(d->c && d->c->io_disable)) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return BLK_QC_T_NONE;
-+ }
-+
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-
- bio_set_dev(bio, dc->bdev);
-@@ -1089,6 +1095,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
- struct bcache_device *d = bio->bi_disk->private_data;
- int rw = bio_data_dir(bio);
-
-+ if (unlikely(d->c->io_disable)) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return BLK_QC_T_NONE;
-+ }
-+
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-
- s = search_alloc(bio, d);
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index bbe911847eea..7aa76c3e3556 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -521,7 +521,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
- bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
- bch_bio_map(bio, ca->disk_buckets);
-
-- closure_bio_submit(bio, &ca->prio);
-+ closure_bio_submit(ca->set, bio, &ca->prio);
- closure_sync(cl);
- }
-
-@@ -1333,6 +1333,10 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
- acquire_console_sem();
- */
-
-+ c->io_disable = true;
-+ /* make others know io_disable is true earlier */
-+ smp_mb();
-+
- printk(KERN_ERR "bcache: error on %pU: ", c->sb.set_uuid);
-
- va_start(args, fmt);
-@@ -1564,6 +1568,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
- c->congested_read_threshold_us = 2000;
- c->congested_write_threshold_us = 20000;
- c->error_limit = DEFAULT_IO_ERROR_LIMIT;
-+ c->io_disable = false;
-
- return c;
- err:
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index d7ce9a05b304..acce7c82e111 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -92,6 +92,7 @@ read_attribute(partial_stripes_expensive);
-
- rw_attribute(synchronous);
- rw_attribute(journal_delay_ms);
-+rw_attribute(io_disable);
- rw_attribute(discard);
- rw_attribute(running);
- rw_attribute(label);
-@@ -573,6 +574,7 @@ SHOW(__bch_cache_set)
- sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
- sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
- sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
-+ sysfs_printf(io_disable, "%i", c->io_disable);
-
- if (attr == &sysfs_bset_tree_stats)
- return bch_bset_print_stats(c, buf);
-@@ -663,6 +665,7 @@ STORE(__bch_cache_set)
- c->error_decay = strtoul_or_return(buf) / 88;
-
- sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
-+ sysfs_strtoul_clamp(io_disable, c->io_disable, 0, 1);
- sysfs_strtoul(verify, c->verify);
- sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
- sysfs_strtoul(expensive_debug_checks, c->expensive_debug_checks);
-@@ -744,6 +747,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
- &sysfs_gc_always_rewrite,
- &sysfs_btree_shrinker_disabled,
- &sysfs_copy_gc_enabled,
-+ &sysfs_io_disable,
- NULL
- };
- KTYPE(bch_cache_set_internal);
-diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
-index ed5e8a412eb8..03e533631798 100644
---- a/drivers/md/bcache/util.h
-+++ b/drivers/md/bcache/util.h
-@@ -564,12 +564,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
- return bdev->bd_inode->i_size >> 9;
- }
-
--#define closure_bio_submit(bio, cl) \
--do { \
-- closure_get(cl); \
-- generic_make_request(bio); \
--} while (0)
--
- uint64_t bch_crc64_update(uint64_t, const void *, size_t);
- uint64_t bch_crc64(const void *, size_t);
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index e58f9be5ae43..54add41d2569 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -93,8 +93,11 @@ static void update_writeback_rate(struct work_struct *work)
- writeback_rate_update);
- struct cache_set *c = dc->disk.c;
-
-- /* quit directly if cache set is stopping */
-- if (test_bit(CACHE_SET_STOPPING, &c->flags))
-+ /*
-+ * quit directly if cache set is stopping. c->io_disable
-+ * can be set via sysfs, check it here too.
-+ */
-+ if (test_bit(CACHE_SET_STOPPING, &c->flags) || c->io_disable)
- return;
-
- down_read(&dc->writeback_lock);
-@@ -105,8 +108,11 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-- /* do not schedule delayed work if cache set is stopping */
-- if (test_bit(CACHE_SET_STOPPING, &c->flags))
-+ /*
-+ * do not schedule delayed work if cache set is stopping,
-+ * c->io_disable can be set via sysfs, check it here too.
-+ */
-+ if (test_bit(CACHE_SET_STOPPING, &c->flags) || c->io_disable)
- return;
-
- schedule_delayed_work(&dc->writeback_rate_update,
-@@ -217,7 +223,7 @@ static void write_dirty(struct closure *cl)
- bio_set_dev(&io->bio, io->dc->bdev);
- io->bio.bi_end_io = dirty_endio;
-
-- closure_bio_submit(&io->bio, cl);
-+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
- }
-
- continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq);
-@@ -240,7 +246,7 @@ static void read_dirty_submit(struct closure *cl)
- {
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
-
-- closure_bio_submit(&io->bio, cl);
-+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
-
- continue_at(cl, write_dirty, io->dc->writeback_write_wq);
- }
-@@ -259,7 +265,7 @@ static void read_dirty(struct cached_dev *dc)
- * mempools.
- */
-
-- while (!kthread_should_stop()) {
-+ while (!(kthread_should_stop() || dc->disk.c->io_disable)) {
-
- w = bch_keybuf_next(&dc->writeback_keys);
- if (!w)
-@@ -269,7 +275,9 @@ static void read_dirty(struct cached_dev *dc)
-
- if (KEY_START(&w->key) != dc->last_read ||
- jiffies_to_msecs(delay) > 50)
-- while (!kthread_should_stop() && delay)
-+ while (!kthread_should_stop() &&
-+ !dc->disk.c->io_disable &&
-+ delay)
- delay = schedule_timeout_interruptible(delay);
-
- dc->last_read = KEY_OFFSET(&w->key);
-@@ -450,18 +458,19 @@ static bool refill_dirty(struct cached_dev *dc)
- static int bch_writeback_thread(void *arg)
- {
- struct cached_dev *dc = arg;
-+ struct cache_set *c = dc->disk.c;
- bool searched_full_index;
-
- bch_ratelimit_reset(&dc->writeback_rate);
-
-- while (!kthread_should_stop()) {
-+ while (!(kthread_should_stop() || c->io_disable)) {
- down_write(&dc->writeback_lock);
- if (!atomic_read(&dc->has_dirty) ||
- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
- !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-
-- if (kthread_should_stop())
-+ if (kthread_should_stop() || c->io_disable)
- break;
-
- set_current_state(TASK_INTERRUPTIBLE);
-@@ -485,8 +494,8 @@ static int bch_writeback_thread(void *arg)
- if (searched_full_index) {
- unsigned delay = dc->writeback_delay * HZ;
-
-- while (delay &&
-- !kthread_should_stop() &&
-+ while (delay && !kthread_should_stop() &&
-+ !c->io_disable &&
- !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
- delay = schedule_timeout_interruptible(delay);
-
-@@ -494,6 +503,7 @@ static int bch_writeback_thread(void *arg)
- }
- }
-
-+ dc->writeback_thread = NULL;
- cached_dev_put(dc);
-
- return 0;
---
-2.15.1
-
diff --git a/for-next/v1/v1-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch b/for-next/v1/v1-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
deleted file mode 100644
index 062caae..0000000
--- a/for-next/v1/v1-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
+++ /dev/null
@@ -1,60 +0,0 @@
-From 595d5d28a7ed23cae061b9e0dd201611afd6db6d Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 3 Jan 2018 18:24:55 +0800
-Subject: [PATCH v1 10/10] bcache: stop all attached bcache devices for a
- retired cache set
-
-When there are too many I/O errors on cache device, current bcache code
-will retire the whole cache set, and detach all bcache devices. But the
-detached bcache devices are not stopped, which is problematic when bcache
-is in writeback mode.
-
-If the retired cache set has dirty data of backing devices, continue
-writing to bcache device will write to backing device directly. If the
-LBA of write request has a dirty version cached on cache device, next time
-when the cache device is re-registered and backing device re-attached to
-it again, the stale dirty data on cache device will be written to backing
-device, and overwrite latest directly written data. This situation causes
-a quite data corruption.
-
-This patch checkes whether cache_set->io_disable is true in
-__cache_set_unregister(). If cache_set->io_disable is true, it means cache
-set is unregistering by too many I/O errors, then all attached bcache
-devices will be stopped as well. If cache_set->io_disable is not true, it
-means __cache_set_unregister() is triggered by writing 1 to sysfs file
-/sys/fs/bcache/<UUID>/bcache/stop. This is an exception because users do
-it explicitly, this patch keeps existing behavior and does not stop any
-bcache device.
-
-Even the failed cache device has no dirty data, stopping bcache device is
-still a desired behavior by many Ceph and data base users. Then their
-application will report I/O errors due to disappeared bcache device, and
-operation people will know the cache device is broken or disconnected.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 49d6fedf89c3..20a7a6959506 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1458,6 +1458,14 @@ static void __cache_set_unregister(struct closure *cl)
- dc = container_of(c->devices[i],
- struct cached_dev, disk);
- bch_cached_dev_detach(dc);
-+ /*
-+ * If we come here by too many I/O errors,
-+ * bcache device should be stopped too, to
-+ * keep data consistency on cache and
-+ * backing devices.
-+ */
-+ if (c->io_disable)
-+ bcache_device_stop(c->devices[i]);
- } else {
- bcache_device_stop(c->devices[i]);
- }
---
-2.15.1
-
diff --git a/for-next/v2/v2-0000-cover-letter.patch b/for-next/v2/v2-0000-cover-letter.patch
deleted file mode 100644
index 48a8af3..0000000
--- a/for-next/v2/v2-0000-cover-letter.patch
+++ /dev/null
@@ -1,92 +0,0 @@
-From b586ad82f67e12cb4d2a55681264b5cdf6353c59 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 23:20:33 +0800
-Subject: [PATCH v2 00/12] bcache: device failure handling improvement
-
-Hi maintainers and folks,
-
-This patch set tries to improve bcache device failure handling, including
-cache device and backing device failures.
-
-The basic idea to handle failed cache device is,
-- Unregister cache set
-- Detach all backing devices attached to this cache set
-- Stop all bcache devices linked to this cache set
-The above process is named 'cache set retire' by me. The result of cache
-set retire is, cache set and bcache devices are all removed, following
-I/O requests will get failed immediately to notift upper layer or user
-space coce that the cache device is failed or disconnected.
-
-For failed backing device, there are two ways to handle them,
-- If device is disconnected, when kernel thread dc->status_update_thread
- finds it is offline for BACKING_DEV_OFFLINE_TIMEOUT (5) seconds, the
- kernel thread will set dc->io_disable and call bcache_device_stop() to
- stop and remove the bcache device from system.
-- If device is connected but too many I/O errors happen, after errors
- number exceeds dc->error_limit, call bch_cached_dev_error() to set
- dc->io_disable and stop bcache device. Then the broken backing device
- and its bcache device will be removed from system.
-
-The v2 patch set fixes the problems addressed in v1 patch reviews, adds
-failure handling for backing device. This patch set also includes a patch
-from Junhui Tang. And the v2 patch set does not include 2 patches which are
-in bcache-for-next already.
-
-A basic testing covered with writethrough, writeback, writearound mode, and
-read/write/readwrite workloads, cache set or bcache device can be removed
-by too many I/O errors or delete the device. For plugging out physical
-disks, a kernel bug triggers rcu oops in __do_softirq() and locks up all
-following accesses to the disconnected disk, this blocks my testing.
-
-While posting v2 patch set, I also continue to test the code from my side.
-Any comment, question and review are warmly welcome.
-
-Open issues:
-1, Detach backing device by writing sysfs detach file does not work, it is
- because writeback thread does not drop dc->count refcount when cache
- device turns from dirty into clean. This issue will be fixed in v3
- patch set.
-2, A kernel bug in __do_softirq() when plugging out hard disk with heavy
- I/O blocks my physical disk disconnection test. If any one knows this
- bug, please give me a hint.
-
-Changelog:
-v2: fixes all problems found in v1 review.
- add patches to handle backing device failure.
- add one more patch to set writeback_rate_update_seconds range.
- include a patch from Junhui Tang.
-v1: the initial version, only handles cache device failure.
-
-Coly Li (11):
- bcache: set writeback_rate_update_seconds in range [1, 60] seconds
- bcache: properly set task state in bch_writeback_thread()
- bcache: set task properly in allocator_wait()
- bcache: fix cached_dev->count usage for bch_cache_set_error()
- bcache: stop dc->writeback_rate_update properly
- bcache: set error_limit correctly
- bcache: add CACHE_SET_IO_DISABLE to struct cache_set flags
- bcache: stop all attached bcache devices for a retired cache set
- bcache: add backing_request_endio() for bi_end_io of attached backing
- device I/O
- bcache: add io_disable to struct cached_dev
- bcache: stop bcache device when backing device is offline
-
-Tang Junhui (1):
- bcache: fix inaccurate io state for detached bcache devices
-
- drivers/md/bcache/alloc.c | 5 +-
- drivers/md/bcache/bcache.h | 37 ++++++++-
- drivers/md/bcache/btree.c | 10 ++-
- drivers/md/bcache/io.c | 16 +++-
- drivers/md/bcache/journal.c | 4 +-
- drivers/md/bcache/request.c | 188 +++++++++++++++++++++++++++++++++++-------
- drivers/md/bcache/super.c | 134 ++++++++++++++++++++++++++++--
- drivers/md/bcache/sysfs.c | 45 +++++++++-
- drivers/md/bcache/util.h | 6 --
- drivers/md/bcache/writeback.c | 79 +++++++++++++++---
- drivers/md/bcache/writeback.h | 5 +-
- 11 files changed, 458 insertions(+), 71 deletions(-)
-
---
-2.15.1
-
diff --git a/for-next/v2/v2-0001-bcache-set-writeback_rate_update_seconds-in-range.patch b/for-next/v2/v2-0001-bcache-set-writeback_rate_update_seconds-in-range.patch
deleted file mode 100644
index 3bdcb3c..0000000
--- a/for-next/v2/v2-0001-bcache-set-writeback_rate_update_seconds-in-range.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 6641000fb839379fd006ec2e101ea788b65d01b6 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 15:11:03 +0800
-Subject: [PATCH v2 01/12] bcache: set writeback_rate_update_seconds in range
- [1, 60] seconds
-
-dc->writeback_rate_update_seconds can be set via sysfs and its value can
-be set to [1, ULONG_MAX]. It does not make sense to set such a large
-value, 60 seconds is long enough value considering the default 5 seconds
-works well for long time.
-
-Because dc->writeback_rate_update is a special delayed work, it re-arms
-itself inside the delayed work routine update_writeback_rate(). When
-stopping it by cancel_delayed_work_sync(), there should be a timeout to
-wait and make sure the re-armed delayed work is stopped too. A small max
-value of dc->writeback_rate_update_seconds is also helpful to decide a
-reasonable small timeout.
-
-This patch limits sysfs interface to set dc->writeback_rate_update_seconds
-in range of [1, 60] seconds, and replaces the hand-coded number by macros.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/sysfs.c | 3 +++
- drivers/md/bcache/writeback.c | 2 +-
- drivers/md/bcache/writeback.h | 3 +++
- 3 files changed, 7 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index b4184092c727..a74a752c9e0f 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -215,6 +215,9 @@ STORE(__cached_dev)
- sysfs_strtoul_clamp(writeback_rate,
- dc->writeback_rate.rate, 1, INT_MAX);
-
-+ sysfs_strtoul_clamp(writeback_rate_update_seconds,
-+ dc->writeback_rate_update_seconds,
-+ 1, WRITEBACK_RATE_UPDATE_SECS_MAX);
- d_strtoul_nonzero(writeback_rate_update_seconds);
- d_strtoul(writeback_rate_i_term_inverse);
- d_strtoul_nonzero(writeback_rate_p_term_inverse);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 51306a19ab03..0ade883b6316 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -652,7 +652,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
- dc->writeback_rate.rate = 1024;
- dc->writeback_rate_minimum = 8;
-
-- dc->writeback_rate_update_seconds = 5;
-+ dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
- dc->writeback_rate_p_term_inverse = 40;
- dc->writeback_rate_i_term_inverse = 10000;
-
-diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index 66f1c527fa24..587b25599856 100644
---- a/drivers/md/bcache/writeback.h
-+++ b/drivers/md/bcache/writeback.h
-@@ -8,6 +8,9 @@
- #define MAX_WRITEBACKS_IN_PASS 5
- #define MAX_WRITESIZE_IN_PASS 5000 /* *512b */
-
-+#define WRITEBACK_RATE_UPDATE_SECS_MAX 60
-+#define WRITEBACK_RATE_UPDATE_SECS_DEFAULT 5
-+
- /*
- * 14 (16384ths) is chosen here as something that each backing device
- * should be a reasonable fraction of the share, and not to blow up
---
-2.15.1
-
diff --git a/for-next/v2/v2-0002-bcache-properly-set-task-state-in-bch_writeback_t.patch b/for-next/v2/v2-0002-bcache-properly-set-task-state-in-bch_writeback_t.patch
deleted file mode 100644
index 9cd3ab4..0000000
--- a/for-next/v2/v2-0002-bcache-properly-set-task-state-in-bch_writeback_t.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From 5dffbbb4a18a8bed0985ead53afa8d14898d1279 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 8 Jan 2018 22:11:01 +0800
-Subject: [PATCH v2 02/12] bcache: properly set task state in
- bch_writeback_thread()
-
-Kernel thread routine bch_writeback_thread() has the following code block,
-
-447 down_write(&dc->writeback_lock);
-448~450 if (check conditions) {
-451 up_write(&dc->writeback_lock);
-452 set_current_state(TASK_INTERRUPTIBLE);
-453
-454 if (kthread_should_stop())
-455 return 0;
-456
-457 schedule();
-458 continue;
-459 }
-
-If condition check is true, its task state is set to TASK_INTERRUPTIBLE
-and call schedule() to wait for others to wake up it.
-
-There are 2 issues in current code,
-1, Task state is set to TASK_INTERRUPTIBLE after the condition checks, if
- another process changes the condition and call wake_up_process(dc->
- writeback_thread), then at line 452 task state is set back to
- TASK_INTERRUPTIBLE, the writeback kernel thread will lose a chance to be
- waken up.
-2, At line 454 if kthread_should_stop() is true, writeback kernel thread
- will return to kernel/kthread.c:kthread() with TASK_INTERRUPTIBLE and
- call do_exit(). It is not good to enter do_exit() with task state
- TASK_INTERRUPTIBLE, in following code path might_sleep() is called and a
- warning message is reported by __might_sleep(): "WARNING: do not call
- blocking ops when !TASK_RUNNING; state=1 set at [xxxx]".
-
-For the first issue, task state should be set before condition checks.
-Ineed because dc->writeback_lock is required when modifying all the
-conditions, calling set_current_state() inside code block where dc->
-writeback_lock is hold is safe. But this is quite implicit, so I still move
-set_current_state() before all the condition checks.
-
-For the second issue, frankley speaking it does not hurt when kernel thread
-exits with TASK_INTERRUPTIBLE state, but this warning message scares users,
-makes them feel there might be something risky with bcache and hurt their
-data. Setting task state to TASK_RUNNING before returning fixes this
-problem.
-
-Changelog:
-v2: fix the race issue in v1 patch.
-v1: initial buggy fix.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.de>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/writeback.c | 7 +++++--
- 1 file changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 0ade883b6316..f1d2fc15abcc 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -564,18 +564,21 @@ static int bch_writeback_thread(void *arg)
-
- while (!kthread_should_stop()) {
- down_write(&dc->writeback_lock);
-+ set_current_state(TASK_INTERRUPTIBLE);
- if (!atomic_read(&dc->has_dirty) ||
- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
- !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-- set_current_state(TASK_INTERRUPTIBLE);
-
-- if (kthread_should_stop())
-+ if (kthread_should_stop()) {
-+ set_current_state(TASK_RUNNING);
- return 0;
-+ }
-
- schedule();
- continue;
- }
-+ set_current_state(TASK_RUNNING);
-
- searched_full_index = refill_dirty(dc);
-
---
-2.15.1
-
diff --git a/for-next/v2/v2-0003-bcache-set-task-properly-in-allocator_wait.patch b/for-next/v2/v2-0003-bcache-set-task-properly-in-allocator_wait.patch
deleted file mode 100644
index 81a6e7d..0000000
--- a/for-next/v2/v2-0003-bcache-set-task-properly-in-allocator_wait.patch
+++ /dev/null
@@ -1,65 +0,0 @@
-From 374b24628212f175ceaf09901c2fd419d55f6962 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 8 Jan 2018 22:45:51 +0800
-Subject: [PATCH v2 03/12] bcache: set task properly in allocator_wait()
-
-Kernel thread routine bch_allocator_thread() references macro
-allocator_wait() to wait for a condition or quit to do_exit()
-when kthread_should_stop() is true. Here is the code block,
-
-284 while (1) { \
-285 set_current_state(TASK_INTERRUPTIBLE); \
-286 if (cond) \
-287 break; \
-288 \
-289 mutex_unlock(&(ca)->set->bucket_lock); \
-290 if (kthread_should_stop()) \
-291 return 0; \
-292 \
-293 schedule(); \
-294 mutex_lock(&(ca)->set->bucket_lock); \
-295 } \
-296 __set_current_state(TASK_RUNNING); \
-
-At line 285, task state is set to TASK_INTERRUPTIBLE, if at line 290
-kthread_should_stop() is true, the kernel thread will terminate and return
-to kernel/kthread.s:kthread(), then calls do_exit() with TASK_INTERRUPTIBLE
-state. This is not a suggested behavior and a warning message will be
-reported by might_sleep() in do_exit() code path: "WARNING: do not call
-blocking ops when !TASK_RUNNING; state=1 set at [xxxx]".
-
-This patch fixes this problem by setting task state to TASK_RUNNING if
-kthread_should_stop() is true and before kernel thread returns back to
-kernel/kthread.s:kthread().
-
-Changelog:
-v2: fix the race issue in v1 patch.
-v1: initial buggy fix.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.de>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/alloc.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index 6cc6c0f9c3a9..458e1d38577d 100644
---- a/drivers/md/bcache/alloc.c
-+++ b/drivers/md/bcache/alloc.c
-@@ -287,8 +287,10 @@ do { \
- break; \
- \
- mutex_unlock(&(ca)->set->bucket_lock); \
-- if (kthread_should_stop()) \
-+ if (kthread_should_stop()) { \
-+ set_current_state(TASK_RUNNING); \
- return 0; \
-+ } \
- \
- schedule(); \
- mutex_lock(&(ca)->set->bucket_lock); \
---
-2.15.1
-
diff --git a/for-next/v2/v2-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch b/for-next/v2/v2-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
deleted file mode 100644
index a452016..0000000
--- a/for-next/v2/v2-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
+++ /dev/null
@@ -1,178 +0,0 @@
-From 00455397a8de16cec8e56292f267f2850a939b15 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 8 Jan 2018 23:05:58 +0800
-Subject: [PATCH v2 04/12] bcache: fix cached_dev->count usage for
- bch_cache_set_error()
-
-When bcache metadata I/O fails, bcache will call bch_cache_set_error()
-to retire the whole cache set. The expected behavior to retire a cache
-set is to unregister the cache set, and unregister all backing device
-attached to this cache set, then remove sysfs entries of the cache set
-and all attached backing devices, finally release memory of structs
-cache_set, cache, cached_dev and bcache_device.
-
-In my testing when journal I/O failure triggered by disconnected cache
-device, sometimes the cache set cannot be retired, and its sysfs
-entry /sys/fs/bcache/<uuid> still exits and the backing device also
-references it. This is not expected behavior.
-
-When metadata I/O failes, the call senquence to retire whole cache set is,
- bch_cache_set_error()
- bch_cache_set_unregister()
- bch_cache_set_stop()
- __cache_set_unregister() <- called as callback by calling
- clousre_queue(&c->caching)
- cache_set_flush() <- called as a callback when refcount
- of cache_set->caching is 0
- cache_set_free() <- called as a callback when refcount
- of catch_set->cl is 0
- bch_cache_set_release() <- called as a callback when refcount
- of catch_set->kobj is 0
-
-I find if kernel thread bch_writeback_thread() quits while-loop when
-kthread_should_stop() is true and searched_full_index is false, clousre
-callback cache_set_flush() set by continue_at() will never be called. The
-result is, bcache fails to retire whole cache set.
-
-cache_set_flush() will be called when refcount of closure c->caching is 0,
-and in function bcache_device_detach() refcount of closure c->caching is
-released to 0 by clousre_put(). In metadata error code path, function
-bcache_device_detach() is called by cached_dev_detach_finish(). This is a
-callback routine being called when cached_dev->count is 0. This refcount
-is decreased by cached_dev_put().
-
-The above dependence indicates, cache_set_flush() will be called when
-refcount of cache_set->cl is 0, and refcount of cache_set->cl to be 0
-when refcount of cache_dev->count is 0.
-
-The reason why sometimes cache_dev->count is not 0 (when metadata I/O fails
-and bch_cache_set_error() called) is, in bch_writeback_thread(), refcount
-of cache_dev is not decreased properly.
-
-In bch_writeback_thread(), cached_dev_put() is called only when
-searched_full_index is true and cached_dev->writeback_keys is empty, a.k.a
-there is no dirty data on cache. In most of run time it is correct, but
-when bch_writeback_thread() quits the while-loop while cache is still
-dirty, current code forget to call cached_dev_put() before this kernel
-thread exits. This is why sometimes cache_set_flush() is not executed and
-cache set fails to be retired.
-
-The reason to call cached_dev_put() in bch_writeback_rate() is, when the
-cache device changes from clean to dirty, cached_dev_get() is called, to
-make sure during writeback operatiions both backing and cache devices
-won't be released.
-
-Adding following code in bch_writeback_thread() does not work,
- static int bch_writeback_thread(void *arg)
- }
-
-+ if (atomic_read(&dc->has_dirty))
-+ cached_dev_put()
-+
- return 0;
- }
-because writeback kernel thread can be waken up and start via sysfs entry:
- echo 1 > /sys/block/bcache<N>/bcache/writeback_running
-It is difficult to check whether backing device is dirty without race and
-extra lock. So the above modification will introduce potential refcount
-underflow in some conditions.
-
-The correct fix is, to take cached dev refcount when creating the kernel
-thread, and put it before the kernel thread exits. Then bcache does not
-need to take a cached dev refcount when cache turns from clean to dirty,
-or to put a cached dev refcount when cache turns from ditry to clean. The
-writeback kernel thread is alwasy safe to reference data structure from
-cache set, cache and cached device (because a refcount of cache device is
-taken for it already), and no matter the kernel thread is stopped by I/O
-errors or system reboot, cached_dev->count can always be used correctly.
-
-The patch is simple, but understanding how it works is quite complicated.
-
-Changelog:
-v2: set dc->writeback_thread to NULL in this patch, as suggested by Hannes.
-v1: inital version for review.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/super.c | 1 -
- drivers/md/bcache/writeback.c | 11 ++++++++---
- drivers/md/bcache/writeback.h | 2 --
- 3 files changed, 8 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 133b81225ea9..d14e09cce2f6 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1052,7 +1052,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
- if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
- bch_sectors_dirty_init(&dc->disk);
- atomic_set(&dc->has_dirty, 1);
-- refcount_inc(&dc->count);
- bch_writeback_queue(dc);
- }
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index f1d2fc15abcc..b280c134dd4d 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -572,7 +572,7 @@ static int bch_writeback_thread(void *arg)
-
- if (kthread_should_stop()) {
- set_current_state(TASK_RUNNING);
-- return 0;
-+ break;
- }
-
- schedule();
-@@ -585,7 +585,6 @@ static int bch_writeback_thread(void *arg)
- if (searched_full_index &&
- RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
- atomic_set(&dc->has_dirty, 0);
-- cached_dev_put(dc);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
- bch_write_bdev_super(dc, NULL);
- }
-@@ -606,6 +605,9 @@ static int bch_writeback_thread(void *arg)
- }
- }
-
-+ dc->writeback_thread = NULL;
-+ cached_dev_put(dc);
-+
- return 0;
- }
-
-@@ -669,10 +671,13 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- if (!dc->writeback_write_wq)
- return -ENOMEM;
-
-+ cached_dev_get(dc);
- dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
- "bcache_writeback");
-- if (IS_ERR(dc->writeback_thread))
-+ if (IS_ERR(dc->writeback_thread)) {
-+ cached_dev_put(dc);
- return PTR_ERR(dc->writeback_thread);
-+ }
-
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index 587b25599856..0bba8f1c6cdf 100644
---- a/drivers/md/bcache/writeback.h
-+++ b/drivers/md/bcache/writeback.h
-@@ -105,8 +105,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
- {
- if (!atomic_read(&dc->has_dirty) &&
- !atomic_xchg(&dc->has_dirty, 1)) {
-- refcount_inc(&dc->count);
--
- if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
- /* XXX: should do this synchronously */
---
-2.15.1
-
diff --git a/for-next/v2/v2-0005-bcache-stop-dc-writeback_rate_update-properly.patch b/for-next/v2/v2-0005-bcache-stop-dc-writeback_rate_update-properly.patch
deleted file mode 100644
index 6448f1a..0000000
--- a/for-next/v2/v2-0005-bcache-stop-dc-writeback_rate_update-properly.patch
+++ /dev/null
@@ -1,266 +0,0 @@
-From 36b752f82142be3641fbb60e6b8a79b53ad5419e Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 15:48:39 +0800
-Subject: [PATCH v2 05/12] bcache: stop dc->writeback_rate_update properly
-
-struct delayed_work writeback_rate_update in struct cache_dev is a delayed
-worker to call function update_writeback_rate() in period (the interval is
-defined by dc->writeback_rate_update_seconds).
-
-When a metadate I/O error happens on cache device, bcache error handling
-routine bch_cache_set_error() will call bch_cache_set_unregister() to
-retire whole cache set. On the unregister code path, this delayed work is
-stopped by calling cancel_delayed_work_sync(&dc->writeback_rate_update).
-
-dc->writeback_rate_update is a special delayed work from others in bcache.
-In its routine update_writeback_rate(), this delayed work is re-armed
-itself. That means when cancel_delayed_work_sync() returns, this delayed
-work can still be executed after several seconds defined by
-dc->writeback_rate_update_seconds.
-
-The problem is, after cancel_delayed_work_sync() returns, the cache set
-unregister code path will continue and release memory of struct cache set.
-Then the delayed work is scheduled to run, __update_writeback_rate()
-will reference the already released cache_set memory, and trigger a NULL
-pointer deference fault.
-
-This patch introduces two more bcache device flags,
-- BCACHE_DEV_WB_RUNNING
- bit set: bcache device is in writeback mode and running, it is OK for
- dc->writeback_rate_update to re-arm itself.
- bit clear:bcache device is trying to stop dc->writeback_rate_update,
- this delayed work should not re-arm itself and quit.
-- BCACHE_DEV_RATE_DW_RUNNING
- bit set: routine update_writeback_rate() is executing.
- bit clear: routine update_writeback_rate() quits.
-
-This patch also adds a function cancel_writeback_rate_update_dwork() to
-wait for dc->writeback_rate_update quits before cancel it by calling
-cancel_delayed_work_sync(). In order to avoid a deadlock by unexpected
-quit dc->writeback_rate_update, after time_out seconds this function will
-give up and continue to call cancel_delayed_work_sync().
-
-And here I explain how this patch stops self re-armed delayed work properly
-with the above stuffs.
-
-update_writeback_rate() sets BCACHE_DEV_RATE_DW_RUNNING at its beginning
-and clears BCACHE_DEV_RATE_DW_RUNNING at its end. Before calling
-cancel_writeback_rate_update_dwork() clear flag BCACHE_DEV_WB_RUNNING.
-
-Before calling cancel_delayed_work_sync() wait utill flag
-BCACHE_DEV_RATE_DW_RUNNING is clear. So when calling
-cancel_delayed_work_sync(), dc->writeback_rate_update must be already re-
-armed, or quite by seeing BCACHE_DEV_WB_RUNNING cleared. In both cases
-delayed work routine update_writeback_rate() won't be executed after
-cancel_delayed_work_sync() returns.
-
-Inside update_writeback_rate() before calling schedule_delayed_work(), flag
-BCACHE_DEV_WB_RUNNING is checked before. If this flag is cleared, it means
-someone is about to stop the delayed work. Because flag
-BCACHE_DEV_RATE_DW_RUNNING is set already and cancel_delayed_work_sync()
-has to wait for this flag to be cleared, we don't need to worry about race
-condition here.
-
-If update_writeback_rate() is scheduled to run after checking
-BCACHE_DEV_RATE_DW_RUNNING and before calling cancel_delayed_work_sync()
-in cancel_writeback_rate_update_dwork(), it is also safe. Because at this
-moment BCACHE_DEV_WB_RUNNING is cleared with memory barrier. As I mentioned
-previously, update_writeback_rate() will see BCACHE_DEV_WB_RUNNING is clear
-and quit immediately.
-
-Because there are more dependences inside update_writeback_rate() to struct
-cache_set memory, dc->writeback_rate_update is not a simple self re-arm
-delayed work. After trying many different methods (e.g. hold dc->count, or
-use locks), this is the only way I can find which works to properly stop
-dc->writeback_rate_update delayed work.
-
-Changelog:
-v2: Try to fix the race issue which is pointed out by Junhui.
-v1: The initial version for review
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 9 +++++----
- drivers/md/bcache/super.c | 39 +++++++++++++++++++++++++++++++++++----
- drivers/md/bcache/sysfs.c | 3 ++-
- drivers/md/bcache/writeback.c | 29 ++++++++++++++++++++++++++++-
- 4 files changed, 70 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 5e2d4e80198e..88d938c8d027 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -258,10 +258,11 @@ struct bcache_device {
- struct gendisk *disk;
-
- unsigned long flags;
--#define BCACHE_DEV_CLOSING 0
--#define BCACHE_DEV_DETACHING 1
--#define BCACHE_DEV_UNLINK_DONE 2
--
-+#define BCACHE_DEV_CLOSING 0
-+#define BCACHE_DEV_DETACHING 1
-+#define BCACHE_DEV_UNLINK_DONE 2
-+#define BCACHE_DEV_WB_RUNNING 4
-+#define BCACHE_DEV_RATE_DW_RUNNING 8
- unsigned nr_stripes;
- unsigned stripe_size;
- atomic_t *stripe_sectors_dirty;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index d14e09cce2f6..6d888e8fea8c 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -899,6 +899,32 @@ void bch_cached_dev_run(struct cached_dev *dc)
- pr_debug("error creating sysfs link");
- }
-
-+/*
-+ * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
-+ * work dc->writeback_rate_update is running. Wait until the routine
-+ * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
-+ * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
-+ * seconds, give up waiting here and continue to cancel it too.
-+ */
-+static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
-+{
-+ int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
-+
-+ do {
-+ if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
-+ &dc->disk.flags))
-+ break;
-+ time_out--;
-+ schedule_timeout_interruptible(1);
-+ } while (time_out > 0);
-+
-+ if (time_out == 0)
-+ pr_warn("bcache: give up waiting for "
-+ "dc->writeback_write_update to quit");
-+
-+ cancel_delayed_work_sync(&dc->writeback_rate_update);
-+}
-+
- static void cached_dev_detach_finish(struct work_struct *w)
- {
- struct cached_dev *dc = container_of(w, struct cached_dev, detach);
-@@ -911,7 +937,9 @@ static void cached_dev_detach_finish(struct work_struct *w)
-
- mutex_lock(&bch_register_lock);
-
-- cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ cancel_writeback_rate_update_dwork(dc);
-+
- if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
- kthread_stop(dc->writeback_thread);
- dc->writeback_thread = NULL;
-@@ -954,6 +982,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
- closure_get(&dc->disk.cl);
-
- bch_writeback_queue(dc);
-+
- cached_dev_put(dc);
- }
-
-@@ -1079,14 +1108,16 @@ static void cached_dev_free(struct closure *cl)
- {
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
-
-- cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ mutex_lock(&bch_register_lock);
-+
-+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ cancel_writeback_rate_update_dwork(dc);
-+
- if (!IS_ERR_OR_NULL(dc->writeback_thread))
- kthread_stop(dc->writeback_thread);
- if (dc->writeback_write_wq)
- destroy_workqueue(dc->writeback_write_wq);
-
-- mutex_lock(&bch_register_lock);
--
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
- bcache_device_free(&dc->disk);
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index a74a752c9e0f..b7166c504cdb 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -304,7 +304,8 @@ STORE(bch_cached_dev)
- bch_writeback_queue(dc);
-
- if (attr == &sysfs_writeback_percent)
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-
- mutex_unlock(&bch_register_lock);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index b280c134dd4d..69957f97bf13 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -115,6 +115,21 @@ static void update_writeback_rate(struct work_struct *work)
- struct cached_dev,
- writeback_rate_update);
-
-+ /*
-+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-+ * cancel_delayed_work_sync().
-+ */
-+ set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
-+
-+ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
-+ return;
-+ }
-+
- down_read(&dc->writeback_lock);
-
- if (atomic_read(&dc->has_dirty) &&
-@@ -123,8 +138,18 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-+ }
-+
-+ /*
-+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-+ * cancel_delayed_work_sync().
-+ */
-+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
- }
-
- static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
-@@ -661,6 +686,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
- dc->writeback_rate_p_term_inverse = 40;
- dc->writeback_rate_i_term_inverse = 10000;
-
-+ WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
- INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
- }
-
-@@ -679,6 +705,7 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- return PTR_ERR(dc->writeback_thread);
- }
-
-+ WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-
---
-2.15.1
-
diff --git a/for-next/v2/v2-0006-bcache-set-error_limit-correctly.patch b/for-next/v2/v2-0006-bcache-set-error_limit-correctly.patch
deleted file mode 100644
index 46124c9..0000000
--- a/for-next/v2/v2-0006-bcache-set-error_limit-correctly.patch
+++ /dev/null
@@ -1,121 +0,0 @@
-From 868e1b9ae94b4852555b4dcf5990b309c59f798b Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 9 Jan 2018 22:46:25 +0800
-Subject: [PATCH v2 06/12] bcache: set error_limit correctly
-
-Struct cache uses io_errors for two purposes,
-- Error decay: when cache set error_decay is set, io_errors is used to
- generate a small piece of delay when I/O error happens.
-- I/O errors counter: in order to generate big enough value for error
- decay, I/O errors counter value is stored by left shifting 20 bits (a.k.a
- IO_ERROR_SHIFT).
-
-In function bch_count_io_errors(), if I/O errors counter reaches cache set
-error limit, bch_cache_set_error() will be called to retire the whold cache
-set. But current code is problematic when checking the error limit, see the
-following code piece from bch_count_io_errors(),
-
- 90 if (error) {
- 91 char buf[BDEVNAME_SIZE];
- 92 unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT,
- 93 &ca->io_errors);
- 94 errors >>= IO_ERROR_SHIFT;
- 95
- 96 if (errors < ca->set->error_limit)
- 97 pr_err("%s: IO error on %s, recovering",
- 98 bdevname(ca->bdev, buf), m);
- 99 else
-100 bch_cache_set_error(ca->set,
-101 "%s: too many IO errors %s",
-102 bdevname(ca->bdev, buf), m);
-103 }
-
-At line 94, errors is right shifting IO_ERROR_SHIFT bits, now it is real
-errors counter to compare at line 96. But ca->set->error_limit is initia-
-lized with an amplified value in bch_cache_set_alloc(),
-1545 c->error_limit = 8 << IO_ERROR_SHIFT;
-
-It means by default, in bch_count_io_errors(), before 8<<20 errors happened
-bch_cache_set_error() won't be called to retire the problematic cache
-device. If the average request size is 64KB, it means bcache won't handle
-failed device until 512GB data is requested. This is too large to be an I/O
-threashold. So I believe the correct error limit should be much less.
-
-This patch sets default cache set error limit to 8, then in
-bch_count_io_errors() when errors counter reaches 8 (if it is default
-value), function bch_cache_set_error() will be called to retire the whole
-cache set. This patch also removes bits shifting when store or show
-io_error_limit value via sysfs interface.
-
-Nowadays most of SSDs handle internal flash failure automatically by LBA
-address re-indirect mapping. If an I/O error can be observed by upper layer
-code, it will be a notable error because that SSD can not re-indirect
-map the problematic LBA address to an available flash block. This situation
-indicates the whole SSD will be failed very soon. Therefore setting 8 as
-the default io error limit value makes sense, it is enough for most of
-cache devices.
-
-Changelog:
-v2: add reviewed-by from Hannes.
-v1: initial version for review.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 1 +
- drivers/md/bcache/super.c | 2 +-
- drivers/md/bcache/sysfs.c | 4 ++--
- 3 files changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 88d938c8d027..7d7512fa4f09 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -663,6 +663,7 @@ struct cache_set {
- ON_ERROR_UNREGISTER,
- ON_ERROR_PANIC,
- } on_error;
-+#define DEFAULT_IO_ERROR_LIMIT 8
- unsigned error_limit;
- unsigned error_decay;
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 6d888e8fea8c..a373648b5d4b 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1583,7 +1583,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
-
- c->congested_read_threshold_us = 2000;
- c->congested_write_threshold_us = 20000;
-- c->error_limit = 8 << IO_ERROR_SHIFT;
-+ c->error_limit = DEFAULT_IO_ERROR_LIMIT;
-
- return c;
- err:
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index b7166c504cdb..ba62e987b503 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -560,7 +560,7 @@ SHOW(__bch_cache_set)
-
- /* See count_io_errors for why 88 */
- sysfs_print(io_error_halflife, c->error_decay * 88);
-- sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT);
-+ sysfs_print(io_error_limit, c->error_limit);
-
- sysfs_hprint(congested,
- ((uint64_t) bch_get_congested(c)) << 9);
-@@ -660,7 +660,7 @@ STORE(__bch_cache_set)
- }
-
- if (attr == &sysfs_io_error_limit)
-- c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
-+ c->error_limit = strtoul_or_return(buf);
-
- /* See count_io_errors() for why 88 */
- if (attr == &sysfs_io_error_halflife)
---
-2.15.1
-
diff --git a/for-next/v2/v2-0007-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch b/for-next/v2/v2-0007-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch
deleted file mode 100644
index 37631b2..0000000
--- a/for-next/v2/v2-0007-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch
+++ /dev/null
@@ -1,489 +0,0 @@
-From f67f7eb1a237ff8409574ddafe8331f6ec3d6b88 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 16:47:40 +0800
-Subject: [PATCH v2 07/12] bcache: add CACHE_SET_IO_DISABLE to struct cache_set
- flags
-
-When too many I/Os failed on cache device, bch_cache_set_error() is called
-in the error handling code path to retire whole problematic cache set. If
-new I/O requests continue to come and take refcount dc->count, the cache
-set won't be retired immediately, this is a problem.
-
-Further more, there are several kernel thread and self-armed kernel work
-may still running after bch_cache_set_error() is called. It needs to wait
-quite a while for them to stop, or they won't stop at all. They also
-prevent the cache set from being retired.
-
-The solution in this patch is, to add per cache set flag to disable I/O
-request on this cache and all attached backing devices. Then new coming I/O
-requests can be rejected in *_make_request() before taking refcount, kernel
-threads and self-armed kernel worker can stop very fast when flags bit
-CACHE_SET_IO_DISABLE is set.
-
-Because bcache also do internal I/Os for writeback, garbage collection,
-bucket allocation, journaling, this kind of I/O should be disabled after
-bch_cache_set_error() is called. So closure_bio_submit() is modified to
-check whether CACHE_SET_IO_DISABLE is set on cache_set->flags. If set,
-closure_bio_submit() will set bio->bi_status to BLK_STS_IOERR and
-return, generic_make_request() won't be called.
-
-A sysfs interface is also added to set or clear CACHE_SET_IO_DISABLE bit
-from cache_set->flags, to disable or enable cache set I/O for debugging. It
-is helpful to trigger more corner case issues for failed cache device.
-
-Changelog
-v2,
-- use cache_set->flags to set io disable bit, suggested by Junhui.
-- check CACHE_SET_IO_DISABLE in bch_btree_gc() to stop a while-loop, this
- is reported and inspired from origal patch of Pavel Vazharov.
-v1, initial version.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Pavel Vazharov <freakpv@gmail.com>
----
- drivers/md/bcache/alloc.c | 3 ++-
- drivers/md/bcache/bcache.h | 18 ++++++++++++++++++
- drivers/md/bcache/btree.c | 10 +++++++---
- drivers/md/bcache/io.c | 2 +-
- drivers/md/bcache/journal.c | 4 ++--
- drivers/md/bcache/request.c | 26 +++++++++++++++++++-------
- drivers/md/bcache/super.c | 6 +++++-
- drivers/md/bcache/sysfs.c | 20 ++++++++++++++++++++
- drivers/md/bcache/util.h | 6 ------
- drivers/md/bcache/writeback.c | 35 +++++++++++++++++++++++++++--------
- 10 files changed, 101 insertions(+), 29 deletions(-)
-
-diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index 458e1d38577d..004cc3cc6123 100644
---- a/drivers/md/bcache/alloc.c
-+++ b/drivers/md/bcache/alloc.c
-@@ -287,7 +287,8 @@ do { \
- break; \
- \
- mutex_unlock(&(ca)->set->bucket_lock); \
-- if (kthread_should_stop()) { \
-+ if (kthread_should_stop() || \
-+ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
- set_current_state(TASK_RUNNING); \
- return 0; \
- } \
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 7d7512fa4f09..c41736960045 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -475,10 +475,15 @@ struct gc_stat {
- *
- * CACHE_SET_RUNNING means all cache devices have been registered and journal
- * replay is complete.
-+ *
-+ * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
-+ * external and internal I/O should be denied when this flag is set.
-+ *
- */
- #define CACHE_SET_UNREGISTERING 0
- #define CACHE_SET_STOPPING 1
- #define CACHE_SET_RUNNING 2
-+#define CACHE_SET_IO_DISABLE 4
-
- struct cache_set {
- struct closure cl;
-@@ -862,6 +867,19 @@ static inline void wake_up_allocators(struct cache_set *c)
- wake_up_process(ca->alloc_thread);
- }
-
-+static inline void closure_bio_submit(struct cache_set *c,
-+ struct bio *bio,
-+ struct closure *cl)
-+{
-+ closure_get(cl);
-+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return;
-+ }
-+ generic_make_request(bio);
-+}
-+
- /* Forward declarations */
-
- void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index bf3a48aa9a9a..0a0bc63011b4 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -1744,6 +1744,7 @@ static void bch_btree_gc(struct cache_set *c)
-
- btree_gc_start(c);
-
-+ /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
- do {
- ret = btree_root(gc_root, c, &op, &writes, &stats);
- closure_sync(&writes);
-@@ -1751,7 +1752,7 @@ static void bch_btree_gc(struct cache_set *c)
-
- if (ret && ret != -EAGAIN)
- pr_warn("gc failed!");
-- } while (ret);
-+ } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- bch_btree_gc_finish(c);
- wake_up_allocators(c);
-@@ -1789,9 +1790,12 @@ static int bch_gc_thread(void *arg)
-
- while (1) {
- wait_event_interruptible(c->gc_wait,
-- kthread_should_stop() || gc_should_run(c));
-+ kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
-+ gc_should_run(c));
-
-- if (kthread_should_stop())
-+ if (kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags))
- break;
-
- set_gc_sectors(c);
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index a783c5a41ff1..8013ecbcdbda 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
- bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
-
- b->submit_time_us = local_clock_us();
-- closure_bio_submit(bio, bio->bi_private);
-+ closure_bio_submit(c, bio, bio->bi_private);
- }
-
- void bch_submit_bbio(struct bio *bio, struct cache_set *c,
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index a87165c1d8e5..979873641030 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
- bch_bio_map(bio, data);
-
-- closure_bio_submit(bio, &cl);
-+ closure_bio_submit(ca->set, bio, &cl);
- closure_sync(&cl);
-
- /* This function could be simpler now since we no longer write
-@@ -653,7 +653,7 @@ static void journal_write_unlocked(struct closure *cl)
- spin_unlock(&c->journal.lock);
-
- while ((bio = bio_list_pop(&list)))
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(c, bio, cl);
-
- continue_at(cl, journal_write_done, NULL);
- }
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 1a46b41dac70..02296bda6384 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -747,7 +747,7 @@ static void cached_dev_read_error(struct closure *cl)
-
- /* XXX: invalidate cache */
-
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- }
-
- continue_at(cl, cached_dev_cache_miss_done, NULL);
-@@ -872,7 +872,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- s->cache_miss = miss;
- s->iop.bio = cache_bio;
- bio_get(cache_bio);
-- closure_bio_submit(cache_bio, &s->cl);
-+ closure_bio_submit(s->iop.c, cache_bio, &s->cl);
-
- return ret;
- out_put:
-@@ -880,7 +880,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- out_submit:
- miss->bi_end_io = request_endio;
- miss->bi_private = &s->cl;
-- closure_bio_submit(miss, &s->cl);
-+ closure_bio_submit(s->iop.c, miss, &s->cl);
- return ret;
- }
-
-@@ -945,7 +945,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
-
- if ((bio_op(bio) != REQ_OP_DISCARD) ||
- blk_queue_discard(bdev_get_queue(dc->bdev)))
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- } else if (s->iop.writeback) {
- bch_writeback_add(dc);
- s->iop.bio = bio;
-@@ -960,12 +960,12 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
- flush->bi_private = cl;
- flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
-- closure_bio_submit(flush, cl);
-+ closure_bio_submit(s->iop.c, flush, cl);
- }
- } else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
-
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- }
-
- closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
-@@ -981,7 +981,7 @@ static void cached_dev_nodata(struct closure *cl)
- bch_journal_meta(s->iop.c, cl);
-
- /* If it's a flush, we send the flush to the backing device too */
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
-
- continue_at(cl, cached_dev_bio_complete, NULL);
- }
-@@ -996,6 +996,12 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
-
-+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return BLK_QC_T_NONE;
-+ }
-+
- atomic_set(&dc->backing_idle, 0);
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-
-@@ -1112,6 +1118,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
- struct bcache_device *d = bio->bi_disk->private_data;
- int rw = bio_data_dir(bio);
-
-+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return BLK_QC_T_NONE;
-+ }
-+
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-
- s = search_alloc(bio, d);
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index a373648b5d4b..4204d75aee7b 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -521,7 +521,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
- bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
- bch_bio_map(bio, ca->disk_buckets);
-
-- closure_bio_submit(bio, &ca->prio);
-+ closure_bio_submit(ca->set, bio, &ca->prio);
- closure_sync(cl);
- }
-
-@@ -1349,6 +1349,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
- test_bit(CACHE_SET_STOPPING, &c->flags))
- return false;
-
-+ if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
-+ pr_warn("bcache: CACHE_SET_IO_DISABLE already set");
-+
- /* XXX: we can be called from atomic context
- acquire_console_sem();
- */
-@@ -1584,6 +1587,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
- c->congested_read_threshold_us = 2000;
- c->congested_write_threshold_us = 20000;
- c->error_limit = DEFAULT_IO_ERROR_LIMIT;
-+ WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- return c;
- err:
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index ba62e987b503..afb051bcfca1 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -92,6 +92,7 @@ read_attribute(partial_stripes_expensive);
-
- rw_attribute(synchronous);
- rw_attribute(journal_delay_ms);
-+rw_attribute(io_disable);
- rw_attribute(discard);
- rw_attribute(running);
- rw_attribute(label);
-@@ -577,6 +578,8 @@ SHOW(__bch_cache_set)
- sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
- sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
- sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
-+ sysfs_printf(io_disable, "%i",
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- if (attr == &sysfs_bset_tree_stats)
- return bch_bset_print_stats(c, buf);
-@@ -666,6 +669,22 @@ STORE(__bch_cache_set)
- if (attr == &sysfs_io_error_halflife)
- c->error_decay = strtoul_or_return(buf) / 88;
-
-+ if (attr == &sysfs_io_disable) {
-+ int v = strtoul_or_return(buf);
-+
-+ if (v) {
-+ if (test_and_set_bit(CACHE_SET_IO_DISABLE,
-+ &c->flags))
-+ pr_warn("bcache: CACHE_SET_IO_DISABLE"
-+ " already set");
-+ } else {
-+ if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
-+ &c->flags))
-+ pr_warn("bcache: CACHE_SET_IO_DISABLE"
-+ " already cleared");
-+ }
-+ }
-+
- sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
- sysfs_strtoul(verify, c->verify);
- sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
-@@ -748,6 +767,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
- &sysfs_gc_always_rewrite,
- &sysfs_btree_shrinker_disabled,
- &sysfs_copy_gc_enabled,
-+ &sysfs_io_disable,
- NULL
- };
- KTYPE(bch_cache_set_internal);
-diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
-index 4df4c5c1cab2..7944eea54fa9 100644
---- a/drivers/md/bcache/util.h
-+++ b/drivers/md/bcache/util.h
-@@ -565,12 +565,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
- return bdev->bd_inode->i_size >> 9;
- }
-
--#define closure_bio_submit(bio, cl) \
--do { \
-- closure_get(cl); \
-- generic_make_request(bio); \
--} while (0)
--
- uint64_t bch_crc64_update(uint64_t, const void *, size_t);
- uint64_t bch_crc64(const void *, size_t);
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 69957f97bf13..e97e2afead3e 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -114,6 +114,7 @@ static void update_writeback_rate(struct work_struct *work)
- struct cached_dev *dc = container_of(to_delayed_work(work),
- struct cached_dev,
- writeback_rate_update);
-+ struct cache_set *c = dc->disk.c;
-
- /*
- * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-@@ -123,7 +124,12 @@ static void update_writeback_rate(struct work_struct *work)
- /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
-
-- if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ /*
-+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
-+ * check it here too.
-+ */
-+ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
- /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
-@@ -138,7 +144,12 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-- if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ /*
-+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
-+ * check it here too.
-+ */
-+ if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
- }
-@@ -278,7 +289,7 @@ static void write_dirty(struct closure *cl)
- bio_set_dev(&io->bio, io->dc->bdev);
- io->bio.bi_end_io = dirty_endio;
-
-- closure_bio_submit(&io->bio, cl);
-+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
- }
-
- atomic_set(&dc->writeback_sequence_next, next_sequence);
-@@ -304,7 +315,7 @@ static void read_dirty_submit(struct closure *cl)
- {
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
-
-- closure_bio_submit(&io->bio, cl);
-+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
-
- continue_at(cl, write_dirty, io->dc->writeback_write_wq);
- }
-@@ -330,7 +341,9 @@ static void read_dirty(struct cached_dev *dc)
-
- next = bch_keybuf_next(&dc->writeback_keys);
-
-- while (!kthread_should_stop() && next) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
-+ next) {
- size = 0;
- nk = 0;
-
-@@ -427,7 +440,9 @@ static void read_dirty(struct cached_dev *dc)
- }
- }
-
-- while (!kthread_should_stop() && delay) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
-+ delay) {
- schedule_timeout_interruptible(delay);
- delay = writeback_delay(dc, 0);
- }
-@@ -583,11 +598,13 @@ static bool refill_dirty(struct cached_dev *dc)
- static int bch_writeback_thread(void *arg)
- {
- struct cached_dev *dc = arg;
-+ struct cache_set *c = dc->disk.c;
- bool searched_full_index;
-
- bch_ratelimit_reset(&dc->writeback_rate);
-
-- while (!kthread_should_stop()) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- down_write(&dc->writeback_lock);
- set_current_state(TASK_INTERRUPTIBLE);
- if (!atomic_read(&dc->has_dirty) ||
-@@ -595,7 +612,8 @@ static int bch_writeback_thread(void *arg)
- !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-
-- if (kthread_should_stop()) {
-+ if (kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- set_current_state(TASK_RUNNING);
- break;
- }
-@@ -623,6 +641,7 @@ static int bch_writeback_thread(void *arg)
-
- while (delay &&
- !kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
- !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
- delay = schedule_timeout_interruptible(delay);
-
---
-2.15.1
-
diff --git a/for-next/v2/v2-0008-bcache-stop-all-attached-bcache-devices-for-a-ret.patch b/for-next/v2/v2-0008-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
deleted file mode 100644
index 391b334..0000000
--- a/for-next/v2/v2-0008-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-From d5fe9ac0c5814dbb33ccff476bf927e55a31e216 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 10 Jan 2018 00:26:32 +0800
-Subject: [PATCH v2 08/12] bcache: stop all attached bcache devices for a
- retired cache set
-
-When there are too many I/O errors on cache device, current bcache code
-will retire the whole cache set, and detach all bcache devices. But the
-detached bcache devices are not stopped, which is problematic when bcache
-is in writeback mode.
-
-If the retired cache set has dirty data of backing devices, continue
-writing to bcache device will write to backing device directly. If the
-LBA of write request has a dirty version cached on cache device, next time
-when the cache device is re-registered and backing device re-attached to
-it again, the stale dirty data on cache device will be written to backing
-device, and overwrite latest directly written data. This situation causes
-a quite data corruption.
-
-This patch checkes whether cache_set->io_disable is true in
-__cache_set_unregister(). If cache_set->io_disable is true, it means cache
-set is unregistering by too many I/O errors, then all attached bcache
-devices will be stopped as well. If cache_set->io_disable is not true, it
-means __cache_set_unregister() is triggered by writing 1 to sysfs file
-/sys/fs/bcache/<UUID>/bcache/stop. This is an exception because users do
-it explicitly, this patch keeps existing behavior and does not stop any
-bcache device.
-
-Even the failed cache device has no dirty data, stopping bcache device is
-still a desired behavior by many Ceph and data base users. Then their
-application will report I/O errors due to disappeared bcache device, and
-operation people will know the cache device is broken or disconnected.
-
-Changelog:
-v2: add reviewed-by from Hannes.
-v1: initial version for review.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
----
- drivers/md/bcache/super.c | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 4204d75aee7b..97e3bb8e1aee 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1478,6 +1478,14 @@ static void __cache_set_unregister(struct closure *cl)
- dc = container_of(c->devices[i],
- struct cached_dev, disk);
- bch_cached_dev_detach(dc);
-+ /*
-+ * If we come here by too many I/O errors,
-+ * bcache device should be stopped too, to
-+ * keep data consistency on cache and
-+ * backing devices.
-+ */
-+ if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
-+ bcache_device_stop(c->devices[i]);
- } else {
- bcache_device_stop(c->devices[i]);
- }
---
-2.15.1
-
diff --git a/for-next/v2/v2-0009-bcache-fix-inaccurate-io-state-for-detached-bcach.patch b/for-next/v2/v2-0009-bcache-fix-inaccurate-io-state-for-detached-bcach.patch
deleted file mode 100644
index 6dc4aad..0000000
--- a/for-next/v2/v2-0009-bcache-fix-inaccurate-io-state-for-detached-bcach.patch
+++ /dev/null
@@ -1,118 +0,0 @@
-From 2ba5a1d14df44bfca8f0c27e13328e6766c5b387 Mon Sep 17 00:00:00 2001
-From: Tang Junhui <tang.junhui@zte.com.cn>
-Date: Tue, 9 Jan 2018 10:27:11 +0800
-Subject: [PATCH v2 09/12] bcache: fix inaccurate io state for detached bcache
- devices
-
-When we run IO in a detached device, and run iostat to shows IO status,
-normally it will show like bellow (Omitted some fields):
-Device: ... avgrq-sz avgqu-sz await r_await w_await svctm %util
-sdd ... 15.89 0.53 1.82 0.20 2.23 1.81 52.30
-bcache0 ... 15.89 115.42 0.00 0.00 0.00 2.40 69.60
-but after IO stopped, there are still very big avgqu-sz and %util
-values as bellow:
-Device: ... avgrq-sz avgqu-sz await r_await w_await svctm %util
-bcache0 ... 0 5326.32 0.00 0.00 0.00 0.00 100.10
-
-The reason for this issue is that, only generic_start_io_acct() called
-and no generic_end_io_acct() called for detached device in
-cached_dev_make_request(). See the code:
-//start generic_start_io_acct()
-generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-if (cached_dev_get(dc)) {
- //will callback generic_end_io_acct()
-}
-else {
- //will not call generic_end_io_acct()
-}
-
-This patch calls generic_end_io_acct() in the end of IO for detached
-devices, so we can show IO state correctly.
-
-(Modified to use GFP_NOIO in kzalloc() by Coly Li)
-
-Signed-off-by: Tang Junhui <tang.junhui@zte.com.cn>
-Reviewed-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/request.c | 58 +++++++++++++++++++++++++++++++++++++++------
- 1 file changed, 51 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 02296bda6384..e09c5ae745be 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -986,6 +986,55 @@ static void cached_dev_nodata(struct closure *cl)
- continue_at(cl, cached_dev_bio_complete, NULL);
- }
-
-+struct detached_dev_io_private {
-+ struct bcache_device *d;
-+ unsigned long start_time;
-+ bio_end_io_t *bi_end_io;
-+ void *bi_private;
-+};
-+
-+static void detatched_dev_end_io(struct bio *bio)
-+{
-+ struct detached_dev_io_private *ddip;
-+
-+ ddip = bio->bi_private;
-+ bio->bi_end_io = ddip->bi_end_io;
-+ bio->bi_private = ddip->bi_private;
-+
-+ generic_end_io_acct(ddip->d->disk->queue,
-+ bio_data_dir(bio),
-+ &ddip->d->disk->part0, ddip->start_time);
-+
-+ kfree(ddip);
-+
-+ bio->bi_end_io(bio);
-+}
-+
-+static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
-+{
-+ struct detached_dev_io_private *ddip;
-+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-+
-+ /*
-+ * no need to call closure_get(&dc->disk.cl),
-+ * because upper layer had already opened bcache device,
-+ * which would call closure_get(&dc->disk.cl)
-+ */
-+ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
-+ ddip->d = d;
-+ ddip->start_time = jiffies;
-+ ddip->bi_end_io = bio->bi_end_io;
-+ ddip->bi_private = bio->bi_private;
-+ bio->bi_end_io = detatched_dev_end_io;
-+ bio->bi_private = ddip;
-+
-+ if ((bio_op(bio) == REQ_OP_DISCARD) &&
-+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
-+ bio->bi_end_io(bio);
-+ else
-+ generic_make_request(bio);
-+}
-+
- /* Cached devices - read & write stuff */
-
- static blk_qc_t cached_dev_make_request(struct request_queue *q,
-@@ -1028,13 +1077,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- else
- cached_dev_read(dc, s);
- }
-- } else {
-- if ((bio_op(bio) == REQ_OP_DISCARD) &&
-- !blk_queue_discard(bdev_get_queue(dc->bdev)))
-- bio_endio(bio);
-- else
-- generic_make_request(bio);
-- }
-+ } else
-+ detached_dev_do_request(d, bio);
-
- return BLK_QC_T_NONE;
- }
---
-2.15.1
-
diff --git a/for-next/v2/v2-0010-bcache-add-backing_request_endio-for-bi_end_io-of.patch b/for-next/v2/v2-0010-bcache-add-backing_request_endio-for-bi_end_io-of.patch
deleted file mode 100644
index b1462a7..0000000
--- a/for-next/v2/v2-0010-bcache-add-backing_request_endio-for-bi_end_io-of.patch
+++ /dev/null
@@ -1,254 +0,0 @@
-From 2692ba986ec25127ee7ac904db109584ec53d44a Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 10 Jan 2018 21:01:48 +0800
-Subject: [PATCH v2 10/12] bcache: add backing_request_endio() for bi_end_io of
- attached backing device I/O
-
-In order to catch I/O error of backing device, a separate bi_end_io
-call back is required. Then a per backing device counter can record I/O
-errors number and retire the backing device if the counter reaches a
-per backing device I/O error limit.
-
-This patch adds backing_request_endio() to bcache backing device I/O code
-path, this is a preparation for further complicated backing device failure
-handling. So far there is no real code logic change, I make this change a
-separate patch to make sure it is stable and reliable for further work.
-
-Changelog:
-v2: indeed this is new added in this patch set.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
----
- drivers/md/bcache/request.c | 95 +++++++++++++++++++++++++++++++++++--------
- drivers/md/bcache/super.c | 1 +
- drivers/md/bcache/writeback.c | 1 +
- 3 files changed, 81 insertions(+), 16 deletions(-)
-
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index e09c5ae745be..ad4cf71f7eab 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl)
- }
-
- op->insert_data_done = true;
-+ /* get in bch_data_insert() */
- bio_put(bio);
- out:
- continue_at(cl, bch_data_insert_keys, op->wq);
-@@ -630,6 +631,38 @@ static void request_endio(struct bio *bio)
- closure_put(cl);
- }
-
-+static void backing_request_endio(struct bio *bio)
-+{
-+ struct closure *cl = bio->bi_private;
-+
-+ if (bio->bi_status) {
-+ struct search *s = container_of(cl, struct search, cl);
-+ /*
-+ * If a bio has REQ_PREFLUSH for writeback mode, it is
-+ * speically assembled in cached_dev_write() for a non-zero
-+ * write request which has REQ_PREFLUSH. we don't set
-+ * s->iop.status by this failure, the status will be decided
-+ * by result of bch_data_insert() operation.
-+ */
-+ if (unlikely(s->iop.writeback &&
-+ bio->bi_opf & REQ_PREFLUSH)) {
-+ char buf[BDEVNAME_SIZE];
-+
-+ bio_devname(bio, buf);
-+ pr_err("Can't flush %s: returned bi_status %i",
-+ buf, bio->bi_status);
-+ } else {
-+ /* set to orig_bio->bi_status in bio_complete() */
-+ s->iop.status = bio->bi_status;
-+ }
-+ s->recoverable = false;
-+ /* should count I/O error for backing device here */
-+ }
-+
-+ bio_put(bio);
-+ closure_put(cl);
-+}
-+
- static void bio_complete(struct search *s)
- {
- if (s->orig_bio) {
-@@ -644,13 +677,21 @@ static void bio_complete(struct search *s)
- }
- }
-
--static void do_bio_hook(struct search *s, struct bio *orig_bio)
-+static void do_bio_hook(struct search *s,
-+ struct bio *orig_bio,
-+ bio_end_io_t *end_io_fn)
- {
- struct bio *bio = &s->bio.bio;
-
- bio_init(bio, NULL, 0);
- __bio_clone_fast(bio, orig_bio);
-- bio->bi_end_io = request_endio;
-+ /*
-+ * bi_end_io can be set separately somewhere else, e.g. the
-+ * variants in,
-+ * - cache_bio->bi_end_io from cached_dev_cache_miss()
-+ * - n->bi_end_io from cache_lookup_fn()
-+ */
-+ bio->bi_end_io = end_io_fn;
- bio->bi_private = &s->cl;
-
- bio_cnt_set(bio, 3);
-@@ -676,7 +717,7 @@ static inline struct search *search_alloc(struct bio *bio,
- s = mempool_alloc(d->c->search, GFP_NOIO);
-
- closure_init(&s->cl, NULL);
-- do_bio_hook(s, bio);
-+ do_bio_hook(s, bio, request_endio);
-
- s->orig_bio = bio;
- s->cache_miss = NULL;
-@@ -743,10 +784,11 @@ static void cached_dev_read_error(struct closure *cl)
- trace_bcache_read_retry(s->orig_bio);
-
- s->iop.status = 0;
-- do_bio_hook(s, s->orig_bio);
-+ do_bio_hook(s, s->orig_bio, backing_request_endio);
-
- /* XXX: invalidate cache */
-
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, bio, cl);
- }
-
-@@ -859,7 +901,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- bio_copy_dev(cache_bio, miss);
- cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
-
-- cache_bio->bi_end_io = request_endio;
-+ cache_bio->bi_end_io = backing_request_endio;
- cache_bio->bi_private = &s->cl;
-
- bch_bio_map(cache_bio, NULL);
-@@ -872,14 +914,16 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- s->cache_miss = miss;
- s->iop.bio = cache_bio;
- bio_get(cache_bio);
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, cache_bio, &s->cl);
-
- return ret;
- out_put:
- bio_put(cache_bio);
- out_submit:
-- miss->bi_end_io = request_endio;
-+ miss->bi_end_io = backing_request_endio;
- miss->bi_private = &s->cl;
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, miss, &s->cl);
- return ret;
- }
-@@ -943,31 +987,48 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
- s->iop.bio = s->orig_bio;
- bio_get(s->iop.bio);
-
-- if ((bio_op(bio) != REQ_OP_DISCARD) ||
-- blk_queue_discard(bdev_get_queue(dc->bdev)))
-- closure_bio_submit(s->iop.c, bio, cl);
-+ if (bio_op(bio) == REQ_OP_DISCARD &&
-+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
-+ goto insert_data;
-+
-+ /* I/O request sent to backing device */
-+ bio->bi_end_io = backing_request_endio;
-+ closure_bio_submit(s->iop.c, bio, cl);
-+
- } else if (s->iop.writeback) {
- bch_writeback_add(dc);
- s->iop.bio = bio;
-
- if (bio->bi_opf & REQ_PREFLUSH) {
-- /* Also need to send a flush to the backing device */
-- struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
-- dc->disk.bio_split);
--
-+ /*
-+ * Also need to send a flush to the backing
-+ * device, if failed on backing device.
-+ */
-+ struct bio *flush;
-+
-+ flush = bio_alloc_bioset(GFP_NOIO, 0,
-+ dc->disk.bio_split);
-+ if (!flush) {
-+ s->iop.status = BLK_STS_RESOURCE;
-+ goto insert_data;
-+ }
- bio_copy_dev(flush, bio);
-- flush->bi_end_io = request_endio;
-+ flush->bi_end_io = backing_request_endio;
- flush->bi_private = cl;
- flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
--
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, flush, cl);
- }
-+ bch_writeback_add(dc);
-+
- } else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
--
-+ /* I/O request sent to backing device */
-+ bio->bi_end_io = backing_request_endio;
- closure_bio_submit(s->iop.c, bio, cl);
- }
-
-+insert_data:
- closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
- continue_at(cl, cached_dev_write_complete, NULL);
- }
-@@ -981,6 +1042,7 @@ static void cached_dev_nodata(struct closure *cl)
- bch_journal_meta(s->iop.c, cl);
-
- /* If it's a flush, we send the flush to the backing device too */
-+ bio->bi_end_io = backing_request_endio;
- closure_bio_submit(s->iop.c, bio, cl);
-
- continue_at(cl, cached_dev_bio_complete, NULL);
-@@ -1078,6 +1140,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- cached_dev_read(dc, s);
- }
- } else
-+ /* I/O request sent to backing device */
- detached_dev_do_request(d, bio);
-
- return BLK_QC_T_NONE;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 97e3bb8e1aee..08a0b541a4da 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -265,6 +265,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
- bio->bi_private = dc;
-
- closure_get(cl);
-+ /* I/O request sent to backing device */
- __write_super(&dc->sb, bio);
-
- closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index e97e2afead3e..878103b9f2ec 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -289,6 +289,7 @@ static void write_dirty(struct closure *cl)
- bio_set_dev(&io->bio, io->dc->bdev);
- io->bio.bi_end_io = dirty_endio;
-
-+ /* I/O request sent to backing device */
- closure_bio_submit(io->dc->disk.c, &io->bio, cl);
- }
-
---
-2.15.1
-
diff --git a/for-next/v2/v2-0011-bcache-add-io_disable-to-struct-cached_dev.patch b/for-next/v2/v2-0011-bcache-add-io_disable-to-struct-cached_dev.patch
deleted file mode 100644
index 5158dc4..0000000
--- a/for-next/v2/v2-0011-bcache-add-io_disable-to-struct-cached_dev.patch
+++ /dev/null
@@ -1,235 +0,0 @@
-From 8631a1c8cf5e224282680bda1d590776f9960a33 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 10 Jan 2018 21:33:45 +0800
-Subject: [PATCH v2 11/12] bcache: add io_disable to struct cached_dev
-
-If a bcache device is configured to writeback mode, current code does not
-handle write I/O errors on backing devices properly.
-
-In writeback mode, write request is written to cache device, and
-latter being flushed to backing device. If I/O failed when writing from
-cache device to the backing device, bcache code just ignores the error and
-upper layer code is NOT noticed that the backing device is broken.
-
-This patch tries to handle backing device failure like how the cache device
-failure is handled,
-- Add a error counter 'io_errors' and error limit 'error_limit' in struct
- cached_dev. Add another io_disable to struct cached_dev to disable I/Os
- on the problematic backing device.
-- When I/O error happens on backing device, increase io_errors counter. And
- if io_errors reaches error_limit, set cache_dev->io_disable to true, and
- stop the bcache device.
-
-The result is, if backing device is broken of disconnected, and I/O errors
-reach its error limit, backing device will be disabled and the associated
-bcache device will be removed from system.
-
-Changelog:
-v2: indeed this is new added in v2 patch set.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 7 +++++++
- drivers/md/bcache/io.c | 14 ++++++++++++++
- drivers/md/bcache/request.c | 14 ++++++++++++--
- drivers/md/bcache/super.c | 22 ++++++++++++++++++++++
- drivers/md/bcache/sysfs.c | 15 ++++++++++++++-
- 5 files changed, 69 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index c41736960045..5a811959392d 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -360,6 +360,7 @@ struct cached_dev {
- unsigned sequential_cutoff;
- unsigned readahead;
-
-+ unsigned io_disable:1;
- unsigned verify:1;
- unsigned bypass_torture_test:1;
-
-@@ -379,6 +380,10 @@ struct cached_dev {
- unsigned writeback_rate_i_term_inverse;
- unsigned writeback_rate_p_term_inverse;
- unsigned writeback_rate_minimum;
-+
-+#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
-+ atomic_t io_errors;
-+ unsigned error_limit;
- };
-
- enum alloc_reserve {
-@@ -882,6 +887,7 @@ static inline void closure_bio_submit(struct cache_set *c,
-
- /* Forward declarations */
-
-+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
- void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
- void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- blk_status_t, const char *);
-@@ -909,6 +915,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
- struct bkey *, int, bool);
- bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
- unsigned, unsigned, bool);
-+bool bch_cached_dev_error(struct cached_dev *dc);
-
- __printf(2, 3)
- bool bch_cache_set_error(struct cache_set *, const char *, ...);
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index 8013ecbcdbda..7fac97ae036e 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
- }
-
- /* IO errors */
-+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
-+{
-+ char buf[BDEVNAME_SIZE];
-+ unsigned errors;
-+
-+ WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
-+
-+ errors = atomic_add_return(1, &dc->io_errors);
-+ if (errors < dc->error_limit)
-+ pr_err("%s: IO error on backing device, unrecoverable",
-+ bio_devname(bio, buf));
-+ else
-+ bch_cached_dev_error(dc);
-+}
-
- void bch_count_io_errors(struct cache *ca,
- blk_status_t error,
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index ad4cf71f7eab..386b388ce296 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -637,6 +637,8 @@ static void backing_request_endio(struct bio *bio)
-
- if (bio->bi_status) {
- struct search *s = container_of(cl, struct search, cl);
-+ struct cached_dev *dc = container_of(s->d,
-+ struct cached_dev, disk);
- /*
- * If a bio has REQ_PREFLUSH for writeback mode, it is
- * speically assembled in cached_dev_write() for a non-zero
-@@ -657,6 +659,7 @@ static void backing_request_endio(struct bio *bio)
- }
- s->recoverable = false;
- /* should count I/O error for backing device here */
-+ bch_count_backing_io_errors(dc, bio);
- }
-
- bio_put(bio);
-@@ -1067,8 +1070,14 @@ static void detatched_dev_end_io(struct bio *bio)
- bio_data_dir(bio),
- &ddip->d->disk->part0, ddip->start_time);
-
-- kfree(ddip);
-+ if (bio->bi_status) {
-+ struct cached_dev *dc = container_of(ddip->d,
-+ struct cached_dev, disk);
-+ /* should count I/O error for backing device here */
-+ bch_count_backing_io_errors(dc, bio);
-+ }
-
-+ kfree(ddip);
- bio->bi_end_io(bio);
- }
-
-@@ -1107,7 +1116,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
-
-- if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
-+ dc->io_disable)) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- return BLK_QC_T_NONE;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 08a0b541a4da..14fce3623770 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1188,6 +1188,10 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
- max(dc->disk.disk->queue->backing_dev_info->ra_pages,
- q->backing_dev_info->ra_pages);
-
-+ atomic_set(&dc->io_errors, 0);
-+ dc->io_disable = false;
-+ dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
-+
- bch_cached_dev_request_init(dc);
- bch_cached_dev_writeback_init(dc);
- return 0;
-@@ -1339,6 +1343,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
- return flash_dev_run(c, u);
- }
-
-+bool bch_cached_dev_error(struct cached_dev *dc)
-+{
-+ char name[BDEVNAME_SIZE];
-+
-+ if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
-+ return false;
-+
-+ dc->io_disable = true;
-+ /* make others know io_disable is true earlier */
-+ smp_mb();
-+
-+ pr_err("bcache: stop %s: too many IO errors on backing device %s\n",
-+ dc->disk.name, bdevname(dc->bdev, name));
-+
-+ bcache_device_stop(&dc->disk);
-+ return true;
-+}
-+
- /* Cache set */
-
- __printf(2, 3)
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index afb051bcfca1..7288927f2a47 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -131,7 +131,9 @@ SHOW(__bch_cached_dev)
- var_print(writeback_delay);
- var_print(writeback_percent);
- sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
--
-+ sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
-+ sysfs_printf(io_error_limit, "%i", dc->error_limit);
-+ sysfs_printf(io_disable, "%i", dc->io_disable);
- var_print(writeback_rate_update_seconds);
- var_print(writeback_rate_i_term_inverse);
- var_print(writeback_rate_p_term_inverse);
-@@ -223,6 +225,14 @@ STORE(__cached_dev)
- d_strtoul(writeback_rate_i_term_inverse);
- d_strtoul_nonzero(writeback_rate_p_term_inverse);
-
-+ sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
-+
-+ if (attr == &sysfs_io_disable) {
-+ int v = strtoul_or_return(buf);
-+
-+ dc->io_disable = v ? 1 : 0;
-+ }
-+
- d_strtoi_h(sequential_cutoff);
- d_strtoi_h(readahead);
-
-@@ -330,6 +340,9 @@ static struct attribute *bch_cached_dev_files[] = {
- &sysfs_writeback_rate_i_term_inverse,
- &sysfs_writeback_rate_p_term_inverse,
- &sysfs_writeback_rate_debug,
-+ &sysfs_errors,
-+ &sysfs_io_error_limit,
-+ &sysfs_io_disable,
- &sysfs_dirty_data,
- &sysfs_stripe_size,
- &sysfs_partial_stripes_expensive,
---
-2.15.1
-
diff --git a/for-next/v2/v2-0012-bcache-stop-bcache-device-when-backing-device-is-.patch b/for-next/v2/v2-0012-bcache-stop-bcache-device-when-backing-device-is-.patch
deleted file mode 100644
index 7b3f6b1..0000000
--- a/for-next/v2/v2-0012-bcache-stop-bcache-device-when-backing-device-is-.patch
+++ /dev/null
@@ -1,151 +0,0 @@
-From e32b3038feba37429496f12a997fa59ea889d2ed Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 17:31:44 +0800
-Subject: [PATCH v2 12/12] bcache: stop bcache device when backing device is
- offline
-
-Currently bcache does not handle backing device failure, if backing
-device is offline and disconnected from system, its bcache device can still
-be accessible. If the bcache device is in writeback mode, I/O requests even
-can success if the requests hit on cache device. That is to say, when and
-how bcache handles offline backing device is undefined.
-
-This patch tries to handle backing device offline in a rather simple way,
-- Add cached_dev->status_update_thread kernel thread to update backing
- device status in every 1 second.
-- Add cached_dev->offline_seconds to record how many seconds the backing
- device is observed to be offline. If the backing device is offline for
- BACKING_DEV_OFFLINE_TIMEOUT (30) seconds, set dc->io_disable to 1 and
- call bcache_device_stop() to stop the bache device which linked to the
- offline backing device.
-
-Now if a backing device is offline for BACKING_DEV_OFFLINE_TIMEOUT seconds,
-its bcache device will be removed, then user space application writing on
-it will get error immediately, and handler the device failure in time.
-
-This patch is quite simple, does not handle more complicated situations.
-Once the bcache device is stopped, users need to recovery the backing
-device, register and attach it manually.
-
-Changelog:
-v2: this is new added in v2 patch set.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 2 ++
- drivers/md/bcache/super.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 57 insertions(+)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 5a811959392d..9eedb35d01bc 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -338,6 +338,7 @@ struct cached_dev {
-
- struct keybuf writeback_keys;
-
-+ struct task_struct *status_update_thread;
- /*
- * Order the write-half of writeback operations strongly in dispatch
- * order. (Maintain LBA order; don't allow reads completing out of
-@@ -384,6 +385,7 @@ struct cached_dev {
- #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
- atomic_t io_errors;
- unsigned error_limit;
-+ unsigned offline_seconds;
- };
-
- enum alloc_reserve {
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 14fce3623770..85adf1e29d11 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -646,6 +646,11 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
- unsigned int cmd, unsigned long arg)
- {
- struct bcache_device *d = b->bd_disk->private_data;
-+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-+
-+ if (dc->io_disable)
-+ return -EIO;
-+
- return d->ioctl(d, mode, cmd, arg);
- }
-
-@@ -856,6 +861,45 @@ static void calc_cached_dev_sectors(struct cache_set *c)
- c->cached_dev_sectors = sectors;
- }
-
-+#define BACKING_DEV_OFFLINE_TIMEOUT 5
-+static int cached_dev_status_update(void *arg)
-+{
-+ struct cached_dev *dc = arg;
-+ struct request_queue *q;
-+ char buf[BDEVNAME_SIZE];
-+
-+ /*
-+ * If this delayed worker is stopping outside, directly quit here.
-+ * dc->io_disable might be set via sysfs interface, so check it
-+ * here too.
-+ */
-+ while (!kthread_should_stop() && !dc->io_disable) {
-+ q = bdev_get_queue(dc->bdev);
-+ if (blk_queue_dying(q))
-+ dc->offline_seconds++;
-+ else
-+ dc->offline_seconds = 0;
-+
-+ if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
-+ pr_err("%s: device offline for %d seconds",
-+ bdevname(dc->bdev, buf),
-+ BACKING_DEV_OFFLINE_TIMEOUT);
-+ pr_err("%s: disable I/O request due to backing "
-+ "device offline", dc->disk.name);
-+ dc->io_disable = true;
-+ /* let others know earlier that io_disable is true */
-+ smp_mb();
-+ bcache_device_stop(&dc->disk);
-+ break;
-+ }
-+
-+ schedule_timeout_interruptible(HZ);
-+ }
-+
-+ dc->status_update_thread = NULL;
-+ return 0;
-+}
-+
- void bch_cached_dev_run(struct cached_dev *dc)
- {
- struct bcache_device *d = &dc->disk;
-@@ -898,6 +942,15 @@ void bch_cached_dev_run(struct cached_dev *dc)
- if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
- sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
- pr_debug("error creating sysfs link");
-+
-+ dc->status_update_thread = kthread_run(cached_dev_status_update,
-+ dc,
-+ "bcache_status_update");
-+ if (IS_ERR(dc->status_update_thread)) {
-+ pr_warn("bcache: failed to create bcache_status_update "
-+ "kthread, continue to run without monitoring backing "
-+ "device status");
-+ }
- }
-
- /*
-@@ -1118,6 +1171,8 @@ static void cached_dev_free(struct closure *cl)
- kthread_stop(dc->writeback_thread);
- if (dc->writeback_write_wq)
- destroy_workqueue(dc->writeback_write_wq);
-+ if (!IS_ERR_OR_NULL(dc->status_update_thread))
-+ kthread_stop(dc->status_update_thread);
-
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
---
-2.15.1
-
diff --git a/for-next/v3/v3-0000-cover-letter.patch b/for-next/v3/v3-0000-cover-letter.patch
deleted file mode 100644
index 3a5de04..0000000
--- a/for-next/v3/v3-0000-cover-letter.patch
+++ /dev/null
@@ -1,92 +0,0 @@
-From 93be9a0e7f3112074702dd070c07818b2fe3d568 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 14 Jan 2018 22:21:43 +0800
-Subject: [PATCH v3 00/13] bcache: device failure handling improvement
-
-Hi maintainers and folks,
-
-This patch set tries to improve bcache device failure handling, includes
-cache device and backing device failures.
-
-The basic idea to handle failed cache device is,
-- Unregister cache set
-- Detach all backing devices which are attached to this cache set
-- Stop all the detached bcache devices
-- Stop all flash only volume on the cache set
-The above process is named 'cache set retire' by me. The result of cache
-set retire is, cache set and bcache devices are all removed, following
-I/O requests will get failed immediately to notift upper layer or user
-space coce that the cache device is failed or disconnected.
-
-For failed backing device, there are two kinds of failures to handle,
-- If device is disconnected, and kernel thread dc->status_update_thread
- finds it is offline for BACKING_DEV_OFFLINE_TIMEOUT (5) seconds, the
- kernel thread will set dc->io_disable and call bcache_device_stop() to
- stop and remove the bcache device from system.
-- If device is alive but returns too many I/O errors, after errors number
- exceeds dc->error_limit, call bch_cached_dev_error() to set
- dc->io_disable and stop bcache device. Then the broken backing device
- and its bcache device will be removed from system.
-
-The v3 patch set adds one more patch to fix the detach issue found in
-v2 patch set.
-
-A basic testing covered with writethrough, writeback, writearound mode, and
-read/write/readwrite workloads, cache set or bcache device can be removed
-by too many I/O errors or delete the device. For plugging out physical
-disks, a kernel bug triggers rcu oops in __do_softirq() and locks up all
-following accesses to the disconnected disk, this blocks my testing.
-
-Open issues:
-1, A kernel bug in __do_softirq() when plugging out hard disk with heavy
- I/O blocks my physical disk disconnection test. This is not problem
- introduced from this patch set, if any one knows this bug, please give
- me a hint.
-
-Changelog:
-v3: fix detach issue find in v2 patch set.
-v2: fixes all problems found in v1 review.
- add patches to handle backing device failure.
- add one more patch to set writeback_rate_update_seconds range.
- include a patch from Junhui Tang.
-v1: the initial version, only handles cache device failure.
-
-Any comment, question and review are warmly welcome. Thanks in advance.
-
-Coly Li
----
-
-Coly Li (12):
- bcache: set writeback_rate_update_seconds in range [1, 60] seconds
- bcache: properly set task state in bch_writeback_thread()
- bcache: set task properly in allocator_wait()
- bcache: fix cached_dev->count usage for bch_cache_set_error()
- bcache: quit dc->writeback_thread when BCACHE_DEV_DETACHING is set
- bcache: stop dc->writeback_rate_update properly
- bcache: set error_limit correctly
- bcache: add CACHE_SET_IO_DISABLE to struct cache_set flags
- bcache: stop all attached bcache devices for a retired cache set
- bcache: add backing_request_endio() for bi_end_io of attached backing
- device I/O
- bcache: add io_disable to struct cached_dev
- bcache: stop bcache device when backing device is offline
-
-Tang Junhui (1):
- bcache: fix inaccurate io state for detached bcache devices
-
- drivers/md/bcache/alloc.c | 5 +-
- drivers/md/bcache/bcache.h | 37 ++++++++-
- drivers/md/bcache/btree.c | 10 ++-
- drivers/md/bcache/io.c | 16 +++-
- drivers/md/bcache/journal.c | 4 +-
- drivers/md/bcache/request.c | 187 +++++++++++++++++++++++++++++++++++-------
- drivers/md/bcache/super.c | 134 ++++++++++++++++++++++++++++--
- drivers/md/bcache/sysfs.c | 45 +++++++++-
- drivers/md/bcache/util.h | 6 --
- drivers/md/bcache/writeback.c | 99 ++++++++++++++++++----
- drivers/md/bcache/writeback.h | 5 +-
- 11 files changed, 474 insertions(+), 74 deletions(-)
-
---
-2.15.1
-
diff --git a/for-next/v3/v3-0001-bcache-set-writeback_rate_update_seconds-in-range.patch b/for-next/v3/v3-0001-bcache-set-writeback_rate_update_seconds-in-range.patch
deleted file mode 100644
index 23b0003..0000000
--- a/for-next/v3/v3-0001-bcache-set-writeback_rate_update_seconds-in-range.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 8fc565e105fdfa08d42bde28056088cdb0f93fbc Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 15:11:03 +0800
-Subject: [PATCH v3 01/13] bcache: set writeback_rate_update_seconds in range
- [1, 60] seconds
-
-dc->writeback_rate_update_seconds can be set via sysfs and its value can
-be set to [1, ULONG_MAX]. It does not make sense to set such a large
-value, 60 seconds is long enough value considering the default 5 seconds
-works well for long time.
-
-Because dc->writeback_rate_update is a special delayed work, it re-arms
-itself inside the delayed work routine update_writeback_rate(). When
-stopping it by cancel_delayed_work_sync(), there should be a timeout to
-wait and make sure the re-armed delayed work is stopped too. A small max
-value of dc->writeback_rate_update_seconds is also helpful to decide a
-reasonable small timeout.
-
-This patch limits sysfs interface to set dc->writeback_rate_update_seconds
-in range of [1, 60] seconds, and replaces the hand-coded number by macros.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/sysfs.c | 3 +++
- drivers/md/bcache/writeback.c | 2 +-
- drivers/md/bcache/writeback.h | 3 +++
- 3 files changed, 7 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index b4184092c727..a74a752c9e0f 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -215,6 +215,9 @@ STORE(__cached_dev)
- sysfs_strtoul_clamp(writeback_rate,
- dc->writeback_rate.rate, 1, INT_MAX);
-
-+ sysfs_strtoul_clamp(writeback_rate_update_seconds,
-+ dc->writeback_rate_update_seconds,
-+ 1, WRITEBACK_RATE_UPDATE_SECS_MAX);
- d_strtoul_nonzero(writeback_rate_update_seconds);
- d_strtoul(writeback_rate_i_term_inverse);
- d_strtoul_nonzero(writeback_rate_p_term_inverse);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 51306a19ab03..0ade883b6316 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -652,7 +652,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
- dc->writeback_rate.rate = 1024;
- dc->writeback_rate_minimum = 8;
-
-- dc->writeback_rate_update_seconds = 5;
-+ dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
- dc->writeback_rate_p_term_inverse = 40;
- dc->writeback_rate_i_term_inverse = 10000;
-
-diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index 66f1c527fa24..587b25599856 100644
---- a/drivers/md/bcache/writeback.h
-+++ b/drivers/md/bcache/writeback.h
-@@ -8,6 +8,9 @@
- #define MAX_WRITEBACKS_IN_PASS 5
- #define MAX_WRITESIZE_IN_PASS 5000 /* *512b */
-
-+#define WRITEBACK_RATE_UPDATE_SECS_MAX 60
-+#define WRITEBACK_RATE_UPDATE_SECS_DEFAULT 5
-+
- /*
- * 14 (16384ths) is chosen here as something that each backing device
- * should be a reasonable fraction of the share, and not to blow up
---
-2.15.1
-
diff --git a/for-next/v3/v3-0002-bcache-properly-set-task-state-in-bch_writeback_t.patch b/for-next/v3/v3-0002-bcache-properly-set-task-state-in-bch_writeback_t.patch
deleted file mode 100644
index a2844e3..0000000
--- a/for-next/v3/v3-0002-bcache-properly-set-task-state-in-bch_writeback_t.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From 48dd314ed3cad040372dec28ddc55991fb3be870 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 8 Jan 2018 22:11:01 +0800
-Subject: [PATCH v3 02/13] bcache: properly set task state in
- bch_writeback_thread()
-
-Kernel thread routine bch_writeback_thread() has the following code block,
-
-447 down_write(&dc->writeback_lock);
-448~450 if (check conditions) {
-451 up_write(&dc->writeback_lock);
-452 set_current_state(TASK_INTERRUPTIBLE);
-453
-454 if (kthread_should_stop())
-455 return 0;
-456
-457 schedule();
-458 continue;
-459 }
-
-If condition check is true, its task state is set to TASK_INTERRUPTIBLE
-and call schedule() to wait for others to wake up it.
-
-There are 2 issues in current code,
-1, Task state is set to TASK_INTERRUPTIBLE after the condition checks, if
- another process changes the condition and call wake_up_process(dc->
- writeback_thread), then at line 452 task state is set back to
- TASK_INTERRUPTIBLE, the writeback kernel thread will lose a chance to be
- waken up.
-2, At line 454 if kthread_should_stop() is true, writeback kernel thread
- will return to kernel/kthread.c:kthread() with TASK_INTERRUPTIBLE and
- call do_exit(). It is not good to enter do_exit() with task state
- TASK_INTERRUPTIBLE, in following code path might_sleep() is called and a
- warning message is reported by __might_sleep(): "WARNING: do not call
- blocking ops when !TASK_RUNNING; state=1 set at [xxxx]".
-
-For the first issue, task state should be set before condition checks.
-Ineed because dc->writeback_lock is required when modifying all the
-conditions, calling set_current_state() inside code block where dc->
-writeback_lock is hold is safe. But this is quite implicit, so I still move
-set_current_state() before all the condition checks.
-
-For the second issue, frankley speaking it does not hurt when kernel thread
-exits with TASK_INTERRUPTIBLE state, but this warning message scares users,
-makes them feel there might be something risky with bcache and hurt their
-data. Setting task state to TASK_RUNNING before returning fixes this
-problem.
-
-Changelog:
-v2: fix the race issue in v1 patch.
-v1: initial buggy fix.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.de>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/writeback.c | 7 +++++--
- 1 file changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 0ade883b6316..f1d2fc15abcc 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -564,18 +564,21 @@ static int bch_writeback_thread(void *arg)
-
- while (!kthread_should_stop()) {
- down_write(&dc->writeback_lock);
-+ set_current_state(TASK_INTERRUPTIBLE);
- if (!atomic_read(&dc->has_dirty) ||
- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
- !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-- set_current_state(TASK_INTERRUPTIBLE);
-
-- if (kthread_should_stop())
-+ if (kthread_should_stop()) {
-+ set_current_state(TASK_RUNNING);
- return 0;
-+ }
-
- schedule();
- continue;
- }
-+ set_current_state(TASK_RUNNING);
-
- searched_full_index = refill_dirty(dc);
-
---
-2.15.1
-
diff --git a/for-next/v3/v3-0003-bcache-set-task-properly-in-allocator_wait.patch b/for-next/v3/v3-0003-bcache-set-task-properly-in-allocator_wait.patch
deleted file mode 100644
index a665eb5..0000000
--- a/for-next/v3/v3-0003-bcache-set-task-properly-in-allocator_wait.patch
+++ /dev/null
@@ -1,65 +0,0 @@
-From 37adf3e3e864ef985da85787b9662faed23ddf25 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 8 Jan 2018 22:45:51 +0800
-Subject: [PATCH v3 03/13] bcache: set task properly in allocator_wait()
-
-Kernel thread routine bch_allocator_thread() references macro
-allocator_wait() to wait for a condition or quit to do_exit()
-when kthread_should_stop() is true. Here is the code block,
-
-284 while (1) { \
-285 set_current_state(TASK_INTERRUPTIBLE); \
-286 if (cond) \
-287 break; \
-288 \
-289 mutex_unlock(&(ca)->set->bucket_lock); \
-290 if (kthread_should_stop()) \
-291 return 0; \
-292 \
-293 schedule(); \
-294 mutex_lock(&(ca)->set->bucket_lock); \
-295 } \
-296 __set_current_state(TASK_RUNNING); \
-
-At line 285, task state is set to TASK_INTERRUPTIBLE, if at line 290
-kthread_should_stop() is true, the kernel thread will terminate and return
-to kernel/kthread.s:kthread(), then calls do_exit() with TASK_INTERRUPTIBLE
-state. This is not a suggested behavior and a warning message will be
-reported by might_sleep() in do_exit() code path: "WARNING: do not call
-blocking ops when !TASK_RUNNING; state=1 set at [xxxx]".
-
-This patch fixes this problem by setting task state to TASK_RUNNING if
-kthread_should_stop() is true and before kernel thread returns back to
-kernel/kthread.s:kthread().
-
-Changelog:
-v2: fix the race issue in v1 patch.
-v1: initial buggy fix.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.de>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/alloc.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index 6cc6c0f9c3a9..458e1d38577d 100644
---- a/drivers/md/bcache/alloc.c
-+++ b/drivers/md/bcache/alloc.c
-@@ -287,8 +287,10 @@ do { \
- break; \
- \
- mutex_unlock(&(ca)->set->bucket_lock); \
-- if (kthread_should_stop()) \
-+ if (kthread_should_stop()) { \
-+ set_current_state(TASK_RUNNING); \
- return 0; \
-+ } \
- \
- schedule(); \
- mutex_lock(&(ca)->set->bucket_lock); \
---
-2.15.1
-
diff --git a/for-next/v3/v3-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch b/for-next/v3/v3-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
deleted file mode 100644
index 2aa735e..0000000
--- a/for-next/v3/v3-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
+++ /dev/null
@@ -1,178 +0,0 @@
-From fffa2563deae795ee82d5e46f089d68ca13a9864 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 8 Jan 2018 23:05:58 +0800
-Subject: [PATCH v3 04/13] bcache: fix cached_dev->count usage for
- bch_cache_set_error()
-
-When bcache metadata I/O fails, bcache will call bch_cache_set_error()
-to retire the whole cache set. The expected behavior to retire a cache
-set is to unregister the cache set, and unregister all backing device
-attached to this cache set, then remove sysfs entries of the cache set
-and all attached backing devices, finally release memory of structs
-cache_set, cache, cached_dev and bcache_device.
-
-In my testing when journal I/O failure triggered by disconnected cache
-device, sometimes the cache set cannot be retired, and its sysfs
-entry /sys/fs/bcache/<uuid> still exits and the backing device also
-references it. This is not expected behavior.
-
-When metadata I/O failes, the call senquence to retire whole cache set is,
- bch_cache_set_error()
- bch_cache_set_unregister()
- bch_cache_set_stop()
- __cache_set_unregister() <- called as callback by calling
- clousre_queue(&c->caching)
- cache_set_flush() <- called as a callback when refcount
- of cache_set->caching is 0
- cache_set_free() <- called as a callback when refcount
- of catch_set->cl is 0
- bch_cache_set_release() <- called as a callback when refcount
- of catch_set->kobj is 0
-
-I find if kernel thread bch_writeback_thread() quits while-loop when
-kthread_should_stop() is true and searched_full_index is false, clousre
-callback cache_set_flush() set by continue_at() will never be called. The
-result is, bcache fails to retire whole cache set.
-
-cache_set_flush() will be called when refcount of closure c->caching is 0,
-and in function bcache_device_detach() refcount of closure c->caching is
-released to 0 by clousre_put(). In metadata error code path, function
-bcache_device_detach() is called by cached_dev_detach_finish(). This is a
-callback routine being called when cached_dev->count is 0. This refcount
-is decreased by cached_dev_put().
-
-The above dependence indicates, cache_set_flush() will be called when
-refcount of cache_set->cl is 0, and refcount of cache_set->cl to be 0
-when refcount of cache_dev->count is 0.
-
-The reason why sometimes cache_dev->count is not 0 (when metadata I/O fails
-and bch_cache_set_error() called) is, in bch_writeback_thread(), refcount
-of cache_dev is not decreased properly.
-
-In bch_writeback_thread(), cached_dev_put() is called only when
-searched_full_index is true and cached_dev->writeback_keys is empty, a.k.a
-there is no dirty data on cache. In most of run time it is correct, but
-when bch_writeback_thread() quits the while-loop while cache is still
-dirty, current code forget to call cached_dev_put() before this kernel
-thread exits. This is why sometimes cache_set_flush() is not executed and
-cache set fails to be retired.
-
-The reason to call cached_dev_put() in bch_writeback_rate() is, when the
-cache device changes from clean to dirty, cached_dev_get() is called, to
-make sure during writeback operatiions both backing and cache devices
-won't be released.
-
-Adding following code in bch_writeback_thread() does not work,
- static int bch_writeback_thread(void *arg)
- }
-
-+ if (atomic_read(&dc->has_dirty))
-+ cached_dev_put()
-+
- return 0;
- }
-because writeback kernel thread can be waken up and start via sysfs entry:
- echo 1 > /sys/block/bcache<N>/bcache/writeback_running
-It is difficult to check whether backing device is dirty without race and
-extra lock. So the above modification will introduce potential refcount
-underflow in some conditions.
-
-The correct fix is, to take cached dev refcount when creating the kernel
-thread, and put it before the kernel thread exits. Then bcache does not
-need to take a cached dev refcount when cache turns from clean to dirty,
-or to put a cached dev refcount when cache turns from ditry to clean. The
-writeback kernel thread is alwasy safe to reference data structure from
-cache set, cache and cached device (because a refcount of cache device is
-taken for it already), and no matter the kernel thread is stopped by I/O
-errors or system reboot, cached_dev->count can always be used correctly.
-
-The patch is simple, but understanding how it works is quite complicated.
-
-Changelog:
-v2: set dc->writeback_thread to NULL in this patch, as suggested by Hannes.
-v1: initial version for review.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/super.c | 1 -
- drivers/md/bcache/writeback.c | 11 ++++++++---
- drivers/md/bcache/writeback.h | 2 --
- 3 files changed, 8 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 133b81225ea9..d14e09cce2f6 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1052,7 +1052,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
- if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
- bch_sectors_dirty_init(&dc->disk);
- atomic_set(&dc->has_dirty, 1);
-- refcount_inc(&dc->count);
- bch_writeback_queue(dc);
- }
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index f1d2fc15abcc..b280c134dd4d 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -572,7 +572,7 @@ static int bch_writeback_thread(void *arg)
-
- if (kthread_should_stop()) {
- set_current_state(TASK_RUNNING);
-- return 0;
-+ break;
- }
-
- schedule();
-@@ -585,7 +585,6 @@ static int bch_writeback_thread(void *arg)
- if (searched_full_index &&
- RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
- atomic_set(&dc->has_dirty, 0);
-- cached_dev_put(dc);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
- bch_write_bdev_super(dc, NULL);
- }
-@@ -606,6 +605,9 @@ static int bch_writeback_thread(void *arg)
- }
- }
-
-+ dc->writeback_thread = NULL;
-+ cached_dev_put(dc);
-+
- return 0;
- }
-
-@@ -669,10 +671,13 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- if (!dc->writeback_write_wq)
- return -ENOMEM;
-
-+ cached_dev_get(dc);
- dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
- "bcache_writeback");
-- if (IS_ERR(dc->writeback_thread))
-+ if (IS_ERR(dc->writeback_thread)) {
-+ cached_dev_put(dc);
- return PTR_ERR(dc->writeback_thread);
-+ }
-
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index 587b25599856..0bba8f1c6cdf 100644
---- a/drivers/md/bcache/writeback.h
-+++ b/drivers/md/bcache/writeback.h
-@@ -105,8 +105,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
- {
- if (!atomic_read(&dc->has_dirty) &&
- !atomic_xchg(&dc->has_dirty, 1)) {
-- refcount_inc(&dc->count);
--
- if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
- /* XXX: should do this synchronously */
---
-2.15.1
-
diff --git a/for-next/v3/v3-0005-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch b/for-next/v3/v3-0005-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch
deleted file mode 100644
index 3d5bf0c..0000000
--- a/for-next/v3/v3-0005-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From 36747b38396db80de96d650e88f79cc82d284dff Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 14 Jan 2018 21:41:57 +0800
-Subject: [PATCH v3 05/13] bcache: quit dc->writeback_thread when
- BCACHE_DEV_DETACHING is set
-
-In patch "bcache: fix cached_dev->count usage for bch_cache_set_error()",
-cached_dev_get() is called when creating dc->writeback_thread, and
-cached_dev_put() is called when exiting dc->writeback_thread. This
-modification works well unless people detach the bcache device manually by
- 'echo 1 > /sys/block/bcache<N>/bcache/detach'
-Because this sysfs interface only calls bch_cached_dev_detach() which wakes
-up dc->writeback_thread but does not stop it. The reason is, before patch
-"bcache: fix cached_dev->count usage for bch_cache_set_error()", inside
-bch_writeback_thread(), if cache is not dirty after writeback,
-cached_dev_put() will be called here. And in cached_dev_make_request() when
-a new write request makes cache from clean to dirty, cached_dev_get() will
-be called there. Since we don't operate dc->count in these locations,
-refcount d->count cannot be dropped after cache becomes clean, and
-cached_dev_detach_finish() won't be called to detach bcache device.
-
-This patch fixes the issue by checking whether BCACHE_DEV_DETACHING is
-set inside bch_writeback_thread(). If this bit is set and cache is clean
-(no existing writeback_keys), break the while-loop, call cached_dev_put()
-and quit the writeback thread.
-
-Please note if cache is still dirty, even BCACHE_DEV_DETACHING is set the
-writeback thread should continue to perform writeback, this is the original
-design of manually detach.
-
-I compose a separte patch because that patch "bcache: fix cached_dev->count
-usage for bch_cache_set_error()" already gets a "Reviewed-by:" from Hannes
-Reinecke. Also this fix is not trivial and good for a separate patch.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
-Cc: Huijun Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/writeback.c | 20 +++++++++++++++++---
- 1 file changed, 17 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index b280c134dd4d..4dbeaaa575bf 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -565,9 +565,15 @@ static int bch_writeback_thread(void *arg)
- while (!kthread_should_stop()) {
- down_write(&dc->writeback_lock);
- set_current_state(TASK_INTERRUPTIBLE);
-- if (!atomic_read(&dc->has_dirty) ||
-- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
-- !dc->writeback_running)) {
-+ /*
-+ * If the bache device is detaching, skip here and continue
-+ * to perform writeback. Otherwise, if no dirty data on cache,
-+ * or there is dirty data on cache but writeback is disabled,
-+ * the writeback thread should sleep here and wait for others
-+ * to wake up it.
-+ */
-+ if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
-+ (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-
- if (kthread_should_stop()) {
-@@ -587,6 +593,14 @@ static int bch_writeback_thread(void *arg)
- atomic_set(&dc->has_dirty, 0);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
- bch_write_bdev_super(dc, NULL);
-+ /*
-+ * If bcache device is detaching via sysfs interface,
-+ * writeback thread should stop after there is no dirty
-+ * data on cache. BCACHE_DEV_DETACHING flag is set in
-+ * bch_cached_dev_detach().
-+ */
-+ if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
-+ break;
- }
-
- up_write(&dc->writeback_lock);
---
-2.15.1
-
diff --git a/for-next/v3/v3-0006-bcache-stop-dc-writeback_rate_update-properly.patch b/for-next/v3/v3-0006-bcache-stop-dc-writeback_rate_update-properly.patch
deleted file mode 100644
index 2859d42..0000000
--- a/for-next/v3/v3-0006-bcache-stop-dc-writeback_rate_update-properly.patch
+++ /dev/null
@@ -1,266 +0,0 @@
-From c89453e05ab7c96442a17e8aa634d82719534125 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 15:48:39 +0800
-Subject: [PATCH v3 06/13] bcache: stop dc->writeback_rate_update properly
-
-struct delayed_work writeback_rate_update in struct cache_dev is a delayed
-worker to call function update_writeback_rate() in period (the interval is
-defined by dc->writeback_rate_update_seconds).
-
-When a metadate I/O error happens on cache device, bcache error handling
-routine bch_cache_set_error() will call bch_cache_set_unregister() to
-retire whole cache set. On the unregister code path, this delayed work is
-stopped by calling cancel_delayed_work_sync(&dc->writeback_rate_update).
-
-dc->writeback_rate_update is a special delayed work from others in bcache.
-In its routine update_writeback_rate(), this delayed work is re-armed
-itself. That means when cancel_delayed_work_sync() returns, this delayed
-work can still be executed after several seconds defined by
-dc->writeback_rate_update_seconds.
-
-The problem is, after cancel_delayed_work_sync() returns, the cache set
-unregister code path will continue and release memory of struct cache set.
-Then the delayed work is scheduled to run, __update_writeback_rate()
-will reference the already released cache_set memory, and trigger a NULL
-pointer deference fault.
-
-This patch introduces two more bcache device flags,
-- BCACHE_DEV_WB_RUNNING
- bit set: bcache device is in writeback mode and running, it is OK for
- dc->writeback_rate_update to re-arm itself.
- bit clear:bcache device is trying to stop dc->writeback_rate_update,
- this delayed work should not re-arm itself and quit.
-- BCACHE_DEV_RATE_DW_RUNNING
- bit set: routine update_writeback_rate() is executing.
- bit clear: routine update_writeback_rate() quits.
-
-This patch also adds a function cancel_writeback_rate_update_dwork() to
-wait for dc->writeback_rate_update quits before cancel it by calling
-cancel_delayed_work_sync(). In order to avoid a deadlock by unexpected
-quit dc->writeback_rate_update, after time_out seconds this function will
-give up and continue to call cancel_delayed_work_sync().
-
-And here I explain how this patch stops self re-armed delayed work properly
-with the above stuffs.
-
-update_writeback_rate() sets BCACHE_DEV_RATE_DW_RUNNING at its beginning
-and clears BCACHE_DEV_RATE_DW_RUNNING at its end. Before calling
-cancel_writeback_rate_update_dwork() clear flag BCACHE_DEV_WB_RUNNING.
-
-Before calling cancel_delayed_work_sync() wait utill flag
-BCACHE_DEV_RATE_DW_RUNNING is clear. So when calling
-cancel_delayed_work_sync(), dc->writeback_rate_update must be already re-
-armed, or quite by seeing BCACHE_DEV_WB_RUNNING cleared. In both cases
-delayed work routine update_writeback_rate() won't be executed after
-cancel_delayed_work_sync() returns.
-
-Inside update_writeback_rate() before calling schedule_delayed_work(), flag
-BCACHE_DEV_WB_RUNNING is checked before. If this flag is cleared, it means
-someone is about to stop the delayed work. Because flag
-BCACHE_DEV_RATE_DW_RUNNING is set already and cancel_delayed_work_sync()
-has to wait for this flag to be cleared, we don't need to worry about race
-condition here.
-
-If update_writeback_rate() is scheduled to run after checking
-BCACHE_DEV_RATE_DW_RUNNING and before calling cancel_delayed_work_sync()
-in cancel_writeback_rate_update_dwork(), it is also safe. Because at this
-moment BCACHE_DEV_WB_RUNNING is cleared with memory barrier. As I mentioned
-previously, update_writeback_rate() will see BCACHE_DEV_WB_RUNNING is clear
-and quit immediately.
-
-Because there are more dependences inside update_writeback_rate() to struct
-cache_set memory, dc->writeback_rate_update is not a simple self re-arm
-delayed work. After trying many different methods (e.g. hold dc->count, or
-use locks), this is the only way I can find which works to properly stop
-dc->writeback_rate_update delayed work.
-
-Changelog:
-v2: Try to fix the race issue which is pointed out by Junhui.
-v1: The initial version for review
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 9 +++++----
- drivers/md/bcache/super.c | 39 +++++++++++++++++++++++++++++++++++----
- drivers/md/bcache/sysfs.c | 3 ++-
- drivers/md/bcache/writeback.c | 29 ++++++++++++++++++++++++++++-
- 4 files changed, 70 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 5e2d4e80198e..88d938c8d027 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -258,10 +258,11 @@ struct bcache_device {
- struct gendisk *disk;
-
- unsigned long flags;
--#define BCACHE_DEV_CLOSING 0
--#define BCACHE_DEV_DETACHING 1
--#define BCACHE_DEV_UNLINK_DONE 2
--
-+#define BCACHE_DEV_CLOSING 0
-+#define BCACHE_DEV_DETACHING 1
-+#define BCACHE_DEV_UNLINK_DONE 2
-+#define BCACHE_DEV_WB_RUNNING 4
-+#define BCACHE_DEV_RATE_DW_RUNNING 8
- unsigned nr_stripes;
- unsigned stripe_size;
- atomic_t *stripe_sectors_dirty;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index d14e09cce2f6..6d888e8fea8c 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -899,6 +899,32 @@ void bch_cached_dev_run(struct cached_dev *dc)
- pr_debug("error creating sysfs link");
- }
-
-+/*
-+ * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
-+ * work dc->writeback_rate_update is running. Wait until the routine
-+ * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
-+ * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
-+ * seconds, give up waiting here and continue to cancel it too.
-+ */
-+static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
-+{
-+ int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
-+
-+ do {
-+ if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
-+ &dc->disk.flags))
-+ break;
-+ time_out--;
-+ schedule_timeout_interruptible(1);
-+ } while (time_out > 0);
-+
-+ if (time_out == 0)
-+ pr_warn("bcache: give up waiting for "
-+ "dc->writeback_write_update to quit");
-+
-+ cancel_delayed_work_sync(&dc->writeback_rate_update);
-+}
-+
- static void cached_dev_detach_finish(struct work_struct *w)
- {
- struct cached_dev *dc = container_of(w, struct cached_dev, detach);
-@@ -911,7 +937,9 @@ static void cached_dev_detach_finish(struct work_struct *w)
-
- mutex_lock(&bch_register_lock);
-
-- cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ cancel_writeback_rate_update_dwork(dc);
-+
- if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
- kthread_stop(dc->writeback_thread);
- dc->writeback_thread = NULL;
-@@ -954,6 +982,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
- closure_get(&dc->disk.cl);
-
- bch_writeback_queue(dc);
-+
- cached_dev_put(dc);
- }
-
-@@ -1079,14 +1108,16 @@ static void cached_dev_free(struct closure *cl)
- {
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
-
-- cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ mutex_lock(&bch_register_lock);
-+
-+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ cancel_writeback_rate_update_dwork(dc);
-+
- if (!IS_ERR_OR_NULL(dc->writeback_thread))
- kthread_stop(dc->writeback_thread);
- if (dc->writeback_write_wq)
- destroy_workqueue(dc->writeback_write_wq);
-
-- mutex_lock(&bch_register_lock);
--
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
- bcache_device_free(&dc->disk);
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index a74a752c9e0f..b7166c504cdb 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -304,7 +304,8 @@ STORE(bch_cached_dev)
- bch_writeback_queue(dc);
-
- if (attr == &sysfs_writeback_percent)
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-
- mutex_unlock(&bch_register_lock);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 4dbeaaa575bf..8f98ef1038d3 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -115,6 +115,21 @@ static void update_writeback_rate(struct work_struct *work)
- struct cached_dev,
- writeback_rate_update);
-
-+ /*
-+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-+ * cancel_delayed_work_sync().
-+ */
-+ set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
-+
-+ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
-+ return;
-+ }
-+
- down_read(&dc->writeback_lock);
-
- if (atomic_read(&dc->has_dirty) &&
-@@ -123,8 +138,18 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-+ }
-+
-+ /*
-+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-+ * cancel_delayed_work_sync().
-+ */
-+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
- }
-
- static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
-@@ -675,6 +700,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
- dc->writeback_rate_p_term_inverse = 40;
- dc->writeback_rate_i_term_inverse = 10000;
-
-+ WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
- INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
- }
-
-@@ -693,6 +719,7 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- return PTR_ERR(dc->writeback_thread);
- }
-
-+ WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-
---
-2.15.1
-
diff --git a/for-next/v3/v3-0007-bcache-set-error_limit-correctly.patch b/for-next/v3/v3-0007-bcache-set-error_limit-correctly.patch
deleted file mode 100644
index 84acd7c..0000000
--- a/for-next/v3/v3-0007-bcache-set-error_limit-correctly.patch
+++ /dev/null
@@ -1,121 +0,0 @@
-From be7b78080d36c040af2cef65ab08a5df77122248 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 9 Jan 2018 22:46:25 +0800
-Subject: [PATCH v3 07/13] bcache: set error_limit correctly
-
-Struct cache uses io_errors for two purposes,
-- Error decay: when cache set error_decay is set, io_errors is used to
- generate a small piece of delay when I/O error happens.
-- I/O errors counter: in order to generate big enough value for error
- decay, I/O errors counter value is stored by left shifting 20 bits (a.k.a
- IO_ERROR_SHIFT).
-
-In function bch_count_io_errors(), if I/O errors counter reaches cache set
-error limit, bch_cache_set_error() will be called to retire the whold cache
-set. But current code is problematic when checking the error limit, see the
-following code piece from bch_count_io_errors(),
-
- 90 if (error) {
- 91 char buf[BDEVNAME_SIZE];
- 92 unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT,
- 93 &ca->io_errors);
- 94 errors >>= IO_ERROR_SHIFT;
- 95
- 96 if (errors < ca->set->error_limit)
- 97 pr_err("%s: IO error on %s, recovering",
- 98 bdevname(ca->bdev, buf), m);
- 99 else
-100 bch_cache_set_error(ca->set,
-101 "%s: too many IO errors %s",
-102 bdevname(ca->bdev, buf), m);
-103 }
-
-At line 94, errors is right shifting IO_ERROR_SHIFT bits, now it is real
-errors counter to compare at line 96. But ca->set->error_limit is initia-
-lized with an amplified value in bch_cache_set_alloc(),
-1545 c->error_limit = 8 << IO_ERROR_SHIFT;
-
-It means by default, in bch_count_io_errors(), before 8<<20 errors happened
-bch_cache_set_error() won't be called to retire the problematic cache
-device. If the average request size is 64KB, it means bcache won't handle
-failed device until 512GB data is requested. This is too large to be an I/O
-threashold. So I believe the correct error limit should be much less.
-
-This patch sets default cache set error limit to 8, then in
-bch_count_io_errors() when errors counter reaches 8 (if it is default
-value), function bch_cache_set_error() will be called to retire the whole
-cache set. This patch also removes bits shifting when store or show
-io_error_limit value via sysfs interface.
-
-Nowadays most of SSDs handle internal flash failure automatically by LBA
-address re-indirect mapping. If an I/O error can be observed by upper layer
-code, it will be a notable error because that SSD can not re-indirect
-map the problematic LBA address to an available flash block. This situation
-indicates the whole SSD will be failed very soon. Therefore setting 8 as
-the default io error limit value makes sense, it is enough for most of
-cache devices.
-
-Changelog:
-v2: add reviewed-by from Hannes.
-v1: initial version for review.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 1 +
- drivers/md/bcache/super.c | 2 +-
- drivers/md/bcache/sysfs.c | 4 ++--
- 3 files changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 88d938c8d027..7d7512fa4f09 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -663,6 +663,7 @@ struct cache_set {
- ON_ERROR_UNREGISTER,
- ON_ERROR_PANIC,
- } on_error;
-+#define DEFAULT_IO_ERROR_LIMIT 8
- unsigned error_limit;
- unsigned error_decay;
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 6d888e8fea8c..a373648b5d4b 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1583,7 +1583,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
-
- c->congested_read_threshold_us = 2000;
- c->congested_write_threshold_us = 20000;
-- c->error_limit = 8 << IO_ERROR_SHIFT;
-+ c->error_limit = DEFAULT_IO_ERROR_LIMIT;
-
- return c;
- err:
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index b7166c504cdb..ba62e987b503 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -560,7 +560,7 @@ SHOW(__bch_cache_set)
-
- /* See count_io_errors for why 88 */
- sysfs_print(io_error_halflife, c->error_decay * 88);
-- sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT);
-+ sysfs_print(io_error_limit, c->error_limit);
-
- sysfs_hprint(congested,
- ((uint64_t) bch_get_congested(c)) << 9);
-@@ -660,7 +660,7 @@ STORE(__bch_cache_set)
- }
-
- if (attr == &sysfs_io_error_limit)
-- c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
-+ c->error_limit = strtoul_or_return(buf);
-
- /* See count_io_errors() for why 88 */
- if (attr == &sysfs_io_error_halflife)
---
-2.15.1
-
diff --git a/for-next/v3/v3-0008-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch b/for-next/v3/v3-0008-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch
deleted file mode 100644
index 7d51f20..0000000
--- a/for-next/v3/v3-0008-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch
+++ /dev/null
@@ -1,489 +0,0 @@
-From 8533ead6738e3ece36d111fb0aac7e37bc7e7a7c Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 14 Jan 2018 22:15:00 +0800
-Subject: [PATCH v3 08/13] bcache: add CACHE_SET_IO_DISABLE to struct cache_set
- flags
-
-When too many I/Os failed on cache device, bch_cache_set_error() is called
-in the error handling code path to retire whole problematic cache set. If
-new I/O requests continue to come and take refcount dc->count, the cache
-set won't be retired immediately, this is a problem.
-
-Further more, there are several kernel thread and self-armed kernel work
-may still running after bch_cache_set_error() is called. It needs to wait
-quite a while for them to stop, or they won't stop at all. They also
-prevent the cache set from being retired.
-
-The solution in this patch is, to add per cache set flag to disable I/O
-request on this cache and all attached backing devices. Then new coming I/O
-requests can be rejected in *_make_request() before taking refcount, kernel
-threads and self-armed kernel worker can stop very fast when flags bit
-CACHE_SET_IO_DISABLE is set.
-
-Because bcache also do internal I/Os for writeback, garbage collection,
-bucket allocation, journaling, this kind of I/O should be disabled after
-bch_cache_set_error() is called. So closure_bio_submit() is modified to
-check whether CACHE_SET_IO_DISABLE is set on cache_set->flags. If set,
-closure_bio_submit() will set bio->bi_status to BLK_STS_IOERR and
-return, generic_make_request() won't be called.
-
-A sysfs interface is also added to set or clear CACHE_SET_IO_DISABLE bit
-from cache_set->flags, to disable or enable cache set I/O for debugging. It
-is helpful to trigger more corner case issues for failed cache device.
-
-Changelog
-v2, more changes by previous review,
-- Use CACHE_SET_IO_DISABLE of cache_set->flags, suggested by Junhui.
-- Check CACHE_SET_IO_DISABLE in bch_btree_gc() to stop a while-loop, this
- is reported and inspired from origal patch of Pavel Vazharov.
-v1, initial version.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Pavel Vazharov <freakpv@gmail.com>
----
- drivers/md/bcache/alloc.c | 3 ++-
- drivers/md/bcache/bcache.h | 18 ++++++++++++++++++
- drivers/md/bcache/btree.c | 10 +++++++---
- drivers/md/bcache/io.c | 2 +-
- drivers/md/bcache/journal.c | 4 ++--
- drivers/md/bcache/request.c | 26 +++++++++++++++++++-------
- drivers/md/bcache/super.c | 6 +++++-
- drivers/md/bcache/sysfs.c | 20 ++++++++++++++++++++
- drivers/md/bcache/util.h | 6 ------
- drivers/md/bcache/writeback.c | 35 +++++++++++++++++++++++++++--------
- 10 files changed, 101 insertions(+), 29 deletions(-)
-
-diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index 458e1d38577d..004cc3cc6123 100644
---- a/drivers/md/bcache/alloc.c
-+++ b/drivers/md/bcache/alloc.c
-@@ -287,7 +287,8 @@ do { \
- break; \
- \
- mutex_unlock(&(ca)->set->bucket_lock); \
-- if (kthread_should_stop()) { \
-+ if (kthread_should_stop() || \
-+ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
- set_current_state(TASK_RUNNING); \
- return 0; \
- } \
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 7d7512fa4f09..c41736960045 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -475,10 +475,15 @@ struct gc_stat {
- *
- * CACHE_SET_RUNNING means all cache devices have been registered and journal
- * replay is complete.
-+ *
-+ * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
-+ * external and internal I/O should be denied when this flag is set.
-+ *
- */
- #define CACHE_SET_UNREGISTERING 0
- #define CACHE_SET_STOPPING 1
- #define CACHE_SET_RUNNING 2
-+#define CACHE_SET_IO_DISABLE 4
-
- struct cache_set {
- struct closure cl;
-@@ -862,6 +867,19 @@ static inline void wake_up_allocators(struct cache_set *c)
- wake_up_process(ca->alloc_thread);
- }
-
-+static inline void closure_bio_submit(struct cache_set *c,
-+ struct bio *bio,
-+ struct closure *cl)
-+{
-+ closure_get(cl);
-+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return;
-+ }
-+ generic_make_request(bio);
-+}
-+
- /* Forward declarations */
-
- void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index bf3a48aa9a9a..0a0bc63011b4 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -1744,6 +1744,7 @@ static void bch_btree_gc(struct cache_set *c)
-
- btree_gc_start(c);
-
-+ /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
- do {
- ret = btree_root(gc_root, c, &op, &writes, &stats);
- closure_sync(&writes);
-@@ -1751,7 +1752,7 @@ static void bch_btree_gc(struct cache_set *c)
-
- if (ret && ret != -EAGAIN)
- pr_warn("gc failed!");
-- } while (ret);
-+ } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- bch_btree_gc_finish(c);
- wake_up_allocators(c);
-@@ -1789,9 +1790,12 @@ static int bch_gc_thread(void *arg)
-
- while (1) {
- wait_event_interruptible(c->gc_wait,
-- kthread_should_stop() || gc_should_run(c));
-+ kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
-+ gc_should_run(c));
-
-- if (kthread_should_stop())
-+ if (kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags))
- break;
-
- set_gc_sectors(c);
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index a783c5a41ff1..8013ecbcdbda 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
- bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
-
- b->submit_time_us = local_clock_us();
-- closure_bio_submit(bio, bio->bi_private);
-+ closure_bio_submit(c, bio, bio->bi_private);
- }
-
- void bch_submit_bbio(struct bio *bio, struct cache_set *c,
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index a87165c1d8e5..979873641030 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
- bch_bio_map(bio, data);
-
-- closure_bio_submit(bio, &cl);
-+ closure_bio_submit(ca->set, bio, &cl);
- closure_sync(&cl);
-
- /* This function could be simpler now since we no longer write
-@@ -653,7 +653,7 @@ static void journal_write_unlocked(struct closure *cl)
- spin_unlock(&c->journal.lock);
-
- while ((bio = bio_list_pop(&list)))
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(c, bio, cl);
-
- continue_at(cl, journal_write_done, NULL);
- }
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 1a46b41dac70..02296bda6384 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -747,7 +747,7 @@ static void cached_dev_read_error(struct closure *cl)
-
- /* XXX: invalidate cache */
-
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- }
-
- continue_at(cl, cached_dev_cache_miss_done, NULL);
-@@ -872,7 +872,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- s->cache_miss = miss;
- s->iop.bio = cache_bio;
- bio_get(cache_bio);
-- closure_bio_submit(cache_bio, &s->cl);
-+ closure_bio_submit(s->iop.c, cache_bio, &s->cl);
-
- return ret;
- out_put:
-@@ -880,7 +880,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- out_submit:
- miss->bi_end_io = request_endio;
- miss->bi_private = &s->cl;
-- closure_bio_submit(miss, &s->cl);
-+ closure_bio_submit(s->iop.c, miss, &s->cl);
- return ret;
- }
-
-@@ -945,7 +945,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
-
- if ((bio_op(bio) != REQ_OP_DISCARD) ||
- blk_queue_discard(bdev_get_queue(dc->bdev)))
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- } else if (s->iop.writeback) {
- bch_writeback_add(dc);
- s->iop.bio = bio;
-@@ -960,12 +960,12 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
- flush->bi_private = cl;
- flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
-- closure_bio_submit(flush, cl);
-+ closure_bio_submit(s->iop.c, flush, cl);
- }
- } else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
-
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- }
-
- closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
-@@ -981,7 +981,7 @@ static void cached_dev_nodata(struct closure *cl)
- bch_journal_meta(s->iop.c, cl);
-
- /* If it's a flush, we send the flush to the backing device too */
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
-
- continue_at(cl, cached_dev_bio_complete, NULL);
- }
-@@ -996,6 +996,12 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
-
-+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return BLK_QC_T_NONE;
-+ }
-+
- atomic_set(&dc->backing_idle, 0);
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-
-@@ -1112,6 +1118,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
- struct bcache_device *d = bio->bi_disk->private_data;
- int rw = bio_data_dir(bio);
-
-+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return BLK_QC_T_NONE;
-+ }
-+
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-
- s = search_alloc(bio, d);
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index a373648b5d4b..4204d75aee7b 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -521,7 +521,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
- bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
- bch_bio_map(bio, ca->disk_buckets);
-
-- closure_bio_submit(bio, &ca->prio);
-+ closure_bio_submit(ca->set, bio, &ca->prio);
- closure_sync(cl);
- }
-
-@@ -1349,6 +1349,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
- test_bit(CACHE_SET_STOPPING, &c->flags))
- return false;
-
-+ if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
-+ pr_warn("bcache: CACHE_SET_IO_DISABLE already set");
-+
- /* XXX: we can be called from atomic context
- acquire_console_sem();
- */
-@@ -1584,6 +1587,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
- c->congested_read_threshold_us = 2000;
- c->congested_write_threshold_us = 20000;
- c->error_limit = DEFAULT_IO_ERROR_LIMIT;
-+ WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- return c;
- err:
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index ba62e987b503..afb051bcfca1 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -92,6 +92,7 @@ read_attribute(partial_stripes_expensive);
-
- rw_attribute(synchronous);
- rw_attribute(journal_delay_ms);
-+rw_attribute(io_disable);
- rw_attribute(discard);
- rw_attribute(running);
- rw_attribute(label);
-@@ -577,6 +578,8 @@ SHOW(__bch_cache_set)
- sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
- sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
- sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
-+ sysfs_printf(io_disable, "%i",
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- if (attr == &sysfs_bset_tree_stats)
- return bch_bset_print_stats(c, buf);
-@@ -666,6 +669,22 @@ STORE(__bch_cache_set)
- if (attr == &sysfs_io_error_halflife)
- c->error_decay = strtoul_or_return(buf) / 88;
-
-+ if (attr == &sysfs_io_disable) {
-+ int v = strtoul_or_return(buf);
-+
-+ if (v) {
-+ if (test_and_set_bit(CACHE_SET_IO_DISABLE,
-+ &c->flags))
-+ pr_warn("bcache: CACHE_SET_IO_DISABLE"
-+ " already set");
-+ } else {
-+ if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
-+ &c->flags))
-+ pr_warn("bcache: CACHE_SET_IO_DISABLE"
-+ " already cleared");
-+ }
-+ }
-+
- sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
- sysfs_strtoul(verify, c->verify);
- sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
-@@ -748,6 +767,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
- &sysfs_gc_always_rewrite,
- &sysfs_btree_shrinker_disabled,
- &sysfs_copy_gc_enabled,
-+ &sysfs_io_disable,
- NULL
- };
- KTYPE(bch_cache_set_internal);
-diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
-index 4df4c5c1cab2..7944eea54fa9 100644
---- a/drivers/md/bcache/util.h
-+++ b/drivers/md/bcache/util.h
-@@ -565,12 +565,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
- return bdev->bd_inode->i_size >> 9;
- }
-
--#define closure_bio_submit(bio, cl) \
--do { \
-- closure_get(cl); \
-- generic_make_request(bio); \
--} while (0)
--
- uint64_t bch_crc64_update(uint64_t, const void *, size_t);
- uint64_t bch_crc64(const void *, size_t);
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 8f98ef1038d3..3d7d8452e0de 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -114,6 +114,7 @@ static void update_writeback_rate(struct work_struct *work)
- struct cached_dev *dc = container_of(to_delayed_work(work),
- struct cached_dev,
- writeback_rate_update);
-+ struct cache_set *c = dc->disk.c;
-
- /*
- * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-@@ -123,7 +124,12 @@ static void update_writeback_rate(struct work_struct *work)
- /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
-
-- if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ /*
-+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
-+ * check it here too.
-+ */
-+ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
- /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
-@@ -138,7 +144,12 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-- if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ /*
-+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
-+ * check it here too.
-+ */
-+ if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
- }
-@@ -278,7 +289,7 @@ static void write_dirty(struct closure *cl)
- bio_set_dev(&io->bio, io->dc->bdev);
- io->bio.bi_end_io = dirty_endio;
-
-- closure_bio_submit(&io->bio, cl);
-+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
- }
-
- atomic_set(&dc->writeback_sequence_next, next_sequence);
-@@ -304,7 +315,7 @@ static void read_dirty_submit(struct closure *cl)
- {
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
-
-- closure_bio_submit(&io->bio, cl);
-+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
-
- continue_at(cl, write_dirty, io->dc->writeback_write_wq);
- }
-@@ -330,7 +341,9 @@ static void read_dirty(struct cached_dev *dc)
-
- next = bch_keybuf_next(&dc->writeback_keys);
-
-- while (!kthread_should_stop() && next) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
-+ next) {
- size = 0;
- nk = 0;
-
-@@ -427,7 +440,9 @@ static void read_dirty(struct cached_dev *dc)
- }
- }
-
-- while (!kthread_should_stop() && delay) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
-+ delay) {
- schedule_timeout_interruptible(delay);
- delay = writeback_delay(dc, 0);
- }
-@@ -583,11 +598,13 @@ static bool refill_dirty(struct cached_dev *dc)
- static int bch_writeback_thread(void *arg)
- {
- struct cached_dev *dc = arg;
-+ struct cache_set *c = dc->disk.c;
- bool searched_full_index;
-
- bch_ratelimit_reset(&dc->writeback_rate);
-
-- while (!kthread_should_stop()) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- down_write(&dc->writeback_lock);
- set_current_state(TASK_INTERRUPTIBLE);
- /*
-@@ -601,7 +618,8 @@ static int bch_writeback_thread(void *arg)
- (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-
-- if (kthread_should_stop()) {
-+ if (kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- set_current_state(TASK_RUNNING);
- break;
- }
-@@ -637,6 +655,7 @@ static int bch_writeback_thread(void *arg)
-
- while (delay &&
- !kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
- !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
- delay = schedule_timeout_interruptible(delay);
-
---
-2.15.1
-
diff --git a/for-next/v3/v3-0009-bcache-stop-all-attached-bcache-devices-for-a-ret.patch b/for-next/v3/v3-0009-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
deleted file mode 100644
index 0246cef..0000000
--- a/for-next/v3/v3-0009-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-From c5e03551019bb14ac40adf1b9e52bc6430c8659f Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 10 Jan 2018 00:26:32 +0800
-Subject: [PATCH v3 09/13] bcache: stop all attached bcache devices for a
- retired cache set
-
-When there are too many I/O errors on cache device, current bcache code
-will retire the whole cache set, and detach all bcache devices. But the
-detached bcache devices are not stopped, which is problematic when bcache
-is in writeback mode.
-
-If the retired cache set has dirty data of backing devices, continue
-writing to bcache device will write to backing device directly. If the
-LBA of write request has a dirty version cached on cache device, next time
-when the cache device is re-registered and backing device re-attached to
-it again, the stale dirty data on cache device will be written to backing
-device, and overwrite latest directly written data. This situation causes
-a quite data corruption.
-
-This patch checkes whether cache_set->io_disable is true in
-__cache_set_unregister(). If cache_set->io_disable is true, it means cache
-set is unregistering by too many I/O errors, then all attached bcache
-devices will be stopped as well. If cache_set->io_disable is not true, it
-means __cache_set_unregister() is triggered by writing 1 to sysfs file
-/sys/fs/bcache/<UUID>/bcache/stop. This is an exception because users do
-it explicitly, this patch keeps existing behavior and does not stop any
-bcache device.
-
-Even the failed cache device has no dirty data, stopping bcache device is
-still a desired behavior by many Ceph and data base users. Then their
-application will report I/O errors due to disappeared bcache device, and
-operation people will know the cache device is broken or disconnected.
-
-Changelog:
-v2: add Reviewed-by from Hannes.
-v1: initial version for review.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
----
- drivers/md/bcache/super.c | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 4204d75aee7b..97e3bb8e1aee 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1478,6 +1478,14 @@ static void __cache_set_unregister(struct closure *cl)
- dc = container_of(c->devices[i],
- struct cached_dev, disk);
- bch_cached_dev_detach(dc);
-+ /*
-+ * If we come here by too many I/O errors,
-+ * bcache device should be stopped too, to
-+ * keep data consistency on cache and
-+ * backing devices.
-+ */
-+ if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
-+ bcache_device_stop(c->devices[i]);
- } else {
- bcache_device_stop(c->devices[i]);
- }
---
-2.15.1
-
diff --git a/for-next/v3/v3-0010-bcache-fix-inaccurate-io-state-for-detached-bcach.patch b/for-next/v3/v3-0010-bcache-fix-inaccurate-io-state-for-detached-bcach.patch
deleted file mode 100644
index c42c832..0000000
--- a/for-next/v3/v3-0010-bcache-fix-inaccurate-io-state-for-detached-bcach.patch
+++ /dev/null
@@ -1,118 +0,0 @@
-From 9d7ba78bd2b2b109ff1bf3eac21e962e183175b3 Mon Sep 17 00:00:00 2001
-From: Tang Junhui <tang.junhui@zte.com.cn>
-Date: Tue, 9 Jan 2018 10:27:11 +0800
-Subject: [PATCH v3 10/13] bcache: fix inaccurate io state for detached bcache
- devices
-
-When we run IO in a detached device, and run iostat to shows IO status,
-normally it will show like bellow (Omitted some fields):
-Device: ... avgrq-sz avgqu-sz await r_await w_await svctm %util
-sdd ... 15.89 0.53 1.82 0.20 2.23 1.81 52.30
-bcache0 ... 15.89 115.42 0.00 0.00 0.00 2.40 69.60
-but after IO stopped, there are still very big avgqu-sz and %util
-values as bellow:
-Device: ... avgrq-sz avgqu-sz await r_await w_await svctm %util
-bcache0 ... 0 5326.32 0.00 0.00 0.00 0.00 100.10
-
-The reason for this issue is that, only generic_start_io_acct() called
-and no generic_end_io_acct() called for detached device in
-cached_dev_make_request(). See the code:
-//start generic_start_io_acct()
-generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-if (cached_dev_get(dc)) {
- //will callback generic_end_io_acct()
-}
-else {
- //will not call generic_end_io_acct()
-}
-
-This patch calls generic_end_io_acct() in the end of IO for detached
-devices, so we can show IO state correctly.
-
-(Modified to use GFP_NOIO in kzalloc() by Coly Li)
-
-Signed-off-by: Tang Junhui <tang.junhui@zte.com.cn>
-Reviewed-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/request.c | 58 +++++++++++++++++++++++++++++++++++++++------
- 1 file changed, 51 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 02296bda6384..e09c5ae745be 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -986,6 +986,55 @@ static void cached_dev_nodata(struct closure *cl)
- continue_at(cl, cached_dev_bio_complete, NULL);
- }
-
-+struct detached_dev_io_private {
-+ struct bcache_device *d;
-+ unsigned long start_time;
-+ bio_end_io_t *bi_end_io;
-+ void *bi_private;
-+};
-+
-+static void detatched_dev_end_io(struct bio *bio)
-+{
-+ struct detached_dev_io_private *ddip;
-+
-+ ddip = bio->bi_private;
-+ bio->bi_end_io = ddip->bi_end_io;
-+ bio->bi_private = ddip->bi_private;
-+
-+ generic_end_io_acct(ddip->d->disk->queue,
-+ bio_data_dir(bio),
-+ &ddip->d->disk->part0, ddip->start_time);
-+
-+ kfree(ddip);
-+
-+ bio->bi_end_io(bio);
-+}
-+
-+static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
-+{
-+ struct detached_dev_io_private *ddip;
-+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-+
-+ /*
-+ * no need to call closure_get(&dc->disk.cl),
-+ * because upper layer had already opened bcache device,
-+ * which would call closure_get(&dc->disk.cl)
-+ */
-+ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
-+ ddip->d = d;
-+ ddip->start_time = jiffies;
-+ ddip->bi_end_io = bio->bi_end_io;
-+ ddip->bi_private = bio->bi_private;
-+ bio->bi_end_io = detatched_dev_end_io;
-+ bio->bi_private = ddip;
-+
-+ if ((bio_op(bio) == REQ_OP_DISCARD) &&
-+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
-+ bio->bi_end_io(bio);
-+ else
-+ generic_make_request(bio);
-+}
-+
- /* Cached devices - read & write stuff */
-
- static blk_qc_t cached_dev_make_request(struct request_queue *q,
-@@ -1028,13 +1077,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- else
- cached_dev_read(dc, s);
- }
-- } else {
-- if ((bio_op(bio) == REQ_OP_DISCARD) &&
-- !blk_queue_discard(bdev_get_queue(dc->bdev)))
-- bio_endio(bio);
-- else
-- generic_make_request(bio);
-- }
-+ } else
-+ detached_dev_do_request(d, bio);
-
- return BLK_QC_T_NONE;
- }
---
-2.15.1
-
diff --git a/for-next/v3/v3-0011-bcache-add-backing_request_endio-for-bi_end_io-of.patch b/for-next/v3/v3-0011-bcache-add-backing_request_endio-for-bi_end_io-of.patch
deleted file mode 100644
index a151def..0000000
--- a/for-next/v3/v3-0011-bcache-add-backing_request_endio-for-bi_end_io-of.patch
+++ /dev/null
@@ -1,251 +0,0 @@
-From 840af1de0d2ba8f0f6fd148574d3c0a64c63943e Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 10 Jan 2018 21:01:48 +0800
-Subject: [PATCH v3 11/13] bcache: add backing_request_endio() for bi_end_io of
- attached backing device I/O
-
-In order to catch I/O error of backing device, a separate bi_end_io
-call back is required. Then a per backing device counter can record I/O
-errors number and retire the backing device if the counter reaches a
-per backing device I/O error limit.
-
-This patch adds backing_request_endio() to bcache backing device I/O code
-path, this is a preparation for further complicated backing device failure
-handling. So far there is no real code logic change, I make this change a
-separate patch to make sure it is stable and reliable for further work.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
----
- drivers/md/bcache/request.c | 95 +++++++++++++++++++++++++++++++++++--------
- drivers/md/bcache/super.c | 1 +
- drivers/md/bcache/writeback.c | 1 +
- 3 files changed, 81 insertions(+), 16 deletions(-)
-
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index e09c5ae745be..ad4cf71f7eab 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl)
- }
-
- op->insert_data_done = true;
-+ /* get in bch_data_insert() */
- bio_put(bio);
- out:
- continue_at(cl, bch_data_insert_keys, op->wq);
-@@ -630,6 +631,38 @@ static void request_endio(struct bio *bio)
- closure_put(cl);
- }
-
-+static void backing_request_endio(struct bio *bio)
-+{
-+ struct closure *cl = bio->bi_private;
-+
-+ if (bio->bi_status) {
-+ struct search *s = container_of(cl, struct search, cl);
-+ /*
-+ * If a bio has REQ_PREFLUSH for writeback mode, it is
-+ * speically assembled in cached_dev_write() for a non-zero
-+ * write request which has REQ_PREFLUSH. we don't set
-+ * s->iop.status by this failure, the status will be decided
-+ * by result of bch_data_insert() operation.
-+ */
-+ if (unlikely(s->iop.writeback &&
-+ bio->bi_opf & REQ_PREFLUSH)) {
-+ char buf[BDEVNAME_SIZE];
-+
-+ bio_devname(bio, buf);
-+ pr_err("Can't flush %s: returned bi_status %i",
-+ buf, bio->bi_status);
-+ } else {
-+ /* set to orig_bio->bi_status in bio_complete() */
-+ s->iop.status = bio->bi_status;
-+ }
-+ s->recoverable = false;
-+ /* should count I/O error for backing device here */
-+ }
-+
-+ bio_put(bio);
-+ closure_put(cl);
-+}
-+
- static void bio_complete(struct search *s)
- {
- if (s->orig_bio) {
-@@ -644,13 +677,21 @@ static void bio_complete(struct search *s)
- }
- }
-
--static void do_bio_hook(struct search *s, struct bio *orig_bio)
-+static void do_bio_hook(struct search *s,
-+ struct bio *orig_bio,
-+ bio_end_io_t *end_io_fn)
- {
- struct bio *bio = &s->bio.bio;
-
- bio_init(bio, NULL, 0);
- __bio_clone_fast(bio, orig_bio);
-- bio->bi_end_io = request_endio;
-+ /*
-+ * bi_end_io can be set separately somewhere else, e.g. the
-+ * variants in,
-+ * - cache_bio->bi_end_io from cached_dev_cache_miss()
-+ * - n->bi_end_io from cache_lookup_fn()
-+ */
-+ bio->bi_end_io = end_io_fn;
- bio->bi_private = &s->cl;
-
- bio_cnt_set(bio, 3);
-@@ -676,7 +717,7 @@ static inline struct search *search_alloc(struct bio *bio,
- s = mempool_alloc(d->c->search, GFP_NOIO);
-
- closure_init(&s->cl, NULL);
-- do_bio_hook(s, bio);
-+ do_bio_hook(s, bio, request_endio);
-
- s->orig_bio = bio;
- s->cache_miss = NULL;
-@@ -743,10 +784,11 @@ static void cached_dev_read_error(struct closure *cl)
- trace_bcache_read_retry(s->orig_bio);
-
- s->iop.status = 0;
-- do_bio_hook(s, s->orig_bio);
-+ do_bio_hook(s, s->orig_bio, backing_request_endio);
-
- /* XXX: invalidate cache */
-
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, bio, cl);
- }
-
-@@ -859,7 +901,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- bio_copy_dev(cache_bio, miss);
- cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
-
-- cache_bio->bi_end_io = request_endio;
-+ cache_bio->bi_end_io = backing_request_endio;
- cache_bio->bi_private = &s->cl;
-
- bch_bio_map(cache_bio, NULL);
-@@ -872,14 +914,16 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- s->cache_miss = miss;
- s->iop.bio = cache_bio;
- bio_get(cache_bio);
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, cache_bio, &s->cl);
-
- return ret;
- out_put:
- bio_put(cache_bio);
- out_submit:
-- miss->bi_end_io = request_endio;
-+ miss->bi_end_io = backing_request_endio;
- miss->bi_private = &s->cl;
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, miss, &s->cl);
- return ret;
- }
-@@ -943,31 +987,48 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
- s->iop.bio = s->orig_bio;
- bio_get(s->iop.bio);
-
-- if ((bio_op(bio) != REQ_OP_DISCARD) ||
-- blk_queue_discard(bdev_get_queue(dc->bdev)))
-- closure_bio_submit(s->iop.c, bio, cl);
-+ if (bio_op(bio) == REQ_OP_DISCARD &&
-+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
-+ goto insert_data;
-+
-+ /* I/O request sent to backing device */
-+ bio->bi_end_io = backing_request_endio;
-+ closure_bio_submit(s->iop.c, bio, cl);
-+
- } else if (s->iop.writeback) {
- bch_writeback_add(dc);
- s->iop.bio = bio;
-
- if (bio->bi_opf & REQ_PREFLUSH) {
-- /* Also need to send a flush to the backing device */
-- struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
-- dc->disk.bio_split);
--
-+ /*
-+ * Also need to send a flush to the backing
-+ * device, if failed on backing device.
-+ */
-+ struct bio *flush;
-+
-+ flush = bio_alloc_bioset(GFP_NOIO, 0,
-+ dc->disk.bio_split);
-+ if (!flush) {
-+ s->iop.status = BLK_STS_RESOURCE;
-+ goto insert_data;
-+ }
- bio_copy_dev(flush, bio);
-- flush->bi_end_io = request_endio;
-+ flush->bi_end_io = backing_request_endio;
- flush->bi_private = cl;
- flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
--
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, flush, cl);
- }
-+ bch_writeback_add(dc);
-+
- } else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
--
-+ /* I/O request sent to backing device */
-+ bio->bi_end_io = backing_request_endio;
- closure_bio_submit(s->iop.c, bio, cl);
- }
-
-+insert_data:
- closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
- continue_at(cl, cached_dev_write_complete, NULL);
- }
-@@ -981,6 +1042,7 @@ static void cached_dev_nodata(struct closure *cl)
- bch_journal_meta(s->iop.c, cl);
-
- /* If it's a flush, we send the flush to the backing device too */
-+ bio->bi_end_io = backing_request_endio;
- closure_bio_submit(s->iop.c, bio, cl);
-
- continue_at(cl, cached_dev_bio_complete, NULL);
-@@ -1078,6 +1140,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- cached_dev_read(dc, s);
- }
- } else
-+ /* I/O request sent to backing device */
- detached_dev_do_request(d, bio);
-
- return BLK_QC_T_NONE;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 97e3bb8e1aee..08a0b541a4da 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -265,6 +265,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
- bio->bi_private = dc;
-
- closure_get(cl);
-+ /* I/O request sent to backing device */
- __write_super(&dc->sb, bio);
-
- closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 3d7d8452e0de..4ebe0119ea7e 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -289,6 +289,7 @@ static void write_dirty(struct closure *cl)
- bio_set_dev(&io->bio, io->dc->bdev);
- io->bio.bi_end_io = dirty_endio;
-
-+ /* I/O request sent to backing device */
- closure_bio_submit(io->dc->disk.c, &io->bio, cl);
- }
-
---
-2.15.1
-
diff --git a/for-next/v3/v3-0012-bcache-add-io_disable-to-struct-cached_dev.patch b/for-next/v3/v3-0012-bcache-add-io_disable-to-struct-cached_dev.patch
deleted file mode 100644
index bbb79ec..0000000
--- a/for-next/v3/v3-0012-bcache-add-io_disable-to-struct-cached_dev.patch
+++ /dev/null
@@ -1,232 +0,0 @@
-From 662e22f2afdb792c184fc82bd9f6515e4aa5eb0c Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 10 Jan 2018 21:33:45 +0800
-Subject: [PATCH v3 12/13] bcache: add io_disable to struct cached_dev
-
-If a bcache device is configured to writeback mode, current code does not
-handle write I/O errors on backing devices properly.
-
-In writeback mode, write request is written to cache device, and
-latter being flushed to backing device. If I/O failed when writing from
-cache device to the backing device, bcache code just ignores the error and
-upper layer code is NOT noticed that the backing device is broken.
-
-This patch tries to handle backing device failure like how the cache device
-failure is handled,
-- Add a error counter 'io_errors' and error limit 'error_limit' in struct
- cached_dev. Add another io_disable to struct cached_dev to disable I/Os
- on the problematic backing device.
-- When I/O error happens on backing device, increase io_errors counter. And
- if io_errors reaches error_limit, set cache_dev->io_disable to true, and
- stop the bcache device.
-
-The result is, if backing device is broken of disconnected, and I/O errors
-reach its error limit, backing device will be disabled and the associated
-bcache device will be removed from system.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 7 +++++++
- drivers/md/bcache/io.c | 14 ++++++++++++++
- drivers/md/bcache/request.c | 14 ++++++++++++--
- drivers/md/bcache/super.c | 22 ++++++++++++++++++++++
- drivers/md/bcache/sysfs.c | 15 ++++++++++++++-
- 5 files changed, 69 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index c41736960045..5a811959392d 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -360,6 +360,7 @@ struct cached_dev {
- unsigned sequential_cutoff;
- unsigned readahead;
-
-+ unsigned io_disable:1;
- unsigned verify:1;
- unsigned bypass_torture_test:1;
-
-@@ -379,6 +380,10 @@ struct cached_dev {
- unsigned writeback_rate_i_term_inverse;
- unsigned writeback_rate_p_term_inverse;
- unsigned writeback_rate_minimum;
-+
-+#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
-+ atomic_t io_errors;
-+ unsigned error_limit;
- };
-
- enum alloc_reserve {
-@@ -882,6 +887,7 @@ static inline void closure_bio_submit(struct cache_set *c,
-
- /* Forward declarations */
-
-+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
- void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
- void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- blk_status_t, const char *);
-@@ -909,6 +915,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
- struct bkey *, int, bool);
- bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
- unsigned, unsigned, bool);
-+bool bch_cached_dev_error(struct cached_dev *dc);
-
- __printf(2, 3)
- bool bch_cache_set_error(struct cache_set *, const char *, ...);
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index 8013ecbcdbda..7fac97ae036e 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
- }
-
- /* IO errors */
-+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
-+{
-+ char buf[BDEVNAME_SIZE];
-+ unsigned errors;
-+
-+ WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
-+
-+ errors = atomic_add_return(1, &dc->io_errors);
-+ if (errors < dc->error_limit)
-+ pr_err("%s: IO error on backing device, unrecoverable",
-+ bio_devname(bio, buf));
-+ else
-+ bch_cached_dev_error(dc);
-+}
-
- void bch_count_io_errors(struct cache *ca,
- blk_status_t error,
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index ad4cf71f7eab..386b388ce296 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -637,6 +637,8 @@ static void backing_request_endio(struct bio *bio)
-
- if (bio->bi_status) {
- struct search *s = container_of(cl, struct search, cl);
-+ struct cached_dev *dc = container_of(s->d,
-+ struct cached_dev, disk);
- /*
- * If a bio has REQ_PREFLUSH for writeback mode, it is
- * speically assembled in cached_dev_write() for a non-zero
-@@ -657,6 +659,7 @@ static void backing_request_endio(struct bio *bio)
- }
- s->recoverable = false;
- /* should count I/O error for backing device here */
-+ bch_count_backing_io_errors(dc, bio);
- }
-
- bio_put(bio);
-@@ -1067,8 +1070,14 @@ static void detatched_dev_end_io(struct bio *bio)
- bio_data_dir(bio),
- &ddip->d->disk->part0, ddip->start_time);
-
-- kfree(ddip);
-+ if (bio->bi_status) {
-+ struct cached_dev *dc = container_of(ddip->d,
-+ struct cached_dev, disk);
-+ /* should count I/O error for backing device here */
-+ bch_count_backing_io_errors(dc, bio);
-+ }
-
-+ kfree(ddip);
- bio->bi_end_io(bio);
- }
-
-@@ -1107,7 +1116,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
-
-- if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
-+ dc->io_disable)) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- return BLK_QC_T_NONE;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 08a0b541a4da..14fce3623770 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1188,6 +1188,10 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
- max(dc->disk.disk->queue->backing_dev_info->ra_pages,
- q->backing_dev_info->ra_pages);
-
-+ atomic_set(&dc->io_errors, 0);
-+ dc->io_disable = false;
-+ dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
-+
- bch_cached_dev_request_init(dc);
- bch_cached_dev_writeback_init(dc);
- return 0;
-@@ -1339,6 +1343,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
- return flash_dev_run(c, u);
- }
-
-+bool bch_cached_dev_error(struct cached_dev *dc)
-+{
-+ char name[BDEVNAME_SIZE];
-+
-+ if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
-+ return false;
-+
-+ dc->io_disable = true;
-+ /* make others know io_disable is true earlier */
-+ smp_mb();
-+
-+ pr_err("bcache: stop %s: too many IO errors on backing device %s\n",
-+ dc->disk.name, bdevname(dc->bdev, name));
-+
-+ bcache_device_stop(&dc->disk);
-+ return true;
-+}
-+
- /* Cache set */
-
- __printf(2, 3)
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index afb051bcfca1..7288927f2a47 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -131,7 +131,9 @@ SHOW(__bch_cached_dev)
- var_print(writeback_delay);
- var_print(writeback_percent);
- sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
--
-+ sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
-+ sysfs_printf(io_error_limit, "%i", dc->error_limit);
-+ sysfs_printf(io_disable, "%i", dc->io_disable);
- var_print(writeback_rate_update_seconds);
- var_print(writeback_rate_i_term_inverse);
- var_print(writeback_rate_p_term_inverse);
-@@ -223,6 +225,14 @@ STORE(__cached_dev)
- d_strtoul(writeback_rate_i_term_inverse);
- d_strtoul_nonzero(writeback_rate_p_term_inverse);
-
-+ sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
-+
-+ if (attr == &sysfs_io_disable) {
-+ int v = strtoul_or_return(buf);
-+
-+ dc->io_disable = v ? 1 : 0;
-+ }
-+
- d_strtoi_h(sequential_cutoff);
- d_strtoi_h(readahead);
-
-@@ -330,6 +340,9 @@ static struct attribute *bch_cached_dev_files[] = {
- &sysfs_writeback_rate_i_term_inverse,
- &sysfs_writeback_rate_p_term_inverse,
- &sysfs_writeback_rate_debug,
-+ &sysfs_errors,
-+ &sysfs_io_error_limit,
-+ &sysfs_io_disable,
- &sysfs_dirty_data,
- &sysfs_stripe_size,
- &sysfs_partial_stripes_expensive,
---
-2.15.1
-
diff --git a/for-next/v3/v3-0013-bcache-stop-bcache-device-when-backing-device-is-.patch b/for-next/v3/v3-0013-bcache-stop-bcache-device-when-backing-device-is-.patch
deleted file mode 100644
index 6180bd1..0000000
--- a/for-next/v3/v3-0013-bcache-stop-bcache-device-when-backing-device-is-.patch
+++ /dev/null
@@ -1,148 +0,0 @@
-From 93be9a0e7f3112074702dd070c07818b2fe3d568 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 17:31:44 +0800
-Subject: [PATCH v3 13/13] bcache: stop bcache device when backing device is
- offline
-
-Currently bcache does not handle backing device failure, if backing
-device is offline and disconnected from system, its bcache device can still
-be accessible. If the bcache device is in writeback mode, I/O requests even
-can success if the requests hit on cache device. That is to say, when and
-how bcache handles offline backing device is undefined.
-
-This patch tries to handle backing device offline in a rather simple way,
-- Add cached_dev->status_update_thread kernel thread to update backing
- device status in every 1 second.
-- Add cached_dev->offline_seconds to record how many seconds the backing
- device is observed to be offline. If the backing device is offline for
- BACKING_DEV_OFFLINE_TIMEOUT (30) seconds, set dc->io_disable to 1 and
- call bcache_device_stop() to stop the bache device which linked to the
- offline backing device.
-
-Now if a backing device is offline for BACKING_DEV_OFFLINE_TIMEOUT seconds,
-its bcache device will be removed, then user space application writing on
-it will get error immediately, and handler the device failure in time.
-
-This patch is quite simple, does not handle more complicated situations.
-Once the bcache device is stopped, users need to recovery the backing
-device, register and attach it manually.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 2 ++
- drivers/md/bcache/super.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 57 insertions(+)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 5a811959392d..9eedb35d01bc 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -338,6 +338,7 @@ struct cached_dev {
-
- struct keybuf writeback_keys;
-
-+ struct task_struct *status_update_thread;
- /*
- * Order the write-half of writeback operations strongly in dispatch
- * order. (Maintain LBA order; don't allow reads completing out of
-@@ -384,6 +385,7 @@ struct cached_dev {
- #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
- atomic_t io_errors;
- unsigned error_limit;
-+ unsigned offline_seconds;
- };
-
- enum alloc_reserve {
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 14fce3623770..85adf1e29d11 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -646,6 +646,11 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
- unsigned int cmd, unsigned long arg)
- {
- struct bcache_device *d = b->bd_disk->private_data;
-+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-+
-+ if (dc->io_disable)
-+ return -EIO;
-+
- return d->ioctl(d, mode, cmd, arg);
- }
-
-@@ -856,6 +861,45 @@ static void calc_cached_dev_sectors(struct cache_set *c)
- c->cached_dev_sectors = sectors;
- }
-
-+#define BACKING_DEV_OFFLINE_TIMEOUT 5
-+static int cached_dev_status_update(void *arg)
-+{
-+ struct cached_dev *dc = arg;
-+ struct request_queue *q;
-+ char buf[BDEVNAME_SIZE];
-+
-+ /*
-+ * If this delayed worker is stopping outside, directly quit here.
-+ * dc->io_disable might be set via sysfs interface, so check it
-+ * here too.
-+ */
-+ while (!kthread_should_stop() && !dc->io_disable) {
-+ q = bdev_get_queue(dc->bdev);
-+ if (blk_queue_dying(q))
-+ dc->offline_seconds++;
-+ else
-+ dc->offline_seconds = 0;
-+
-+ if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
-+ pr_err("%s: device offline for %d seconds",
-+ bdevname(dc->bdev, buf),
-+ BACKING_DEV_OFFLINE_TIMEOUT);
-+ pr_err("%s: disable I/O request due to backing "
-+ "device offline", dc->disk.name);
-+ dc->io_disable = true;
-+ /* let others know earlier that io_disable is true */
-+ smp_mb();
-+ bcache_device_stop(&dc->disk);
-+ break;
-+ }
-+
-+ schedule_timeout_interruptible(HZ);
-+ }
-+
-+ dc->status_update_thread = NULL;
-+ return 0;
-+}
-+
- void bch_cached_dev_run(struct cached_dev *dc)
- {
- struct bcache_device *d = &dc->disk;
-@@ -898,6 +942,15 @@ void bch_cached_dev_run(struct cached_dev *dc)
- if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
- sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
- pr_debug("error creating sysfs link");
-+
-+ dc->status_update_thread = kthread_run(cached_dev_status_update,
-+ dc,
-+ "bcache_status_update");
-+ if (IS_ERR(dc->status_update_thread)) {
-+ pr_warn("bcache: failed to create bcache_status_update "
-+ "kthread, continue to run without monitoring backing "
-+ "device status");
-+ }
- }
-
- /*
-@@ -1118,6 +1171,8 @@ static void cached_dev_free(struct closure *cl)
- kthread_stop(dc->writeback_thread);
- if (dc->writeback_write_wq)
- destroy_workqueue(dc->writeback_write_wq);
-+ if (!IS_ERR_OR_NULL(dc->status_update_thread))
-+ kthread_stop(dc->status_update_thread);
-
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
---
-2.15.1
-
diff --git a/for-next/v4/v4-0000-cover-letter.patch b/for-next/v4/v4-0000-cover-letter.patch
deleted file mode 100644
index 0327afe..0000000
--- a/for-next/v4/v4-0000-cover-letter.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From 86e6c96037b81ca6d302e1e7d4342fd1decc8814 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 27 Jan 2018 20:24:53 +0800
-Subject: [PATCH v4 00/13] bcache: device failure handling improvement
-
-Hi maintainers and folks,
-
-This patch set tries to improve bcache device failure handling, includes
-cache device and backing device failures.
-
-The basic idea to handle failed cache device is,
-- Unregister cache set
-- Detach all backing devices which are attached to this cache set
-- Stop all the detached bcache devices (configurable)
-- Stop all flash only volume on the cache set
-The above process is named 'cache set retire' by me. The result of cache
-set retire is, cache set and bcache devices are all removed, following
-I/O requests will get failed immediately to notift upper layer or user
-space coce that the cache device is failed or disconnected.
-
-For failed backing device, there are two kinds of failures to handle,
-- If device is disconnected, and kernel thread dc->status_update_thread
- finds it is offline for BACKING_DEV_OFFLINE_TIMEOUT (5) seconds, the
- kernel thread will set dc->io_disable and call bcache_device_stop() to
- stop and remove the bcache device from system.
-- If device is alive but returns too many I/O errors, after errors number
- exceeds dc->error_limit, call bch_cached_dev_error() to set
- dc->io_disable and stop bcache device. Then the broken backing device
- and its bcache device will be removed from system.
-
-The v4 patch set combines two v3 patches into one, and adds one more patch
-to permit users to explicitly avoid stopping attached bcache device from a
-retiring cache set. This is a configurable option suggested by
-Nix <nix@esperi.org.uk>.
-
-Some patches of this patch set is already in bcache-for-next and not
-included here anymore. Most of the patches are reviewed by Hannes Reinecke
-and Junhui Tang. There are still severl patches need to be reviewed,
-- [PATCH v4 05/13] bcache: stop dc->writeback_rate_update properly
-- [PATCH v4 13/13] bcache: add stop_attached_devs_on_fail to struct
- cached_dev
-
-Any comment, question and review are warmly welcome. Thanks in advance.
-
-Changelog:
-v4: add per-cached_dev option stop_attached_devs_on_fail to avoid stopping
- attached bcache device from a retiring cache set.
-v3: fix detach issue find in v2 patch set.
-v2: fixes all problems found in v1 review.
- add patches to handle backing device failure.
- add one more patch to set writeback_rate_update_seconds range.
- include a patch from Junhui Tang.
-v1: the initial version, only handles cache device failure.
-
-Coly Li
----
-
-Coly Li (12):
- bcache: set writeback_rate_update_seconds in range [1, 60] seconds
- bcache: properly set task state in bch_writeback_thread()
- bcache: fix cached_dev->count usage for bch_cache_set_error()
- bcache: quit dc->writeback_thread when BCACHE_DEV_DETACHING is set
- bcache: stop dc->writeback_rate_update properly
- bcache: set error_limit correctly
- bcache: add CACHE_SET_IO_DISABLE to struct cache_set flags
- bcache: stop all attached bcache devices for a retired cache set
- bcache: add backing_request_endio() for bi_end_io of attached backing
- device I/O
- bcache: add io_disable to struct cached_dev
- bcache: stop bcache device when backing device is offline
- bcache: add stop_attached_devs_on_fail to struct cached_dev
-
-Tang Junhui (1):
- bcache: fix inaccurate io state for detached bcache devices
-
- drivers/md/bcache/alloc.c | 5 +-
- drivers/md/bcache/bcache.h | 38 ++++++++-
- drivers/md/bcache/btree.c | 10 ++-
- drivers/md/bcache/io.c | 16 +++-
- drivers/md/bcache/journal.c | 4 +-
- drivers/md/bcache/request.c | 187 +++++++++++++++++++++++++++++++++++-------
- drivers/md/bcache/super.c | 181 ++++++++++++++++++++++++++++++++++++----
- drivers/md/bcache/sysfs.c | 55 ++++++++++++-
- drivers/md/bcache/util.h | 6 --
- drivers/md/bcache/writeback.c | 99 ++++++++++++++++++----
- drivers/md/bcache/writeback.h | 5 +-
- 11 files changed, 522 insertions(+), 84 deletions(-)
-
---
-2.15.1
-
diff --git a/for-next/v4/v4-0001-bcache-set-writeback_rate_update_seconds-in-range.patch b/for-next/v4/v4-0001-bcache-set-writeback_rate_update_seconds-in-range.patch
deleted file mode 100644
index 51edd0b..0000000
--- a/for-next/v4/v4-0001-bcache-set-writeback_rate_update_seconds-in-range.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-From 387baf9326a1abdf2005447c5c2a24f37b6681c1 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 15:11:03 +0800
-Subject: [PATCH v4 01/13] bcache: set writeback_rate_update_seconds in range
- [1, 60] seconds
-
-dc->writeback_rate_update_seconds can be set via sysfs and its value can
-be set to [1, ULONG_MAX]. It does not make sense to set such a large
-value, 60 seconds is long enough value considering the default 5 seconds
-works well for long time.
-
-Because dc->writeback_rate_update is a special delayed work, it re-arms
-itself inside the delayed work routine update_writeback_rate(). When
-stopping it by cancel_delayed_work_sync(), there should be a timeout to
-wait and make sure the re-armed delayed work is stopped too. A small max
-value of dc->writeback_rate_update_seconds is also helpful to decide a
-reasonable small timeout.
-
-This patch limits sysfs interface to set dc->writeback_rate_update_seconds
-in range of [1, 60] seconds, and replaces the hand-coded number by macros.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
----
- drivers/md/bcache/sysfs.c | 3 +++
- drivers/md/bcache/writeback.c | 2 +-
- drivers/md/bcache/writeback.h | 3 +++
- 3 files changed, 7 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index b4184092c727..a74a752c9e0f 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -215,6 +215,9 @@ STORE(__cached_dev)
- sysfs_strtoul_clamp(writeback_rate,
- dc->writeback_rate.rate, 1, INT_MAX);
-
-+ sysfs_strtoul_clamp(writeback_rate_update_seconds,
-+ dc->writeback_rate_update_seconds,
-+ 1, WRITEBACK_RATE_UPDATE_SECS_MAX);
- d_strtoul_nonzero(writeback_rate_update_seconds);
- d_strtoul(writeback_rate_i_term_inverse);
- d_strtoul_nonzero(writeback_rate_p_term_inverse);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 51306a19ab03..0ade883b6316 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -652,7 +652,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
- dc->writeback_rate.rate = 1024;
- dc->writeback_rate_minimum = 8;
-
-- dc->writeback_rate_update_seconds = 5;
-+ dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
- dc->writeback_rate_p_term_inverse = 40;
- dc->writeback_rate_i_term_inverse = 10000;
-
-diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index 66f1c527fa24..587b25599856 100644
---- a/drivers/md/bcache/writeback.h
-+++ b/drivers/md/bcache/writeback.h
-@@ -8,6 +8,9 @@
- #define MAX_WRITEBACKS_IN_PASS 5
- #define MAX_WRITESIZE_IN_PASS 5000 /* *512b */
-
-+#define WRITEBACK_RATE_UPDATE_SECS_MAX 60
-+#define WRITEBACK_RATE_UPDATE_SECS_DEFAULT 5
-+
- /*
- * 14 (16384ths) is chosen here as something that each backing device
- * should be a reasonable fraction of the share, and not to blow up
---
-2.15.1
-
diff --git a/for-next/v4/v4-0002-bcache-properly-set-task-state-in-bch_writeback_t.patch b/for-next/v4/v4-0002-bcache-properly-set-task-state-in-bch_writeback_t.patch
deleted file mode 100644
index 113dd97..0000000
--- a/for-next/v4/v4-0002-bcache-properly-set-task-state-in-bch_writeback_t.patch
+++ /dev/null
@@ -1,112 +0,0 @@
-From a979b8e27c45b69c2e1e2a5ef06257ca5fda4b66 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Fri, 26 Jan 2018 13:38:41 +0800
-Subject: [PATCH v4 02/13] bcache: properly set task state in
- bch_writeback_thread()
-
-Kernel thread routine bch_writeback_thread() has the following code block,
-
-447 down_write(&dc->writeback_lock);
-448~450 if (check conditions) {
-451 up_write(&dc->writeback_lock);
-452 set_current_state(TASK_INTERRUPTIBLE);
-453
-454 if (kthread_should_stop())
-455 return 0;
-456
-457 schedule();
-458 continue;
-459 }
-
-If condition check is true, its task state is set to TASK_INTERRUPTIBLE
-and call schedule() to wait for others to wake up it.
-
-There are 2 issues in current code,
-1, Task state is set to TASK_INTERRUPTIBLE after the condition checks, if
- another process changes the condition and call wake_up_process(dc->
- writeback_thread), then at line 452 task state is set back to
- TASK_INTERRUPTIBLE, the writeback kernel thread will lose a chance to be
- waken up.
-2, At line 454 if kthread_should_stop() is true, writeback kernel thread
- will return to kernel/kthread.c:kthread() with TASK_INTERRUPTIBLE and
- call do_exit(). It is not good to enter do_exit() with task state
- TASK_INTERRUPTIBLE, in following code path might_sleep() is called and a
- warning message is reported by __might_sleep(): "WARNING: do not call
- blocking ops when !TASK_RUNNING; state=1 set at [xxxx]".
-
-For the first issue, task state should be set before condition checks.
-Ineed because dc->writeback_lock is required when modifying all the
-conditions, calling set_current_state() inside code block where dc->
-writeback_lock is hold is safe. But this is quite implicit, so I still move
-set_current_state() before all the condition checks.
-
-For the second issue, frankley speaking it does not hurt when kernel thread
-exits with TASK_INTERRUPTIBLE state, but this warning message scares users,
-makes them feel there might be something risky with bcache and hurt their
-data. Setting task state to TASK_RUNNING before returning fixes this
-problem.
-
-In alloc.c:allocator_wait(), there is also a similar issue, and is also
-fixed in this patch.
-
-Changelog:
-v3: merge two similar fixes into one patch
-v2: fix the race issue in v1 patch.
-v1: initial buggy fix.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/alloc.c | 4 +++-
- drivers/md/bcache/writeback.c | 7 +++++--
- 2 files changed, 8 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index 6cc6c0f9c3a9..458e1d38577d 100644
---- a/drivers/md/bcache/alloc.c
-+++ b/drivers/md/bcache/alloc.c
-@@ -287,8 +287,10 @@ do { \
- break; \
- \
- mutex_unlock(&(ca)->set->bucket_lock); \
-- if (kthread_should_stop()) \
-+ if (kthread_should_stop()) { \
-+ set_current_state(TASK_RUNNING); \
- return 0; \
-+ } \
- \
- schedule(); \
- mutex_lock(&(ca)->set->bucket_lock); \
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 0ade883b6316..f1d2fc15abcc 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -564,18 +564,21 @@ static int bch_writeback_thread(void *arg)
-
- while (!kthread_should_stop()) {
- down_write(&dc->writeback_lock);
-+ set_current_state(TASK_INTERRUPTIBLE);
- if (!atomic_read(&dc->has_dirty) ||
- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
- !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-- set_current_state(TASK_INTERRUPTIBLE);
-
-- if (kthread_should_stop())
-+ if (kthread_should_stop()) {
-+ set_current_state(TASK_RUNNING);
- return 0;
-+ }
-
- schedule();
- continue;
- }
-+ set_current_state(TASK_RUNNING);
-
- searched_full_index = refill_dirty(dc);
-
---
-2.15.1
-
diff --git a/for-next/v4/v4-0003-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch b/for-next/v4/v4-0003-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
deleted file mode 100644
index f85123b..0000000
--- a/for-next/v4/v4-0003-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
+++ /dev/null
@@ -1,178 +0,0 @@
-From 15d97588692d8ddd4b1d0c628494422f33dfd537 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 8 Jan 2018 23:05:58 +0800
-Subject: [PATCH v4 03/13] bcache: fix cached_dev->count usage for
- bch_cache_set_error()
-
-When bcache metadata I/O fails, bcache will call bch_cache_set_error()
-to retire the whole cache set. The expected behavior to retire a cache
-set is to unregister the cache set, and unregister all backing device
-attached to this cache set, then remove sysfs entries of the cache set
-and all attached backing devices, finally release memory of structs
-cache_set, cache, cached_dev and bcache_device.
-
-In my testing when journal I/O failure triggered by disconnected cache
-device, sometimes the cache set cannot be retired, and its sysfs
-entry /sys/fs/bcache/<uuid> still exits and the backing device also
-references it. This is not expected behavior.
-
-When metadata I/O failes, the call senquence to retire whole cache set is,
- bch_cache_set_error()
- bch_cache_set_unregister()
- bch_cache_set_stop()
- __cache_set_unregister() <- called as callback by calling
- clousre_queue(&c->caching)
- cache_set_flush() <- called as a callback when refcount
- of cache_set->caching is 0
- cache_set_free() <- called as a callback when refcount
- of catch_set->cl is 0
- bch_cache_set_release() <- called as a callback when refcount
- of catch_set->kobj is 0
-
-I find if kernel thread bch_writeback_thread() quits while-loop when
-kthread_should_stop() is true and searched_full_index is false, clousre
-callback cache_set_flush() set by continue_at() will never be called. The
-result is, bcache fails to retire whole cache set.
-
-cache_set_flush() will be called when refcount of closure c->caching is 0,
-and in function bcache_device_detach() refcount of closure c->caching is
-released to 0 by clousre_put(). In metadata error code path, function
-bcache_device_detach() is called by cached_dev_detach_finish(). This is a
-callback routine being called when cached_dev->count is 0. This refcount
-is decreased by cached_dev_put().
-
-The above dependence indicates, cache_set_flush() will be called when
-refcount of cache_set->cl is 0, and refcount of cache_set->cl to be 0
-when refcount of cache_dev->count is 0.
-
-The reason why sometimes cache_dev->count is not 0 (when metadata I/O fails
-and bch_cache_set_error() called) is, in bch_writeback_thread(), refcount
-of cache_dev is not decreased properly.
-
-In bch_writeback_thread(), cached_dev_put() is called only when
-searched_full_index is true and cached_dev->writeback_keys is empty, a.k.a
-there is no dirty data on cache. In most of run time it is correct, but
-when bch_writeback_thread() quits the while-loop while cache is still
-dirty, current code forget to call cached_dev_put() before this kernel
-thread exits. This is why sometimes cache_set_flush() is not executed and
-cache set fails to be retired.
-
-The reason to call cached_dev_put() in bch_writeback_rate() is, when the
-cache device changes from clean to dirty, cached_dev_get() is called, to
-make sure during writeback operatiions both backing and cache devices
-won't be released.
-
-Adding following code in bch_writeback_thread() does not work,
- static int bch_writeback_thread(void *arg)
- }
-
-+ if (atomic_read(&dc->has_dirty))
-+ cached_dev_put()
-+
- return 0;
- }
-because writeback kernel thread can be waken up and start via sysfs entry:
- echo 1 > /sys/block/bcache<N>/bcache/writeback_running
-It is difficult to check whether backing device is dirty without race and
-extra lock. So the above modification will introduce potential refcount
-underflow in some conditions.
-
-The correct fix is, to take cached dev refcount when creating the kernel
-thread, and put it before the kernel thread exits. Then bcache does not
-need to take a cached dev refcount when cache turns from clean to dirty,
-or to put a cached dev refcount when cache turns from ditry to clean. The
-writeback kernel thread is alwasy safe to reference data structure from
-cache set, cache and cached device (because a refcount of cache device is
-taken for it already), and no matter the kernel thread is stopped by I/O
-errors or system reboot, cached_dev->count can always be used correctly.
-
-The patch is simple, but understanding how it works is quite complicated.
-
-Changelog:
-v2: set dc->writeback_thread to NULL in this patch, as suggested by Hannes.
-v1: initial version for review.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/super.c | 1 -
- drivers/md/bcache/writeback.c | 11 ++++++++---
- drivers/md/bcache/writeback.h | 2 --
- 3 files changed, 8 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 133b81225ea9..d14e09cce2f6 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1052,7 +1052,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
- if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
- bch_sectors_dirty_init(&dc->disk);
- atomic_set(&dc->has_dirty, 1);
-- refcount_inc(&dc->count);
- bch_writeback_queue(dc);
- }
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index f1d2fc15abcc..b280c134dd4d 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -572,7 +572,7 @@ static int bch_writeback_thread(void *arg)
-
- if (kthread_should_stop()) {
- set_current_state(TASK_RUNNING);
-- return 0;
-+ break;
- }
-
- schedule();
-@@ -585,7 +585,6 @@ static int bch_writeback_thread(void *arg)
- if (searched_full_index &&
- RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
- atomic_set(&dc->has_dirty, 0);
-- cached_dev_put(dc);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
- bch_write_bdev_super(dc, NULL);
- }
-@@ -606,6 +605,9 @@ static int bch_writeback_thread(void *arg)
- }
- }
-
-+ dc->writeback_thread = NULL;
-+ cached_dev_put(dc);
-+
- return 0;
- }
-
-@@ -669,10 +671,13 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- if (!dc->writeback_write_wq)
- return -ENOMEM;
-
-+ cached_dev_get(dc);
- dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
- "bcache_writeback");
-- if (IS_ERR(dc->writeback_thread))
-+ if (IS_ERR(dc->writeback_thread)) {
-+ cached_dev_put(dc);
- return PTR_ERR(dc->writeback_thread);
-+ }
-
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index 587b25599856..0bba8f1c6cdf 100644
---- a/drivers/md/bcache/writeback.h
-+++ b/drivers/md/bcache/writeback.h
-@@ -105,8 +105,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
- {
- if (!atomic_read(&dc->has_dirty) &&
- !atomic_xchg(&dc->has_dirty, 1)) {
-- refcount_inc(&dc->count);
--
- if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
- /* XXX: should do this synchronously */
---
-2.15.1
-
diff --git a/for-next/v4/v4-0004-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch b/for-next/v4/v4-0004-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch
deleted file mode 100644
index 349a3d1..0000000
--- a/for-next/v4/v4-0004-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From f958950022560d243ae2f77c76b5063a583a625c Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 14 Jan 2018 21:41:57 +0800
-Subject: [PATCH v4 04/13] bcache: quit dc->writeback_thread when
- BCACHE_DEV_DETACHING is set
-
-In patch "bcache: fix cached_dev->count usage for bch_cache_set_error()",
-cached_dev_get() is called when creating dc->writeback_thread, and
-cached_dev_put() is called when exiting dc->writeback_thread. This
-modification works well unless people detach the bcache device manually by
- 'echo 1 > /sys/block/bcache<N>/bcache/detach'
-Because this sysfs interface only calls bch_cached_dev_detach() which wakes
-up dc->writeback_thread but does not stop it. The reason is, before patch
-"bcache: fix cached_dev->count usage for bch_cache_set_error()", inside
-bch_writeback_thread(), if cache is not dirty after writeback,
-cached_dev_put() will be called here. And in cached_dev_make_request() when
-a new write request makes cache from clean to dirty, cached_dev_get() will
-be called there. Since we don't operate dc->count in these locations,
-refcount d->count cannot be dropped after cache becomes clean, and
-cached_dev_detach_finish() won't be called to detach bcache device.
-
-This patch fixes the issue by checking whether BCACHE_DEV_DETACHING is
-set inside bch_writeback_thread(). If this bit is set and cache is clean
-(no existing writeback_keys), break the while-loop, call cached_dev_put()
-and quit the writeback thread.
-
-Please note if cache is still dirty, even BCACHE_DEV_DETACHING is set the
-writeback thread should continue to perform writeback, this is the original
-design of manually detach.
-
-I compose a separte patch because that patch "bcache: fix cached_dev->count
-usage for bch_cache_set_error()" already gets a "Reviewed-by:" from Hannes
-Reinecke. Also this fix is not trivial and good for a separate patch.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
-Cc: Huijun Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/writeback.c | 20 +++++++++++++++++---
- 1 file changed, 17 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index b280c134dd4d..4dbeaaa575bf 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -565,9 +565,15 @@ static int bch_writeback_thread(void *arg)
- while (!kthread_should_stop()) {
- down_write(&dc->writeback_lock);
- set_current_state(TASK_INTERRUPTIBLE);
-- if (!atomic_read(&dc->has_dirty) ||
-- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
-- !dc->writeback_running)) {
-+ /*
-+ * If the bache device is detaching, skip here and continue
-+ * to perform writeback. Otherwise, if no dirty data on cache,
-+ * or there is dirty data on cache but writeback is disabled,
-+ * the writeback thread should sleep here and wait for others
-+ * to wake up it.
-+ */
-+ if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
-+ (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-
- if (kthread_should_stop()) {
-@@ -587,6 +593,14 @@ static int bch_writeback_thread(void *arg)
- atomic_set(&dc->has_dirty, 0);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
- bch_write_bdev_super(dc, NULL);
-+ /*
-+ * If bcache device is detaching via sysfs interface,
-+ * writeback thread should stop after there is no dirty
-+ * data on cache. BCACHE_DEV_DETACHING flag is set in
-+ * bch_cached_dev_detach().
-+ */
-+ if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
-+ break;
- }
-
- up_write(&dc->writeback_lock);
---
-2.15.1
-
diff --git a/for-next/v4/v4-0005-bcache-stop-dc-writeback_rate_update-properly.patch b/for-next/v4/v4-0005-bcache-stop-dc-writeback_rate_update-properly.patch
deleted file mode 100644
index 2e6ce9b..0000000
--- a/for-next/v4/v4-0005-bcache-stop-dc-writeback_rate_update-properly.patch
+++ /dev/null
@@ -1,266 +0,0 @@
-From bd0fe247c2e49cb2e19edb4bf54e8670cb315eb3 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 15:48:39 +0800
-Subject: [PATCH v4 05/13] bcache: stop dc->writeback_rate_update properly
-
-struct delayed_work writeback_rate_update in struct cache_dev is a delayed
-worker to call function update_writeback_rate() in period (the interval is
-defined by dc->writeback_rate_update_seconds).
-
-When a metadate I/O error happens on cache device, bcache error handling
-routine bch_cache_set_error() will call bch_cache_set_unregister() to
-retire whole cache set. On the unregister code path, this delayed work is
-stopped by calling cancel_delayed_work_sync(&dc->writeback_rate_update).
-
-dc->writeback_rate_update is a special delayed work from others in bcache.
-In its routine update_writeback_rate(), this delayed work is re-armed
-itself. That means when cancel_delayed_work_sync() returns, this delayed
-work can still be executed after several seconds defined by
-dc->writeback_rate_update_seconds.
-
-The problem is, after cancel_delayed_work_sync() returns, the cache set
-unregister code path will continue and release memory of struct cache set.
-Then the delayed work is scheduled to run, __update_writeback_rate()
-will reference the already released cache_set memory, and trigger a NULL
-pointer deference fault.
-
-This patch introduces two more bcache device flags,
-- BCACHE_DEV_WB_RUNNING
- bit set: bcache device is in writeback mode and running, it is OK for
- dc->writeback_rate_update to re-arm itself.
- bit clear:bcache device is trying to stop dc->writeback_rate_update,
- this delayed work should not re-arm itself and quit.
-- BCACHE_DEV_RATE_DW_RUNNING
- bit set: routine update_writeback_rate() is executing.
- bit clear: routine update_writeback_rate() quits.
-
-This patch also adds a function cancel_writeback_rate_update_dwork() to
-wait for dc->writeback_rate_update quits before cancel it by calling
-cancel_delayed_work_sync(). In order to avoid a deadlock by unexpected
-quit dc->writeback_rate_update, after time_out seconds this function will
-give up and continue to call cancel_delayed_work_sync().
-
-And here I explain how this patch stops self re-armed delayed work properly
-with the above stuffs.
-
-update_writeback_rate() sets BCACHE_DEV_RATE_DW_RUNNING at its beginning
-and clears BCACHE_DEV_RATE_DW_RUNNING at its end. Before calling
-cancel_writeback_rate_update_dwork() clear flag BCACHE_DEV_WB_RUNNING.
-
-Before calling cancel_delayed_work_sync() wait utill flag
-BCACHE_DEV_RATE_DW_RUNNING is clear. So when calling
-cancel_delayed_work_sync(), dc->writeback_rate_update must be already re-
-armed, or quite by seeing BCACHE_DEV_WB_RUNNING cleared. In both cases
-delayed work routine update_writeback_rate() won't be executed after
-cancel_delayed_work_sync() returns.
-
-Inside update_writeback_rate() before calling schedule_delayed_work(), flag
-BCACHE_DEV_WB_RUNNING is checked before. If this flag is cleared, it means
-someone is about to stop the delayed work. Because flag
-BCACHE_DEV_RATE_DW_RUNNING is set already and cancel_delayed_work_sync()
-has to wait for this flag to be cleared, we don't need to worry about race
-condition here.
-
-If update_writeback_rate() is scheduled to run after checking
-BCACHE_DEV_RATE_DW_RUNNING and before calling cancel_delayed_work_sync()
-in cancel_writeback_rate_update_dwork(), it is also safe. Because at this
-moment BCACHE_DEV_WB_RUNNING is cleared with memory barrier. As I mentioned
-previously, update_writeback_rate() will see BCACHE_DEV_WB_RUNNING is clear
-and quit immediately.
-
-Because there are more dependences inside update_writeback_rate() to struct
-cache_set memory, dc->writeback_rate_update is not a simple self re-arm
-delayed work. After trying many different methods (e.g. hold dc->count, or
-use locks), this is the only way I can find which works to properly stop
-dc->writeback_rate_update delayed work.
-
-Changelog:
-v2: Try to fix the race issue which is pointed out by Junhui.
-v1: The initial version for review
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 9 +++++----
- drivers/md/bcache/super.c | 39 +++++++++++++++++++++++++++++++++++----
- drivers/md/bcache/sysfs.c | 3 ++-
- drivers/md/bcache/writeback.c | 29 ++++++++++++++++++++++++++++-
- 4 files changed, 70 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 5e2d4e80198e..88d938c8d027 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -258,10 +258,11 @@ struct bcache_device {
- struct gendisk *disk;
-
- unsigned long flags;
--#define BCACHE_DEV_CLOSING 0
--#define BCACHE_DEV_DETACHING 1
--#define BCACHE_DEV_UNLINK_DONE 2
--
-+#define BCACHE_DEV_CLOSING 0
-+#define BCACHE_DEV_DETACHING 1
-+#define BCACHE_DEV_UNLINK_DONE 2
-+#define BCACHE_DEV_WB_RUNNING 4
-+#define BCACHE_DEV_RATE_DW_RUNNING 8
- unsigned nr_stripes;
- unsigned stripe_size;
- atomic_t *stripe_sectors_dirty;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index d14e09cce2f6..6d888e8fea8c 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -899,6 +899,32 @@ void bch_cached_dev_run(struct cached_dev *dc)
- pr_debug("error creating sysfs link");
- }
-
-+/*
-+ * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
-+ * work dc->writeback_rate_update is running. Wait until the routine
-+ * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
-+ * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
-+ * seconds, give up waiting here and continue to cancel it too.
-+ */
-+static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
-+{
-+ int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
-+
-+ do {
-+ if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
-+ &dc->disk.flags))
-+ break;
-+ time_out--;
-+ schedule_timeout_interruptible(1);
-+ } while (time_out > 0);
-+
-+ if (time_out == 0)
-+ pr_warn("bcache: give up waiting for "
-+ "dc->writeback_write_update to quit");
-+
-+ cancel_delayed_work_sync(&dc->writeback_rate_update);
-+}
-+
- static void cached_dev_detach_finish(struct work_struct *w)
- {
- struct cached_dev *dc = container_of(w, struct cached_dev, detach);
-@@ -911,7 +937,9 @@ static void cached_dev_detach_finish(struct work_struct *w)
-
- mutex_lock(&bch_register_lock);
-
-- cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ cancel_writeback_rate_update_dwork(dc);
-+
- if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
- kthread_stop(dc->writeback_thread);
- dc->writeback_thread = NULL;
-@@ -954,6 +982,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
- closure_get(&dc->disk.cl);
-
- bch_writeback_queue(dc);
-+
- cached_dev_put(dc);
- }
-
-@@ -1079,14 +1108,16 @@ static void cached_dev_free(struct closure *cl)
- {
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
-
-- cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ mutex_lock(&bch_register_lock);
-+
-+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ cancel_writeback_rate_update_dwork(dc);
-+
- if (!IS_ERR_OR_NULL(dc->writeback_thread))
- kthread_stop(dc->writeback_thread);
- if (dc->writeback_write_wq)
- destroy_workqueue(dc->writeback_write_wq);
-
-- mutex_lock(&bch_register_lock);
--
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
- bcache_device_free(&dc->disk);
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index a74a752c9e0f..b7166c504cdb 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -304,7 +304,8 @@ STORE(bch_cached_dev)
- bch_writeback_queue(dc);
-
- if (attr == &sysfs_writeback_percent)
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-
- mutex_unlock(&bch_register_lock);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 4dbeaaa575bf..8f98ef1038d3 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -115,6 +115,21 @@ static void update_writeback_rate(struct work_struct *work)
- struct cached_dev,
- writeback_rate_update);
-
-+ /*
-+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-+ * cancel_delayed_work_sync().
-+ */
-+ set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
-+
-+ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
-+ return;
-+ }
-+
- down_read(&dc->writeback_lock);
-
- if (atomic_read(&dc->has_dirty) &&
-@@ -123,8 +138,18 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-+ }
-+
-+ /*
-+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-+ * cancel_delayed_work_sync().
-+ */
-+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
- }
-
- static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
-@@ -675,6 +700,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
- dc->writeback_rate_p_term_inverse = 40;
- dc->writeback_rate_i_term_inverse = 10000;
-
-+ WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
- INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
- }
-
-@@ -693,6 +719,7 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- return PTR_ERR(dc->writeback_thread);
- }
-
-+ WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-
---
-2.15.1
-
diff --git a/for-next/v4/v4-0006-bcache-set-error_limit-correctly.patch b/for-next/v4/v4-0006-bcache-set-error_limit-correctly.patch
deleted file mode 100644
index 927468d..0000000
--- a/for-next/v4/v4-0006-bcache-set-error_limit-correctly.patch
+++ /dev/null
@@ -1,121 +0,0 @@
-From f259f50b81b23abcd79f8e20ba479c61ef67d983 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 9 Jan 2018 22:46:25 +0800
-Subject: [PATCH v4 06/13] bcache: set error_limit correctly
-
-Struct cache uses io_errors for two purposes,
-- Error decay: when cache set error_decay is set, io_errors is used to
- generate a small piece of delay when I/O error happens.
-- I/O errors counter: in order to generate big enough value for error
- decay, I/O errors counter value is stored by left shifting 20 bits (a.k.a
- IO_ERROR_SHIFT).
-
-In function bch_count_io_errors(), if I/O errors counter reaches cache set
-error limit, bch_cache_set_error() will be called to retire the whold cache
-set. But current code is problematic when checking the error limit, see the
-following code piece from bch_count_io_errors(),
-
- 90 if (error) {
- 91 char buf[BDEVNAME_SIZE];
- 92 unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT,
- 93 &ca->io_errors);
- 94 errors >>= IO_ERROR_SHIFT;
- 95
- 96 if (errors < ca->set->error_limit)
- 97 pr_err("%s: IO error on %s, recovering",
- 98 bdevname(ca->bdev, buf), m);
- 99 else
-100 bch_cache_set_error(ca->set,
-101 "%s: too many IO errors %s",
-102 bdevname(ca->bdev, buf), m);
-103 }
-
-At line 94, errors is right shifting IO_ERROR_SHIFT bits, now it is real
-errors counter to compare at line 96. But ca->set->error_limit is initia-
-lized with an amplified value in bch_cache_set_alloc(),
-1545 c->error_limit = 8 << IO_ERROR_SHIFT;
-
-It means by default, in bch_count_io_errors(), before 8<<20 errors happened
-bch_cache_set_error() won't be called to retire the problematic cache
-device. If the average request size is 64KB, it means bcache won't handle
-failed device until 512GB data is requested. This is too large to be an I/O
-threashold. So I believe the correct error limit should be much less.
-
-This patch sets default cache set error limit to 8, then in
-bch_count_io_errors() when errors counter reaches 8 (if it is default
-value), function bch_cache_set_error() will be called to retire the whole
-cache set. This patch also removes bits shifting when store or show
-io_error_limit value via sysfs interface.
-
-Nowadays most of SSDs handle internal flash failure automatically by LBA
-address re-indirect mapping. If an I/O error can be observed by upper layer
-code, it will be a notable error because that SSD can not re-indirect
-map the problematic LBA address to an available flash block. This situation
-indicates the whole SSD will be failed very soon. Therefore setting 8 as
-the default io error limit value makes sense, it is enough for most of
-cache devices.
-
-Changelog:
-v2: add reviewed-by from Hannes.
-v1: initial version for review.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Reviewed-by: Tang Junhui <tang.junhui@zte.com.cn>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 1 +
- drivers/md/bcache/super.c | 2 +-
- drivers/md/bcache/sysfs.c | 4 ++--
- 3 files changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 88d938c8d027..7d7512fa4f09 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -663,6 +663,7 @@ struct cache_set {
- ON_ERROR_UNREGISTER,
- ON_ERROR_PANIC,
- } on_error;
-+#define DEFAULT_IO_ERROR_LIMIT 8
- unsigned error_limit;
- unsigned error_decay;
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 6d888e8fea8c..a373648b5d4b 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1583,7 +1583,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
-
- c->congested_read_threshold_us = 2000;
- c->congested_write_threshold_us = 20000;
-- c->error_limit = 8 << IO_ERROR_SHIFT;
-+ c->error_limit = DEFAULT_IO_ERROR_LIMIT;
-
- return c;
- err:
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index b7166c504cdb..ba62e987b503 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -560,7 +560,7 @@ SHOW(__bch_cache_set)
-
- /* See count_io_errors for why 88 */
- sysfs_print(io_error_halflife, c->error_decay * 88);
-- sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT);
-+ sysfs_print(io_error_limit, c->error_limit);
-
- sysfs_hprint(congested,
- ((uint64_t) bch_get_congested(c)) << 9);
-@@ -660,7 +660,7 @@ STORE(__bch_cache_set)
- }
-
- if (attr == &sysfs_io_error_limit)
-- c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
-+ c->error_limit = strtoul_or_return(buf);
-
- /* See count_io_errors() for why 88 */
- if (attr == &sysfs_io_error_halflife)
---
-2.15.1
-
diff --git a/for-next/v4/v4-0007-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch b/for-next/v4/v4-0007-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch
deleted file mode 100644
index 849d522..0000000
--- a/for-next/v4/v4-0007-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch
+++ /dev/null
@@ -1,489 +0,0 @@
-From a7c1f04212502a6e1505bfc0917809363d988660 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 14 Jan 2018 22:15:00 +0800
-Subject: [PATCH v4 07/13] bcache: add CACHE_SET_IO_DISABLE to struct cache_set
- flags
-
-When too many I/Os failed on cache device, bch_cache_set_error() is called
-in the error handling code path to retire whole problematic cache set. If
-new I/O requests continue to come and take refcount dc->count, the cache
-set won't be retired immediately, this is a problem.
-
-Further more, there are several kernel thread and self-armed kernel work
-may still running after bch_cache_set_error() is called. It needs to wait
-quite a while for them to stop, or they won't stop at all. They also
-prevent the cache set from being retired.
-
-The solution in this patch is, to add per cache set flag to disable I/O
-request on this cache and all attached backing devices. Then new coming I/O
-requests can be rejected in *_make_request() before taking refcount, kernel
-threads and self-armed kernel worker can stop very fast when flags bit
-CACHE_SET_IO_DISABLE is set.
-
-Because bcache also do internal I/Os for writeback, garbage collection,
-bucket allocation, journaling, this kind of I/O should be disabled after
-bch_cache_set_error() is called. So closure_bio_submit() is modified to
-check whether CACHE_SET_IO_DISABLE is set on cache_set->flags. If set,
-closure_bio_submit() will set bio->bi_status to BLK_STS_IOERR and
-return, generic_make_request() won't be called.
-
-A sysfs interface is also added to set or clear CACHE_SET_IO_DISABLE bit
-from cache_set->flags, to disable or enable cache set I/O for debugging. It
-is helpful to trigger more corner case issues for failed cache device.
-
-Changelog
-v2, more changes by previous review,
-- Use CACHE_SET_IO_DISABLE of cache_set->flags, suggested by Junhui.
-- Check CACHE_SET_IO_DISABLE in bch_btree_gc() to stop a while-loop, this
- is reported and inspired from origal patch of Pavel Vazharov.
-v1, initial version.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Pavel Vazharov <freakpv@gmail.com>
----
- drivers/md/bcache/alloc.c | 3 ++-
- drivers/md/bcache/bcache.h | 18 ++++++++++++++++++
- drivers/md/bcache/btree.c | 10 +++++++---
- drivers/md/bcache/io.c | 2 +-
- drivers/md/bcache/journal.c | 4 ++--
- drivers/md/bcache/request.c | 26 +++++++++++++++++++-------
- drivers/md/bcache/super.c | 6 +++++-
- drivers/md/bcache/sysfs.c | 20 ++++++++++++++++++++
- drivers/md/bcache/util.h | 6 ------
- drivers/md/bcache/writeback.c | 35 +++++++++++++++++++++++++++--------
- 10 files changed, 101 insertions(+), 29 deletions(-)
-
-diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index 458e1d38577d..004cc3cc6123 100644
---- a/drivers/md/bcache/alloc.c
-+++ b/drivers/md/bcache/alloc.c
-@@ -287,7 +287,8 @@ do { \
- break; \
- \
- mutex_unlock(&(ca)->set->bucket_lock); \
-- if (kthread_should_stop()) { \
-+ if (kthread_should_stop() || \
-+ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
- set_current_state(TASK_RUNNING); \
- return 0; \
- } \
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 7d7512fa4f09..c41736960045 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -475,10 +475,15 @@ struct gc_stat {
- *
- * CACHE_SET_RUNNING means all cache devices have been registered and journal
- * replay is complete.
-+ *
-+ * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
-+ * external and internal I/O should be denied when this flag is set.
-+ *
- */
- #define CACHE_SET_UNREGISTERING 0
- #define CACHE_SET_STOPPING 1
- #define CACHE_SET_RUNNING 2
-+#define CACHE_SET_IO_DISABLE 4
-
- struct cache_set {
- struct closure cl;
-@@ -862,6 +867,19 @@ static inline void wake_up_allocators(struct cache_set *c)
- wake_up_process(ca->alloc_thread);
- }
-
-+static inline void closure_bio_submit(struct cache_set *c,
-+ struct bio *bio,
-+ struct closure *cl)
-+{
-+ closure_get(cl);
-+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return;
-+ }
-+ generic_make_request(bio);
-+}
-+
- /* Forward declarations */
-
- void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index bf3a48aa9a9a..0a0bc63011b4 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -1744,6 +1744,7 @@ static void bch_btree_gc(struct cache_set *c)
-
- btree_gc_start(c);
-
-+ /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
- do {
- ret = btree_root(gc_root, c, &op, &writes, &stats);
- closure_sync(&writes);
-@@ -1751,7 +1752,7 @@ static void bch_btree_gc(struct cache_set *c)
-
- if (ret && ret != -EAGAIN)
- pr_warn("gc failed!");
-- } while (ret);
-+ } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- bch_btree_gc_finish(c);
- wake_up_allocators(c);
-@@ -1789,9 +1790,12 @@ static int bch_gc_thread(void *arg)
-
- while (1) {
- wait_event_interruptible(c->gc_wait,
-- kthread_should_stop() || gc_should_run(c));
-+ kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
-+ gc_should_run(c));
-
-- if (kthread_should_stop())
-+ if (kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags))
- break;
-
- set_gc_sectors(c);
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index a783c5a41ff1..8013ecbcdbda 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
- bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
-
- b->submit_time_us = local_clock_us();
-- closure_bio_submit(bio, bio->bi_private);
-+ closure_bio_submit(c, bio, bio->bi_private);
- }
-
- void bch_submit_bbio(struct bio *bio, struct cache_set *c,
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index a87165c1d8e5..979873641030 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
- bch_bio_map(bio, data);
-
-- closure_bio_submit(bio, &cl);
-+ closure_bio_submit(ca->set, bio, &cl);
- closure_sync(&cl);
-
- /* This function could be simpler now since we no longer write
-@@ -653,7 +653,7 @@ static void journal_write_unlocked(struct closure *cl)
- spin_unlock(&c->journal.lock);
-
- while ((bio = bio_list_pop(&list)))
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(c, bio, cl);
-
- continue_at(cl, journal_write_done, NULL);
- }
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 1a46b41dac70..02296bda6384 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -747,7 +747,7 @@ static void cached_dev_read_error(struct closure *cl)
-
- /* XXX: invalidate cache */
-
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- }
-
- continue_at(cl, cached_dev_cache_miss_done, NULL);
-@@ -872,7 +872,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- s->cache_miss = miss;
- s->iop.bio = cache_bio;
- bio_get(cache_bio);
-- closure_bio_submit(cache_bio, &s->cl);
-+ closure_bio_submit(s->iop.c, cache_bio, &s->cl);
-
- return ret;
- out_put:
-@@ -880,7 +880,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- out_submit:
- miss->bi_end_io = request_endio;
- miss->bi_private = &s->cl;
-- closure_bio_submit(miss, &s->cl);
-+ closure_bio_submit(s->iop.c, miss, &s->cl);
- return ret;
- }
-
-@@ -945,7 +945,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
-
- if ((bio_op(bio) != REQ_OP_DISCARD) ||
- blk_queue_discard(bdev_get_queue(dc->bdev)))
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- } else if (s->iop.writeback) {
- bch_writeback_add(dc);
- s->iop.bio = bio;
-@@ -960,12 +960,12 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
- flush->bi_private = cl;
- flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
-- closure_bio_submit(flush, cl);
-+ closure_bio_submit(s->iop.c, flush, cl);
- }
- } else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
-
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- }
-
- closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
-@@ -981,7 +981,7 @@ static void cached_dev_nodata(struct closure *cl)
- bch_journal_meta(s->iop.c, cl);
-
- /* If it's a flush, we send the flush to the backing device too */
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
-
- continue_at(cl, cached_dev_bio_complete, NULL);
- }
-@@ -996,6 +996,12 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
-
-+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return BLK_QC_T_NONE;
-+ }
-+
- atomic_set(&dc->backing_idle, 0);
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-
-@@ -1112,6 +1118,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
- struct bcache_device *d = bio->bi_disk->private_data;
- int rw = bio_data_dir(bio);
-
-+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return BLK_QC_T_NONE;
-+ }
-+
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-
- s = search_alloc(bio, d);
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index a373648b5d4b..4204d75aee7b 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -521,7 +521,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
- bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
- bch_bio_map(bio, ca->disk_buckets);
-
-- closure_bio_submit(bio, &ca->prio);
-+ closure_bio_submit(ca->set, bio, &ca->prio);
- closure_sync(cl);
- }
-
-@@ -1349,6 +1349,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
- test_bit(CACHE_SET_STOPPING, &c->flags))
- return false;
-
-+ if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
-+ pr_warn("bcache: CACHE_SET_IO_DISABLE already set");
-+
- /* XXX: we can be called from atomic context
- acquire_console_sem();
- */
-@@ -1584,6 +1587,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
- c->congested_read_threshold_us = 2000;
- c->congested_write_threshold_us = 20000;
- c->error_limit = DEFAULT_IO_ERROR_LIMIT;
-+ WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- return c;
- err:
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index ba62e987b503..afb051bcfca1 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -92,6 +92,7 @@ read_attribute(partial_stripes_expensive);
-
- rw_attribute(synchronous);
- rw_attribute(journal_delay_ms);
-+rw_attribute(io_disable);
- rw_attribute(discard);
- rw_attribute(running);
- rw_attribute(label);
-@@ -577,6 +578,8 @@ SHOW(__bch_cache_set)
- sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
- sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
- sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
-+ sysfs_printf(io_disable, "%i",
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- if (attr == &sysfs_bset_tree_stats)
- return bch_bset_print_stats(c, buf);
-@@ -666,6 +669,22 @@ STORE(__bch_cache_set)
- if (attr == &sysfs_io_error_halflife)
- c->error_decay = strtoul_or_return(buf) / 88;
-
-+ if (attr == &sysfs_io_disable) {
-+ int v = strtoul_or_return(buf);
-+
-+ if (v) {
-+ if (test_and_set_bit(CACHE_SET_IO_DISABLE,
-+ &c->flags))
-+ pr_warn("bcache: CACHE_SET_IO_DISABLE"
-+ " already set");
-+ } else {
-+ if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
-+ &c->flags))
-+ pr_warn("bcache: CACHE_SET_IO_DISABLE"
-+ " already cleared");
-+ }
-+ }
-+
- sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
- sysfs_strtoul(verify, c->verify);
- sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
-@@ -748,6 +767,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
- &sysfs_gc_always_rewrite,
- &sysfs_btree_shrinker_disabled,
- &sysfs_copy_gc_enabled,
-+ &sysfs_io_disable,
- NULL
- };
- KTYPE(bch_cache_set_internal);
-diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
-index 4df4c5c1cab2..7944eea54fa9 100644
---- a/drivers/md/bcache/util.h
-+++ b/drivers/md/bcache/util.h
-@@ -565,12 +565,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
- return bdev->bd_inode->i_size >> 9;
- }
-
--#define closure_bio_submit(bio, cl) \
--do { \
-- closure_get(cl); \
-- generic_make_request(bio); \
--} while (0)
--
- uint64_t bch_crc64_update(uint64_t, const void *, size_t);
- uint64_t bch_crc64(const void *, size_t);
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 8f98ef1038d3..3d7d8452e0de 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -114,6 +114,7 @@ static void update_writeback_rate(struct work_struct *work)
- struct cached_dev *dc = container_of(to_delayed_work(work),
- struct cached_dev,
- writeback_rate_update);
-+ struct cache_set *c = dc->disk.c;
-
- /*
- * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-@@ -123,7 +124,12 @@ static void update_writeback_rate(struct work_struct *work)
- /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
-
-- if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ /*
-+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
-+ * check it here too.
-+ */
-+ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
- /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
-@@ -138,7 +144,12 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-- if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ /*
-+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
-+ * check it here too.
-+ */
-+ if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
- }
-@@ -278,7 +289,7 @@ static void write_dirty(struct closure *cl)
- bio_set_dev(&io->bio, io->dc->bdev);
- io->bio.bi_end_io = dirty_endio;
-
-- closure_bio_submit(&io->bio, cl);
-+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
- }
-
- atomic_set(&dc->writeback_sequence_next, next_sequence);
-@@ -304,7 +315,7 @@ static void read_dirty_submit(struct closure *cl)
- {
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
-
-- closure_bio_submit(&io->bio, cl);
-+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
-
- continue_at(cl, write_dirty, io->dc->writeback_write_wq);
- }
-@@ -330,7 +341,9 @@ static void read_dirty(struct cached_dev *dc)
-
- next = bch_keybuf_next(&dc->writeback_keys);
-
-- while (!kthread_should_stop() && next) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
-+ next) {
- size = 0;
- nk = 0;
-
-@@ -427,7 +440,9 @@ static void read_dirty(struct cached_dev *dc)
- }
- }
-
-- while (!kthread_should_stop() && delay) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
-+ delay) {
- schedule_timeout_interruptible(delay);
- delay = writeback_delay(dc, 0);
- }
-@@ -583,11 +598,13 @@ static bool refill_dirty(struct cached_dev *dc)
- static int bch_writeback_thread(void *arg)
- {
- struct cached_dev *dc = arg;
-+ struct cache_set *c = dc->disk.c;
- bool searched_full_index;
-
- bch_ratelimit_reset(&dc->writeback_rate);
-
-- while (!kthread_should_stop()) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- down_write(&dc->writeback_lock);
- set_current_state(TASK_INTERRUPTIBLE);
- /*
-@@ -601,7 +618,8 @@ static int bch_writeback_thread(void *arg)
- (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-
-- if (kthread_should_stop()) {
-+ if (kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- set_current_state(TASK_RUNNING);
- break;
- }
-@@ -637,6 +655,7 @@ static int bch_writeback_thread(void *arg)
-
- while (delay &&
- !kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
- !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
- delay = schedule_timeout_interruptible(delay);
-
---
-2.15.1
-
diff --git a/for-next/v4/v4-0008-bcache-stop-all-attached-bcache-devices-for-a-ret.patch b/for-next/v4/v4-0008-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
deleted file mode 100644
index eab5e76..0000000
--- a/for-next/v4/v4-0008-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-From 86e6ce9e732449701c0d00048b5a07c140bd2ee5 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 10 Jan 2018 00:26:32 +0800
-Subject: [PATCH v4 08/13] bcache: stop all attached bcache devices for a
- retired cache set
-
-When there are too many I/O errors on cache device, current bcache code
-will retire the whole cache set, and detach all bcache devices. But the
-detached bcache devices are not stopped, which is problematic when bcache
-is in writeback mode.
-
-If the retired cache set has dirty data of backing devices, continue
-writing to bcache device will write to backing device directly. If the
-LBA of write request has a dirty version cached on cache device, next time
-when the cache device is re-registered and backing device re-attached to
-it again, the stale dirty data on cache device will be written to backing
-device, and overwrite latest directly written data. This situation causes
-a quite data corruption.
-
-This patch checkes whether cache_set->io_disable is true in
-__cache_set_unregister(). If cache_set->io_disable is true, it means cache
-set is unregistering by too many I/O errors, then all attached bcache
-devices will be stopped as well. If cache_set->io_disable is not true, it
-means __cache_set_unregister() is triggered by writing 1 to sysfs file
-/sys/fs/bcache/<UUID>/bcache/stop. This is an exception because users do
-it explicitly, this patch keeps existing behavior and does not stop any
-bcache device.
-
-Even the failed cache device has no dirty data, stopping bcache device is
-still a desired behavior by many Ceph and data base users. Then their
-application will report I/O errors due to disappeared bcache device, and
-operation people will know the cache device is broken or disconnected.
-
-Changelog:
-v2: add reviewed-by from Hannes.
-v1: initial version for review.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
----
- drivers/md/bcache/super.c | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 4204d75aee7b..97e3bb8e1aee 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1478,6 +1478,14 @@ static void __cache_set_unregister(struct closure *cl)
- dc = container_of(c->devices[i],
- struct cached_dev, disk);
- bch_cached_dev_detach(dc);
-+ /*
-+ * If we come here by too many I/O errors,
-+ * bcache device should be stopped too, to
-+ * keep data consistency on cache and
-+ * backing devices.
-+ */
-+ if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
-+ bcache_device_stop(c->devices[i]);
- } else {
- bcache_device_stop(c->devices[i]);
- }
---
-2.15.1
-
diff --git a/for-next/v4/v4-0009-bcache-fix-inaccurate-io-state-for-detached-bcach.patch b/for-next/v4/v4-0009-bcache-fix-inaccurate-io-state-for-detached-bcach.patch
deleted file mode 100644
index 048a30a..0000000
--- a/for-next/v4/v4-0009-bcache-fix-inaccurate-io-state-for-detached-bcach.patch
+++ /dev/null
@@ -1,119 +0,0 @@
-From 4d6a58a04771b787578862bae770e69eee1b358e Mon Sep 17 00:00:00 2001
-From: Tang Junhui <tang.junhui@zte.com.cn>
-Date: Tue, 9 Jan 2018 10:27:11 +0800
-Subject: [PATCH v4 09/13] bcache: fix inaccurate io state for detached bcache
- devices
-
-When we run IO in a detached device, and run iostat to shows IO status,
-normally it will show like bellow (Omitted some fields):
-Device: ... avgrq-sz avgqu-sz await r_await w_await svctm %util
-sdd ... 15.89 0.53 1.82 0.20 2.23 1.81 52.30
-bcache0 ... 15.89 115.42 0.00 0.00 0.00 2.40 69.60
-but after IO stopped, there are still very big avgqu-sz and %util
-values as bellow:
-Device: ... avgrq-sz avgqu-sz await r_await w_await svctm %util
-bcache0 ... 0 5326.32 0.00 0.00 0.00 0.00 100.10
-
-The reason for this issue is that, only generic_start_io_acct() called
-and no generic_end_io_acct() called for detached device in
-cached_dev_make_request(). See the code:
-//start generic_start_io_acct()
-generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-if (cached_dev_get(dc)) {
- //will callback generic_end_io_acct()
-}
-else {
- //will not call generic_end_io_acct()
-}
-
-This patch calls generic_end_io_acct() in the end of IO for detached
-devices, so we can show IO state correctly.
-
-(Modified to use GFP_NOIO in kzalloc() by Coly Li)
-
-Signed-off-by: Tang Junhui <tang.junhui@zte.com.cn>
-Reviewed-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
----
- drivers/md/bcache/request.c | 58 +++++++++++++++++++++++++++++++++++++++------
- 1 file changed, 51 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 02296bda6384..e09c5ae745be 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -986,6 +986,55 @@ static void cached_dev_nodata(struct closure *cl)
- continue_at(cl, cached_dev_bio_complete, NULL);
- }
-
-+struct detached_dev_io_private {
-+ struct bcache_device *d;
-+ unsigned long start_time;
-+ bio_end_io_t *bi_end_io;
-+ void *bi_private;
-+};
-+
-+static void detatched_dev_end_io(struct bio *bio)
-+{
-+ struct detached_dev_io_private *ddip;
-+
-+ ddip = bio->bi_private;
-+ bio->bi_end_io = ddip->bi_end_io;
-+ bio->bi_private = ddip->bi_private;
-+
-+ generic_end_io_acct(ddip->d->disk->queue,
-+ bio_data_dir(bio),
-+ &ddip->d->disk->part0, ddip->start_time);
-+
-+ kfree(ddip);
-+
-+ bio->bi_end_io(bio);
-+}
-+
-+static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
-+{
-+ struct detached_dev_io_private *ddip;
-+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-+
-+ /*
-+ * no need to call closure_get(&dc->disk.cl),
-+ * because upper layer had already opened bcache device,
-+ * which would call closure_get(&dc->disk.cl)
-+ */
-+ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
-+ ddip->d = d;
-+ ddip->start_time = jiffies;
-+ ddip->bi_end_io = bio->bi_end_io;
-+ ddip->bi_private = bio->bi_private;
-+ bio->bi_end_io = detatched_dev_end_io;
-+ bio->bi_private = ddip;
-+
-+ if ((bio_op(bio) == REQ_OP_DISCARD) &&
-+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
-+ bio->bi_end_io(bio);
-+ else
-+ generic_make_request(bio);
-+}
-+
- /* Cached devices - read & write stuff */
-
- static blk_qc_t cached_dev_make_request(struct request_queue *q,
-@@ -1028,13 +1077,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- else
- cached_dev_read(dc, s);
- }
-- } else {
-- if ((bio_op(bio) == REQ_OP_DISCARD) &&
-- !blk_queue_discard(bdev_get_queue(dc->bdev)))
-- bio_endio(bio);
-- else
-- generic_make_request(bio);
-- }
-+ } else
-+ detached_dev_do_request(d, bio);
-
- return BLK_QC_T_NONE;
- }
---
-2.15.1
-
diff --git a/for-next/v4/v4-0010-bcache-add-backing_request_endio-for-bi_end_io-of.patch b/for-next/v4/v4-0010-bcache-add-backing_request_endio-for-bi_end_io-of.patch
deleted file mode 100644
index 80f6dc8..0000000
--- a/for-next/v4/v4-0010-bcache-add-backing_request_endio-for-bi_end_io-of.patch
+++ /dev/null
@@ -1,255 +0,0 @@
-From 1e8e6958888300f4b50ccc6798d4ce17b0e92afe Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 10 Jan 2018 21:01:48 +0800
-Subject: [PATCH v4 10/13] bcache: add backing_request_endio() for bi_end_io of
- attached backing device I/O
-
-In order to catch I/O error of backing device, a separate bi_end_io
-call back is required. Then a per backing device counter can record I/O
-errors number and retire the backing device if the counter reaches a
-per backing device I/O error limit.
-
-This patch adds backing_request_endio() to bcache backing device I/O code
-path, this is a preparation for further complicated backing device failure
-handling. So far there is no real code logic change, I make this change a
-separate patch to make sure it is stable and reliable for further work.
-
-Changelog:
-v2: indeed this is new added in this patch set.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
----
- drivers/md/bcache/request.c | 95 +++++++++++++++++++++++++++++++++++--------
- drivers/md/bcache/super.c | 1 +
- drivers/md/bcache/writeback.c | 1 +
- 3 files changed, 81 insertions(+), 16 deletions(-)
-
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index e09c5ae745be..ad4cf71f7eab 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl)
- }
-
- op->insert_data_done = true;
-+ /* get in bch_data_insert() */
- bio_put(bio);
- out:
- continue_at(cl, bch_data_insert_keys, op->wq);
-@@ -630,6 +631,38 @@ static void request_endio(struct bio *bio)
- closure_put(cl);
- }
-
-+static void backing_request_endio(struct bio *bio)
-+{
-+ struct closure *cl = bio->bi_private;
-+
-+ if (bio->bi_status) {
-+ struct search *s = container_of(cl, struct search, cl);
-+ /*
-+ * If a bio has REQ_PREFLUSH for writeback mode, it is
-+ * speically assembled in cached_dev_write() for a non-zero
-+ * write request which has REQ_PREFLUSH. we don't set
-+ * s->iop.status by this failure, the status will be decided
-+ * by result of bch_data_insert() operation.
-+ */
-+ if (unlikely(s->iop.writeback &&
-+ bio->bi_opf & REQ_PREFLUSH)) {
-+ char buf[BDEVNAME_SIZE];
-+
-+ bio_devname(bio, buf);
-+ pr_err("Can't flush %s: returned bi_status %i",
-+ buf, bio->bi_status);
-+ } else {
-+ /* set to orig_bio->bi_status in bio_complete() */
-+ s->iop.status = bio->bi_status;
-+ }
-+ s->recoverable = false;
-+ /* should count I/O error for backing device here */
-+ }
-+
-+ bio_put(bio);
-+ closure_put(cl);
-+}
-+
- static void bio_complete(struct search *s)
- {
- if (s->orig_bio) {
-@@ -644,13 +677,21 @@ static void bio_complete(struct search *s)
- }
- }
-
--static void do_bio_hook(struct search *s, struct bio *orig_bio)
-+static void do_bio_hook(struct search *s,
-+ struct bio *orig_bio,
-+ bio_end_io_t *end_io_fn)
- {
- struct bio *bio = &s->bio.bio;
-
- bio_init(bio, NULL, 0);
- __bio_clone_fast(bio, orig_bio);
-- bio->bi_end_io = request_endio;
-+ /*
-+ * bi_end_io can be set separately somewhere else, e.g. the
-+ * variants in,
-+ * - cache_bio->bi_end_io from cached_dev_cache_miss()
-+ * - n->bi_end_io from cache_lookup_fn()
-+ */
-+ bio->bi_end_io = end_io_fn;
- bio->bi_private = &s->cl;
-
- bio_cnt_set(bio, 3);
-@@ -676,7 +717,7 @@ static inline struct search *search_alloc(struct bio *bio,
- s = mempool_alloc(d->c->search, GFP_NOIO);
-
- closure_init(&s->cl, NULL);
-- do_bio_hook(s, bio);
-+ do_bio_hook(s, bio, request_endio);
-
- s->orig_bio = bio;
- s->cache_miss = NULL;
-@@ -743,10 +784,11 @@ static void cached_dev_read_error(struct closure *cl)
- trace_bcache_read_retry(s->orig_bio);
-
- s->iop.status = 0;
-- do_bio_hook(s, s->orig_bio);
-+ do_bio_hook(s, s->orig_bio, backing_request_endio);
-
- /* XXX: invalidate cache */
-
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, bio, cl);
- }
-
-@@ -859,7 +901,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- bio_copy_dev(cache_bio, miss);
- cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
-
-- cache_bio->bi_end_io = request_endio;
-+ cache_bio->bi_end_io = backing_request_endio;
- cache_bio->bi_private = &s->cl;
-
- bch_bio_map(cache_bio, NULL);
-@@ -872,14 +914,16 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- s->cache_miss = miss;
- s->iop.bio = cache_bio;
- bio_get(cache_bio);
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, cache_bio, &s->cl);
-
- return ret;
- out_put:
- bio_put(cache_bio);
- out_submit:
-- miss->bi_end_io = request_endio;
-+ miss->bi_end_io = backing_request_endio;
- miss->bi_private = &s->cl;
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, miss, &s->cl);
- return ret;
- }
-@@ -943,31 +987,48 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
- s->iop.bio = s->orig_bio;
- bio_get(s->iop.bio);
-
-- if ((bio_op(bio) != REQ_OP_DISCARD) ||
-- blk_queue_discard(bdev_get_queue(dc->bdev)))
-- closure_bio_submit(s->iop.c, bio, cl);
-+ if (bio_op(bio) == REQ_OP_DISCARD &&
-+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
-+ goto insert_data;
-+
-+ /* I/O request sent to backing device */
-+ bio->bi_end_io = backing_request_endio;
-+ closure_bio_submit(s->iop.c, bio, cl);
-+
- } else if (s->iop.writeback) {
- bch_writeback_add(dc);
- s->iop.bio = bio;
-
- if (bio->bi_opf & REQ_PREFLUSH) {
-- /* Also need to send a flush to the backing device */
-- struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
-- dc->disk.bio_split);
--
-+ /*
-+ * Also need to send a flush to the backing
-+ * device, if failed on backing device.
-+ */
-+ struct bio *flush;
-+
-+ flush = bio_alloc_bioset(GFP_NOIO, 0,
-+ dc->disk.bio_split);
-+ if (!flush) {
-+ s->iop.status = BLK_STS_RESOURCE;
-+ goto insert_data;
-+ }
- bio_copy_dev(flush, bio);
-- flush->bi_end_io = request_endio;
-+ flush->bi_end_io = backing_request_endio;
- flush->bi_private = cl;
- flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
--
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, flush, cl);
- }
-+ bch_writeback_add(dc);
-+
- } else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
--
-+ /* I/O request sent to backing device */
-+ bio->bi_end_io = backing_request_endio;
- closure_bio_submit(s->iop.c, bio, cl);
- }
-
-+insert_data:
- closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
- continue_at(cl, cached_dev_write_complete, NULL);
- }
-@@ -981,6 +1042,7 @@ static void cached_dev_nodata(struct closure *cl)
- bch_journal_meta(s->iop.c, cl);
-
- /* If it's a flush, we send the flush to the backing device too */
-+ bio->bi_end_io = backing_request_endio;
- closure_bio_submit(s->iop.c, bio, cl);
-
- continue_at(cl, cached_dev_bio_complete, NULL);
-@@ -1078,6 +1140,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- cached_dev_read(dc, s);
- }
- } else
-+ /* I/O request sent to backing device */
- detached_dev_do_request(d, bio);
-
- return BLK_QC_T_NONE;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 97e3bb8e1aee..08a0b541a4da 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -265,6 +265,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
- bio->bi_private = dc;
-
- closure_get(cl);
-+ /* I/O request sent to backing device */
- __write_super(&dc->sb, bio);
-
- closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 3d7d8452e0de..4ebe0119ea7e 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -289,6 +289,7 @@ static void write_dirty(struct closure *cl)
- bio_set_dev(&io->bio, io->dc->bdev);
- io->bio.bi_end_io = dirty_endio;
-
-+ /* I/O request sent to backing device */
- closure_bio_submit(io->dc->disk.c, &io->bio, cl);
- }
-
---
-2.15.1
-
diff --git a/for-next/v4/v4-0011-bcache-add-io_disable-to-struct-cached_dev.patch b/for-next/v4/v4-0011-bcache-add-io_disable-to-struct-cached_dev.patch
deleted file mode 100644
index 6b4ae2a..0000000
--- a/for-next/v4/v4-0011-bcache-add-io_disable-to-struct-cached_dev.patch
+++ /dev/null
@@ -1,235 +0,0 @@
-From 63d3df27ffc3a82d15f3f7f428194988f410197a Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 10 Jan 2018 21:33:45 +0800
-Subject: [PATCH v4 11/13] bcache: add io_disable to struct cached_dev
-
-If a bcache device is configured to writeback mode, current code does not
-handle write I/O errors on backing devices properly.
-
-In writeback mode, write request is written to cache device, and
-latter being flushed to backing device. If I/O failed when writing from
-cache device to the backing device, bcache code just ignores the error and
-upper layer code is NOT noticed that the backing device is broken.
-
-This patch tries to handle backing device failure like how the cache device
-failure is handled,
-- Add a error counter 'io_errors' and error limit 'error_limit' in struct
- cached_dev. Add another io_disable to struct cached_dev to disable I/Os
- on the problematic backing device.
-- When I/O error happens on backing device, increase io_errors counter. And
- if io_errors reaches error_limit, set cache_dev->io_disable to true, and
- stop the bcache device.
-
-The result is, if backing device is broken of disconnected, and I/O errors
-reach its error limit, backing device will be disabled and the associated
-bcache device will be removed from system.
-
-Changelog:
-v2: indeed this is new added in v2 patch set.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 7 +++++++
- drivers/md/bcache/io.c | 14 ++++++++++++++
- drivers/md/bcache/request.c | 14 ++++++++++++--
- drivers/md/bcache/super.c | 22 ++++++++++++++++++++++
- drivers/md/bcache/sysfs.c | 15 ++++++++++++++-
- 5 files changed, 69 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index c41736960045..5a811959392d 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -360,6 +360,7 @@ struct cached_dev {
- unsigned sequential_cutoff;
- unsigned readahead;
-
-+ unsigned io_disable:1;
- unsigned verify:1;
- unsigned bypass_torture_test:1;
-
-@@ -379,6 +380,10 @@ struct cached_dev {
- unsigned writeback_rate_i_term_inverse;
- unsigned writeback_rate_p_term_inverse;
- unsigned writeback_rate_minimum;
-+
-+#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
-+ atomic_t io_errors;
-+ unsigned error_limit;
- };
-
- enum alloc_reserve {
-@@ -882,6 +887,7 @@ static inline void closure_bio_submit(struct cache_set *c,
-
- /* Forward declarations */
-
-+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
- void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
- void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- blk_status_t, const char *);
-@@ -909,6 +915,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
- struct bkey *, int, bool);
- bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
- unsigned, unsigned, bool);
-+bool bch_cached_dev_error(struct cached_dev *dc);
-
- __printf(2, 3)
- bool bch_cache_set_error(struct cache_set *, const char *, ...);
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index 8013ecbcdbda..7fac97ae036e 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
- }
-
- /* IO errors */
-+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
-+{
-+ char buf[BDEVNAME_SIZE];
-+ unsigned errors;
-+
-+ WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
-+
-+ errors = atomic_add_return(1, &dc->io_errors);
-+ if (errors < dc->error_limit)
-+ pr_err("%s: IO error on backing device, unrecoverable",
-+ bio_devname(bio, buf));
-+ else
-+ bch_cached_dev_error(dc);
-+}
-
- void bch_count_io_errors(struct cache *ca,
- blk_status_t error,
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index ad4cf71f7eab..386b388ce296 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -637,6 +637,8 @@ static void backing_request_endio(struct bio *bio)
-
- if (bio->bi_status) {
- struct search *s = container_of(cl, struct search, cl);
-+ struct cached_dev *dc = container_of(s->d,
-+ struct cached_dev, disk);
- /*
- * If a bio has REQ_PREFLUSH for writeback mode, it is
- * speically assembled in cached_dev_write() for a non-zero
-@@ -657,6 +659,7 @@ static void backing_request_endio(struct bio *bio)
- }
- s->recoverable = false;
- /* should count I/O error for backing device here */
-+ bch_count_backing_io_errors(dc, bio);
- }
-
- bio_put(bio);
-@@ -1067,8 +1070,14 @@ static void detatched_dev_end_io(struct bio *bio)
- bio_data_dir(bio),
- &ddip->d->disk->part0, ddip->start_time);
-
-- kfree(ddip);
-+ if (bio->bi_status) {
-+ struct cached_dev *dc = container_of(ddip->d,
-+ struct cached_dev, disk);
-+ /* should count I/O error for backing device here */
-+ bch_count_backing_io_errors(dc, bio);
-+ }
-
-+ kfree(ddip);
- bio->bi_end_io(bio);
- }
-
-@@ -1107,7 +1116,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
-
-- if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
-+ dc->io_disable)) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- return BLK_QC_T_NONE;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 08a0b541a4da..14fce3623770 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1188,6 +1188,10 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
- max(dc->disk.disk->queue->backing_dev_info->ra_pages,
- q->backing_dev_info->ra_pages);
-
-+ atomic_set(&dc->io_errors, 0);
-+ dc->io_disable = false;
-+ dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
-+
- bch_cached_dev_request_init(dc);
- bch_cached_dev_writeback_init(dc);
- return 0;
-@@ -1339,6 +1343,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
- return flash_dev_run(c, u);
- }
-
-+bool bch_cached_dev_error(struct cached_dev *dc)
-+{
-+ char name[BDEVNAME_SIZE];
-+
-+ if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
-+ return false;
-+
-+ dc->io_disable = true;
-+ /* make others know io_disable is true earlier */
-+ smp_mb();
-+
-+ pr_err("bcache: stop %s: too many IO errors on backing device %s\n",
-+ dc->disk.name, bdevname(dc->bdev, name));
-+
-+ bcache_device_stop(&dc->disk);
-+ return true;
-+}
-+
- /* Cache set */
-
- __printf(2, 3)
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index afb051bcfca1..7288927f2a47 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -131,7 +131,9 @@ SHOW(__bch_cached_dev)
- var_print(writeback_delay);
- var_print(writeback_percent);
- sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
--
-+ sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
-+ sysfs_printf(io_error_limit, "%i", dc->error_limit);
-+ sysfs_printf(io_disable, "%i", dc->io_disable);
- var_print(writeback_rate_update_seconds);
- var_print(writeback_rate_i_term_inverse);
- var_print(writeback_rate_p_term_inverse);
-@@ -223,6 +225,14 @@ STORE(__cached_dev)
- d_strtoul(writeback_rate_i_term_inverse);
- d_strtoul_nonzero(writeback_rate_p_term_inverse);
-
-+ sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
-+
-+ if (attr == &sysfs_io_disable) {
-+ int v = strtoul_or_return(buf);
-+
-+ dc->io_disable = v ? 1 : 0;
-+ }
-+
- d_strtoi_h(sequential_cutoff);
- d_strtoi_h(readahead);
-
-@@ -330,6 +340,9 @@ static struct attribute *bch_cached_dev_files[] = {
- &sysfs_writeback_rate_i_term_inverse,
- &sysfs_writeback_rate_p_term_inverse,
- &sysfs_writeback_rate_debug,
-+ &sysfs_errors,
-+ &sysfs_io_error_limit,
-+ &sysfs_io_disable,
- &sysfs_dirty_data,
- &sysfs_stripe_size,
- &sysfs_partial_stripes_expensive,
---
-2.15.1
-
diff --git a/for-next/v4/v4-0012-bcache-stop-bcache-device-when-backing-device-is-.patch b/for-next/v4/v4-0012-bcache-stop-bcache-device-when-backing-device-is-.patch
deleted file mode 100644
index e73bf4f..0000000
--- a/for-next/v4/v4-0012-bcache-stop-bcache-device-when-backing-device-is-.patch
+++ /dev/null
@@ -1,148 +0,0 @@
-From fd9bb15c3ac093f087401ed275184e2a54eadbb6 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 17:31:44 +0800
-Subject: [PATCH v4 12/13] bcache: stop bcache device when backing device is
- offline
-
-Currently bcache does not handle backing device failure, if backing
-device is offline and disconnected from system, its bcache device can still
-be accessible. If the bcache device is in writeback mode, I/O requests even
-can success if the requests hit on cache device. That is to say, when and
-how bcache handles offline backing device is undefined.
-
-This patch tries to handle backing device offline in a rather simple way,
-- Add cached_dev->status_update_thread kernel thread to update backing
- device status in every 1 second.
-- Add cached_dev->offline_seconds to record how many seconds the backing
- device is observed to be offline. If the backing device is offline for
- BACKING_DEV_OFFLINE_TIMEOUT (30) seconds, set dc->io_disable to 1 and
- call bcache_device_stop() to stop the bache device which linked to the
- offline backing device.
-
-Now if a backing device is offline for BACKING_DEV_OFFLINE_TIMEOUT seconds,
-its bcache device will be removed, then user space application writing on
-it will get error immediately, and handler the device failure in time.
-
-This patch is quite simple, does not handle more complicated situations.
-Once the bcache device is stopped, users need to recovery the backing
-device, register and attach it manually.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 2 ++
- drivers/md/bcache/super.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 57 insertions(+)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 5a811959392d..9eedb35d01bc 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -338,6 +338,7 @@ struct cached_dev {
-
- struct keybuf writeback_keys;
-
-+ struct task_struct *status_update_thread;
- /*
- * Order the write-half of writeback operations strongly in dispatch
- * order. (Maintain LBA order; don't allow reads completing out of
-@@ -384,6 +385,7 @@ struct cached_dev {
- #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
- atomic_t io_errors;
- unsigned error_limit;
-+ unsigned offline_seconds;
- };
-
- enum alloc_reserve {
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 14fce3623770..85adf1e29d11 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -646,6 +646,11 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
- unsigned int cmd, unsigned long arg)
- {
- struct bcache_device *d = b->bd_disk->private_data;
-+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-+
-+ if (dc->io_disable)
-+ return -EIO;
-+
- return d->ioctl(d, mode, cmd, arg);
- }
-
-@@ -856,6 +861,45 @@ static void calc_cached_dev_sectors(struct cache_set *c)
- c->cached_dev_sectors = sectors;
- }
-
-+#define BACKING_DEV_OFFLINE_TIMEOUT 5
-+static int cached_dev_status_update(void *arg)
-+{
-+ struct cached_dev *dc = arg;
-+ struct request_queue *q;
-+ char buf[BDEVNAME_SIZE];
-+
-+ /*
-+ * If this delayed worker is stopping outside, directly quit here.
-+ * dc->io_disable might be set via sysfs interface, so check it
-+ * here too.
-+ */
-+ while (!kthread_should_stop() && !dc->io_disable) {
-+ q = bdev_get_queue(dc->bdev);
-+ if (blk_queue_dying(q))
-+ dc->offline_seconds++;
-+ else
-+ dc->offline_seconds = 0;
-+
-+ if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
-+ pr_err("%s: device offline for %d seconds",
-+ bdevname(dc->bdev, buf),
-+ BACKING_DEV_OFFLINE_TIMEOUT);
-+ pr_err("%s: disable I/O request due to backing "
-+ "device offline", dc->disk.name);
-+ dc->io_disable = true;
-+ /* let others know earlier that io_disable is true */
-+ smp_mb();
-+ bcache_device_stop(&dc->disk);
-+ break;
-+ }
-+
-+ schedule_timeout_interruptible(HZ);
-+ }
-+
-+ dc->status_update_thread = NULL;
-+ return 0;
-+}
-+
- void bch_cached_dev_run(struct cached_dev *dc)
- {
- struct bcache_device *d = &dc->disk;
-@@ -898,6 +942,15 @@ void bch_cached_dev_run(struct cached_dev *dc)
- if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
- sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
- pr_debug("error creating sysfs link");
-+
-+ dc->status_update_thread = kthread_run(cached_dev_status_update,
-+ dc,
-+ "bcache_status_update");
-+ if (IS_ERR(dc->status_update_thread)) {
-+ pr_warn("bcache: failed to create bcache_status_update "
-+ "kthread, continue to run without monitoring backing "
-+ "device status");
-+ }
- }
-
- /*
-@@ -1118,6 +1171,8 @@ static void cached_dev_free(struct closure *cl)
- kthread_stop(dc->writeback_thread);
- if (dc->writeback_write_wq)
- destroy_workqueue(dc->writeback_write_wq);
-+ if (!IS_ERR_OR_NULL(dc->status_update_thread))
-+ kthread_stop(dc->status_update_thread);
-
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
---
-2.15.1
-
diff --git a/for-next/v4/v4-0013-bcache-add-stop_attached_devs_on_fail-to-struct-c.patch b/for-next/v4/v4-0013-bcache-add-stop_attached_devs_on_fail-to-struct-c.patch
deleted file mode 100644
index d9edf10..0000000
--- a/for-next/v4/v4-0013-bcache-add-stop_attached_devs_on_fail-to-struct-c.patch
+++ /dev/null
@@ -1,180 +0,0 @@
-From 86e6c96037b81ca6d302e1e7d4342fd1decc8814 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 27 Jan 2018 20:06:15 +0800
-Subject: [PATCH v4 13/13] bcache: add stop_when_cache_set_failed to struct
- cached_dev
-
-Current bcache failure handling code will stop all attached bcache devices
-when the cache set is broken or disconnected. This is desired behavior for
-most of enterprise or cloud use cases, but maybe not for low end
-configuration. Nix <nix@esperi.org.uk> points out, users may still want to
-access the bcache device after cache device failed, for example on laptops.
-
-This patch adds a per-cached_dev option stop_when_cache_set_failed, which
-is enabled (1) by default. Its value can be set via sysfs, when it is set
-to 0, the corresponding bcache device won't be stopped when a broken
-or disconnected cache set is retiring.
-
-When the cached device has dirty data on retiring cache set, if bcache
-device is not stopped, following I/O request on the bcache device may
-result data corruption on backing device. This patch also prints out warn-
-ing information in kernel message.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Nix <nix@esperi.org.uk>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Hannes Reinecke <hare@suse.com>
----
- drivers/md/bcache/bcache.h | 1 +
- drivers/md/bcache/super.c | 63 +++++++++++++++++++++++++++++++++-------------
- drivers/md/bcache/sysfs.c | 10 ++++++++
- 3 files changed, 56 insertions(+), 18 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 9eedb35d01bc..3756a196916f 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -362,6 +362,7 @@ struct cached_dev {
- unsigned readahead;
-
- unsigned io_disable:1;
-+ unsigned stop_when_cache_set_failed:1;
- unsigned verify:1;
- unsigned bypass_torture_test:1;
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 85adf1e29d11..93f720433b40 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1246,6 +1246,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
- atomic_set(&dc->io_errors, 0);
- dc->io_disable = false;
- dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
-+ dc->stop_when_cache_set_failed = 1;
-
- bch_cached_dev_request_init(dc);
- bch_cached_dev_writeback_init(dc);
-@@ -1541,33 +1542,59 @@ static void cache_set_flush(struct closure *cl)
- closure_return(cl);
- }
-
-+/*
-+ * dc->stop_when_cache_set_failed is default to true. If it is explicitly
-+ * set to false by user, the bcache device won't be stopped when cache set
-+ * is broken or disconnected. If there is dirty data on failed cache set,
-+ * not stopping bcache device may result data corruption on backing device,
-+ * pr_warn() notices the protential risk in kernel message.
-+ */
-+static void try_stop_bcache_device(struct cache_set *c,
-+ struct bcache_device *d,
-+ struct cached_dev *dc)
-+{
-+ if (dc->stop_when_cache_set_failed)
-+ bcache_device_stop(d);
-+ else if (!dc->stop_when_cache_set_failed &&
-+ atomic_read(&dc->has_dirty))
-+ pr_warn("bcache: device %s won't be stopped while unregistering"
-+ " broken dirty cache set %pU, your data has potential "
-+ "risk to be corrupted. To disable this warning message,"
-+ " please set /sys/block/%s/bcache/stop_when_"
-+ "cache_set_failed to 1.",
-+ d->name, c->sb.set_uuid, d->name);
-+}
-+
- static void __cache_set_unregister(struct closure *cl)
- {
- struct cache_set *c = container_of(cl, struct cache_set, caching);
- struct cached_dev *dc;
-+ struct bcache_device *d;
- size_t i;
-
- mutex_lock(&bch_register_lock);
-
-- for (i = 0; i < c->devices_max_used; i++)
-- if (c->devices[i]) {
-- if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
-- test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
-- dc = container_of(c->devices[i],
-- struct cached_dev, disk);
-- bch_cached_dev_detach(dc);
-- /*
-- * If we come here by too many I/O errors,
-- * bcache device should be stopped too, to
-- * keep data consistency on cache and
-- * backing devices.
-- */
-- if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
-- bcache_device_stop(c->devices[i]);
-- } else {
-- bcache_device_stop(c->devices[i]);
-- }
-+ for (i = 0; i < c->devices_max_used; i++) {
-+ d = c->devices[i];
-+ if (!d)
-+ continue;
-+
-+ if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
-+ test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
-+ dc = container_of(d, struct cached_dev, disk);
-+ bch_cached_dev_detach(dc);
-+ /*
-+ * If we come here by too many I/O errors,
-+ * bcache device should be stopped too, to
-+ * keep data consistency on cache and
-+ * backing devices.
-+ */
-+ if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
-+ try_stop_bcache_device(c, d, dc);
-+ } else {
-+ bcache_device_stop(d);
- }
-+ }
-
- mutex_unlock(&bch_register_lock);
-
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index 7288927f2a47..b096d4c37c9b 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -93,6 +93,7 @@ read_attribute(partial_stripes_expensive);
- rw_attribute(synchronous);
- rw_attribute(journal_delay_ms);
- rw_attribute(io_disable);
-+rw_attribute(stop_when_cache_set_failed);
- rw_attribute(discard);
- rw_attribute(running);
- rw_attribute(label);
-@@ -134,6 +135,8 @@ SHOW(__bch_cached_dev)
- sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
- sysfs_printf(io_error_limit, "%i", dc->error_limit);
- sysfs_printf(io_disable, "%i", dc->io_disable);
-+ sysfs_printf(stop_when_cache_set_failed, "%i",
-+ dc->stop_when_cache_set_failed);
- var_print(writeback_rate_update_seconds);
- var_print(writeback_rate_i_term_inverse);
- var_print(writeback_rate_p_term_inverse);
-@@ -233,6 +236,12 @@ STORE(__cached_dev)
- dc->io_disable = v ? 1 : 0;
- }
-
-+ if (attr == &sysfs_stop_when_cache_set_failed) {
-+ int v = strtoul_or_return(buf);
-+
-+ dc->stop_when_cache_set_failed = v ? 1 : 0;
-+ }
-+
- d_strtoi_h(sequential_cutoff);
- d_strtoi_h(readahead);
-
-@@ -343,6 +352,7 @@ static struct attribute *bch_cached_dev_files[] = {
- &sysfs_errors,
- &sysfs_io_error_limit,
- &sysfs_io_disable,
-+ &sysfs_stop_when_cache_set_failed,
- &sysfs_dirty_data,
- &sysfs_stripe_size,
- &sysfs_partial_stripes_expensive,
---
-2.15.1
-
diff --git a/for-next/v5/v5-0000-cover-letter.patch b/for-next/v5/v5-0000-cover-letter.patch
deleted file mode 100644
index f643463..0000000
--- a/for-next/v5/v5-0000-cover-letter.patch
+++ /dev/null
@@ -1,95 +0,0 @@
-From e8f72263c0f4f20b85f42a617fa4998115f797af Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 5 Feb 2018 18:26:45 +0800
-Subject: [PATCH v5 00/10] bcache: device failure handling improvement
-
-Hi maintainers and folks,
-
-This patch set tries to improve bcache device failure handling, includes
-cache device and backing device failures.
-
-The basic idea to handle failed cache device is,
-- Unregister cache set
-- Detach all backing devices which are attached to this cache set
-- Stop all the detached bcache devices (configurable)
-- Stop all flash only volume on the cache set
-The above process is named 'cache set retire' by me. The result of cache
-set retire is, cache set and bcache devices are all removed, following
-I/O requests will get failed immediately to notift upper layer or user
-space coce that the cache device is failed or disconnected.
-- Stop all the detached bcache devices (configurable)
-- Stop all flash only volume on the cache set
-The above process is named 'cache set retire' by me. The result of cache
-set retire is, cache set and bcache devices are all removed
-(configurable), following I/O requests will get failed immediately to
-notify upper layer or user space coce that the cache device is failed or
-disconnected.
-
-There are 2 patches from v4 patch set is merged into bcache-for-next, they
-are not in v5 patch set any more.
-
-V5 patch set adds a new patch "bcache: add stop_when_cache_set_failed
-option to backing device", which provides "auto"/"always" options to
-configure whether or not to stop bcache device for a broken cache set. The
-patch "bcache: stop all attached bcache devices for a retired cache set"
-from v4 patch set is replaced by the above new added patch.
-
-Most of the patches are reviewed by Hannes Reinecke and Junhui Tang. There
-are still severl patches need to be reviewed,
-- [PATCH v5 03/10] bcache: quit dc->writeback_thread when
- BCACHE_DEV_DETACHING is set
-- [PATCH v5 06/10] bcache: add stop_when_cache_set_failed option to
- backing device
-
-Any comment, question and review are warmly welcome. Thanks in advance.
-
-Changelog:
-v5: replace patch "bcache: stop all attached bcache devices for a retired
- cache set" from v4 patch set by "bcache: add stop_when_cache_set_failed
- option to backing device" from v5 patch set.
- fix issues from v4 patch set.
- improve kernel message format, remove redundant prefix string.
-v4: add per-cached_dev option stop_attached_devs_on_fail to avoid stopping
- attached bcache device from a retiring cache set.
-v3: fix detach issue find in v2 patch set.
-v2: fixes all problems found in v1 review.
- add patches to handle backing device failure.
- add one more patch to set writeback_rate_update_seconds range.
- include a patch from Junhui Tang.
-v1: the initial version, only handles cache device failure.
-
-Coly Li
-
-
-Coly Li (10):
- bcache: set writeback_rate_update_seconds in range [1, 60] seconds
- bcache: fix cached_dev->count usage for bch_cache_set_error()
- bcache: quit dc->writeback_thread when BCACHE_DEV_DETACHING is set
- bcache: stop dc->writeback_rate_update properly
- bcache: add CACHE_SET_IO_DISABLE to struct cache_set flags
- bcache: stop all attached bcache devices for a retired cache set
- bcache: add backing_request_endio() for bi_end_io of attached backing
- device I/O
- bcache: add io_disable to struct cached_dev
- bcache: stop bcache device when backing device is offline
- bcache: add stop_when_cache_set_failed option to backing device
-
-Tang Junhui (1):
- bcache: fix inaccurate io state for detached bcache devices
-
- drivers/md/bcache/alloc.c | 3 +-
- drivers/md/bcache/bcache.h | 44 ++++++++-
- drivers/md/bcache/btree.c | 10 +-
- drivers/md/bcache/io.c | 16 +++-
- drivers/md/bcache/journal.c | 4 +-
- drivers/md/bcache/request.c | 185 +++++++++++++++++++++++++++++++------
- drivers/md/bcache/super.c | 206 ++++++++++++++++++++++++++++++++++++++----
- drivers/md/bcache/sysfs.c | 59 +++++++++++-
- drivers/md/bcache/util.h | 6 --
- drivers/md/bcache/writeback.c | 94 ++++++++++++++++---
- drivers/md/bcache/writeback.h | 5 +-
- 11 files changed, 551 insertions(+), 81 deletions(-)
-
---
-2.16.1
-
diff --git a/for-next/v5/v5-0001-bcache-set-writeback_rate_update_seconds-in-range.patch b/for-next/v5/v5-0001-bcache-set-writeback_rate_update_seconds-in-range.patch
deleted file mode 100644
index 4a6c147..0000000
--- a/for-next/v5/v5-0001-bcache-set-writeback_rate_update_seconds-in-range.patch
+++ /dev/null
@@ -1,79 +0,0 @@
-From 71066c410c4f50bb1803a634dff17fd0ecb90860 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 15:11:03 +0800
-Subject: [PATCH v5 01/10] bcache: set writeback_rate_update_seconds in range
- [1, 60] seconds
-
-dc->writeback_rate_update_seconds can be set via sysfs and its value can
-be set to [1, ULONG_MAX]. It does not make sense to set such a large
-value, 60 seconds is long enough value considering the default 5 seconds
-works well for long time.
-
-Because dc->writeback_rate_update is a special delayed work, it re-arms
-itself inside the delayed work routine update_writeback_rate(). When
-stopping it by cancel_delayed_work_sync(), there should be a timeout to
-wait and make sure the re-armed delayed work is stopped too. A small max
-value of dc->writeback_rate_update_seconds is also helpful to decide a
-reasonable small timeout.
-
-This patch limits sysfs interface to set dc->writeback_rate_update_seconds
-in range of [1, 60] seconds, and replaces the hand-coded number by macros.
-
-Changelog:
-v2: fix a rebase typo in v4, which is pointed out by Michael Lyle.
-v1: initial version.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
----
- drivers/md/bcache/sysfs.c | 4 +++-
- drivers/md/bcache/writeback.c | 2 +-
- drivers/md/bcache/writeback.h | 3 +++
- 3 files changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index c524305cc9a7..4a6a697e1680 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -218,7 +218,9 @@ STORE(__cached_dev)
- sysfs_strtoul_clamp(writeback_rate,
- dc->writeback_rate.rate, 1, INT_MAX);
-
-- d_strtoul_nonzero(writeback_rate_update_seconds);
-+ sysfs_strtoul_clamp(writeback_rate_update_seconds,
-+ dc->writeback_rate_update_seconds,
-+ 1, WRITEBACK_RATE_UPDATE_SECS_MAX);
- d_strtoul(writeback_rate_i_term_inverse);
- d_strtoul_nonzero(writeback_rate_p_term_inverse);
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 58218f7e77c3..f1d2fc15abcc 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -655,7 +655,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
- dc->writeback_rate.rate = 1024;
- dc->writeback_rate_minimum = 8;
-
-- dc->writeback_rate_update_seconds = 5;
-+ dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
- dc->writeback_rate_p_term_inverse = 40;
- dc->writeback_rate_i_term_inverse = 10000;
-
-diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index 66f1c527fa24..587b25599856 100644
---- a/drivers/md/bcache/writeback.h
-+++ b/drivers/md/bcache/writeback.h
-@@ -8,6 +8,9 @@
- #define MAX_WRITEBACKS_IN_PASS 5
- #define MAX_WRITESIZE_IN_PASS 5000 /* *512b */
-
-+#define WRITEBACK_RATE_UPDATE_SECS_MAX 60
-+#define WRITEBACK_RATE_UPDATE_SECS_DEFAULT 5
-+
- /*
- * 14 (16384ths) is chosen here as something that each backing device
- * should be a reasonable fraction of the share, and not to blow up
---
-2.16.1
-
diff --git a/for-next/v5/v5-0002-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch b/for-next/v5/v5-0002-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
deleted file mode 100644
index 1ff898a..0000000
--- a/for-next/v5/v5-0002-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
+++ /dev/null
@@ -1,178 +0,0 @@
-From 8d90ae56c8b859dbd3b4360c8e011f5fee7b3540 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 8 Jan 2018 23:05:58 +0800
-Subject: [PATCH v5 02/10] bcache: fix cached_dev->count usage for
- bch_cache_set_error()
-
-When bcache metadata I/O fails, bcache will call bch_cache_set_error()
-to retire the whole cache set. The expected behavior to retire a cache
-set is to unregister the cache set, and unregister all backing device
-attached to this cache set, then remove sysfs entries of the cache set
-and all attached backing devices, finally release memory of structs
-cache_set, cache, cached_dev and bcache_device.
-
-In my testing when journal I/O failure triggered by disconnected cache
-device, sometimes the cache set cannot be retired, and its sysfs
-entry /sys/fs/bcache/<uuid> still exits and the backing device also
-references it. This is not expected behavior.
-
-When metadata I/O failes, the call senquence to retire whole cache set is,
- bch_cache_set_error()
- bch_cache_set_unregister()
- bch_cache_set_stop()
- __cache_set_unregister() <- called as callback by calling
- clousre_queue(&c->caching)
- cache_set_flush() <- called as a callback when refcount
- of cache_set->caching is 0
- cache_set_free() <- called as a callback when refcount
- of catch_set->cl is 0
- bch_cache_set_release() <- called as a callback when refcount
- of catch_set->kobj is 0
-
-I find if kernel thread bch_writeback_thread() quits while-loop when
-kthread_should_stop() is true and searched_full_index is false, clousre
-callback cache_set_flush() set by continue_at() will never be called. The
-result is, bcache fails to retire whole cache set.
-
-cache_set_flush() will be called when refcount of closure c->caching is 0,
-and in function bcache_device_detach() refcount of closure c->caching is
-released to 0 by clousre_put(). In metadata error code path, function
-bcache_device_detach() is called by cached_dev_detach_finish(). This is a
-callback routine being called when cached_dev->count is 0. This refcount
-is decreased by cached_dev_put().
-
-The above dependence indicates, cache_set_flush() will be called when
-refcount of cache_set->cl is 0, and refcount of cache_set->cl to be 0
-when refcount of cache_dev->count is 0.
-
-The reason why sometimes cache_dev->count is not 0 (when metadata I/O fails
-and bch_cache_set_error() called) is, in bch_writeback_thread(), refcount
-of cache_dev is not decreased properly.
-
-In bch_writeback_thread(), cached_dev_put() is called only when
-searched_full_index is true and cached_dev->writeback_keys is empty, a.k.a
-there is no dirty data on cache. In most of run time it is correct, but
-when bch_writeback_thread() quits the while-loop while cache is still
-dirty, current code forget to call cached_dev_put() before this kernel
-thread exits. This is why sometimes cache_set_flush() is not executed and
-cache set fails to be retired.
-
-The reason to call cached_dev_put() in bch_writeback_rate() is, when the
-cache device changes from clean to dirty, cached_dev_get() is called, to
-make sure during writeback operatiions both backing and cache devices
-won't be released.
-
-Adding following code in bch_writeback_thread() does not work,
- static int bch_writeback_thread(void *arg)
- }
-
-+ if (atomic_read(&dc->has_dirty))
-+ cached_dev_put()
-+
- return 0;
- }
-because writeback kernel thread can be waken up and start via sysfs entry:
- echo 1 > /sys/block/bcache<N>/bcache/writeback_running
-It is difficult to check whether backing device is dirty without race and
-extra lock. So the above modification will introduce potential refcount
-underflow in some conditions.
-
-The correct fix is, to take cached dev refcount when creating the kernel
-thread, and put it before the kernel thread exits. Then bcache does not
-need to take a cached dev refcount when cache turns from clean to dirty,
-or to put a cached dev refcount when cache turns from ditry to clean. The
-writeback kernel thread is alwasy safe to reference data structure from
-cache set, cache and cached device (because a refcount of cache device is
-taken for it already), and no matter the kernel thread is stopped by I/O
-errors or system reboot, cached_dev->count can always be used correctly.
-
-The patch is simple, but understanding how it works is quite complicated.
-
-Changelog:
-v2: set dc->writeback_thread to NULL in this patch, as suggested by Hannes.
-v1: initial version for review.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/super.c | 1 -
- drivers/md/bcache/writeback.c | 11 ++++++++---
- drivers/md/bcache/writeback.h | 2 --
- 3 files changed, 8 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index a2ad37a8afc0..7d96dc6860fa 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1052,7 +1052,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
- if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
- bch_sectors_dirty_init(&dc->disk);
- atomic_set(&dc->has_dirty, 1);
-- refcount_inc(&dc->count);
- bch_writeback_queue(dc);
- }
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index f1d2fc15abcc..b280c134dd4d 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -572,7 +572,7 @@ static int bch_writeback_thread(void *arg)
-
- if (kthread_should_stop()) {
- set_current_state(TASK_RUNNING);
-- return 0;
-+ break;
- }
-
- schedule();
-@@ -585,7 +585,6 @@ static int bch_writeback_thread(void *arg)
- if (searched_full_index &&
- RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
- atomic_set(&dc->has_dirty, 0);
-- cached_dev_put(dc);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
- bch_write_bdev_super(dc, NULL);
- }
-@@ -606,6 +605,9 @@ static int bch_writeback_thread(void *arg)
- }
- }
-
-+ dc->writeback_thread = NULL;
-+ cached_dev_put(dc);
-+
- return 0;
- }
-
-@@ -669,10 +671,13 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- if (!dc->writeback_write_wq)
- return -ENOMEM;
-
-+ cached_dev_get(dc);
- dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
- "bcache_writeback");
-- if (IS_ERR(dc->writeback_thread))
-+ if (IS_ERR(dc->writeback_thread)) {
-+ cached_dev_put(dc);
- return PTR_ERR(dc->writeback_thread);
-+ }
-
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index 587b25599856..0bba8f1c6cdf 100644
---- a/drivers/md/bcache/writeback.h
-+++ b/drivers/md/bcache/writeback.h
-@@ -105,8 +105,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
- {
- if (!atomic_read(&dc->has_dirty) &&
- !atomic_xchg(&dc->has_dirty, 1)) {
-- refcount_inc(&dc->count);
--
- if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
- /* XXX: should do this synchronously */
---
-2.16.1
-
diff --git a/for-next/v5/v5-0003-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch b/for-next/v5/v5-0003-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch
deleted file mode 100644
index cd0b2e6..0000000
--- a/for-next/v5/v5-0003-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch
+++ /dev/null
@@ -1,130 +0,0 @@
-From 26562d0421bf1fa18492e4089fead5b1f97616e2 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 14 Jan 2018 21:41:57 +0800
-Subject: [PATCH v5 03/10] bcache: quit dc->writeback_thread when
- BCACHE_DEV_DETACHING is set
-
-In patch "bcache: fix cached_dev->count usage for bch_cache_set_error()",
-cached_dev_get() is called when creating dc->writeback_thread, and
-cached_dev_put() is called when exiting dc->writeback_thread. This
-modification works well unless people detach the bcache device manually by
- 'echo 1 > /sys/block/bcache<N>/bcache/detach'
-Because this sysfs interface only calls bch_cached_dev_detach() which wakes
-up dc->writeback_thread but does not stop it. The reason is, before patch
-"bcache: fix cached_dev->count usage for bch_cache_set_error()", inside
-bch_writeback_thread(), if cache is not dirty after writeback,
-cached_dev_put() will be called here. And in cached_dev_make_request() when
-a new write request makes cache from clean to dirty, cached_dev_get() will
-be called there. Since we don't operate dc->count in these locations,
-refcount d->count cannot be dropped after cache becomes clean, and
-cached_dev_detach_finish() won't be called to detach bcache device.
-
-This patch fixes the issue by checking whether BCACHE_DEV_DETACHING is
-set inside bch_writeback_thread(). If this bit is set and cache is clean
-(no existing writeback_keys), break the while-loop, call cached_dev_put()
-and quit the writeback thread.
-
-Please note if cache is still dirty, even BCACHE_DEV_DETACHING is set the
-writeback thread should continue to perform writeback, this is the original
-design of manually detach.
-
-It is safe to do the following check without locking, let me explain why,
-+ if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
-+ (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
-
-If the kenrel thread does not sleep and continue to run due to conditions
-are not updated in time on the running CPU core, it just consumes more CPU
-cycles and has no hurt. This should-sleep-but-run is safe here. We just
-focus on the should-run-but-sleep condition, which means the writeback
-thread goes to sleep in mistake while it should continue to run.
-1, First of all, no matter the writeback thread is hung or not, kthread_stop() from
- cached_dev_detach_finish() will wake up it and terminate by making
- kthread_should_stop() return true. And in normal run time, bit on index
- BCACHE_DEV_DETACHING is always cleared, the condition
- !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)
- is always true and can be ignored as constant value.
-2, If one of the following conditions is true, the writeback thread should
- go to sleep,
- "!atomic_read(&dc->has_dirty)" or "!dc->writeback_running)"
- each of them independently controls the writeback thread should sleep or
- not, let's analyse them one by one.
-2.1 condition "!atomic_read(&dc->has_dirty)"
- If dc->has_dirty is set from 0 to 1 on another CPU core, bcache will
- call bch_writeback_queue() immediately or call bch_writeback_add() which
- indirectly calls bch_writeback_queue() too. In bch_writeback_queue(),
- wake_up_process(dc->writeback_thread) is called. It sets writeback
- thread's task state to TASK_RUNNING and following an implicit memory
- barrier, then tries to wake up the writeback thread.
- In writeback thread, its task state is set to TASK_INTERRUPTIBLE before
- doing the condition check. If other CPU core sets the TASK_RUNNING state
- after writeback thread setting TASK_INTERRUPTIBLE, the writeback thread
- will be scheduled to run very soon because its state is not
- TASK_INTERRUPTIBLE. If other CPU core sets the TASK_RUNNING state before
- writeback thread setting TASK_INTERRUPTIBLE, the implict memory barrier
- of wake_up_process() will make sure modification of dc->has_dirty on
- other CPU core is updated and observed on the CPU core of writeback
- thread. Therefore the condition check will correctly be false, and
- continue writeback code without sleeping.
-2.2 condition "!dc->writeback_running)"
- dc->writeback_running can be changed via sysfs file, every time it is
- modified, a following bch_writeback_queue() is alwasy called. So the
- change is always observed on the CPU core of writeback thread. If
- dc->writeback_running is changed from 0 to 1 on other CPU core, this
- condition check will observe the modification and allow writeback
- thread to continue to run without sleeping.
-Now we can see, even without a locking protection, multiple conditions
-check is safe here, no deadlock or process hang up will happen.
-
-I compose a separte patch because that patch "bcache: fix cached_dev->count
-usage for bch_cache_set_error()" already gets a "Reviewed-by:" from Hannes
-Reinecke. Also this fix is not trivial and good for a separate patch.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
-Cc: Huijun Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/writeback.c | 20 +++++++++++++++++---
- 1 file changed, 17 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index b280c134dd4d..4dbeaaa575bf 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -565,9 +565,15 @@ static int bch_writeback_thread(void *arg)
- while (!kthread_should_stop()) {
- down_write(&dc->writeback_lock);
- set_current_state(TASK_INTERRUPTIBLE);
-- if (!atomic_read(&dc->has_dirty) ||
-- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
-- !dc->writeback_running)) {
-+ /*
-+ * If the bache device is detaching, skip here and continue
-+ * to perform writeback. Otherwise, if no dirty data on cache,
-+ * or there is dirty data on cache but writeback is disabled,
-+ * the writeback thread should sleep here and wait for others
-+ * to wake up it.
-+ */
-+ if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
-+ (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-
- if (kthread_should_stop()) {
-@@ -587,6 +593,14 @@ static int bch_writeback_thread(void *arg)
- atomic_set(&dc->has_dirty, 0);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
- bch_write_bdev_super(dc, NULL);
-+ /*
-+ * If bcache device is detaching via sysfs interface,
-+ * writeback thread should stop after there is no dirty
-+ * data on cache. BCACHE_DEV_DETACHING flag is set in
-+ * bch_cached_dev_detach().
-+ */
-+ if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
-+ break;
- }
-
- up_write(&dc->writeback_lock);
---
-2.16.1
-
diff --git a/for-next/v5/v5-0004-bcache-stop-dc-writeback_rate_update-properly.patch b/for-next/v5/v5-0004-bcache-stop-dc-writeback_rate_update-properly.patch
deleted file mode 100644
index 909a381..0000000
--- a/for-next/v5/v5-0004-bcache-stop-dc-writeback_rate_update-properly.patch
+++ /dev/null
@@ -1,268 +0,0 @@
-From 0661a1f418c8efe59d19f952218c2faca0044275 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 15:48:39 +0800
-Subject: [PATCH v5 04/10] bcache: stop dc->writeback_rate_update properly
-
-struct delayed_work writeback_rate_update in struct cache_dev is a delayed
-worker to call function update_writeback_rate() in period (the interval is
-defined by dc->writeback_rate_update_seconds).
-
-When a metadate I/O error happens on cache device, bcache error handling
-routine bch_cache_set_error() will call bch_cache_set_unregister() to
-retire whole cache set. On the unregister code path, this delayed work is
-stopped by calling cancel_delayed_work_sync(&dc->writeback_rate_update).
-
-dc->writeback_rate_update is a special delayed work from others in bcache.
-In its routine update_writeback_rate(), this delayed work is re-armed
-itself. That means when cancel_delayed_work_sync() returns, this delayed
-work can still be executed after several seconds defined by
-dc->writeback_rate_update_seconds.
-
-The problem is, after cancel_delayed_work_sync() returns, the cache set
-unregister code path will continue and release memory of struct cache set.
-Then the delayed work is scheduled to run, __update_writeback_rate()
-will reference the already released cache_set memory, and trigger a NULL
-pointer deference fault.
-
-This patch introduces two more bcache device flags,
-- BCACHE_DEV_WB_RUNNING
- bit set: bcache device is in writeback mode and running, it is OK for
- dc->writeback_rate_update to re-arm itself.
- bit clear:bcache device is trying to stop dc->writeback_rate_update,
- this delayed work should not re-arm itself and quit.
-- BCACHE_DEV_RATE_DW_RUNNING
- bit set: routine update_writeback_rate() is executing.
- bit clear: routine update_writeback_rate() quits.
-
-This patch also adds a function cancel_writeback_rate_update_dwork() to
-wait for dc->writeback_rate_update quits before cancel it by calling
-cancel_delayed_work_sync(). In order to avoid a deadlock by unexpected
-quit dc->writeback_rate_update, after time_out seconds this function will
-give up and continue to call cancel_delayed_work_sync().
-
-And here I explain how this patch stops self re-armed delayed work properly
-with the above stuffs.
-
-update_writeback_rate() sets BCACHE_DEV_RATE_DW_RUNNING at its beginning
-and clears BCACHE_DEV_RATE_DW_RUNNING at its end. Before calling
-cancel_writeback_rate_update_dwork() clear flag BCACHE_DEV_WB_RUNNING.
-
-Before calling cancel_delayed_work_sync() wait utill flag
-BCACHE_DEV_RATE_DW_RUNNING is clear. So when calling
-cancel_delayed_work_sync(), dc->writeback_rate_update must be already re-
-armed, or quite by seeing BCACHE_DEV_WB_RUNNING cleared. In both cases
-delayed work routine update_writeback_rate() won't be executed after
-cancel_delayed_work_sync() returns.
-
-Inside update_writeback_rate() before calling schedule_delayed_work(), flag
-BCACHE_DEV_WB_RUNNING is checked before. If this flag is cleared, it means
-someone is about to stop the delayed work. Because flag
-BCACHE_DEV_RATE_DW_RUNNING is set already and cancel_delayed_work_sync()
-has to wait for this flag to be cleared, we don't need to worry about race
-condition here.
-
-If update_writeback_rate() is scheduled to run after checking
-BCACHE_DEV_RATE_DW_RUNNING and before calling cancel_delayed_work_sync()
-in cancel_writeback_rate_update_dwork(), it is also safe. Because at this
-moment BCACHE_DEV_WB_RUNNING is cleared with memory barrier. As I mentioned
-previously, update_writeback_rate() will see BCACHE_DEV_WB_RUNNING is clear
-and quit immediately.
-
-Because there are more dependences inside update_writeback_rate() to struct
-cache_set memory, dc->writeback_rate_update is not a simple self re-arm
-delayed work. After trying many different methods (e.g. hold dc->count, or
-use locks), this is the only way I can find which works to properly stop
-dc->writeback_rate_update delayed work.
-
-Changelog:
-v3: change values of BCACHE_DEV_WB_RUNNING and BCACHE_DEV_RATE_DW_RUNNING
- to bit index, for test_bit().
-v2: Try to fix the race issue which is pointed out by Junhui.
-v1: The initial version for review
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
----
- drivers/md/bcache/bcache.h | 9 +++++----
- drivers/md/bcache/super.c | 39 +++++++++++++++++++++++++++++++++++----
- drivers/md/bcache/sysfs.c | 3 ++-
- drivers/md/bcache/writeback.c | 29 ++++++++++++++++++++++++++++-
- 4 files changed, 70 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index b8c2e1bef1f1..0380626bf525 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -258,10 +258,11 @@ struct bcache_device {
- struct gendisk *disk;
-
- unsigned long flags;
--#define BCACHE_DEV_CLOSING 0
--#define BCACHE_DEV_DETACHING 1
--#define BCACHE_DEV_UNLINK_DONE 2
--
-+#define BCACHE_DEV_CLOSING 0
-+#define BCACHE_DEV_DETACHING 1
-+#define BCACHE_DEV_UNLINK_DONE 2
-+#define BCACHE_DEV_WB_RUNNING 3
-+#define BCACHE_DEV_RATE_DW_RUNNING 4
- unsigned nr_stripes;
- unsigned stripe_size;
- atomic_t *stripe_sectors_dirty;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 7d96dc6860fa..e15cacecf078 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -899,6 +899,32 @@ void bch_cached_dev_run(struct cached_dev *dc)
- pr_debug("error creating sysfs link");
- }
-
-+/*
-+ * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
-+ * work dc->writeback_rate_update is running. Wait until the routine
-+ * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
-+ * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
-+ * seconds, give up waiting here and continue to cancel it too.
-+ */
-+static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
-+{
-+ int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
-+
-+ do {
-+ if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
-+ &dc->disk.flags))
-+ break;
-+ time_out--;
-+ schedule_timeout_interruptible(1);
-+ } while (time_out > 0);
-+
-+ if (time_out == 0)
-+ pr_warn("give up waiting for dc->writeback_write_update"
-+ " to quit");
-+
-+ cancel_delayed_work_sync(&dc->writeback_rate_update);
-+}
-+
- static void cached_dev_detach_finish(struct work_struct *w)
- {
- struct cached_dev *dc = container_of(w, struct cached_dev, detach);
-@@ -911,7 +937,9 @@ static void cached_dev_detach_finish(struct work_struct *w)
-
- mutex_lock(&bch_register_lock);
-
-- cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ cancel_writeback_rate_update_dwork(dc);
-+
- if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
- kthread_stop(dc->writeback_thread);
- dc->writeback_thread = NULL;
-@@ -954,6 +982,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
- closure_get(&dc->disk.cl);
-
- bch_writeback_queue(dc);
-+
- cached_dev_put(dc);
- }
-
-@@ -1079,14 +1108,16 @@ static void cached_dev_free(struct closure *cl)
- {
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
-
-- cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ mutex_lock(&bch_register_lock);
-+
-+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ cancel_writeback_rate_update_dwork(dc);
-+
- if (!IS_ERR_OR_NULL(dc->writeback_thread))
- kthread_stop(dc->writeback_thread);
- if (dc->writeback_write_wq)
- destroy_workqueue(dc->writeback_write_wq);
-
-- mutex_lock(&bch_register_lock);
--
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
- bcache_device_free(&dc->disk);
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index 4a6a697e1680..399e91cbf714 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -306,7 +306,8 @@ STORE(bch_cached_dev)
- bch_writeback_queue(dc);
-
- if (attr == &sysfs_writeback_percent)
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-
- mutex_unlock(&bch_register_lock);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 4dbeaaa575bf..8f98ef1038d3 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -115,6 +115,21 @@ static void update_writeback_rate(struct work_struct *work)
- struct cached_dev,
- writeback_rate_update);
-
-+ /*
-+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-+ * cancel_delayed_work_sync().
-+ */
-+ set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
-+
-+ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
-+ return;
-+ }
-+
- down_read(&dc->writeback_lock);
-
- if (atomic_read(&dc->has_dirty) &&
-@@ -123,8 +138,18 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-+ }
-+
-+ /*
-+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-+ * cancel_delayed_work_sync().
-+ */
-+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
- }
-
- static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
-@@ -675,6 +700,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
- dc->writeback_rate_p_term_inverse = 40;
- dc->writeback_rate_i_term_inverse = 10000;
-
-+ WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
- INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
- }
-
-@@ -693,6 +719,7 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- return PTR_ERR(dc->writeback_thread);
- }
-
-+ WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-
---
-2.16.1
-
diff --git a/for-next/v5/v5-0005-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch b/for-next/v5/v5-0005-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch
deleted file mode 100644
index 7444b9a..0000000
--- a/for-next/v5/v5-0005-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch
+++ /dev/null
@@ -1,491 +0,0 @@
-From f9371b6b9d66ff73942770360cce17a72ca7625a Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 14 Jan 2018 22:15:00 +0800
-Subject: [PATCH v5 05/10] bcache: add CACHE_SET_IO_DISABLE to struct cache_set
- flags
-
-When too many I/Os failed on cache device, bch_cache_set_error() is called
-in the error handling code path to retire whole problematic cache set. If
-new I/O requests continue to come and take refcount dc->count, the cache
-set won't be retired immediately, this is a problem.
-
-Further more, there are several kernel thread and self-armed kernel work
-may still running after bch_cache_set_error() is called. It needs to wait
-quite a while for them to stop, or they won't stop at all. They also
-prevent the cache set from being retired.
-
-The solution in this patch is, to add per cache set flag to disable I/O
-request on this cache and all attached backing devices. Then new coming I/O
-requests can be rejected in *_make_request() before taking refcount, kernel
-threads and self-armed kernel worker can stop very fast when flags bit
-CACHE_SET_IO_DISABLE is set.
-
-Because bcache also do internal I/Os for writeback, garbage collection,
-bucket allocation, journaling, this kind of I/O should be disabled after
-bch_cache_set_error() is called. So closure_bio_submit() is modified to
-check whether CACHE_SET_IO_DISABLE is set on cache_set->flags. If set,
-closure_bio_submit() will set bio->bi_status to BLK_STS_IOERR and
-return, generic_make_request() won't be called.
-
-A sysfs interface is also added to set or clear CACHE_SET_IO_DISABLE bit
-from cache_set->flags, to disable or enable cache set I/O for debugging. It
-is helpful to trigger more corner case issues for failed cache device.
-
-Changelog
-v3, change CACHE_SET_IO_DISABLE from 4 to 3, since it is bit index.
- remove "bcache: " prefix when printing out kernel message.
-v2, more changes by previous review,
-- Use CACHE_SET_IO_DISABLE of cache_set->flags, suggested by Junhui.
-- Check CACHE_SET_IO_DISABLE in bch_btree_gc() to stop a while-loop, this
- is reported and inspired from origal patch of Pavel Vazharov.
-v1, initial version.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Pavel Vazharov <freakpv@gmail.com>
----
- drivers/md/bcache/alloc.c | 3 ++-
- drivers/md/bcache/bcache.h | 18 ++++++++++++++++++
- drivers/md/bcache/btree.c | 10 +++++++---
- drivers/md/bcache/io.c | 2 +-
- drivers/md/bcache/journal.c | 4 ++--
- drivers/md/bcache/request.c | 26 +++++++++++++++++++-------
- drivers/md/bcache/super.c | 6 +++++-
- drivers/md/bcache/sysfs.c | 20 ++++++++++++++++++++
- drivers/md/bcache/util.h | 6 ------
- drivers/md/bcache/writeback.c | 35 +++++++++++++++++++++++++++--------
- 10 files changed, 101 insertions(+), 29 deletions(-)
-
-diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index 458e1d38577d..004cc3cc6123 100644
---- a/drivers/md/bcache/alloc.c
-+++ b/drivers/md/bcache/alloc.c
-@@ -287,7 +287,8 @@ do { \
- break; \
- \
- mutex_unlock(&(ca)->set->bucket_lock); \
-- if (kthread_should_stop()) { \
-+ if (kthread_should_stop() || \
-+ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
- set_current_state(TASK_RUNNING); \
- return 0; \
- } \
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 0380626bf525..7917b3820dd5 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -475,10 +475,15 @@ struct gc_stat {
- *
- * CACHE_SET_RUNNING means all cache devices have been registered and journal
- * replay is complete.
-+ *
-+ * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
-+ * external and internal I/O should be denied when this flag is set.
-+ *
- */
- #define CACHE_SET_UNREGISTERING 0
- #define CACHE_SET_STOPPING 1
- #define CACHE_SET_RUNNING 2
-+#define CACHE_SET_IO_DISABLE 3
-
- struct cache_set {
- struct closure cl;
-@@ -868,6 +873,19 @@ static inline void wake_up_allocators(struct cache_set *c)
- wake_up_process(ca->alloc_thread);
- }
-
-+static inline void closure_bio_submit(struct cache_set *c,
-+ struct bio *bio,
-+ struct closure *cl)
-+{
-+ closure_get(cl);
-+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return;
-+ }
-+ generic_make_request(bio);
-+}
-+
- /* Forward declarations */
-
- void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index fad9fe8817eb..8ca50f387a1d 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -1744,6 +1744,7 @@ static void bch_btree_gc(struct cache_set *c)
-
- btree_gc_start(c);
-
-+ /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
- do {
- ret = btree_root(gc_root, c, &op, &writes, &stats);
- closure_sync(&writes);
-@@ -1751,7 +1752,7 @@ static void bch_btree_gc(struct cache_set *c)
-
- if (ret && ret != -EAGAIN)
- pr_warn("gc failed!");
-- } while (ret);
-+ } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- bch_btree_gc_finish(c);
- wake_up_allocators(c);
-@@ -1789,9 +1790,12 @@ static int bch_gc_thread(void *arg)
-
- while (1) {
- wait_event_interruptible(c->gc_wait,
-- kthread_should_stop() || gc_should_run(c));
-+ kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
-+ gc_should_run(c));
-
-- if (kthread_should_stop())
-+ if (kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags))
- break;
-
- set_gc_sectors(c);
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index a783c5a41ff1..8013ecbcdbda 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
- bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
-
- b->submit_time_us = local_clock_us();
-- closure_bio_submit(bio, bio->bi_private);
-+ closure_bio_submit(c, bio, bio->bi_private);
- }
-
- void bch_submit_bbio(struct bio *bio, struct cache_set *c,
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 1b736b860739..c94085f400a4 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
- bch_bio_map(bio, data);
-
-- closure_bio_submit(bio, &cl);
-+ closure_bio_submit(ca->set, bio, &cl);
- closure_sync(&cl);
-
- /* This function could be simpler now since we no longer write
-@@ -674,7 +674,7 @@ static void journal_write_unlocked(struct closure *cl)
- spin_unlock(&c->journal.lock);
-
- while ((bio = bio_list_pop(&list)))
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(c, bio, cl);
-
- continue_at(cl, journal_write_done, NULL);
- }
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 1a46b41dac70..02296bda6384 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -747,7 +747,7 @@ static void cached_dev_read_error(struct closure *cl)
-
- /* XXX: invalidate cache */
-
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- }
-
- continue_at(cl, cached_dev_cache_miss_done, NULL);
-@@ -872,7 +872,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- s->cache_miss = miss;
- s->iop.bio = cache_bio;
- bio_get(cache_bio);
-- closure_bio_submit(cache_bio, &s->cl);
-+ closure_bio_submit(s->iop.c, cache_bio, &s->cl);
-
- return ret;
- out_put:
-@@ -880,7 +880,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- out_submit:
- miss->bi_end_io = request_endio;
- miss->bi_private = &s->cl;
-- closure_bio_submit(miss, &s->cl);
-+ closure_bio_submit(s->iop.c, miss, &s->cl);
- return ret;
- }
-
-@@ -945,7 +945,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
-
- if ((bio_op(bio) != REQ_OP_DISCARD) ||
- blk_queue_discard(bdev_get_queue(dc->bdev)))
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- } else if (s->iop.writeback) {
- bch_writeback_add(dc);
- s->iop.bio = bio;
-@@ -960,12 +960,12 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
- flush->bi_private = cl;
- flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
-- closure_bio_submit(flush, cl);
-+ closure_bio_submit(s->iop.c, flush, cl);
- }
- } else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
-
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- }
-
- closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
-@@ -981,7 +981,7 @@ static void cached_dev_nodata(struct closure *cl)
- bch_journal_meta(s->iop.c, cl);
-
- /* If it's a flush, we send the flush to the backing device too */
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
-
- continue_at(cl, cached_dev_bio_complete, NULL);
- }
-@@ -996,6 +996,12 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
-
-+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return BLK_QC_T_NONE;
-+ }
-+
- atomic_set(&dc->backing_idle, 0);
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-
-@@ -1112,6 +1118,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
- struct bcache_device *d = bio->bi_disk->private_data;
- int rw = bio_data_dir(bio);
-
-+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return BLK_QC_T_NONE;
-+ }
-+
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-
- s = search_alloc(bio, d);
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index e15cacecf078..f8b0d1196c12 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -521,7 +521,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
- bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
- bch_bio_map(bio, ca->disk_buckets);
-
-- closure_bio_submit(bio, &ca->prio);
-+ closure_bio_submit(ca->set, bio, &ca->prio);
- closure_sync(cl);
- }
-
-@@ -1349,6 +1349,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
- test_bit(CACHE_SET_STOPPING, &c->flags))
- return false;
-
-+ if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
-+ pr_warn("CACHE_SET_IO_DISABLE already set");
-+
- /* XXX: we can be called from atomic context
- acquire_console_sem();
- */
-@@ -1584,6 +1587,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
- c->congested_read_threshold_us = 2000;
- c->congested_write_threshold_us = 20000;
- c->error_limit = DEFAULT_IO_ERROR_LIMIT;
-+ WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- return c;
- err:
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index 399e91cbf714..cf973c07c856 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -95,6 +95,7 @@ read_attribute(partial_stripes_expensive);
-
- rw_attribute(synchronous);
- rw_attribute(journal_delay_ms);
-+rw_attribute(io_disable);
- rw_attribute(discard);
- rw_attribute(running);
- rw_attribute(label);
-@@ -588,6 +589,8 @@ SHOW(__bch_cache_set)
- sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
- sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
- sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
-+ sysfs_printf(io_disable, "%i",
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- if (attr == &sysfs_bset_tree_stats)
- return bch_bset_print_stats(c, buf);
-@@ -677,6 +680,22 @@ STORE(__bch_cache_set)
- if (attr == &sysfs_io_error_halflife)
- c->error_decay = strtoul_or_return(buf) / 88;
-
-+ if (attr == &sysfs_io_disable) {
-+ int v = strtoul_or_return(buf);
-+
-+ if (v) {
-+ if (test_and_set_bit(CACHE_SET_IO_DISABLE,
-+ &c->flags))
-+ pr_warn("CACHE_SET_IO_DISABLE"
-+ " already set");
-+ } else {
-+ if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
-+ &c->flags))
-+ pr_warn("CACHE_SET_IO_DISABLE"
-+ " already cleared");
-+ }
-+ }
-+
- sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
- sysfs_strtoul(verify, c->verify);
- sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
-@@ -762,6 +781,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
- &sysfs_gc_always_rewrite,
- &sysfs_btree_shrinker_disabled,
- &sysfs_copy_gc_enabled,
-+ &sysfs_io_disable,
- NULL
- };
- KTYPE(bch_cache_set_internal);
-diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
-index a6763db7f061..268024529edd 100644
---- a/drivers/md/bcache/util.h
-+++ b/drivers/md/bcache/util.h
-@@ -567,12 +567,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
- return bdev->bd_inode->i_size >> 9;
- }
-
--#define closure_bio_submit(bio, cl) \
--do { \
-- closure_get(cl); \
-- generic_make_request(bio); \
--} while (0)
--
- uint64_t bch_crc64_update(uint64_t, const void *, size_t);
- uint64_t bch_crc64(const void *, size_t);
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 8f98ef1038d3..3d7d8452e0de 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -114,6 +114,7 @@ static void update_writeback_rate(struct work_struct *work)
- struct cached_dev *dc = container_of(to_delayed_work(work),
- struct cached_dev,
- writeback_rate_update);
-+ struct cache_set *c = dc->disk.c;
-
- /*
- * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-@@ -123,7 +124,12 @@ static void update_writeback_rate(struct work_struct *work)
- /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
-
-- if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ /*
-+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
-+ * check it here too.
-+ */
-+ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
- /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
-@@ -138,7 +144,12 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-- if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ /*
-+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
-+ * check it here too.
-+ */
-+ if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
- }
-@@ -278,7 +289,7 @@ static void write_dirty(struct closure *cl)
- bio_set_dev(&io->bio, io->dc->bdev);
- io->bio.bi_end_io = dirty_endio;
-
-- closure_bio_submit(&io->bio, cl);
-+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
- }
-
- atomic_set(&dc->writeback_sequence_next, next_sequence);
-@@ -304,7 +315,7 @@ static void read_dirty_submit(struct closure *cl)
- {
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
-
-- closure_bio_submit(&io->bio, cl);
-+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
-
- continue_at(cl, write_dirty, io->dc->writeback_write_wq);
- }
-@@ -330,7 +341,9 @@ static void read_dirty(struct cached_dev *dc)
-
- next = bch_keybuf_next(&dc->writeback_keys);
-
-- while (!kthread_should_stop() && next) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
-+ next) {
- size = 0;
- nk = 0;
-
-@@ -427,7 +440,9 @@ static void read_dirty(struct cached_dev *dc)
- }
- }
-
-- while (!kthread_should_stop() && delay) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
-+ delay) {
- schedule_timeout_interruptible(delay);
- delay = writeback_delay(dc, 0);
- }
-@@ -583,11 +598,13 @@ static bool refill_dirty(struct cached_dev *dc)
- static int bch_writeback_thread(void *arg)
- {
- struct cached_dev *dc = arg;
-+ struct cache_set *c = dc->disk.c;
- bool searched_full_index;
-
- bch_ratelimit_reset(&dc->writeback_rate);
-
-- while (!kthread_should_stop()) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- down_write(&dc->writeback_lock);
- set_current_state(TASK_INTERRUPTIBLE);
- /*
-@@ -601,7 +618,8 @@ static int bch_writeback_thread(void *arg)
- (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-
-- if (kthread_should_stop()) {
-+ if (kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- set_current_state(TASK_RUNNING);
- break;
- }
-@@ -637,6 +655,7 @@ static int bch_writeback_thread(void *arg)
-
- while (delay &&
- !kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
- !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
- delay = schedule_timeout_interruptible(delay);
-
---
-2.16.1
-
diff --git a/for-next/v5/v5-0006-bcache-add-stop_when_cache_set_failed-option-to-b.patch b/for-next/v5/v5-0006-bcache-add-stop_when_cache_set_failed-option-to-b.patch
deleted file mode 100644
index 3952ba1..0000000
--- a/for-next/v5/v5-0006-bcache-add-stop_when_cache_set_failed-option-to-b.patch
+++ /dev/null
@@ -1,258 +0,0 @@
-From fc5aa1aa4157619dc56f794419405b64a31a1312 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 5 Feb 2018 23:44:28 +0800
-Subject: [PATCH v5 06/10] bcache: add stop_when_cache_set_failed option to
- backing device
-
-When there are too many I/O errors on cache device, current bcache code
-will retire the whole cache set, and detach all bcache devices. But the
-detached bcache devices are not stopped, which is problematic when bcache
-is in writeback mode.
-
-If the retired cache set has dirty data of backing devices, continue
-writing to bcache device will write to backing device directly. If the
-LBA of write request has a dirty version cached on cache device, next time
-when the cache device is re-registered and backing device re-attached to
-it again, the stale dirty data on cache device will be written to backing
-device, and overwrite latest directly written data. This situation causes
-a quite data corruption.
-
-But we cannot simply stop all attached bcache devices when the cache set is
-broken or disconnected. For example, use bcache to accelerate performance
-of an email service. In such workload, if cache device is broken but no
-dirty data lost, keep the bcache device alive and permit email service
-continue to access user data might be a better solution for the cache
-device failure.
-
-Nix <nix@esperi.org.uk> points out the issue and provides the above example
-to explain why it might be necessary to not stop bcache device for broken
-cache device. Pavel Goran <via-bcache@pvgoran.name> provides a brilliant
-suggestion to provide "always" and "auto" options to per-cached device
-sysfs file stop_when_cache_set_failed. If cache set is retiring and the
-backing device has no dirty data on cache, it should be safe to keep the
-bcache device alive. In this case, if stop_when_cache_set_failed is set to
-"auto", the device failure handling code will not stop this bcache device
-and permit application to access the backing device with a unattached
-bcache device.
-
-Changelog:
-v3: fix typos pointed out by Nix.
-v2: change option values of stop_when_cache_set_failed from 1/0 to
- "auto"/"always".
-v1: initial version, stop_when_cache_set_failed can be 0 (not stop) or 1
- (always stop).
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: Nix <nix@esperi.org.uk>
-Cc: Pavel Goran <via-bcache@pvgoran.name>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Hannes Reinecke <hare@suse.com>
----
- drivers/md/bcache/bcache.h | 9 +++++
- drivers/md/bcache/super.c | 82 ++++++++++++++++++++++++++++++++++++++++------
- drivers/md/bcache/sysfs.c | 17 ++++++++++
- 3 files changed, 98 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 7917b3820dd5..263164490833 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -287,6 +287,12 @@ struct io {
- sector_t last;
- };
-
-+enum stop_on_failure {
-+ BCH_CACHED_DEV_STOP_AUTO = 0,
-+ BCH_CACHED_DEV_STOP_ALWAYS,
-+ BCH_CACHED_DEV_STOP_MODE_MAX,
-+};
-+
- struct cached_dev {
- struct list_head list;
- struct bcache_device disk;
-@@ -379,6 +385,8 @@ struct cached_dev {
- unsigned writeback_rate_i_term_inverse;
- unsigned writeback_rate_p_term_inverse;
- unsigned writeback_rate_minimum;
-+
-+ enum stop_on_failure stop_when_cache_set_failed;
- };
-
- enum alloc_reserve {
-@@ -924,6 +932,7 @@ void bch_write_bdev_super(struct cached_dev *, struct closure *);
-
- extern struct workqueue_struct *bcache_wq;
- extern const char * const bch_cache_modes[];
-+extern const char * const bch_stop_on_failure_modes[];
- extern struct mutex bch_register_lock;
- extern struct list_head bch_cache_sets;
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index f8b0d1196c12..e335433bdfb7 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -47,6 +47,14 @@ const char * const bch_cache_modes[] = {
- NULL
- };
-
-+/* Default is -1; we skip past it for stop_when_cache_set_failed */
-+const char * const bch_stop_on_failure_modes[] = {
-+ "default",
-+ "auto",
-+ "always",
-+ NULL
-+};
-+
- static struct kobject *bcache_kobj;
- struct mutex bch_register_lock;
- LIST_HEAD(bch_cache_sets);
-@@ -1187,6 +1195,9 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
- max(dc->disk.disk->queue->backing_dev_info->ra_pages,
- q->backing_dev_info->ra_pages);
-
-+ /* default to auto */
-+ dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
-+
- bch_cached_dev_request_init(dc);
- bch_cached_dev_writeback_init(dc);
- return 0;
-@@ -1463,25 +1474,76 @@ static void cache_set_flush(struct closure *cl)
- closure_return(cl);
- }
-
-+/*
-+ * This function is only called when CACHE_SET_IO_DISABLE is set, which means
-+ * cache set is unregistering due to too many I/O errors. In this condition,
-+ * the bcache device might be stopped, it depends on stop_when_cache_set_failed
-+ * value and whether the broken cache has dirty data:
-+ *
-+ * dc->stop_when_cache_set_failed dc->has_dirty stop bcache device
-+ * BCH_CACHED_STOP_ATUO 0 NO
-+ * BCH_CACHED_STOP_ATUO 1 YES
-+ * BCH_CACHED_DEV_STOP_ALWAYS 0 YES
-+ * BCH_CACHED_DEV_STOP_ALWAYS 1 YES
-+ *
-+ * The expected behavior is, if stop_when_cache_set_failed is configured to
-+ * "auto" via sysfs interface, the bcache device will not be stopped if the
-+ * backing device is clean on the broken cache device.
-+ */
-+static void conditional_stop_bcache_device(struct cache_set *c,
-+ struct bcache_device *d,
-+ struct cached_dev *dc)
-+{
-+ if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
-+ pr_warn("stop_when_cache_set_failed of %s is \"always\", stop"
-+ " it for failed cache set %pU.",
-+ d->disk->disk_name, c->sb.set_uuid);
-+ bcache_device_stop(d);
-+ } else if (atomic_read(&dc->has_dirty)) {
-+ /*
-+ * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_ATUO
-+ * and dc->has_dirty == 1
-+ */
-+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and "
-+ "cache is dirty, stop it to avoid potential data "
-+ "corruption.",
-+ d->disk->disk_name);
-+ bcache_device_stop(d);
-+ } else {
-+ /*
-+ * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_ATUO
-+ * and dc->has_dirty == 0
-+ */
-+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and "
-+ "cache is clean, keep it alive.",
-+ d->disk->disk_name);
-+ }
-+}
-+
- static void __cache_set_unregister(struct closure *cl)
- {
- struct cache_set *c = container_of(cl, struct cache_set, caching);
- struct cached_dev *dc;
-+ struct bcache_device *d;
- size_t i;
-
- mutex_lock(&bch_register_lock);
-
-- for (i = 0; i < c->devices_max_used; i++)
-- if (c->devices[i]) {
-- if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
-- test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
-- dc = container_of(c->devices[i],
-- struct cached_dev, disk);
-- bch_cached_dev_detach(dc);
-- } else {
-- bcache_device_stop(c->devices[i]);
-- }
-+ for (i = 0; i < c->devices_max_used; i++) {
-+ d = c->devices[i];
-+ if (!d)
-+ continue;
-+
-+ if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
-+ test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
-+ dc = container_of(d, struct cached_dev, disk);
-+ bch_cached_dev_detach(dc);
-+ if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
-+ conditional_stop_bcache_device(c, d, dc);
-+ } else {
-+ bcache_device_stop(d);
- }
-+ }
-
- mutex_unlock(&bch_register_lock);
-
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index cf973c07c856..91d859a54575 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -78,6 +78,7 @@ rw_attribute(congested_write_threshold_us);
- rw_attribute(sequential_cutoff);
- rw_attribute(data_csum);
- rw_attribute(cache_mode);
-+rw_attribute(stop_when_cache_set_failed);
- rw_attribute(writeback_metadata);
- rw_attribute(writeback_running);
- rw_attribute(writeback_percent);
-@@ -126,6 +127,12 @@ SHOW(__bch_cached_dev)
- bch_cache_modes + 1,
- BDEV_CACHE_MODE(&dc->sb));
-
-+ if (attr == &sysfs_stop_when_cache_set_failed)
-+ return bch_snprint_string_list(buf, PAGE_SIZE,
-+ bch_stop_on_failure_modes + 1,
-+ dc->stop_when_cache_set_failed);
-+
-+
- sysfs_printf(data_csum, "%i", dc->disk.data_csum);
- var_printf(verify, "%i");
- var_printf(bypass_torture_test, "%i");
-@@ -247,6 +254,15 @@ STORE(__cached_dev)
- }
- }
-
-+ if (attr == &sysfs_stop_when_cache_set_failed) {
-+ v = bch_read_string_list(buf, bch_stop_on_failure_modes + 1);
-+
-+ if (v < 0)
-+ return v;
-+
-+ dc->stop_when_cache_set_failed = v;
-+ }
-+
- if (attr == &sysfs_label) {
- if (size > SB_LABEL_SIZE)
- return -EINVAL;
-@@ -323,6 +339,7 @@ static struct attribute *bch_cached_dev_files[] = {
- &sysfs_data_csum,
- #endif
- &sysfs_cache_mode,
-+ &sysfs_stop_when_cache_set_failed,
- &sysfs_writeback_metadata,
- &sysfs_writeback_running,
- &sysfs_writeback_delay,
---
-2.16.1
-
diff --git a/for-next/v5/v5-0007-bcache-fix-inaccurate-io-state-for-detached-bcach.patch b/for-next/v5/v5-0007-bcache-fix-inaccurate-io-state-for-detached-bcach.patch
deleted file mode 100644
index 79707be..0000000
--- a/for-next/v5/v5-0007-bcache-fix-inaccurate-io-state-for-detached-bcach.patch
+++ /dev/null
@@ -1,119 +0,0 @@
-From 64c41825d56a1a0a7f7b468606a08bf6a86c21ba Mon Sep 17 00:00:00 2001
-From: Tang Junhui <tang.junhui@zte.com.cn>
-Date: Tue, 9 Jan 2018 10:27:11 +0800
-Subject: [PATCH v5 07/10] bcache: fix inaccurate io state for detached bcache
- devices
-
-When we run IO in a detached device, and run iostat to shows IO status,
-normally it will show like bellow (Omitted some fields):
-Device: ... avgrq-sz avgqu-sz await r_await w_await svctm %util
-sdd ... 15.89 0.53 1.82 0.20 2.23 1.81 52.30
-bcache0 ... 15.89 115.42 0.00 0.00 0.00 2.40 69.60
-but after IO stopped, there are still very big avgqu-sz and %util
-values as bellow:
-Device: ... avgrq-sz avgqu-sz await r_await w_await svctm %util
-bcache0 ... 0 5326.32 0.00 0.00 0.00 0.00 100.10
-
-The reason for this issue is that, only generic_start_io_acct() called
-and no generic_end_io_acct() called for detached device in
-cached_dev_make_request(). See the code:
-//start generic_start_io_acct()
-generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-if (cached_dev_get(dc)) {
- //will callback generic_end_io_acct()
-}
-else {
- //will not call generic_end_io_acct()
-}
-
-This patch calls generic_end_io_acct() in the end of IO for detached
-devices, so we can show IO state correctly.
-
-(Modified to use GFP_NOIO in kzalloc() by Coly Li)
-
-Signed-off-by: Tang Junhui <tang.junhui@zte.com.cn>
-Reviewed-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
----
- drivers/md/bcache/request.c | 58 +++++++++++++++++++++++++++++++++++++++------
- 1 file changed, 51 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 02296bda6384..e09c5ae745be 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -986,6 +986,55 @@ static void cached_dev_nodata(struct closure *cl)
- continue_at(cl, cached_dev_bio_complete, NULL);
- }
-
-+struct detached_dev_io_private {
-+ struct bcache_device *d;
-+ unsigned long start_time;
-+ bio_end_io_t *bi_end_io;
-+ void *bi_private;
-+};
-+
-+static void detatched_dev_end_io(struct bio *bio)
-+{
-+ struct detached_dev_io_private *ddip;
-+
-+ ddip = bio->bi_private;
-+ bio->bi_end_io = ddip->bi_end_io;
-+ bio->bi_private = ddip->bi_private;
-+
-+ generic_end_io_acct(ddip->d->disk->queue,
-+ bio_data_dir(bio),
-+ &ddip->d->disk->part0, ddip->start_time);
-+
-+ kfree(ddip);
-+
-+ bio->bi_end_io(bio);
-+}
-+
-+static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
-+{
-+ struct detached_dev_io_private *ddip;
-+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-+
-+ /*
-+ * no need to call closure_get(&dc->disk.cl),
-+ * because upper layer had already opened bcache device,
-+ * which would call closure_get(&dc->disk.cl)
-+ */
-+ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
-+ ddip->d = d;
-+ ddip->start_time = jiffies;
-+ ddip->bi_end_io = bio->bi_end_io;
-+ ddip->bi_private = bio->bi_private;
-+ bio->bi_end_io = detatched_dev_end_io;
-+ bio->bi_private = ddip;
-+
-+ if ((bio_op(bio) == REQ_OP_DISCARD) &&
-+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
-+ bio->bi_end_io(bio);
-+ else
-+ generic_make_request(bio);
-+}
-+
- /* Cached devices - read & write stuff */
-
- static blk_qc_t cached_dev_make_request(struct request_queue *q,
-@@ -1028,13 +1077,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- else
- cached_dev_read(dc, s);
- }
-- } else {
-- if ((bio_op(bio) == REQ_OP_DISCARD) &&
-- !blk_queue_discard(bdev_get_queue(dc->bdev)))
-- bio_endio(bio);
-- else
-- generic_make_request(bio);
-- }
-+ } else
-+ detached_dev_do_request(d, bio);
-
- return BLK_QC_T_NONE;
- }
---
-2.16.1
-
diff --git a/for-next/v5/v5-0008-bcache-add-backing_request_endio-for-bi_end_io-of.patch b/for-next/v5/v5-0008-bcache-add-backing_request_endio-for-bi_end_io-of.patch
deleted file mode 100644
index 18f7651..0000000
--- a/for-next/v5/v5-0008-bcache-add-backing_request_endio-for-bi_end_io-of.patch
+++ /dev/null
@@ -1,255 +0,0 @@
-From 38cfbb08de26e4e16d9f87307f132f4c7572e7bf Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 10 Jan 2018 21:01:48 +0800
-Subject: [PATCH v5 08/10] bcache: add backing_request_endio() for bi_end_io of
- attached backing device I/O
-
-In order to catch I/O error of backing device, a separate bi_end_io
-call back is required. Then a per backing device counter can record I/O
-errors number and retire the backing device if the counter reaches a
-per backing device I/O error limit.
-
-This patch adds backing_request_endio() to bcache backing device I/O code
-path, this is a preparation for further complicated backing device failure
-handling. So far there is no real code logic change, I make this change a
-separate patch to make sure it is stable and reliable for further work.
-
-Changelog:
-v2: Fix code comments typo, remove a redundant bch_writeback_add() line
- added in v4 patch set.
-v1: indeed this is new added in this patch set.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
----
- drivers/md/bcache/request.c | 93 +++++++++++++++++++++++++++++++++++--------
- drivers/md/bcache/super.c | 1 +
- drivers/md/bcache/writeback.c | 1 +
- 3 files changed, 79 insertions(+), 16 deletions(-)
-
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index e09c5ae745be..9c6dda3b0068 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl)
- }
-
- op->insert_data_done = true;
-+ /* get in bch_data_insert() */
- bio_put(bio);
- out:
- continue_at(cl, bch_data_insert_keys, op->wq);
-@@ -630,6 +631,38 @@ static void request_endio(struct bio *bio)
- closure_put(cl);
- }
-
-+static void backing_request_endio(struct bio *bio)
-+{
-+ struct closure *cl = bio->bi_private;
-+
-+ if (bio->bi_status) {
-+ struct search *s = container_of(cl, struct search, cl);
-+ /*
-+ * If a bio has REQ_PREFLUSH for writeback mode, it is
-+ * speically assembled in cached_dev_write() for a non-zero
-+ * write request which has REQ_PREFLUSH. we don't set
-+ * s->iop.status by this failure, the status will be decided
-+ * by result of bch_data_insert() operation.
-+ */
-+ if (unlikely(s->iop.writeback &&
-+ bio->bi_opf & REQ_PREFLUSH)) {
-+ char buf[BDEVNAME_SIZE];
-+
-+ bio_devname(bio, buf);
-+ pr_err("Can't flush %s: returned bi_status %i",
-+ buf, bio->bi_status);
-+ } else {
-+ /* set to orig_bio->bi_status in bio_complete() */
-+ s->iop.status = bio->bi_status;
-+ }
-+ s->recoverable = false;
-+ /* should count I/O error for backing device here */
-+ }
-+
-+ bio_put(bio);
-+ closure_put(cl);
-+}
-+
- static void bio_complete(struct search *s)
- {
- if (s->orig_bio) {
-@@ -644,13 +677,21 @@ static void bio_complete(struct search *s)
- }
- }
-
--static void do_bio_hook(struct search *s, struct bio *orig_bio)
-+static void do_bio_hook(struct search *s,
-+ struct bio *orig_bio,
-+ bio_end_io_t *end_io_fn)
- {
- struct bio *bio = &s->bio.bio;
-
- bio_init(bio, NULL, 0);
- __bio_clone_fast(bio, orig_bio);
-- bio->bi_end_io = request_endio;
-+ /*
-+ * bi_end_io can be set separately somewhere else, e.g. the
-+ * variants in,
-+ * - cache_bio->bi_end_io from cached_dev_cache_miss()
-+ * - n->bi_end_io from cache_lookup_fn()
-+ */
-+ bio->bi_end_io = end_io_fn;
- bio->bi_private = &s->cl;
-
- bio_cnt_set(bio, 3);
-@@ -676,7 +717,7 @@ static inline struct search *search_alloc(struct bio *bio,
- s = mempool_alloc(d->c->search, GFP_NOIO);
-
- closure_init(&s->cl, NULL);
-- do_bio_hook(s, bio);
-+ do_bio_hook(s, bio, request_endio);
-
- s->orig_bio = bio;
- s->cache_miss = NULL;
-@@ -743,10 +784,11 @@ static void cached_dev_read_error(struct closure *cl)
- trace_bcache_read_retry(s->orig_bio);
-
- s->iop.status = 0;
-- do_bio_hook(s, s->orig_bio);
-+ do_bio_hook(s, s->orig_bio, backing_request_endio);
-
- /* XXX: invalidate cache */
-
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, bio, cl);
- }
-
-@@ -859,7 +901,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- bio_copy_dev(cache_bio, miss);
- cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
-
-- cache_bio->bi_end_io = request_endio;
-+ cache_bio->bi_end_io = backing_request_endio;
- cache_bio->bi_private = &s->cl;
-
- bch_bio_map(cache_bio, NULL);
-@@ -872,14 +914,16 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- s->cache_miss = miss;
- s->iop.bio = cache_bio;
- bio_get(cache_bio);
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, cache_bio, &s->cl);
-
- return ret;
- out_put:
- bio_put(cache_bio);
- out_submit:
-- miss->bi_end_io = request_endio;
-+ miss->bi_end_io = backing_request_endio;
- miss->bi_private = &s->cl;
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, miss, &s->cl);
- return ret;
- }
-@@ -943,31 +987,46 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
- s->iop.bio = s->orig_bio;
- bio_get(s->iop.bio);
-
-- if ((bio_op(bio) != REQ_OP_DISCARD) ||
-- blk_queue_discard(bdev_get_queue(dc->bdev)))
-- closure_bio_submit(s->iop.c, bio, cl);
-+ if (bio_op(bio) == REQ_OP_DISCARD &&
-+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
-+ goto insert_data;
-+
-+ /* I/O request sent to backing device */
-+ bio->bi_end_io = backing_request_endio;
-+ closure_bio_submit(s->iop.c, bio, cl);
-+
- } else if (s->iop.writeback) {
- bch_writeback_add(dc);
- s->iop.bio = bio;
-
- if (bio->bi_opf & REQ_PREFLUSH) {
-- /* Also need to send a flush to the backing device */
-- struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
-- dc->disk.bio_split);
--
-+ /*
-+ * Also need to send a flush to the backing
-+ * device.
-+ */
-+ struct bio *flush;
-+
-+ flush = bio_alloc_bioset(GFP_NOIO, 0,
-+ dc->disk.bio_split);
-+ if (!flush) {
-+ s->iop.status = BLK_STS_RESOURCE;
-+ goto insert_data;
-+ }
- bio_copy_dev(flush, bio);
-- flush->bi_end_io = request_endio;
-+ flush->bi_end_io = backing_request_endio;
- flush->bi_private = cl;
- flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
--
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, flush, cl);
- }
- } else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
--
-+ /* I/O request sent to backing device */
-+ bio->bi_end_io = backing_request_endio;
- closure_bio_submit(s->iop.c, bio, cl);
- }
-
-+insert_data:
- closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
- continue_at(cl, cached_dev_write_complete, NULL);
- }
-@@ -981,6 +1040,7 @@ static void cached_dev_nodata(struct closure *cl)
- bch_journal_meta(s->iop.c, cl);
-
- /* If it's a flush, we send the flush to the backing device too */
-+ bio->bi_end_io = backing_request_endio;
- closure_bio_submit(s->iop.c, bio, cl);
-
- continue_at(cl, cached_dev_bio_complete, NULL);
-@@ -1078,6 +1138,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- cached_dev_read(dc, s);
- }
- } else
-+ /* I/O request sent to backing device */
- detached_dev_do_request(d, bio);
-
- return BLK_QC_T_NONE;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index e335433bdfb7..4f1a14b99415 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -273,6 +273,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
- bio->bi_private = dc;
-
- closure_get(cl);
-+ /* I/O request sent to backing device */
- __write_super(&dc->sb, bio);
-
- closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 3d7d8452e0de..4ebe0119ea7e 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -289,6 +289,7 @@ static void write_dirty(struct closure *cl)
- bio_set_dev(&io->bio, io->dc->bdev);
- io->bio.bi_end_io = dirty_endio;
-
-+ /* I/O request sent to backing device */
- closure_bio_submit(io->dc->disk.c, &io->bio, cl);
- }
-
---
-2.16.1
-
diff --git a/for-next/v5/v5-0009-bcache-add-io_disable-to-struct-cached_dev.patch b/for-next/v5/v5-0009-bcache-add-io_disable-to-struct-cached_dev.patch
deleted file mode 100644
index 2ff139d..0000000
--- a/for-next/v5/v5-0009-bcache-add-io_disable-to-struct-cached_dev.patch
+++ /dev/null
@@ -1,237 +0,0 @@
-From 677f078827ce5ebde0a4aad6dfc0dc269433d622 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 5 Feb 2018 23:49:47 +0800
-Subject: [PATCH v5 09/10] bcache: add io_disable to struct cached_dev
-
-If a bcache device is configured to writeback mode, current code does not
-handle write I/O errors on backing devices properly.
-
-In writeback mode, write request is written to cache device, and
-latter being flushed to backing device. If I/O failed when writing from
-cache device to the backing device, bcache code just ignores the error and
-upper layer code is NOT noticed that the backing device is broken.
-
-This patch tries to handle backing device failure like how the cache device
-failure is handled,
-- Add a error counter 'io_errors' and error limit 'error_limit' in struct
- cached_dev. Add another io_disable to struct cached_dev to disable I/Os
- on the problematic backing device.
-- When I/O error happens on backing device, increase io_errors counter. And
- if io_errors reaches error_limit, set cache_dev->io_disable to true, and
- stop the bcache device.
-
-The result is, if backing device is broken of disconnected, and I/O errors
-reach its error limit, backing device will be disabled and the associated
-bcache device will be removed from system.
-
-Changelog:
-v2: remove "bcache: " prefix in pr_error(), and use correct name string to
- print out bcache device gendisk name.
-v1: indeed this is new added in v2 patch set.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 6 ++++++
- drivers/md/bcache/io.c | 14 ++++++++++++++
- drivers/md/bcache/request.c | 14 ++++++++++++--
- drivers/md/bcache/super.c | 23 ++++++++++++++++++++++-
- drivers/md/bcache/sysfs.c | 15 ++++++++++++++-
- 5 files changed, 68 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 263164490833..c59ce168bd82 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -366,6 +366,7 @@ struct cached_dev {
- unsigned sequential_cutoff;
- unsigned readahead;
-
-+ unsigned io_disable:1;
- unsigned verify:1;
- unsigned bypass_torture_test:1;
-
-@@ -387,6 +388,9 @@ struct cached_dev {
- unsigned writeback_rate_minimum;
-
- enum stop_on_faliure stop_when_cache_set_failed;
-+#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
-+ atomic_t io_errors;
-+ unsigned error_limit;
- };
-
- enum alloc_reserve {
-@@ -896,6 +900,7 @@ static inline void closure_bio_submit(struct cache_set *c,
-
- /* Forward declarations */
-
-+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
- void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
- void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- blk_status_t, const char *);
-@@ -923,6 +928,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
- struct bkey *, int, bool);
- bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
- unsigned, unsigned, bool);
-+bool bch_cached_dev_error(struct cached_dev *dc);
-
- __printf(2, 3)
- bool bch_cache_set_error(struct cache_set *, const char *, ...);
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index 8013ecbcdbda..7fac97ae036e 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
- }
-
- /* IO errors */
-+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
-+{
-+ char buf[BDEVNAME_SIZE];
-+ unsigned errors;
-+
-+ WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
-+
-+ errors = atomic_add_return(1, &dc->io_errors);
-+ if (errors < dc->error_limit)
-+ pr_err("%s: IO error on backing device, unrecoverable",
-+ bio_devname(bio, buf));
-+ else
-+ bch_cached_dev_error(dc);
-+}
-
- void bch_count_io_errors(struct cache *ca,
- blk_status_t error,
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 9c6dda3b0068..03245e6980a6 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -637,6 +637,8 @@ static void backing_request_endio(struct bio *bio)
-
- if (bio->bi_status) {
- struct search *s = container_of(cl, struct search, cl);
-+ struct cached_dev *dc = container_of(s->d,
-+ struct cached_dev, disk);
- /*
- * If a bio has REQ_PREFLUSH for writeback mode, it is
- * speically assembled in cached_dev_write() for a non-zero
-@@ -657,6 +659,7 @@ static void backing_request_endio(struct bio *bio)
- }
- s->recoverable = false;
- /* should count I/O error for backing device here */
-+ bch_count_backing_io_errors(dc, bio);
- }
-
- bio_put(bio);
-@@ -1065,8 +1068,14 @@ static void detatched_dev_end_io(struct bio *bio)
- bio_data_dir(bio),
- &ddip->d->disk->part0, ddip->start_time);
-
-- kfree(ddip);
-+ if (bio->bi_status) {
-+ struct cached_dev *dc = container_of(ddip->d,
-+ struct cached_dev, disk);
-+ /* should count I/O error for backing device here */
-+ bch_count_backing_io_errors(dc, bio);
-+ }
-
-+ kfree(ddip);
- bio->bi_end_io(bio);
- }
-
-@@ -1105,7 +1114,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
-
-- if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
-+ dc->io_disable)) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- return BLK_QC_T_NONE;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 4f1a14b99415..40b07d980a20 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1196,7 +1196,10 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
- max(dc->disk.disk->queue->backing_dev_info->ra_pages,
- q->backing_dev_info->ra_pages);
-
-- /* default to auto */
-+ atomic_set(&dc->io_errors, 0);
-+ dc->io_disable = false;
-+ dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
-+ /* default to "auto" */
- dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_ATUO;
-
- bch_cached_dev_request_init(dc);
-@@ -1350,6 +1353,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
- return flash_dev_run(c, u);
- }
-
-+bool bch_cached_dev_error(struct cached_dev *dc)
-+{
-+ char name[BDEVNAME_SIZE];
-+
-+ if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
-+ return false;
-+
-+ dc->io_disable = true;
-+ /* make others know io_disable is true earlier */
-+ smp_mb();
-+
-+ pr_err("stop %s: too many IO errors on backing device %s\n",
-+ dc->disk.disk->disk_name, bdevname(dc->bdev, name));
-+
-+ bcache_device_stop(&dc->disk);
-+ return true;
-+}
-+
- /* Cache set */
-
- __printf(2, 3)
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index 91d859a54575..e88fdcc549cd 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -141,7 +141,9 @@ SHOW(__bch_cached_dev)
- var_print(writeback_delay);
- var_print(writeback_percent);
- sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
--
-+ sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
-+ sysfs_printf(io_error_limit, "%i", dc->error_limit);
-+ sysfs_printf(io_disable, "%i", dc->io_disable);
- var_print(writeback_rate_update_seconds);
- var_print(writeback_rate_i_term_inverse);
- var_print(writeback_rate_p_term_inverse);
-@@ -232,6 +234,14 @@ STORE(__cached_dev)
- d_strtoul(writeback_rate_i_term_inverse);
- d_strtoul_nonzero(writeback_rate_p_term_inverse);
-
-+ sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
-+
-+ if (attr == &sysfs_io_disable) {
-+ int v = strtoul_or_return(buf);
-+
-+ dc->io_disable = v ? 1 : 0;
-+ }
-+
- d_strtoi_h(sequential_cutoff);
- d_strtoi_h(readahead);
-
-@@ -349,6 +359,9 @@ static struct attribute *bch_cached_dev_files[] = {
- &sysfs_writeback_rate_i_term_inverse,
- &sysfs_writeback_rate_p_term_inverse,
- &sysfs_writeback_rate_debug,
-+ &sysfs_errors,
-+ &sysfs_io_error_limit,
-+ &sysfs_io_disable,
- &sysfs_dirty_data,
- &sysfs_stripe_size,
- &sysfs_partial_stripes_expensive,
---
-2.16.1
-
diff --git a/for-next/v5/v5-0010-bcache-stop-bcache-device-when-backing-device-is-.patch b/for-next/v5/v5-0010-bcache-stop-bcache-device-when-backing-device-is-.patch
deleted file mode 100644
index 5ebd851..0000000
--- a/for-next/v5/v5-0010-bcache-stop-bcache-device-when-backing-device-is-.patch
+++ /dev/null
@@ -1,152 +0,0 @@
-From 88e4b7378283d942fe281f1b246be4a427a88511 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 5 Feb 2018 23:52:40 +0800
-Subject: [PATCH v5 10/10] bcache: stop bcache device when backing device is
- offline
-
-Currently bcache does not handle backing device failure, if backing
-device is offline and disconnected from system, its bcache device can still
-be accessible. If the bcache device is in writeback mode, I/O requests even
-can success if the requests hit on cache device. That is to say, when and
-how bcache handles offline backing device is undefined.
-
-This patch tries to handle backing device offline in a rather simple way,
-- Add cached_dev->status_update_thread kernel thread to update backing
- device status in every 1 second.
-- Add cached_dev->offline_seconds to record how many seconds the backing
- device is observed to be offline. If the backing device is offline for
- BACKING_DEV_OFFLINE_TIMEOUT (30) seconds, set dc->io_disable to 1 and
- call bcache_device_stop() to stop the bache device which linked to the
- offline backing device.
-
-Now if a backing device is offline for BACKING_DEV_OFFLINE_TIMEOUT seconds,
-its bcache device will be removed, then user space application writing on
-it will get error immediately, and handler the device failure in time.
-
-This patch is quite simple, does not handle more complicated situations.
-Once the bcache device is stopped, users need to recovery the backing
-device, register and attach it manually.
-
-Changelog:
-v2: remove "bcache: " prefix when calling pr_warn().
-v1: initial version.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 2 ++
- drivers/md/bcache/super.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 57 insertions(+)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index c59ce168bd82..aa83dd0f682f 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -344,6 +344,7 @@ struct cached_dev {
-
- struct keybuf writeback_keys;
-
-+ struct task_struct *status_update_thread;
- /*
- * Order the write-half of writeback operations strongly in dispatch
- * order. (Maintain LBA order; don't allow reads completing out of
-@@ -391,6 +392,7 @@ struct cached_dev {
- #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
- atomic_t io_errors;
- unsigned error_limit;
-+ unsigned offline_seconds;
- };
-
- enum alloc_reserve {
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 40b07d980a20..6d672329efce 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -654,6 +654,11 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
- unsigned int cmd, unsigned long arg)
- {
- struct bcache_device *d = b->bd_disk->private_data;
-+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-+
-+ if (dc->io_disable)
-+ return -EIO;
-+
- return d->ioctl(d, mode, cmd, arg);
- }
-
-@@ -864,6 +869,45 @@ static void calc_cached_dev_sectors(struct cache_set *c)
- c->cached_dev_sectors = sectors;
- }
-
-+#define BACKING_DEV_OFFLINE_TIMEOUT 5
-+static int cached_dev_status_update(void *arg)
-+{
-+ struct cached_dev *dc = arg;
-+ struct request_queue *q;
-+ char buf[BDEVNAME_SIZE];
-+
-+ /*
-+ * If this delayed worker is stopping outside, directly quit here.
-+ * dc->io_disable might be set via sysfs interface, so check it
-+ * here too.
-+ */
-+ while (!kthread_should_stop() && !dc->io_disable) {
-+ q = bdev_get_queue(dc->bdev);
-+ if (blk_queue_dying(q))
-+ dc->offline_seconds++;
-+ else
-+ dc->offline_seconds = 0;
-+
-+ if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
-+ pr_err("%s: device offline for %d seconds",
-+ bdevname(dc->bdev, buf),
-+ BACKING_DEV_OFFLINE_TIMEOUT);
-+ pr_err("%s: disable I/O request due to backing "
-+ "device offline", dc->disk.name);
-+ dc->io_disable = true;
-+ /* let others know earlier that io_disable is true */
-+ smp_mb();
-+ bcache_device_stop(&dc->disk);
-+ break;
-+ }
-+
-+ schedule_timeout_interruptible(HZ);
-+ }
-+
-+ dc->status_update_thread = NULL;
-+ return 0;
-+}
-+
- void bch_cached_dev_run(struct cached_dev *dc)
- {
- struct bcache_device *d = &dc->disk;
-@@ -906,6 +950,15 @@ void bch_cached_dev_run(struct cached_dev *dc)
- if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
- sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
- pr_debug("error creating sysfs link");
-+
-+ dc->status_update_thread = kthread_run(cached_dev_status_update,
-+ dc,
-+ "bcache_status_update");
-+ if (IS_ERR(dc->status_update_thread)) {
-+ pr_warn("failed to create bcache_status_update kthread, "
-+ "continue to run without monitoring backing "
-+ "device status");
-+ }
- }
-
- /*
-@@ -1126,6 +1179,8 @@ static void cached_dev_free(struct closure *cl)
- kthread_stop(dc->writeback_thread);
- if (dc->writeback_write_wq)
- destroy_workqueue(dc->writeback_write_wq);
-+ if (!IS_ERR_OR_NULL(dc->status_update_thread))
-+ kthread_stop(dc->status_update_thread);
-
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
---
-2.16.1
-
diff --git a/for-next/v6/v6-0000-cover-letter.patch b/for-next/v6/v6-0000-cover-letter.patch
deleted file mode 100644
index a78da69..0000000
--- a/for-next/v6/v6-0000-cover-letter.patch
+++ /dev/null
@@ -1,82 +0,0 @@
-From 15a23aafbcfd8e92fc7e1740c8e53d9c64c9fde1 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Thu, 8 Feb 2018 23:13:49 +0800
-Subject: [PATCH v6 0/9] bcache: device failure handling improvement
-
-Hi maintainers and folks,
-
-This patch set tries to improve bcache device failure handling, includes
-cache device and backing device failures.
-
-The basic idea to handle failed cache device is,
-- Unregister cache set
-- Detach all backing devices which are attached to this cache set
-- Stop all the detached bcache devices (configurable)
-- Stop all flash only volume on the cache set
-The above process is named 'cache set retire' by me. The result of cache
-set retire is, cache set and bcache devices are all removed, following
-I/O requests will get failed immediately to notift upper layer or user
-space coce that the cache device is failed or disconnected.
-- Stop all the detached bcache devices (configurable)
-- Stop all flash only volume on the cache set
-The above process is named 'cache set retire' by me. The result of cache
-set retire is, cache set and bcache devices are all removed
-(configurable), following I/O requests will get failed immediately to
-notify upper layer or user space coce that the cache device is failed or
-disconnected.
-
-one patch from v5 patch set is merged into bcache-for-next, which is not
-in v6 patch set any longer. The changes of v6 patch set are only for typo
-fix, which were pointed out by Nix, Michael and other developers.
-
-So far all patches have peer review, thank you all, bcache developers!
-
-Changelog:
-v6: fix typo and mistaken spelling.
-v5: replace patch "bcache: stop all attached bcache devices for a retired
- cache set" from v4 patch set by "bcache: add stop_when_cache_set_failed
- option to backing device" from v5 patch set.
- fix issues from v4 patch set.
- improve kernel message format, remove redundant prefix string.
-v4: add per-cached_dev option stop_attached_devs_on_fail to avoid stopping
- attached bcache device from a retiring cache set.
-v3: fix detach issue find in v2 patch set.
-v2: fixes all problems found in v1 review.
- add patches to handle backing device failure.
- add one more patch to set writeback_rate_update_seconds range.
- include a patch from Junhui Tang.
-v1: the initial version, only handles cache device failure.
-
-Coly Li
----
-
-Coly Li (8):
- bcache: fix cached_dev->count usage for bch_cache_set_error()
- bcache: quit dc->writeback_thread when BCACHE_DEV_DETACHING is set
- bcache: stop dc->writeback_rate_update properly
- bcache: add CACHE_SET_IO_DISABLE to struct cache_set flags
- bcache: add stop_when_cache_set_failed option to backing device
- bcache: add backing_request_endio() for bi_end_io of attached backing
- device I/O
- bcache: add io_disable to struct cached_dev
- bcache: stop bcache device when backing device is offline
-
-Tang Junhui (1):
- bcache: fix inaccurate io state for detached bcache devices
-
- drivers/md/bcache/alloc.c | 3 +-
- drivers/md/bcache/bcache.h | 44 ++++++++-
- drivers/md/bcache/btree.c | 10 ++-
- drivers/md/bcache/io.c | 16 +++-
- drivers/md/bcache/journal.c | 4 +-
- drivers/md/bcache/request.c | 185 ++++++++++++++++++++++++++++++++------
- drivers/md/bcache/super.c | 205 ++++++++++++++++++++++++++++++++++++++----
- drivers/md/bcache/sysfs.c | 55 +++++++++++-
- drivers/md/bcache/util.h | 6 --
- drivers/md/bcache/writeback.c | 92 ++++++++++++++++---
- drivers/md/bcache/writeback.h | 2 -
- 11 files changed, 543 insertions(+), 79 deletions(-)
-
---
-2.16.1
-
diff --git a/for-next/v6/v6-0001-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch b/for-next/v6/v6-0001-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
deleted file mode 100644
index c12cf09..0000000
--- a/for-next/v6/v6-0001-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
+++ /dev/null
@@ -1,178 +0,0 @@
-From af3ba20f8ddf139828f6b26e0dfeeea71aa5b6c9 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 8 Jan 2018 23:05:58 +0800
-Subject: [PATCH v6 1/9] bcache: fix cached_dev->count usage for
- bch_cache_set_error()
-
-When bcache metadata I/O fails, bcache will call bch_cache_set_error()
-to retire the whole cache set. The expected behavior to retire a cache
-set is to unregister the cache set, and unregister all backing device
-attached to this cache set, then remove sysfs entries of the cache set
-and all attached backing devices, finally release memory of structs
-cache_set, cache, cached_dev and bcache_device.
-
-In my testing when journal I/O failure triggered by disconnected cache
-device, sometimes the cache set cannot be retired, and its sysfs
-entry /sys/fs/bcache/<uuid> still exits and the backing device also
-references it. This is not expected behavior.
-
-When metadata I/O failes, the call senquence to retire whole cache set is,
- bch_cache_set_error()
- bch_cache_set_unregister()
- bch_cache_set_stop()
- __cache_set_unregister() <- called as callback by calling
- clousre_queue(&c->caching)
- cache_set_flush() <- called as a callback when refcount
- of cache_set->caching is 0
- cache_set_free() <- called as a callback when refcount
- of catch_set->cl is 0
- bch_cache_set_release() <- called as a callback when refcount
- of catch_set->kobj is 0
-
-I find if kernel thread bch_writeback_thread() quits while-loop when
-kthread_should_stop() is true and searched_full_index is false, clousre
-callback cache_set_flush() set by continue_at() will never be called. The
-result is, bcache fails to retire whole cache set.
-
-cache_set_flush() will be called when refcount of closure c->caching is 0,
-and in function bcache_device_detach() refcount of closure c->caching is
-released to 0 by clousre_put(). In metadata error code path, function
-bcache_device_detach() is called by cached_dev_detach_finish(). This is a
-callback routine being called when cached_dev->count is 0. This refcount
-is decreased by cached_dev_put().
-
-The above dependence indicates, cache_set_flush() will be called when
-refcount of cache_set->cl is 0, and refcount of cache_set->cl to be 0
-when refcount of cache_dev->count is 0.
-
-The reason why sometimes cache_dev->count is not 0 (when metadata I/O fails
-and bch_cache_set_error() called) is, in bch_writeback_thread(), refcount
-of cache_dev is not decreased properly.
-
-In bch_writeback_thread(), cached_dev_put() is called only when
-searched_full_index is true and cached_dev->writeback_keys is empty, a.k.a
-there is no dirty data on cache. In most of run time it is correct, but
-when bch_writeback_thread() quits the while-loop while cache is still
-dirty, current code forget to call cached_dev_put() before this kernel
-thread exits. This is why sometimes cache_set_flush() is not executed and
-cache set fails to be retired.
-
-The reason to call cached_dev_put() in bch_writeback_rate() is, when the
-cache device changes from clean to dirty, cached_dev_get() is called, to
-make sure during writeback operatiions both backing and cache devices
-won't be released.
-
-Adding following code in bch_writeback_thread() does not work,
- static int bch_writeback_thread(void *arg)
- }
-
-+ if (atomic_read(&dc->has_dirty))
-+ cached_dev_put()
-+
- return 0;
- }
-because writeback kernel thread can be waken up and start via sysfs entry:
- echo 1 > /sys/block/bcache<N>/bcache/writeback_running
-It is difficult to check whether backing device is dirty without race and
-extra lock. So the above modification will introduce potential refcount
-underflow in some conditions.
-
-The correct fix is, to take cached dev refcount when creating the kernel
-thread, and put it before the kernel thread exits. Then bcache does not
-need to take a cached dev refcount when cache turns from clean to dirty,
-or to put a cached dev refcount when cache turns from ditry to clean. The
-writeback kernel thread is alwasy safe to reference data structure from
-cache set, cache and cached device (because a refcount of cache device is
-taken for it already), and no matter the kernel thread is stopped by I/O
-errors or system reboot, cached_dev->count can always be used correctly.
-
-The patch is simple, but understanding how it works is quite complicated.
-
-Changelog:
-v2: set dc->writeback_thread to NULL in this patch, as suggested by Hannes.
-v1: initial version for review.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/super.c | 1 -
- drivers/md/bcache/writeback.c | 11 ++++++++---
- drivers/md/bcache/writeback.h | 2 --
- 3 files changed, 8 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 312895788036..9b745c5c1980 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1054,7 +1054,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
- if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
- bch_sectors_dirty_init(&dc->disk);
- atomic_set(&dc->has_dirty, 1);
-- refcount_inc(&dc->count);
- bch_writeback_queue(dc);
- }
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index f1d2fc15abcc..b280c134dd4d 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -572,7 +572,7 @@ static int bch_writeback_thread(void *arg)
-
- if (kthread_should_stop()) {
- set_current_state(TASK_RUNNING);
-- return 0;
-+ break;
- }
-
- schedule();
-@@ -585,7 +585,6 @@ static int bch_writeback_thread(void *arg)
- if (searched_full_index &&
- RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
- atomic_set(&dc->has_dirty, 0);
-- cached_dev_put(dc);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
- bch_write_bdev_super(dc, NULL);
- }
-@@ -606,6 +605,9 @@ static int bch_writeback_thread(void *arg)
- }
- }
-
-+ dc->writeback_thread = NULL;
-+ cached_dev_put(dc);
-+
- return 0;
- }
-
-@@ -669,10 +671,13 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- if (!dc->writeback_write_wq)
- return -ENOMEM;
-
-+ cached_dev_get(dc);
- dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
- "bcache_writeback");
-- if (IS_ERR(dc->writeback_thread))
-+ if (IS_ERR(dc->writeback_thread)) {
-+ cached_dev_put(dc);
- return PTR_ERR(dc->writeback_thread);
-+ }
-
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index 587b25599856..0bba8f1c6cdf 100644
---- a/drivers/md/bcache/writeback.h
-+++ b/drivers/md/bcache/writeback.h
-@@ -105,8 +105,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
- {
- if (!atomic_read(&dc->has_dirty) &&
- !atomic_xchg(&dc->has_dirty, 1)) {
-- refcount_inc(&dc->count);
--
- if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
- /* XXX: should do this synchronously */
---
-2.16.1
-
diff --git a/for-next/v6/v6-0002-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch b/for-next/v6/v6-0002-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch
deleted file mode 100644
index 4dd2c66..0000000
--- a/for-next/v6/v6-0002-bcache-quit-dc-writeback_thread-when-BCACHE_DEV_D.patch
+++ /dev/null
@@ -1,130 +0,0 @@
-From dd599ee72b29e153026448f1ff0b7147027a892a Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 14 Jan 2018 21:41:57 +0800
-Subject: [PATCH v6 2/9] bcache: quit dc->writeback_thread when
- BCACHE_DEV_DETACHING is set
-
-In patch "bcache: fix cached_dev->count usage for bch_cache_set_error()",
-cached_dev_get() is called when creating dc->writeback_thread, and
-cached_dev_put() is called when exiting dc->writeback_thread. This
-modification works well unless people detach the bcache device manually by
- 'echo 1 > /sys/block/bcache<N>/bcache/detach'
-Because this sysfs interface only calls bch_cached_dev_detach() which wakes
-up dc->writeback_thread but does not stop it. The reason is, before patch
-"bcache: fix cached_dev->count usage for bch_cache_set_error()", inside
-bch_writeback_thread(), if cache is not dirty after writeback,
-cached_dev_put() will be called here. And in cached_dev_make_request() when
-a new write request makes cache from clean to dirty, cached_dev_get() will
-be called there. Since we don't operate dc->count in these locations,
-refcount d->count cannot be dropped after cache becomes clean, and
-cached_dev_detach_finish() won't be called to detach bcache device.
-
-This patch fixes the issue by checking whether BCACHE_DEV_DETACHING is
-set inside bch_writeback_thread(). If this bit is set and cache is clean
-(no existing writeback_keys), break the while-loop, call cached_dev_put()
-and quit the writeback thread.
-
-Please note if cache is still dirty, even BCACHE_DEV_DETACHING is set the
-writeback thread should continue to perform writeback, this is the original
-design of manually detach.
-
-It is safe to do the following check without locking, let me explain why,
-+ if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
-+ (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
-
-If the kenrel thread does not sleep and continue to run due to conditions
-are not updated in time on the running CPU core, it just consumes more CPU
-cycles and has no hurt. This should-sleep-but-run is safe here. We just
-focus on the should-run-but-sleep condition, which means the writeback
-thread goes to sleep in mistake while it should continue to run.
-1, First of all, no matter the writeback thread is hung or not, kthread_stop() from
- cached_dev_detach_finish() will wake up it and terminate by making
- kthread_should_stop() return true. And in normal run time, bit on index
- BCACHE_DEV_DETACHING is always cleared, the condition
- !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)
- is always true and can be ignored as constant value.
-2, If one of the following conditions is true, the writeback thread should
- go to sleep,
- "!atomic_read(&dc->has_dirty)" or "!dc->writeback_running)"
- each of them independently controls the writeback thread should sleep or
- not, let's analyse them one by one.
-2.1 condition "!atomic_read(&dc->has_dirty)"
- If dc->has_dirty is set from 0 to 1 on another CPU core, bcache will
- call bch_writeback_queue() immediately or call bch_writeback_add() which
- indirectly calls bch_writeback_queue() too. In bch_writeback_queue(),
- wake_up_process(dc->writeback_thread) is called. It sets writeback
- thread's task state to TASK_RUNNING and following an implicit memory
- barrier, then tries to wake up the writeback thread.
- In writeback thread, its task state is set to TASK_INTERRUPTIBLE before
- doing the condition check. If other CPU core sets the TASK_RUNNING state
- after writeback thread setting TASK_INTERRUPTIBLE, the writeback thread
- will be scheduled to run very soon because its state is not
- TASK_INTERRUPTIBLE. If other CPU core sets the TASK_RUNNING state before
- writeback thread setting TASK_INTERRUPTIBLE, the implict memory barrier
- of wake_up_process() will make sure modification of dc->has_dirty on
- other CPU core is updated and observed on the CPU core of writeback
- thread. Therefore the condition check will correctly be false, and
- continue writeback code without sleeping.
-2.2 condition "!dc->writeback_running)"
- dc->writeback_running can be changed via sysfs file, every time it is
- modified, a following bch_writeback_queue() is alwasy called. So the
- change is always observed on the CPU core of writeback thread. If
- dc->writeback_running is changed from 0 to 1 on other CPU core, this
- condition check will observe the modification and allow writeback
- thread to continue to run without sleeping.
-Now we can see, even without a locking protection, multiple conditions
-check is safe here, no deadlock or process hang up will happen.
-
-I compose a separte patch because that patch "bcache: fix cached_dev->count
-usage for bch_cache_set_error()" already gets a "Reviewed-by:" from Hannes
-Reinecke. Also this fix is not trivial and good for a separate patch.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
-Cc: Huijun Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/writeback.c | 20 +++++++++++++++++---
- 1 file changed, 17 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index b280c134dd4d..4dbeaaa575bf 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -565,9 +565,15 @@ static int bch_writeback_thread(void *arg)
- while (!kthread_should_stop()) {
- down_write(&dc->writeback_lock);
- set_current_state(TASK_INTERRUPTIBLE);
-- if (!atomic_read(&dc->has_dirty) ||
-- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
-- !dc->writeback_running)) {
-+ /*
-+ * If the bache device is detaching, skip here and continue
-+ * to perform writeback. Otherwise, if no dirty data on cache,
-+ * or there is dirty data on cache but writeback is disabled,
-+ * the writeback thread should sleep here and wait for others
-+ * to wake up it.
-+ */
-+ if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
-+ (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-
- if (kthread_should_stop()) {
-@@ -587,6 +593,14 @@ static int bch_writeback_thread(void *arg)
- atomic_set(&dc->has_dirty, 0);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
- bch_write_bdev_super(dc, NULL);
-+ /*
-+ * If bcache device is detaching via sysfs interface,
-+ * writeback thread should stop after there is no dirty
-+ * data on cache. BCACHE_DEV_DETACHING flag is set in
-+ * bch_cached_dev_detach().
-+ */
-+ if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
-+ break;
- }
-
- up_write(&dc->writeback_lock);
---
-2.16.1
-
diff --git a/for-next/v6/v6-0003-bcache-stop-dc-writeback_rate_update-properly.patch b/for-next/v6/v6-0003-bcache-stop-dc-writeback_rate_update-properly.patch
deleted file mode 100644
index c3035a0..0000000
--- a/for-next/v6/v6-0003-bcache-stop-dc-writeback_rate_update-properly.patch
+++ /dev/null
@@ -1,268 +0,0 @@
-From 437a0616a6c5fc0da56798a9e60375e3a2b22683 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 13 Jan 2018 15:48:39 +0800
-Subject: [PATCH v6 3/9] bcache: stop dc->writeback_rate_update properly
-
-struct delayed_work writeback_rate_update in struct cache_dev is a delayed
-worker to call function update_writeback_rate() in period (the interval is
-defined by dc->writeback_rate_update_seconds).
-
-When a metadate I/O error happens on cache device, bcache error handling
-routine bch_cache_set_error() will call bch_cache_set_unregister() to
-retire whole cache set. On the unregister code path, this delayed work is
-stopped by calling cancel_delayed_work_sync(&dc->writeback_rate_update).
-
-dc->writeback_rate_update is a special delayed work from others in bcache.
-In its routine update_writeback_rate(), this delayed work is re-armed
-itself. That means when cancel_delayed_work_sync() returns, this delayed
-work can still be executed after several seconds defined by
-dc->writeback_rate_update_seconds.
-
-The problem is, after cancel_delayed_work_sync() returns, the cache set
-unregister code path will continue and release memory of struct cache set.
-Then the delayed work is scheduled to run, __update_writeback_rate()
-will reference the already released cache_set memory, and trigger a NULL
-pointer deference fault.
-
-This patch introduces two more bcache device flags,
-- BCACHE_DEV_WB_RUNNING
- bit set: bcache device is in writeback mode and running, it is OK for
- dc->writeback_rate_update to re-arm itself.
- bit clear:bcache device is trying to stop dc->writeback_rate_update,
- this delayed work should not re-arm itself and quit.
-- BCACHE_DEV_RATE_DW_RUNNING
- bit set: routine update_writeback_rate() is executing.
- bit clear: routine update_writeback_rate() quits.
-
-This patch also adds a function cancel_writeback_rate_update_dwork() to
-wait for dc->writeback_rate_update quits before cancel it by calling
-cancel_delayed_work_sync(). In order to avoid a deadlock by unexpected
-quit dc->writeback_rate_update, after time_out seconds this function will
-give up and continue to call cancel_delayed_work_sync().
-
-And here I explain how this patch stops self re-armed delayed work properly
-with the above stuffs.
-
-update_writeback_rate() sets BCACHE_DEV_RATE_DW_RUNNING at its beginning
-and clears BCACHE_DEV_RATE_DW_RUNNING at its end. Before calling
-cancel_writeback_rate_update_dwork() clear flag BCACHE_DEV_WB_RUNNING.
-
-Before calling cancel_delayed_work_sync() wait utill flag
-BCACHE_DEV_RATE_DW_RUNNING is clear. So when calling
-cancel_delayed_work_sync(), dc->writeback_rate_update must be already re-
-armed, or quite by seeing BCACHE_DEV_WB_RUNNING cleared. In both cases
-delayed work routine update_writeback_rate() won't be executed after
-cancel_delayed_work_sync() returns.
-
-Inside update_writeback_rate() before calling schedule_delayed_work(), flag
-BCACHE_DEV_WB_RUNNING is checked before. If this flag is cleared, it means
-someone is about to stop the delayed work. Because flag
-BCACHE_DEV_RATE_DW_RUNNING is set already and cancel_delayed_work_sync()
-has to wait for this flag to be cleared, we don't need to worry about race
-condition here.
-
-If update_writeback_rate() is scheduled to run after checking
-BCACHE_DEV_RATE_DW_RUNNING and before calling cancel_delayed_work_sync()
-in cancel_writeback_rate_update_dwork(), it is also safe. Because at this
-moment BCACHE_DEV_WB_RUNNING is cleared with memory barrier. As I mentioned
-previously, update_writeback_rate() will see BCACHE_DEV_WB_RUNNING is clear
-and quit immediately.
-
-Because there are more dependences inside update_writeback_rate() to struct
-cache_set memory, dc->writeback_rate_update is not a simple self re-arm
-delayed work. After trying many different methods (e.g. hold dc->count, or
-use locks), this is the only way I can find which works to properly stop
-dc->writeback_rate_update delayed work.
-
-Changelog:
-v3: change values of BCACHE_DEV_WB_RUNNING and BCACHE_DEV_RATE_DW_RUNNING
- to bit index, for test_bit().
-v2: Try to fix the race issue which is pointed out by Junhui.
-v1: The initial version for review
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Hannes Reinecke <hare@suse.com>
----
- drivers/md/bcache/bcache.h | 9 +++++----
- drivers/md/bcache/super.c | 39 +++++++++++++++++++++++++++++++++++----
- drivers/md/bcache/sysfs.c | 3 ++-
- drivers/md/bcache/writeback.c | 29 ++++++++++++++++++++++++++++-
- 4 files changed, 70 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 12e5197f186c..b5ddb848cd31 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -258,10 +258,11 @@ struct bcache_device {
- struct gendisk *disk;
-
- unsigned long flags;
--#define BCACHE_DEV_CLOSING 0
--#define BCACHE_DEV_DETACHING 1
--#define BCACHE_DEV_UNLINK_DONE 2
--
-+#define BCACHE_DEV_CLOSING 0
-+#define BCACHE_DEV_DETACHING 1
-+#define BCACHE_DEV_UNLINK_DONE 2
-+#define BCACHE_DEV_WB_RUNNING 3
-+#define BCACHE_DEV_RATE_DW_RUNNING 4
- unsigned nr_stripes;
- unsigned stripe_size;
- atomic_t *stripe_sectors_dirty;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 9b745c5c1980..531cd967c05f 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -899,6 +899,32 @@ void bch_cached_dev_run(struct cached_dev *dc)
- pr_debug("error creating sysfs link");
- }
-
-+/*
-+ * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
-+ * work dc->writeback_rate_update is running. Wait until the routine
-+ * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
-+ * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
-+ * seconds, give up waiting here and continue to cancel it too.
-+ */
-+static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
-+{
-+ int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
-+
-+ do {
-+ if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
-+ &dc->disk.flags))
-+ break;
-+ time_out--;
-+ schedule_timeout_interruptible(1);
-+ } while (time_out > 0);
-+
-+ if (time_out == 0)
-+ pr_warn("give up waiting for dc->writeback_write_update"
-+ " to quit");
-+
-+ cancel_delayed_work_sync(&dc->writeback_rate_update);
-+}
-+
- static void cached_dev_detach_finish(struct work_struct *w)
- {
- struct cached_dev *dc = container_of(w, struct cached_dev, detach);
-@@ -911,7 +937,9 @@ static void cached_dev_detach_finish(struct work_struct *w)
-
- mutex_lock(&bch_register_lock);
-
-- cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ cancel_writeback_rate_update_dwork(dc);
-+
- if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
- kthread_stop(dc->writeback_thread);
- dc->writeback_thread = NULL;
-@@ -954,6 +982,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
- closure_get(&dc->disk.cl);
-
- bch_writeback_queue(dc);
-+
- cached_dev_put(dc);
- }
-
-@@ -1081,14 +1110,16 @@ static void cached_dev_free(struct closure *cl)
- {
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
-
-- cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ mutex_lock(&bch_register_lock);
-+
-+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ cancel_writeback_rate_update_dwork(dc);
-+
- if (!IS_ERR_OR_NULL(dc->writeback_thread))
- kthread_stop(dc->writeback_thread);
- if (dc->writeback_write_wq)
- destroy_workqueue(dc->writeback_write_wq);
-
-- mutex_lock(&bch_register_lock);
--
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
- bcache_device_free(&dc->disk);
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index 78cd7bd50fdd..55673508628f 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -309,7 +309,8 @@ STORE(bch_cached_dev)
- bch_writeback_queue(dc);
-
- if (attr == &sysfs_writeback_percent)
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-+ schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-
- mutex_unlock(&bch_register_lock);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 4dbeaaa575bf..8f98ef1038d3 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -115,6 +115,21 @@ static void update_writeback_rate(struct work_struct *work)
- struct cached_dev,
- writeback_rate_update);
-
-+ /*
-+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-+ * cancel_delayed_work_sync().
-+ */
-+ set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
-+
-+ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
-+ return;
-+ }
-+
- down_read(&dc->writeback_lock);
-
- if (atomic_read(&dc->has_dirty) &&
-@@ -123,8 +138,18 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-+ }
-+
-+ /*
-+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-+ * cancel_delayed_work_sync().
-+ */
-+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
-+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
-+ smp_mb();
- }
-
- static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
-@@ -675,6 +700,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
- dc->writeback_rate_p_term_inverse = 40;
- dc->writeback_rate_i_term_inverse = 10000;
-
-+ WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
- INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
- }
-
-@@ -693,6 +719,7 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- return PTR_ERR(dc->writeback_thread);
- }
-
-+ WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-
---
-2.16.1
-
diff --git a/for-next/v6/v6-0004-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch b/for-next/v6/v6-0004-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch
deleted file mode 100644
index 9321e09..0000000
--- a/for-next/v6/v6-0004-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch
+++ /dev/null
@@ -1,491 +0,0 @@
-From 40bdef8329171e783ee21bc04670cea7fca8aa8f Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 14 Jan 2018 22:15:00 +0800
-Subject: [PATCH v6 4/9] bcache: add CACHE_SET_IO_DISABLE to struct cache_set
- flags
-
-When too many I/Os failed on cache device, bch_cache_set_error() is called
-in the error handling code path to retire whole problematic cache set. If
-new I/O requests continue to come and take refcount dc->count, the cache
-set won't be retired immediately, this is a problem.
-
-Further more, there are several kernel thread and self-armed kernel work
-may still running after bch_cache_set_error() is called. It needs to wait
-quite a while for them to stop, or they won't stop at all. They also
-prevent the cache set from being retired.
-
-The solution in this patch is, to add per cache set flag to disable I/O
-request on this cache and all attached backing devices. Then new coming I/O
-requests can be rejected in *_make_request() before taking refcount, kernel
-threads and self-armed kernel worker can stop very fast when flags bit
-CACHE_SET_IO_DISABLE is set.
-
-Because bcache also do internal I/Os for writeback, garbage collection,
-bucket allocation, journaling, this kind of I/O should be disabled after
-bch_cache_set_error() is called. So closure_bio_submit() is modified to
-check whether CACHE_SET_IO_DISABLE is set on cache_set->flags. If set,
-closure_bio_submit() will set bio->bi_status to BLK_STS_IOERR and
-return, generic_make_request() won't be called.
-
-A sysfs interface is also added to set or clear CACHE_SET_IO_DISABLE bit
-from cache_set->flags, to disable or enable cache set I/O for debugging. It
-is helpful to trigger more corner case issues for failed cache device.
-
-Changelog
-v3, change CACHE_SET_IO_DISABLE from 4 to 3, since it is bit index.
- remove "bcache: " prefix when printing out kernel message.
-v2, more changes by previous review,
-- Use CACHE_SET_IO_DISABLE of cache_set->flags, suggested by Junhui.
-- Check CACHE_SET_IO_DISABLE in bch_btree_gc() to stop a while-loop, this
- is reported and inspired from origal patch of Pavel Vazharov.
-v1, initial version.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Pavel Vazharov <freakpv@gmail.com>
----
- drivers/md/bcache/alloc.c | 3 ++-
- drivers/md/bcache/bcache.h | 18 ++++++++++++++++++
- drivers/md/bcache/btree.c | 10 +++++++---
- drivers/md/bcache/io.c | 2 +-
- drivers/md/bcache/journal.c | 4 ++--
- drivers/md/bcache/request.c | 26 +++++++++++++++++++-------
- drivers/md/bcache/super.c | 6 +++++-
- drivers/md/bcache/sysfs.c | 20 ++++++++++++++++++++
- drivers/md/bcache/util.h | 6 ------
- drivers/md/bcache/writeback.c | 35 +++++++++++++++++++++++++++--------
- 10 files changed, 101 insertions(+), 29 deletions(-)
-
-diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index 458e1d38577d..004cc3cc6123 100644
---- a/drivers/md/bcache/alloc.c
-+++ b/drivers/md/bcache/alloc.c
-@@ -287,7 +287,8 @@ do { \
- break; \
- \
- mutex_unlock(&(ca)->set->bucket_lock); \
-- if (kthread_should_stop()) { \
-+ if (kthread_should_stop() || \
-+ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
- set_current_state(TASK_RUNNING); \
- return 0; \
- } \
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index b5ddb848cd31..56179fff1e59 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -475,10 +475,15 @@ struct gc_stat {
- *
- * CACHE_SET_RUNNING means all cache devices have been registered and journal
- * replay is complete.
-+ *
-+ * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
-+ * external and internal I/O should be denied when this flag is set.
-+ *
- */
- #define CACHE_SET_UNREGISTERING 0
- #define CACHE_SET_STOPPING 1
- #define CACHE_SET_RUNNING 2
-+#define CACHE_SET_IO_DISABLE 3
-
- struct cache_set {
- struct closure cl;
-@@ -868,6 +873,19 @@ static inline void wake_up_allocators(struct cache_set *c)
- wake_up_process(ca->alloc_thread);
- }
-
-+static inline void closure_bio_submit(struct cache_set *c,
-+ struct bio *bio,
-+ struct closure *cl)
-+{
-+ closure_get(cl);
-+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return;
-+ }
-+ generic_make_request(bio);
-+}
-+
- /* Forward declarations */
-
- void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index fad9fe8817eb..8ca50f387a1d 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -1744,6 +1744,7 @@ static void bch_btree_gc(struct cache_set *c)
-
- btree_gc_start(c);
-
-+ /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
- do {
- ret = btree_root(gc_root, c, &op, &writes, &stats);
- closure_sync(&writes);
-@@ -1751,7 +1752,7 @@ static void bch_btree_gc(struct cache_set *c)
-
- if (ret && ret != -EAGAIN)
- pr_warn("gc failed!");
-- } while (ret);
-+ } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- bch_btree_gc_finish(c);
- wake_up_allocators(c);
-@@ -1789,9 +1790,12 @@ static int bch_gc_thread(void *arg)
-
- while (1) {
- wait_event_interruptible(c->gc_wait,
-- kthread_should_stop() || gc_should_run(c));
-+ kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
-+ gc_should_run(c));
-
-- if (kthread_should_stop())
-+ if (kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags))
- break;
-
- set_gc_sectors(c);
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index a783c5a41ff1..8013ecbcdbda 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
- bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
-
- b->submit_time_us = local_clock_us();
-- closure_bio_submit(bio, bio->bi_private);
-+ closure_bio_submit(c, bio, bio->bi_private);
- }
-
- void bch_submit_bbio(struct bio *bio, struct cache_set *c,
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 1b736b860739..c94085f400a4 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
- bch_bio_map(bio, data);
-
-- closure_bio_submit(bio, &cl);
-+ closure_bio_submit(ca->set, bio, &cl);
- closure_sync(&cl);
-
- /* This function could be simpler now since we no longer write
-@@ -674,7 +674,7 @@ static void journal_write_unlocked(struct closure *cl)
- spin_unlock(&c->journal.lock);
-
- while ((bio = bio_list_pop(&list)))
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(c, bio, cl);
-
- continue_at(cl, journal_write_done, NULL);
- }
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 1a46b41dac70..02296bda6384 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -747,7 +747,7 @@ static void cached_dev_read_error(struct closure *cl)
-
- /* XXX: invalidate cache */
-
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- }
-
- continue_at(cl, cached_dev_cache_miss_done, NULL);
-@@ -872,7 +872,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- s->cache_miss = miss;
- s->iop.bio = cache_bio;
- bio_get(cache_bio);
-- closure_bio_submit(cache_bio, &s->cl);
-+ closure_bio_submit(s->iop.c, cache_bio, &s->cl);
-
- return ret;
- out_put:
-@@ -880,7 +880,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- out_submit:
- miss->bi_end_io = request_endio;
- miss->bi_private = &s->cl;
-- closure_bio_submit(miss, &s->cl);
-+ closure_bio_submit(s->iop.c, miss, &s->cl);
- return ret;
- }
-
-@@ -945,7 +945,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
-
- if ((bio_op(bio) != REQ_OP_DISCARD) ||
- blk_queue_discard(bdev_get_queue(dc->bdev)))
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- } else if (s->iop.writeback) {
- bch_writeback_add(dc);
- s->iop.bio = bio;
-@@ -960,12 +960,12 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
- flush->bi_private = cl;
- flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
-- closure_bio_submit(flush, cl);
-+ closure_bio_submit(s->iop.c, flush, cl);
- }
- } else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
-
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
- }
-
- closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
-@@ -981,7 +981,7 @@ static void cached_dev_nodata(struct closure *cl)
- bch_journal_meta(s->iop.c, cl);
-
- /* If it's a flush, we send the flush to the backing device too */
-- closure_bio_submit(bio, cl);
-+ closure_bio_submit(s->iop.c, bio, cl);
-
- continue_at(cl, cached_dev_bio_complete, NULL);
- }
-@@ -996,6 +996,12 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
-
-+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return BLK_QC_T_NONE;
-+ }
-+
- atomic_set(&dc->backing_idle, 0);
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-
-@@ -1112,6 +1118,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
- struct bcache_device *d = bio->bi_disk->private_data;
- int rw = bio_data_dir(bio);
-
-+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ bio->bi_status = BLK_STS_IOERR;
-+ bio_endio(bio);
-+ return BLK_QC_T_NONE;
-+ }
-+
- generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-
- s = search_alloc(bio, d);
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 531cd967c05f..a1abeebc7643 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -521,7 +521,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
- bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
- bch_bio_map(bio, ca->disk_buckets);
-
-- closure_bio_submit(bio, &ca->prio);
-+ closure_bio_submit(ca->set, bio, &ca->prio);
- closure_sync(cl);
- }
-
-@@ -1351,6 +1351,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
- test_bit(CACHE_SET_STOPPING, &c->flags))
- return false;
-
-+ if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
-+ pr_warn("CACHE_SET_IO_DISABLE already set");
-+
- /* XXX: we can be called from atomic context
- acquire_console_sem();
- */
-@@ -1586,6 +1589,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
- c->congested_read_threshold_us = 2000;
- c->congested_write_threshold_us = 20000;
- c->error_limit = DEFAULT_IO_ERROR_LIMIT;
-+ WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- return c;
- err:
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index 55673508628f..e75279b7d180 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -95,6 +95,7 @@ read_attribute(partial_stripes_expensive);
-
- rw_attribute(synchronous);
- rw_attribute(journal_delay_ms);
-+rw_attribute(io_disable);
- rw_attribute(discard);
- rw_attribute(running);
- rw_attribute(label);
-@@ -591,6 +592,8 @@ SHOW(__bch_cache_set)
- sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
- sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
- sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
-+ sysfs_printf(io_disable, "%i",
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
-
- if (attr == &sysfs_bset_tree_stats)
- return bch_bset_print_stats(c, buf);
-@@ -680,6 +683,22 @@ STORE(__bch_cache_set)
- if (attr == &sysfs_io_error_halflife)
- c->error_decay = strtoul_or_return(buf) / 88;
-
-+ if (attr == &sysfs_io_disable) {
-+ int v = strtoul_or_return(buf);
-+
-+ if (v) {
-+ if (test_and_set_bit(CACHE_SET_IO_DISABLE,
-+ &c->flags))
-+ pr_warn("CACHE_SET_IO_DISABLE"
-+ " already set");
-+ } else {
-+ if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
-+ &c->flags))
-+ pr_warn("CACHE_SET_IO_DISABLE"
-+ " already cleared");
-+ }
-+ }
-+
- sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
- sysfs_strtoul(verify, c->verify);
- sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
-@@ -765,6 +784,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
- &sysfs_gc_always_rewrite,
- &sysfs_btree_shrinker_disabled,
- &sysfs_copy_gc_enabled,
-+ &sysfs_io_disable,
- NULL
- };
- KTYPE(bch_cache_set_internal);
-diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
-index a6763db7f061..268024529edd 100644
---- a/drivers/md/bcache/util.h
-+++ b/drivers/md/bcache/util.h
-@@ -567,12 +567,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
- return bdev->bd_inode->i_size >> 9;
- }
-
--#define closure_bio_submit(bio, cl) \
--do { \
-- closure_get(cl); \
-- generic_make_request(bio); \
--} while (0)
--
- uint64_t bch_crc64_update(uint64_t, const void *, size_t);
- uint64_t bch_crc64(const void *, size_t);
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 8f98ef1038d3..3d7d8452e0de 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -114,6 +114,7 @@ static void update_writeback_rate(struct work_struct *work)
- struct cached_dev *dc = container_of(to_delayed_work(work),
- struct cached_dev,
- writeback_rate_update);
-+ struct cache_set *c = dc->disk.c;
-
- /*
- * should check BCACHE_DEV_RATE_DW_RUNNING before calling
-@@ -123,7 +124,12 @@ static void update_writeback_rate(struct work_struct *work)
- /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
-
-- if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ /*
-+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
-+ * check it here too.
-+ */
-+ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
- /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
- smp_mb();
-@@ -138,7 +144,12 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-- if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
-+ /*
-+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
-+ * check it here too.
-+ */
-+ if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
- }
-@@ -278,7 +289,7 @@ static void write_dirty(struct closure *cl)
- bio_set_dev(&io->bio, io->dc->bdev);
- io->bio.bi_end_io = dirty_endio;
-
-- closure_bio_submit(&io->bio, cl);
-+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
- }
-
- atomic_set(&dc->writeback_sequence_next, next_sequence);
-@@ -304,7 +315,7 @@ static void read_dirty_submit(struct closure *cl)
- {
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
-
-- closure_bio_submit(&io->bio, cl);
-+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
-
- continue_at(cl, write_dirty, io->dc->writeback_write_wq);
- }
-@@ -330,7 +341,9 @@ static void read_dirty(struct cached_dev *dc)
-
- next = bch_keybuf_next(&dc->writeback_keys);
-
-- while (!kthread_should_stop() && next) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
-+ next) {
- size = 0;
- nk = 0;
-
-@@ -427,7 +440,9 @@ static void read_dirty(struct cached_dev *dc)
- }
- }
-
-- while (!kthread_should_stop() && delay) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
-+ delay) {
- schedule_timeout_interruptible(delay);
- delay = writeback_delay(dc, 0);
- }
-@@ -583,11 +598,13 @@ static bool refill_dirty(struct cached_dev *dc)
- static int bch_writeback_thread(void *arg)
- {
- struct cached_dev *dc = arg;
-+ struct cache_set *c = dc->disk.c;
- bool searched_full_index;
-
- bch_ratelimit_reset(&dc->writeback_rate);
-
-- while (!kthread_should_stop()) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- down_write(&dc->writeback_lock);
- set_current_state(TASK_INTERRUPTIBLE);
- /*
-@@ -601,7 +618,8 @@ static int bch_writeback_thread(void *arg)
- (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-
-- if (kthread_should_stop()) {
-+ if (kthread_should_stop() ||
-+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
- set_current_state(TASK_RUNNING);
- break;
- }
-@@ -637,6 +655,7 @@ static int bch_writeback_thread(void *arg)
-
- while (delay &&
- !kthread_should_stop() &&
-+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
- !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
- delay = schedule_timeout_interruptible(delay);
-
---
-2.16.1
-
diff --git a/for-next/v6/v6-0005-bcache-add-stop_when_cache_set_failed-option-to-b.patch b/for-next/v6/v6-0005-bcache-add-stop_when_cache_set_failed-option-to-b.patch
deleted file mode 100644
index 085894e..0000000
--- a/for-next/v6/v6-0005-bcache-add-stop_when_cache_set_failed-option-to-b.patch
+++ /dev/null
@@ -1,258 +0,0 @@
-From e508bc1b82e5720315bfb28b3c42e9333a6ec8ce Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 5 Feb 2018 23:44:28 +0800
-Subject: [PATCH v6 5/9] bcache: add stop_when_cache_set_failed option to
- backing device
-
-When there are too many I/O errors on cache device, current bcache code
-will retire the whole cache set, and detach all bcache devices. But the
-detached bcache devices are not stopped, which is problematic when bcache
-is in writeback mode.
-
-If the retired cache set has dirty data of backing devices, continue
-writing to bcache device will write to backing device directly. If the
-LBA of write request has a dirty version cached on cache device, next time
-when the cache device is re-registered and backing device re-attached to
-it again, the stale dirty data on cache device will be written to backing
-device, and overwrite latest directly written data. This situation causes
-a quite data corruption.
-
-But we cannot simply stop all attached bcache devices when the cache set is
-broken or disconnected. For example, use bcache to accelerate performance
-of an email service. In such workload, if cache device is broken but no
-dirty data lost, keep the bcache device alive and permit email service
-continue to access user data might be a better solution for the cache
-device failure.
-
-Nix <nix@esperi.org.uk> points out the issue and provides the above example
-to explain why it might be necessary to not stop bcache device for broken
-cache device. Pavel Goran <via-bcache@pvgoran.name> provides a brilliant
-suggestion to provide "always" and "auto" options to per-cached device
-sysfs file stop_when_cache_set_failed. If cache set is retiring and the
-backing device has no dirty data on cache, it should be safe to keep the
-bcache device alive. In this case, if stop_when_cache_set_failed is set to
-"auto", the device failure handling code will not stop this bcache device
-and permit application to access the backing device with a unattached
-bcache device.
-
-Changelog:
-v3: fix typos pointed out by Nix.
-v2: change option values of stop_when_cache_set_failed from 1/0 to
- "auto"/"always".
-v1: initial version, stop_when_cache_set_failed can be 0 (not stop) or 1
- (always stop).
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Michael Lyle <mlyle@lyle.org>
-Cc: Nix <nix@esperi.org.uk>
-Cc: Pavel Goran <via-bcache@pvgoran.name>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Hannes Reinecke <hare@suse.com>
----
- drivers/md/bcache/bcache.h | 9 +++++
- drivers/md/bcache/super.c | 82 ++++++++++++++++++++++++++++++++++++++++------
- drivers/md/bcache/sysfs.c | 17 ++++++++++
- 3 files changed, 98 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 56179fff1e59..7c2b836732e9 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -287,6 +287,12 @@ struct io {
- sector_t last;
- };
-
-+enum stop_on_failure {
-+ BCH_CACHED_DEV_STOP_AUTO = 0,
-+ BCH_CACHED_DEV_STOP_ALWAYS,
-+ BCH_CACHED_DEV_STOP_MODE_MAX,
-+};
-+
- struct cached_dev {
- struct list_head list;
- struct bcache_device disk;
-@@ -379,6 +385,8 @@ struct cached_dev {
- unsigned writeback_rate_i_term_inverse;
- unsigned writeback_rate_p_term_inverse;
- unsigned writeback_rate_minimum;
-+
-+ enum stop_on_failure stop_when_cache_set_failed;
- };
-
- enum alloc_reserve {
-@@ -924,6 +932,7 @@ void bch_write_bdev_super(struct cached_dev *, struct closure *);
-
- extern struct workqueue_struct *bcache_wq;
- extern const char * const bch_cache_modes[];
-+extern const char * const bch_stop_on_failure_modes[];
- extern struct mutex bch_register_lock;
- extern struct list_head bch_cache_sets;
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index a1abeebc7643..52d5012948c9 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -47,6 +47,14 @@ const char * const bch_cache_modes[] = {
- NULL
- };
-
-+/* Default is -1; we skip past it for stop_when_cache_set_failed */
-+const char * const bch_stop_on_failure_modes[] = {
-+ "default",
-+ "auto",
-+ "always",
-+ NULL
-+};
-+
- static struct kobject *bcache_kobj;
- struct mutex bch_register_lock;
- LIST_HEAD(bch_cache_sets);
-@@ -1189,6 +1197,9 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
- max(dc->disk.disk->queue->backing_dev_info->ra_pages,
- q->backing_dev_info->ra_pages);
-
-+ /* default to auto */
-+ dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
-+
- bch_cached_dev_request_init(dc);
- bch_cached_dev_writeback_init(dc);
- return 0;
-@@ -1465,25 +1476,76 @@ static void cache_set_flush(struct closure *cl)
- closure_return(cl);
- }
-
-+/*
-+ * This function is only called when CACHE_SET_IO_DISABLE is set, which means
-+ * cache set is unregistering due to too many I/O errors. In this condition,
-+ * the bcache device might be stopped, it depends on stop_when_cache_set_failed
-+ * value and whether the broken cache has dirty data:
-+ *
-+ * dc->stop_when_cache_set_failed dc->has_dirty stop bcache device
-+ * BCH_CACHED_STOP_AUTO 0 NO
-+ * BCH_CACHED_STOP_AUTO 1 YES
-+ * BCH_CACHED_DEV_STOP_ALWAYS 0 YES
-+ * BCH_CACHED_DEV_STOP_ALWAYS 1 YES
-+ *
-+ * The expected behavior is, if stop_when_cache_set_failed is configured to
-+ * "auto" via sysfs interface, the bcache device will not be stopped if the
-+ * backing device is clean on the broken cache device.
-+ */
-+static void conditional_stop_bcache_device(struct cache_set *c,
-+ struct bcache_device *d,
-+ struct cached_dev *dc)
-+{
-+ if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
-+ pr_warn("stop_when_cache_set_failed of %s is \"always\", stop"
-+ " it for failed cache set %pU.",
-+ d->disk->disk_name, c->sb.set_uuid);
-+ bcache_device_stop(d);
-+ } else if (atomic_read(&dc->has_dirty)) {
-+ /*
-+ * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
-+ * and dc->has_dirty == 1
-+ */
-+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and "
-+ "cache is dirty, stop it to avoid potential data "
-+ "corruption.",
-+ d->disk->disk_name);
-+ bcache_device_stop(d);
-+ } else {
-+ /*
-+ * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
-+ * and dc->has_dirty == 0
-+ */
-+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and "
-+ "cache is clean, keep it alive.",
-+ d->disk->disk_name);
-+ }
-+}
-+
- static void __cache_set_unregister(struct closure *cl)
- {
- struct cache_set *c = container_of(cl, struct cache_set, caching);
- struct cached_dev *dc;
-+ struct bcache_device *d;
- size_t i;
-
- mutex_lock(&bch_register_lock);
-
-- for (i = 0; i < c->devices_max_used; i++)
-- if (c->devices[i]) {
-- if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
-- test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
-- dc = container_of(c->devices[i],
-- struct cached_dev, disk);
-- bch_cached_dev_detach(dc);
-- } else {
-- bcache_device_stop(c->devices[i]);
-- }
-+ for (i = 0; i < c->devices_max_used; i++) {
-+ d = c->devices[i];
-+ if (!d)
-+ continue;
-+
-+ if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
-+ test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
-+ dc = container_of(d, struct cached_dev, disk);
-+ bch_cached_dev_detach(dc);
-+ if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
-+ conditional_stop_bcache_device(c, d, dc);
-+ } else {
-+ bcache_device_stop(d);
- }
-+ }
-
- mutex_unlock(&bch_register_lock);
-
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index e75279b7d180..f2b3b2686627 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -78,6 +78,7 @@ rw_attribute(congested_write_threshold_us);
- rw_attribute(sequential_cutoff);
- rw_attribute(data_csum);
- rw_attribute(cache_mode);
-+rw_attribute(stop_when_cache_set_failed);
- rw_attribute(writeback_metadata);
- rw_attribute(writeback_running);
- rw_attribute(writeback_percent);
-@@ -126,6 +127,12 @@ SHOW(__bch_cached_dev)
- bch_cache_modes + 1,
- BDEV_CACHE_MODE(&dc->sb));
-
-+ if (attr == &sysfs_stop_when_cache_set_failed)
-+ return bch_snprint_string_list(buf, PAGE_SIZE,
-+ bch_stop_on_failure_modes + 1,
-+ dc->stop_when_cache_set_failed);
-+
-+
- sysfs_printf(data_csum, "%i", dc->disk.data_csum);
- var_printf(verify, "%i");
- var_printf(bypass_torture_test, "%i");
-@@ -247,6 +254,15 @@ STORE(__cached_dev)
- }
- }
-
-+ if (attr == &sysfs_stop_when_cache_set_failed) {
-+ v = bch_read_string_list(buf, bch_stop_on_failure_modes + 1);
-+
-+ if (v < 0)
-+ return v;
-+
-+ dc->stop_when_cache_set_failed = v;
-+ }
-+
- if (attr == &sysfs_label) {
- if (size > SB_LABEL_SIZE)
- return -EINVAL;
-@@ -326,6 +342,7 @@ static struct attribute *bch_cached_dev_files[] = {
- &sysfs_data_csum,
- #endif
- &sysfs_cache_mode,
-+ &sysfs_stop_when_cache_set_failed,
- &sysfs_writeback_metadata,
- &sysfs_writeback_running,
- &sysfs_writeback_delay,
---
-2.16.1
-
diff --git a/for-next/v6/v6-0006-bcache-fix-inaccurate-io-state-for-detached-bcach.patch b/for-next/v6/v6-0006-bcache-fix-inaccurate-io-state-for-detached-bcach.patch
deleted file mode 100644
index 8557cd6..0000000
--- a/for-next/v6/v6-0006-bcache-fix-inaccurate-io-state-for-detached-bcach.patch
+++ /dev/null
@@ -1,124 +0,0 @@
-From 17928813439cb184ce155145678a916e523f53ae Mon Sep 17 00:00:00 2001
-From: Tang Junhui <tang.junhui@zte.com.cn>
-Date: Tue, 9 Jan 2018 10:27:11 +0800
-Subject: [PATCH v6 6/9] bcache: fix inaccurate io state for detached bcache
- devices
-
-When we run IO in a detached device, and run iostat to shows IO status,
-normally it will show like bellow (Omitted some fields):
-Device: ... avgrq-sz avgqu-sz await r_await w_await svctm %util
-sdd ... 15.89 0.53 1.82 0.20 2.23 1.81 52.30
-bcache0 ... 15.89 115.42 0.00 0.00 0.00 2.40 69.60
-but after IO stopped, there are still very big avgqu-sz and %util
-values as bellow:
-Device: ... avgrq-sz avgqu-sz await r_await w_await svctm %util
-bcache0 ... 0 5326.32 0.00 0.00 0.00 0.00 100.10
-
-The reason for this issue is that, only generic_start_io_acct() called
-and no generic_end_io_acct() called for detached device in
-cached_dev_make_request(). See the code:
-//start generic_start_io_acct()
-generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
-if (cached_dev_get(dc)) {
- //will callback generic_end_io_acct()
-}
-else {
- //will not call generic_end_io_acct()
-}
-
-This patch calls generic_end_io_acct() in the end of IO for detached
-devices, so we can show IO state correctly.
-
-(Modified to use GFP_NOIO in kzalloc() by Coly Li)
-
-Changelog:
-v2: fix typo.
-v1: the initial version.
-
-Signed-off-by: Tang Junhui <tang.junhui@zte.com.cn>
-Reviewed-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Reviewed-by: Michael Lyle <mlyle@lyle.org>
----
- drivers/md/bcache/request.c | 58 +++++++++++++++++++++++++++++++++++++++------
- 1 file changed, 51 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 02296bda6384..e09c5ae745be 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -986,6 +986,55 @@ static void cached_dev_nodata(struct closure *cl)
- continue_at(cl, cached_dev_bio_complete, NULL);
- }
-
-+struct detached_dev_io_private {
-+ struct bcache_device *d;
-+ unsigned long start_time;
-+ bio_end_io_t *bi_end_io;
-+ void *bi_private;
-+};
-+
-+static void detached_dev_end_io(struct bio *bio)
-+{
-+ struct detached_dev_io_private *ddip;
-+
-+ ddip = bio->bi_private;
-+ bio->bi_end_io = ddip->bi_end_io;
-+ bio->bi_private = ddip->bi_private;
-+
-+ generic_end_io_acct(ddip->d->disk->queue,
-+ bio_data_dir(bio),
-+ &ddip->d->disk->part0, ddip->start_time);
-+
-+ kfree(ddip);
-+
-+ bio->bi_end_io(bio);
-+}
-+
-+static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
-+{
-+ struct detached_dev_io_private *ddip;
-+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-+
-+ /*
-+ * no need to call closure_get(&dc->disk.cl),
-+ * because upper layer had already opened bcache device,
-+ * which would call closure_get(&dc->disk.cl)
-+ */
-+ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
-+ ddip->d = d;
-+ ddip->start_time = jiffies;
-+ ddip->bi_end_io = bio->bi_end_io;
-+ ddip->bi_private = bio->bi_private;
-+ bio->bi_end_io = detached_dev_end_io;
-+ bio->bi_private = ddip;
-+
-+ if ((bio_op(bio) == REQ_OP_DISCARD) &&
-+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
-+ bio->bi_end_io(bio);
-+ else
-+ generic_make_request(bio);
-+}
-+
- /* Cached devices - read & write stuff */
-
- static blk_qc_t cached_dev_make_request(struct request_queue *q,
-@@ -1028,13 +1077,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- else
- cached_dev_read(dc, s);
- }
-- } else {
-- if ((bio_op(bio) == REQ_OP_DISCARD) &&
-- !blk_queue_discard(bdev_get_queue(dc->bdev)))
-- bio_endio(bio);
-- else
-- generic_make_request(bio);
-- }
-+ } else
-+ detached_dev_do_request(d, bio);
-
- return BLK_QC_T_NONE;
- }
---
-2.16.1
-
diff --git a/for-next/v6/v6-0007-bcache-add-backing_request_endio-for-bi_end_io-of.patch b/for-next/v6/v6-0007-bcache-add-backing_request_endio-for-bi_end_io-of.patch
deleted file mode 100644
index d634778..0000000
--- a/for-next/v6/v6-0007-bcache-add-backing_request_endio-for-bi_end_io-of.patch
+++ /dev/null
@@ -1,255 +0,0 @@
-From 4990744596fef00323d0d4e4376f147d8aff6db9 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 10 Jan 2018 21:01:48 +0800
-Subject: [PATCH v6 7/9] bcache: add backing_request_endio() for bi_end_io of
- attached backing device I/O
-
-In order to catch I/O error of backing device, a separate bi_end_io
-call back is required. Then a per backing device counter can record I/O
-errors number and retire the backing device if the counter reaches a
-per backing device I/O error limit.
-
-This patch adds backing_request_endio() to bcache backing device I/O code
-path, this is a preparation for further complicated backing device failure
-handling. So far there is no real code logic change, I make this change a
-separate patch to make sure it is stable and reliable for further work.
-
-Changelog:
-v2: Fix code comments typo, remove a redundant bch_writeback_add() line
- added in v4 patch set.
-v1: indeed this is new added in this patch set.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
-Cc: Michael Lyle <mlyle@lyle.org>
----
- drivers/md/bcache/request.c | 93 +++++++++++++++++++++++++++++++++++--------
- drivers/md/bcache/super.c | 1 +
- drivers/md/bcache/writeback.c | 1 +
- 3 files changed, 79 insertions(+), 16 deletions(-)
-
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index e09c5ae745be..9c6dda3b0068 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl)
- }
-
- op->insert_data_done = true;
-+ /* get in bch_data_insert() */
- bio_put(bio);
- out:
- continue_at(cl, bch_data_insert_keys, op->wq);
-@@ -630,6 +631,38 @@ static void request_endio(struct bio *bio)
- closure_put(cl);
- }
-
-+static void backing_request_endio(struct bio *bio)
-+{
-+ struct closure *cl = bio->bi_private;
-+
-+ if (bio->bi_status) {
-+ struct search *s = container_of(cl, struct search, cl);
-+ /*
-+ * If a bio has REQ_PREFLUSH for writeback mode, it is
-+ * speically assembled in cached_dev_write() for a non-zero
-+ * write request which has REQ_PREFLUSH. we don't set
-+ * s->iop.status by this failure, the status will be decided
-+ * by result of bch_data_insert() operation.
-+ */
-+ if (unlikely(s->iop.writeback &&
-+ bio->bi_opf & REQ_PREFLUSH)) {
-+ char buf[BDEVNAME_SIZE];
-+
-+ bio_devname(bio, buf);
-+ pr_err("Can't flush %s: returned bi_status %i",
-+ buf, bio->bi_status);
-+ } else {
-+ /* set to orig_bio->bi_status in bio_complete() */
-+ s->iop.status = bio->bi_status;
-+ }
-+ s->recoverable = false;
-+ /* should count I/O error for backing device here */
-+ }
-+
-+ bio_put(bio);
-+ closure_put(cl);
-+}
-+
- static void bio_complete(struct search *s)
- {
- if (s->orig_bio) {
-@@ -644,13 +677,21 @@ static void bio_complete(struct search *s)
- }
- }
-
--static void do_bio_hook(struct search *s, struct bio *orig_bio)
-+static void do_bio_hook(struct search *s,
-+ struct bio *orig_bio,
-+ bio_end_io_t *end_io_fn)
- {
- struct bio *bio = &s->bio.bio;
-
- bio_init(bio, NULL, 0);
- __bio_clone_fast(bio, orig_bio);
-- bio->bi_end_io = request_endio;
-+ /*
-+ * bi_end_io can be set separately somewhere else, e.g. the
-+ * variants in,
-+ * - cache_bio->bi_end_io from cached_dev_cache_miss()
-+ * - n->bi_end_io from cache_lookup_fn()
-+ */
-+ bio->bi_end_io = end_io_fn;
- bio->bi_private = &s->cl;
-
- bio_cnt_set(bio, 3);
-@@ -676,7 +717,7 @@ static inline struct search *search_alloc(struct bio *bio,
- s = mempool_alloc(d->c->search, GFP_NOIO);
-
- closure_init(&s->cl, NULL);
-- do_bio_hook(s, bio);
-+ do_bio_hook(s, bio, request_endio);
-
- s->orig_bio = bio;
- s->cache_miss = NULL;
-@@ -743,10 +784,11 @@ static void cached_dev_read_error(struct closure *cl)
- trace_bcache_read_retry(s->orig_bio);
-
- s->iop.status = 0;
-- do_bio_hook(s, s->orig_bio);
-+ do_bio_hook(s, s->orig_bio, backing_request_endio);
-
- /* XXX: invalidate cache */
-
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, bio, cl);
- }
-
-@@ -859,7 +901,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- bio_copy_dev(cache_bio, miss);
- cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
-
-- cache_bio->bi_end_io = request_endio;
-+ cache_bio->bi_end_io = backing_request_endio;
- cache_bio->bi_private = &s->cl;
-
- bch_bio_map(cache_bio, NULL);
-@@ -872,14 +914,16 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
- s->cache_miss = miss;
- s->iop.bio = cache_bio;
- bio_get(cache_bio);
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, cache_bio, &s->cl);
-
- return ret;
- out_put:
- bio_put(cache_bio);
- out_submit:
-- miss->bi_end_io = request_endio;
-+ miss->bi_end_io = backing_request_endio;
- miss->bi_private = &s->cl;
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, miss, &s->cl);
- return ret;
- }
-@@ -943,31 +987,46 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
- s->iop.bio = s->orig_bio;
- bio_get(s->iop.bio);
-
-- if ((bio_op(bio) != REQ_OP_DISCARD) ||
-- blk_queue_discard(bdev_get_queue(dc->bdev)))
-- closure_bio_submit(s->iop.c, bio, cl);
-+ if (bio_op(bio) == REQ_OP_DISCARD &&
-+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
-+ goto insert_data;
-+
-+ /* I/O request sent to backing device */
-+ bio->bi_end_io = backing_request_endio;
-+ closure_bio_submit(s->iop.c, bio, cl);
-+
- } else if (s->iop.writeback) {
- bch_writeback_add(dc);
- s->iop.bio = bio;
-
- if (bio->bi_opf & REQ_PREFLUSH) {
-- /* Also need to send a flush to the backing device */
-- struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
-- dc->disk.bio_split);
--
-+ /*
-+ * Also need to send a flush to the backing
-+ * device.
-+ */
-+ struct bio *flush;
-+
-+ flush = bio_alloc_bioset(GFP_NOIO, 0,
-+ dc->disk.bio_split);
-+ if (!flush) {
-+ s->iop.status = BLK_STS_RESOURCE;
-+ goto insert_data;
-+ }
- bio_copy_dev(flush, bio);
-- flush->bi_end_io = request_endio;
-+ flush->bi_end_io = backing_request_endio;
- flush->bi_private = cl;
- flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
--
-+ /* I/O request sent to backing device */
- closure_bio_submit(s->iop.c, flush, cl);
- }
- } else {
- s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
--
-+ /* I/O request sent to backing device */
-+ bio->bi_end_io = backing_request_endio;
- closure_bio_submit(s->iop.c, bio, cl);
- }
-
-+insert_data:
- closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
- continue_at(cl, cached_dev_write_complete, NULL);
- }
-@@ -981,6 +1040,7 @@ static void cached_dev_nodata(struct closure *cl)
- bch_journal_meta(s->iop.c, cl);
-
- /* If it's a flush, we send the flush to the backing device too */
-+ bio->bi_end_io = backing_request_endio;
- closure_bio_submit(s->iop.c, bio, cl);
-
- continue_at(cl, cached_dev_bio_complete, NULL);
-@@ -1078,6 +1138,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- cached_dev_read(dc, s);
- }
- } else
-+ /* I/O request sent to backing device */
- detached_dev_do_request(d, bio);
-
- return BLK_QC_T_NONE;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 52d5012948c9..93b39e7ae63f 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -273,6 +273,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
- bio->bi_private = dc;
-
- closure_get(cl);
-+ /* I/O request sent to backing device */
- __write_super(&dc->sb, bio);
-
- closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 3d7d8452e0de..4ebe0119ea7e 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -289,6 +289,7 @@ static void write_dirty(struct closure *cl)
- bio_set_dev(&io->bio, io->dc->bdev);
- io->bio.bi_end_io = dirty_endio;
-
-+ /* I/O request sent to backing device */
- closure_bio_submit(io->dc->disk.c, &io->bio, cl);
- }
-
---
-2.16.1
-
diff --git a/for-next/v6/v6-0008-bcache-add-io_disable-to-struct-cached_dev.patch b/for-next/v6/v6-0008-bcache-add-io_disable-to-struct-cached_dev.patch
deleted file mode 100644
index 01d336d..0000000
--- a/for-next/v6/v6-0008-bcache-add-io_disable-to-struct-cached_dev.patch
+++ /dev/null
@@ -1,235 +0,0 @@
-From 230e827cd2ed1d6fedda9e1f19367ffc9562c4d1 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Thu, 8 Feb 2018 23:10:27 +0800
-Subject: [PATCH v6 8/9] bcache: add io_disable to struct cached_dev
-
-If a bcache device is configured to writeback mode, current code does not
-handle write I/O errors on backing devices properly.
-
-In writeback mode, write request is written to cache device, and
-latter being flushed to backing device. If I/O failed when writing from
-cache device to the backing device, bcache code just ignores the error and
-upper layer code is NOT noticed that the backing device is broken.
-
-This patch tries to handle backing device failure like how the cache device
-failure is handled,
-- Add a error counter 'io_errors' and error limit 'error_limit' in struct
- cached_dev. Add another io_disable to struct cached_dev to disable I/Os
- on the problematic backing device.
-- When I/O error happens on backing device, increase io_errors counter. And
- if io_errors reaches error_limit, set cache_dev->io_disable to true, and
- stop the bcache device.
-
-The result is, if backing device is broken of disconnected, and I/O errors
-reach its error limit, backing device will be disabled and the associated
-bcache device will be removed from system.
-
-Changelog:
-v2: remove "bcache: " prefix in pr_error(), and use correct name string to
- print out bcache device gendisk name.
-v1: indeed this is new added in v2 patch set.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 6 ++++++
- drivers/md/bcache/io.c | 14 ++++++++++++++
- drivers/md/bcache/request.c | 14 ++++++++++++--
- drivers/md/bcache/super.c | 21 +++++++++++++++++++++
- drivers/md/bcache/sysfs.c | 15 ++++++++++++++-
- 5 files changed, 67 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 7c2b836732e9..dbc4fb48c754 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -366,6 +366,7 @@ struct cached_dev {
- unsigned sequential_cutoff;
- unsigned readahead;
-
-+ unsigned io_disable:1;
- unsigned verify:1;
- unsigned bypass_torture_test:1;
-
-@@ -387,6 +388,9 @@ struct cached_dev {
- unsigned writeback_rate_minimum;
-
- enum stop_on_failure stop_when_cache_set_failed;
-+#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
-+ atomic_t io_errors;
-+ unsigned error_limit;
- };
-
- enum alloc_reserve {
-@@ -896,6 +900,7 @@ static inline void closure_bio_submit(struct cache_set *c,
-
- /* Forward declarations */
-
-+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
- void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
- void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- blk_status_t, const char *);
-@@ -923,6 +928,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
- struct bkey *, int, bool);
- bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
- unsigned, unsigned, bool);
-+bool bch_cached_dev_error(struct cached_dev *dc);
-
- __printf(2, 3)
- bool bch_cache_set_error(struct cache_set *, const char *, ...);
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index 8013ecbcdbda..7fac97ae036e 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
- }
-
- /* IO errors */
-+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
-+{
-+ char buf[BDEVNAME_SIZE];
-+ unsigned errors;
-+
-+ WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
-+
-+ errors = atomic_add_return(1, &dc->io_errors);
-+ if (errors < dc->error_limit)
-+ pr_err("%s: IO error on backing device, unrecoverable",
-+ bio_devname(bio, buf));
-+ else
-+ bch_cached_dev_error(dc);
-+}
-
- void bch_count_io_errors(struct cache *ca,
- blk_status_t error,
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 9c6dda3b0068..03245e6980a6 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -637,6 +637,8 @@ static void backing_request_endio(struct bio *bio)
-
- if (bio->bi_status) {
- struct search *s = container_of(cl, struct search, cl);
-+ struct cached_dev *dc = container_of(s->d,
-+ struct cached_dev, disk);
- /*
- * If a bio has REQ_PREFLUSH for writeback mode, it is
- * speically assembled in cached_dev_write() for a non-zero
-@@ -657,6 +659,7 @@ static void backing_request_endio(struct bio *bio)
- }
- s->recoverable = false;
- /* should count I/O error for backing device here */
-+ bch_count_backing_io_errors(dc, bio);
- }
-
- bio_put(bio);
-@@ -1065,8 +1068,14 @@ static void detatched_dev_end_io(struct bio *bio)
- bio_data_dir(bio),
- &ddip->d->disk->part0, ddip->start_time);
-
-- kfree(ddip);
-+ if (bio->bi_status) {
-+ struct cached_dev *dc = container_of(ddip->d,
-+ struct cached_dev, disk);
-+ /* should count I/O error for backing device here */
-+ bch_count_backing_io_errors(dc, bio);
-+ }
-
-+ kfree(ddip);
- bio->bi_end_io(bio);
- }
-
-@@ -1105,7 +1114,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
- int rw = bio_data_dir(bio);
-
-- if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
-+ if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
-+ dc->io_disable)) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- return BLK_QC_T_NONE;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 93b39e7ae63f..1c5b7074bd6c 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1198,6 +1198,9 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
- max(dc->disk.disk->queue->backing_dev_info->ra_pages,
- q->backing_dev_info->ra_pages);
-
-+ atomic_set(&dc->io_errors, 0);
-+ dc->io_disable = false;
-+ dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
- /* default to auto */
- dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
-
-@@ -1352,6 +1355,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
- return flash_dev_run(c, u);
- }
-
-+bool bch_cached_dev_error(struct cached_dev *dc)
-+{
-+ char name[BDEVNAME_SIZE];
-+
-+ if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
-+ return false;
-+
-+ dc->io_disable = true;
-+ /* make others know io_disable is true earlier */
-+ smp_mb();
-+
-+ pr_err("stop %s: too many IO errors on backing device %s\n",
-+ dc->disk.disk->disk_name, bdevname(dc->bdev, name));
-+
-+ bcache_device_stop(&dc->disk);
-+ return true;
-+}
-+
- /* Cache set */
-
- __printf(2, 3)
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index f2b3b2686627..bd40d9d0a969 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -141,7 +141,9 @@ SHOW(__bch_cached_dev)
- var_print(writeback_delay);
- var_print(writeback_percent);
- sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
--
-+ sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
-+ sysfs_printf(io_error_limit, "%i", dc->error_limit);
-+ sysfs_printf(io_disable, "%i", dc->io_disable);
- var_print(writeback_rate_update_seconds);
- var_print(writeback_rate_i_term_inverse);
- var_print(writeback_rate_p_term_inverse);
-@@ -232,6 +234,14 @@ STORE(__cached_dev)
- d_strtoul(writeback_rate_i_term_inverse);
- d_strtoul_nonzero(writeback_rate_p_term_inverse);
-
-+ sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
-+
-+ if (attr == &sysfs_io_disable) {
-+ int v = strtoul_or_return(buf);
-+
-+ dc->io_disable = v ? 1 : 0;
-+ }
-+
- d_strtoi_h(sequential_cutoff);
- d_strtoi_h(readahead);
-
-@@ -352,6 +362,9 @@ static struct attribute *bch_cached_dev_files[] = {
- &sysfs_writeback_rate_i_term_inverse,
- &sysfs_writeback_rate_p_term_inverse,
- &sysfs_writeback_rate_debug,
-+ &sysfs_errors,
-+ &sysfs_io_error_limit,
-+ &sysfs_io_disable,
- &sysfs_dirty_data,
- &sysfs_stripe_size,
- &sysfs_partial_stripes_expensive,
---
-2.16.1
-
diff --git a/for-next/v6/v6-0009-bcache-stop-bcache-device-when-backing-device-is-.patch b/for-next/v6/v6-0009-bcache-stop-bcache-device-when-backing-device-is-.patch
deleted file mode 100644
index bf2af00..0000000
--- a/for-next/v6/v6-0009-bcache-stop-bcache-device-when-backing-device-is-.patch
+++ /dev/null
@@ -1,152 +0,0 @@
-From 15a23aafbcfd8e92fc7e1740c8e53d9c64c9fde1 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 5 Feb 2018 23:52:40 +0800
-Subject: [PATCH v6 9/9] bcache: stop bcache device when backing device is
- offline
-
-Currently bcache does not handle backing device failure, if backing
-device is offline and disconnected from system, its bcache device can still
-be accessible. If the bcache device is in writeback mode, I/O requests even
-can success if the requests hit on cache device. That is to say, when and
-how bcache handles offline backing device is undefined.
-
-This patch tries to handle backing device offline in a rather simple way,
-- Add cached_dev->status_update_thread kernel thread to update backing
- device status in every 1 second.
-- Add cached_dev->offline_seconds to record how many seconds the backing
- device is observed to be offline. If the backing device is offline for
- BACKING_DEV_OFFLINE_TIMEOUT (30) seconds, set dc->io_disable to 1 and
- call bcache_device_stop() to stop the bache device which linked to the
- offline backing device.
-
-Now if a backing device is offline for BACKING_DEV_OFFLINE_TIMEOUT seconds,
-its bcache device will be removed, then user space application writing on
-it will get error immediately, and handler the device failure in time.
-
-This patch is quite simple, does not handle more complicated situations.
-Once the bcache device is stopped, users need to recovery the backing
-device, register and attach it manually.
-
-Changelog:
-v2: remove "bcache: " prefix when calling pr_warn().
-v1: initial version.
-
-Signed-off-by: Coly Li <colyli@suse.de>
-Reviewed-by: Hannes Reinecke <hare@suse.com>
-Cc: Michael Lyle <mlyle@lyle.org>
-Cc: Junhui Tang <tang.junhui@zte.com.cn>
----
- drivers/md/bcache/bcache.h | 2 ++
- drivers/md/bcache/super.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 57 insertions(+)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index dbc4fb48c754..e465a661f32e 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -344,6 +344,7 @@ struct cached_dev {
-
- struct keybuf writeback_keys;
-
-+ struct task_struct *status_update_thread;
- /*
- * Order the write-half of writeback operations strongly in dispatch
- * order. (Maintain LBA order; don't allow reads completing out of
-@@ -391,6 +392,7 @@ struct cached_dev {
- #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
- atomic_t io_errors;
- unsigned error_limit;
-+ unsigned offline_seconds;
- };
-
- enum alloc_reserve {
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 1c5b7074bd6c..ea25cef924ff 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -654,6 +654,11 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
- unsigned int cmd, unsigned long arg)
- {
- struct bcache_device *d = b->bd_disk->private_data;
-+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-+
-+ if (dc->io_disable)
-+ return -EIO;
-+
- return d->ioctl(d, mode, cmd, arg);
- }
-
-@@ -864,6 +869,45 @@ static void calc_cached_dev_sectors(struct cache_set *c)
- c->cached_dev_sectors = sectors;
- }
-
-+#define BACKING_DEV_OFFLINE_TIMEOUT 5
-+static int cached_dev_status_update(void *arg)
-+{
-+ struct cached_dev *dc = arg;
-+ struct request_queue *q;
-+ char buf[BDEVNAME_SIZE];
-+
-+ /*
-+ * If this delayed worker is stopping outside, directly quit here.
-+ * dc->io_disable might be set via sysfs interface, so check it
-+ * here too.
-+ */
-+ while (!kthread_should_stop() && !dc->io_disable) {
-+ q = bdev_get_queue(dc->bdev);
-+ if (blk_queue_dying(q))
-+ dc->offline_seconds++;
-+ else
-+ dc->offline_seconds = 0;
-+
-+ if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
-+ pr_err("%s: device offline for %d seconds",
-+ bdevname(dc->bdev, buf),
-+ BACKING_DEV_OFFLINE_TIMEOUT);
-+ pr_err("%s: disable I/O request due to backing "
-+ "device offline", dc->disk.name);
-+ dc->io_disable = true;
-+ /* let others know earlier that io_disable is true */
-+ smp_mb();
-+ bcache_device_stop(&dc->disk);
-+ break;
-+ }
-+
-+ schedule_timeout_interruptible(HZ);
-+ }
-+
-+ dc->status_update_thread = NULL;
-+ return 0;
-+}
-+
- void bch_cached_dev_run(struct cached_dev *dc)
- {
- struct bcache_device *d = &dc->disk;
-@@ -906,6 +950,15 @@ void bch_cached_dev_run(struct cached_dev *dc)
- if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
- sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
- pr_debug("error creating sysfs link");
-+
-+ dc->status_update_thread = kthread_run(cached_dev_status_update,
-+ dc,
-+ "bcache_status_update");
-+ if (IS_ERR(dc->status_update_thread)) {
-+ pr_warn("failed to create bcache_status_update kthread, "
-+ "continue to run without monitoring backing "
-+ "device status");
-+ }
- }
-
- /*
-@@ -1128,6 +1181,8 @@ static void cached_dev_free(struct closure *cl)
- kthread_stop(dc->writeback_thread);
- if (dc->writeback_write_wq)
- destroy_workqueue(dc->writeback_write_wq);
-+ if (!IS_ERR_OR_NULL(dc->status_update_thread))
-+ kthread_stop(dc->status_update_thread);
-
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
---
-2.16.1
-