aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorColy Li <colyli@suse.de>2018-01-11 00:29:07 +0800
committerColy Li <colyli@suse.de>2018-01-11 00:29:07 +0800
commitd07c2c86cd49f0dff1028d53479339531a8aba87 (patch)
treeed34de86cb5294ee1cfde0a3bad066c218600d97
parent3ba7b8393a7477aa4e3d7d93f749584a844ce42c (diff)
downloadbcache-patches-d07c2c86cd49f0dff1028d53479339531a8aba87.tar.gz
for-next: update v2 patch for bcache device failure.
-rw-r--r--for-next/v2-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch58
-rw-r--r--for-next/v2-0001-bcache-properly-set-task-state-in-bch_writeback_t.patch87
-rw-r--r--for-next/v2-0002-bcache-set-task-properly-in-allocator_wait.patch58
-rw-r--r--for-next/v2-0003-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch (renamed from for-next/v2-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch)62
-rw-r--r--for-next/v2-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch119
-rw-r--r--for-next/v2-0004-bcache-stop-dc-writeback_rate_update-properly.patch224
-rw-r--r--for-next/v2-0005-bcache-set-error_limit-correctly.patch (renamed from for-next/v2-0007-bcache-set-error_limit-correctly.patch)23
-rw-r--r--for-next/v2-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch68
-rw-r--r--for-next/v2-0006-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch (renamed from for-next/v2-0009-bcache-add-io_disable-to-struct-cache_set.patch)294
-rw-r--r--for-next/v2-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch122
-rw-r--r--for-next/v2-0007-bcache-stop-all-attached-bcache-devices-for-a-ret.patch (renamed from for-next/v2-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch)15
-rw-r--r--for-next/v2-0008-bcache-fix-inaccurate-io-state-for-detached-bcach.patch118
-rw-r--r--for-next/v2-0008-bcache-fix-misleading-error-message-in-bch_count_.patch118
-rw-r--r--for-next/v2-0009-bcache-add-backing_request_endio-for-bi_end_io-of.patch (renamed from for-next/v2-0011-bcache-add-backing_request_endio-for-bi_end_io-of.patch)48
-rw-r--r--for-next/v2-0010-bcache-add-io_disable-to-struct-cached_dev.patch (renamed from for-next/v2-0012-bcache-add-io_disable-to-struct-cached_dev.patch)75
-rw-r--r--for-next/v2-0011-bcache-stop-bcache-device-when-backing-device-is-.patch148
-rw-r--r--for-next/v2-0013-bcache-stop-bcache-device-when-backing-device-is-.patch99
17 files changed, 888 insertions, 848 deletions
diff --git a/for-next/v2-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch b/for-next/v2-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch
deleted file mode 100644
index 16c789d..0000000
--- a/for-next/v2-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch
+++ /dev/null
@@ -1,58 +0,0 @@
-From 02cd6111e6e305665b9b734b41d9e66735eefba5 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 20 Dec 2017 20:32:58 +0800
-Subject: [PATCH v2 01/13] bcache: exit bch_writeback_thread() with proper task
- state
-
-Kernel thread routine bch_writeback_thread() has the following code block,
-
-452 set_current_state(TASK_INTERRUPTIBLE);
-453
-454 if (kthread_should_stop())
-455 return 0;
-456
-457 schedule();
-458 continue;
-
-At line 452, its status is set to TASK_INTERRUPTIBLE, and at line 454 if
-kthread_should_stop() is true, a "return 0" at line 455 will to function
-kernel/kthread.c:kthread() and call do_exit().
-
-It is not good to enter do_exit() with task state TASK_INTERRUPTIBLE, in
-following code path might_sleep() is called and a warning message is
-reported by __might_sleep(): "WARNING: do not call blocking ops when
-!TASK_RUNNING; state=1 set at [xxxx]".
-
-Indeed it does not hurt when kernel thread exits with TASK_INTERRUPTIBLE
-state, but this warning message scares users, makes them feel there might
-be something risky with bcache and hurt their data.
-
-In this patch, TASK_INTERRUPTIBLE is set after kthread_should_stop(),
-so writeback kernel thread can exist and enter do_exit() with
-TASK_RUNNING state. Warning message from might_sleep() is removed.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/writeback.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 56a37884ca8b..a57149803df6 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -449,11 +449,11 @@ static int bch_writeback_thread(void *arg)
- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
- !dc->writeback_running)) {
- up_write(&dc->writeback_lock);
-- set_current_state(TASK_INTERRUPTIBLE);
-
- if (kthread_should_stop())
- return 0;
-
-+ set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- continue;
- }
---
-2.15.1
-
diff --git a/for-next/v2-0001-bcache-properly-set-task-state-in-bch_writeback_t.patch b/for-next/v2-0001-bcache-properly-set-task-state-in-bch_writeback_t.patch
new file mode 100644
index 0000000..9defd10
--- /dev/null
+++ b/for-next/v2-0001-bcache-properly-set-task-state-in-bch_writeback_t.patch
@@ -0,0 +1,87 @@
+From 603a4e1966e353f4709d1352842c0b42aa1a74aa Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Mon, 8 Jan 2018 22:11:01 +0800
+Subject: [PATCH v2 01/11] bcache: properly set task state in
+ bch_writeback_thread()
+
+Kernel thread routine bch_writeback_thread() has the following code block,
+
+447 down_write(&dc->writeback_lock);
+448~450 if (check conditions) {
+451 up_write(&dc->writeback_lock);
+452 set_current_state(TASK_INTERRUPTIBLE);
+453
+454 if (kthread_should_stop())
+455 return 0;
+456
+457 schedule();
+458 continue;
+459 }
+
+If condition check is true, its task state is set to TASK_INTERRUPTIBLE
+and call schedule() to wait for others to wake up it.
+
+There are 2 issues in current code,
+1, Task state is set to TASK_INTERRUPTIBLE after the condition checks, if
+ another process changes the condition and call wake_up_process(dc->
+ writeback_thread), then at line 452 task state is set back to
+ TASK_INTERRUPTIBLE, the writeback kernel thread will lose a chance to be
+ waken up.
+2, At line 454 if kthread_should_stop() is true, writeback kernel thread
+ will return to kernel/kthread.c:kthread() with TASK_INTERRUPTIBLE and
+ call do_exit(). It is not good to enter do_exit() with task state
+ TASK_INTERRUPTIBLE, in following code path might_sleep() is called and a
+ warning message is reported by __might_sleep(): "WARNING: do not call
+ blocking ops when !TASK_RUNNING; state=1 set at [xxxx]".
+
+For the first issue, task state should be set before condition checks.
+Ineed because dc->writeback_lock is required when modifying all the
+conditions, calling set_current_state() inside code block where dc->
+writeback_lock is hold is safe. But this is quite implicit, so I still move
+set_current_state() before all the condition checks.
+
+For the second issue, frankley speaking it does not hurt when kernel thread
+exits with TASK_INTERRUPTIBLE state, but this warning message scares users,
+makes them feel there might be something risky with bcache and hurt their
+data. Setting task state to TASK_RUNNING before returning fixes this
+problem.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+Cc: Michael Lyle <mlyle@lyle.org>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Huijun Tang <tang.junhui@zte.com.cn>
+---
+ drivers/md/bcache/writeback.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
+index 61f24c04cebd..f2924cbb8cdc 100644
+--- a/drivers/md/bcache/writeback.c
++++ b/drivers/md/bcache/writeback.c
+@@ -541,18 +541,21 @@ static int bch_writeback_thread(void *arg)
+
+ while (!kthread_should_stop()) {
+ down_write(&dc->writeback_lock);
++ set_current_state(TASK_INTERRUPTIBLE);
+ if (!atomic_read(&dc->has_dirty) ||
+ (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
+ !dc->writeback_running)) {
+ up_write(&dc->writeback_lock);
+- set_current_state(TASK_INTERRUPTIBLE);
+
+- if (kthread_should_stop())
++ if (kthread_should_stop()) {
++ set_current_state(TASK_RUNNING);
+ return 0;
++ }
+
+ schedule();
+ continue;
+ }
++ set_current_state(TASK_RUNNING);
+
+ searched_full_index = refill_dirty(dc);
+
+--
+2.15.1
+
diff --git a/for-next/v2-0002-bcache-set-task-properly-in-allocator_wait.patch b/for-next/v2-0002-bcache-set-task-properly-in-allocator_wait.patch
index 0ae9c81..5800840 100644
--- a/for-next/v2-0002-bcache-set-task-properly-in-allocator_wait.patch
+++ b/for-next/v2-0002-bcache-set-task-properly-in-allocator_wait.patch
@@ -1,14 +1,11 @@
-From 9eb34cfed6f7cf086a31d0e01f79548aaa82eab9 Mon Sep 17 00:00:00 2001
+From 2723ee397093520bd24c33e54e241ea754bcc5ef Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
-Date: Wed, 20 Dec 2017 22:37:11 +0800
-Subject: [PATCH v2 02/13] bcache: set task properly in allocator_wait()
+Date: Mon, 8 Jan 2018 22:45:51 +0800
+Subject: [PATCH v2 02/11] bcache: set task properly in allocator_wait()
Kernel thread routine bch_allocator_thread() references macro
allocator_wait() to wait for a condition or quit to do_exit()
-when kthread_should_stop() is true.
-
-Macro allocator_wait() has 2 issues in setting task state, let's
-see its code piece,
+when kthread_should_stop() is true. Here is the code block,
284 while (1) { \
285 set_current_state(TASK_INTERRUPTIBLE); \
@@ -24,56 +21,41 @@ see its code piece,
295 } \
296 __set_current_state(TASK_RUNNING); \
-1) At line 285, task state is set to TASK_INTERRUPTIBLE, if at line 290
+At line 285, task state is set to TASK_INTERRUPTIBLE, if at line 290
kthread_should_stop() is true, the kernel thread will terminate and return
to kernel/kthread.s:kthread(), then calls do_exit() with TASK_INTERRUPTIBLE
state. This is not a suggested behavior and a warning message will be
reported by might_sleep() in do_exit() code path: "WARNING: do not call
blocking ops when !TASK_RUNNING; state=1 set at [xxxx]".
-2) Because task state is set to TASK_INTERRUPTIBLE at line 285, when break
-while-loop the task state has to be set back to TASK_RUNNING at line 296.
-Indeed it is unncessary, if task state is set to TASK_INTERRUPTIBLE before
-calling schedule() at line 293, we don't need to set the state back to
-TASK_RUNNING at line 296 anymore. The reason is, allocator kthread is only
-woken up by wake_up_process(), this routine makes sure the task state of
-allocator kthread will be TASK_RUNNING after it returns from schedule() at
-line 294 (see kernel/sched/core.c:try_to_wake_up() for more detailed
-information).
-
-This patch fixes the above 2 issues by,
-1) Setting TASK_INTERRUPTIBLE state just before calling schedule().
-2) Then setting TASK_RUNNING at line 296 is unnecessary, remove it.
+This patch fixes this problem by setting task state to TASK_RUNNING if
+kthread_should_stop() is true and before kernel thread returns back to
+kernel/kthread.s:kthread().
Signed-off-by: Coly Li <colyli@suse.de>
+Cc: Michael Lyle <mlyle@lyle.org>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Huijun Tang <tang.junhui@zte.com.cn>
---
- drivers/md/bcache/alloc.c | 3 +--
- 1 file changed, 1 insertion(+), 2 deletions(-)
+ drivers/md/bcache/alloc.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index a0cc1bc6d884..48c002faf08d 100644
+index 6cc6c0f9c3a9..458e1d38577d 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
-@@ -282,7 +282,6 @@ static void invalidate_buckets(struct cache *ca)
- #define allocator_wait(ca, cond) \
- do { \
- while (1) { \
-- set_current_state(TASK_INTERRUPTIBLE); \
- if (cond) \
+@@ -287,8 +287,10 @@ do { \
break; \
\
-@@ -290,10 +289,10 @@ do { \
- if (kthread_should_stop()) \
+ mutex_unlock(&(ca)->set->bucket_lock); \
+- if (kthread_should_stop()) \
++ if (kthread_should_stop()) { \
++ set_current_state(TASK_RUNNING); \
return 0; \
++ } \
\
-+ set_current_state(TASK_INTERRUPTIBLE); \
schedule(); \
mutex_lock(&(ca)->set->bucket_lock); \
- } \
-- __set_current_state(TASK_RUNNING); \
- } while (0)
-
- static int bch_allocator_push(struct cache *ca, long bucket)
--
2.15.1
diff --git a/for-next/v2-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch b/for-next/v2-0003-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
index 93f1461..b99c8e7 100644
--- a/for-next/v2-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
+++ b/for-next/v2-0003-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
@@ -1,7 +1,7 @@
-From d697858f6f515b4bacee984c82535cf2b896ace9 Mon Sep 17 00:00:00 2001
+From df0607cb405ac954d3cc6c4de3c904904a71ba7b Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
-Date: Fri, 22 Dec 2017 16:37:17 +0800
-Subject: [PATCH v2 04/13] bcache: fix cached_dev->count usage for
+Date: Mon, 8 Jan 2018 23:05:58 +0800
+Subject: [PATCH v2 03/11] bcache: fix cached_dev->count usage for
bch_cache_set_error()
When bcache metadata I/O fails, bcache will call bch_cache_set_error()
@@ -17,17 +17,17 @@ entry /sys/fs/bcache/<uuid> still exits and the backing device also
references it. This is not expected behavior.
When metadata I/O failes, the call senquence to retire whole cache set is,
- bch_cache_set_error()
- bch_cache_set_unregister()
- bch_cache_set_stop()
- __cache_set_unregister() <- called as callback by calling
- clousre_queue(&c->caching)
- cache_set_flush() <- called as a callback when refcount
- of cache_set->caching is 0
- cache_set_free() <- called as a callback when refcount
- of catch_set->cl is 0
- bch_cache_set_release() <- called as a callback when refcount
- of catch_set->kobj is 0
+ bch_cache_set_error()
+ bch_cache_set_unregister()
+ bch_cache_set_stop()
+ __cache_set_unregister() <- called as callback by calling
+ clousre_queue(&c->caching)
+ cache_set_flush() <- called as a callback when refcount
+ of cache_set->caching is 0
+ cache_set_free() <- called as a callback when refcount
+ of catch_set->cl is 0
+ bch_cache_set_release() <- called as a callback when refcount
+ of catch_set->kobj is 0
I find if kernel thread bch_writeback_thread() quits while-loop when
kthread_should_stop() is true and searched_full_index is false, clousre
@@ -73,7 +73,7 @@ Adding following code in bch_writeback_thread() does not work,
return 0;
}
because writeback kernel thread can be waken up and start via sysfs entry:
- echo 1 > /sys/block/bcache<N>/bcache/writeback_running
+ echo 1 > /sys/block/bcache<N>/bcache/writeback_running
It is difficult to check whether backing device is dirty without race and
extra lock. So the above modification will introduce potential refcount
underflow in some conditions.
@@ -90,17 +90,20 @@ errors or system reboot, cached_dev->count can always be used correctly.
The patch is simple, but understanding how it works is quite complicated.
Signed-off-by: Coly Li <colyli@suse.de>
+Reviewed-by: Hannes Reinecke <hare@suse.com>
+Cc: Michael Lyle <mlyle@lyle.org>
+Cc: Huijun Tang <tang.junhui@zte.com.cn>
---
drivers/md/bcache/super.c | 1 -
- drivers/md/bcache/writeback.c | 10 +++++++---
+ drivers/md/bcache/writeback.c | 11 ++++++++---
drivers/md/bcache/writeback.h | 2 --
- 3 files changed, 7 insertions(+), 6 deletions(-)
+ 3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 064efd869017..5401d2356aa3 100644
+index 78fd04bf0534..805b6c37c063 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
-@@ -1044,7 +1044,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
+@@ -1052,7 +1052,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
bch_sectors_dirty_init(&dc->disk);
atomic_set(&dc->has_dirty, 1);
@@ -109,19 +112,19 @@ index 064efd869017..5401d2356aa3 100644
}
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index a57149803df6..0789a9e18337 100644
+index f2924cbb8cdc..228271fe622b 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
-@@ -451,7 +451,7 @@ static int bch_writeback_thread(void *arg)
- up_write(&dc->writeback_lock);
+@@ -549,7 +549,7 @@ static int bch_writeback_thread(void *arg)
- if (kthread_should_stop())
+ if (kthread_should_stop()) {
+ set_current_state(TASK_RUNNING);
- return 0;
+ break;
+ }
- set_current_state(TASK_INTERRUPTIBLE);
schedule();
-@@ -463,7 +463,6 @@ static int bch_writeback_thread(void *arg)
+@@ -562,7 +562,6 @@ static int bch_writeback_thread(void *arg)
if (searched_full_index &&
RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
atomic_set(&dc->has_dirty, 0);
@@ -129,16 +132,17 @@ index a57149803df6..0789a9e18337 100644
SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
bch_write_bdev_super(dc, NULL);
}
-@@ -484,6 +483,8 @@ static int bch_writeback_thread(void *arg)
+@@ -583,6 +582,9 @@ static int bch_writeback_thread(void *arg)
}
}
++ dc->writeback_thread = NULL;
+ cached_dev_put(dc);
+
return 0;
}
-@@ -547,10 +548,13 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
+@@ -646,10 +648,13 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
if (!dc->writeback_write_wq)
return -ENOMEM;
@@ -154,10 +158,10 @@ index a57149803df6..0789a9e18337 100644
schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index 1d284f3d0363..aab21afe49cf 100644
+index f102b1f9bc51..665dd3184ab3 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
-@@ -92,8 +92,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
+@@ -95,8 +95,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
{
if (!atomic_read(&dc->has_dirty) &&
!atomic_xchg(&dc->has_dirty, 1)) {
diff --git a/for-next/v2-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch b/for-next/v2-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch
deleted file mode 100644
index 002212e..0000000
--- a/for-next/v2-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch
+++ /dev/null
@@ -1,119 +0,0 @@
-From fd33195d255d0f152d9e2b36032b1cc816ededb3 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 20 Dec 2017 23:27:41 +0800
-Subject: [PATCH v2 03/13] bcache: reduce cache_set devices iteration by
- devices_max_used
-
-Member devices of struct cache_set is used to reference all attached
-bcache devices to this cache set. If it is treated as array of pointers,
-size of devices[] is indicated by member nr_uuids of struct cache_set.
-
-nr_uuids is calculated in drivers/md/super.c:bch_cache_set_alloc(),
- bucket_bytes(c) / sizeof(struct uuid_entry)
-Bucket size is determined by user space tool "make-bcache", by default it
-is 1024 sectors (defined in bcache-tools/make-bcache.c:main()). So default
-nr_uuids value is 4096 from the above calculation.
-
-Every time when bcache code iterates bcache devices of a cache set, all
-the 4096 pointers are checked even only 1 bcache device is attached to the
-cache set, that's a wast of time and unncessary.
-
-This patch adds a member devices_max_used to struct cache_set. Its value
-is 1 + the maximum used index of devices[] in a cache set. When iterating
-all valid bcache devices of a cache set, use c->devices_max_used in
-for-loop may reduce a lot of useless checking.
-
-Personally, my motivation of this patch is not for performance, I use it
-in bcache debugging, which helps me to narrow down the scape to check
-valid bcached devices of a cache set.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 1 +
- drivers/md/bcache/btree.c | 2 +-
- drivers/md/bcache/super.c | 9 ++++++---
- drivers/md/bcache/writeback.h | 2 +-
- 4 files changed, 9 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 843877e017e1..83c569942bd0 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -488,6 +488,7 @@ struct cache_set {
- int caches_loaded;
-
- struct bcache_device **devices;
-+ unsigned devices_max_used;
- struct list_head cached_devs;
- uint64_t cached_dev_sectors;
- struct closure caching;
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index 81e8dc3dbe5e..bf0d7978bc3d 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -1678,7 +1678,7 @@ static void bch_btree_gc_finish(struct cache_set *c)
-
- /* don't reclaim buckets to which writeback keys point */
- rcu_read_lock();
-- for (i = 0; i < c->nr_uuids; i++) {
-+ for (i = 0; i < c->devices_max_used; i++) {
- struct bcache_device *d = c->devices[i];
- struct cached_dev *dc;
- struct keybuf_key *w, *n;
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index b4d28928dec5..064efd869017 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -721,6 +721,9 @@ static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
- d->c = c;
- c->devices[id] = d;
-
-+ if (id >= c->devices_max_used)
-+ c->devices_max_used = id + 1;
-+
- closure_get(&c->caching);
- }
-
-@@ -1261,7 +1264,7 @@ static int flash_devs_run(struct cache_set *c)
- struct uuid_entry *u;
-
- for (u = c->uuids;
-- u < c->uuids + c->nr_uuids && !ret;
-+ u < c->uuids + c->devices_max_used && !ret;
- u++)
- if (UUID_FLASH_ONLY(u))
- ret = flash_dev_run(c, u);
-@@ -1427,7 +1430,7 @@ static void __cache_set_unregister(struct closure *cl)
-
- mutex_lock(&bch_register_lock);
-
-- for (i = 0; i < c->nr_uuids; i++)
-+ for (i = 0; i < c->devices_max_used; i++)
- if (c->devices[i]) {
- if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
- test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
-@@ -1490,7 +1493,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
- c->bucket_bits = ilog2(sb->bucket_size);
- c->block_bits = ilog2(sb->block_size);
- c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry);
--
-+ c->devices_max_used = 0;
- c->btree_pages = bucket_pages(c);
- if (c->btree_pages > BTREE_MAX_PAGES)
- c->btree_pages = max_t(int, c->btree_pages / 4,
-diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
-index a9e3ffb4b03c..1d284f3d0363 100644
---- a/drivers/md/bcache/writeback.h
-+++ b/drivers/md/bcache/writeback.h
-@@ -21,7 +21,7 @@ static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c)
-
- mutex_lock(&bch_register_lock);
-
-- for (i = 0; i < c->nr_uuids; i++) {
-+ for (i = 0; i < c->devices_max_used; i++) {
- struct bcache_device *d = c->devices[i];
-
- if (!d || !UUID_FLASH_ONLY(&c->uuids[i]))
---
-2.15.1
-
diff --git a/for-next/v2-0004-bcache-stop-dc-writeback_rate_update-properly.patch b/for-next/v2-0004-bcache-stop-dc-writeback_rate_update-properly.patch
new file mode 100644
index 0000000..2b28dc3
--- /dev/null
+++ b/for-next/v2-0004-bcache-stop-dc-writeback_rate_update-properly.patch
@@ -0,0 +1,224 @@
+From 41e9ba093c2394bb2ecfac36ff1ca0cb38050f8d Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Tue, 9 Jan 2018 21:46:21 +0800
+Subject: [PATCH v2 04/11] bcache: stop dc->writeback_rate_update properly
+
+struct delayed_work writeback_rate_update in struct cache_dev is a delayed
+worker to call function update_writeback_rate() in period (the interval is
+defined by dc->writeback_rate_update_seconds).
+
+When a metadate I/O error happens on cache device, bcache error handling
+routine bch_cache_set_error() will call bch_cache_set_unregister() to
+retire whole cache set. On the unregister code path, this delayed work is
+stopped by calling cancel_delayed_work_sync(&dc->writeback_rate_update).
+
+dc->writeback_rate_update is a special delayed work from others in bcache.
+In its routine update_writeback_rate(), this delayed work is re-armed
+itself. That means when cancel_delayed_work_sync() returns, this delayed
+work can still be executed after several seconds defined by
+dc->writeback_rate_update_seconds.
+
+The problem is, after cancel_delayed_work_sync() returns, the cache set
+unregister code path will continue and release memory of struct cache set.
+Then the delayed work is scheduled to run, __update_writeback_rate()
+will reference the already released cache_set memory, and trigger a NULL
+pointer deference fault.
+
+This patch moves cancel_delayed_work_sync() from cached_dev_detach_finish()
+to bch_cached_dev_detach(), the latter one is called before
+cache_set_flush() gets called, we don't need to worry that cache_set memory
+is freed during __update_writeback_rate() is executing.
+
+Here is how this patch solves the access-after-freed problem.
+- Hold a cached_dev refcount.
+ When first time to schedule dc->writeback_rate_update, cached_dev_get() is
+called to hold a refcount of cached_dev. Then if this delayed work re-arms
+itself, the holding refcount will make sure cached_dev_detach_finish()
+won't be called and all objects referenced by __update_writeback_rate()
+won't be freed.
+- Moves cancel_delayed_work_sync() to bch_cached_dev_detach()
+ Currently cancel_delayed_work_sync() is called in
+cached_dev_detach_finish(), since a refcount is taken when the first time
+dc->writeback_rate_update is schedule to run, if this refcount is not
+dropped by stopping the delayed work, cached_dev_detach_finish() will not
+be executed, this is a deadlock. Therefore cancel_delayed_work_sync() is
+moved to bch_cached_dev_detach(), and everything will be OK.
+- Add BCACHE_DEV_WB_RUNNING to bcache device flags
+ Set the flag before first time scheduling dc->writeback_rate_update, and
+clear the bit before calling cancel_delayed_work_sync(). Then inside
+update_writeback_rate() if BCACHE_DEV_WB_RUNNING is clear, it means this
+delayed work should call cached_dev_put() and quit.
+ If BCACHE_DEV_WB_RUNNING is set and cancel_delayed_work() is called
+outside after test_bit() and before schedule_delayed_work() in
+update_writeback_rate(), the delayed work will be re-armed while holding
+a refcount of cached_dev. So when next time it is scheduled to run,
+all referenced memory objects are still there and safe to access. And in
+next run update_writeback_rate() will find BCACHE_DEV_WB_RUNNING is set,
+it will call cached_dev_put() and quit in safe.
+
+If dc->writeback_thread quits before dc->writeback_rate_update, calling
+cached_dev_put() in update_writeback_rate() will drop last refcount of
+cached_dev, then cached_dev_detach_finish() will be called, then in
+bcache_device_detach() last refcount of closure cl->caching will be dropped
+and cache_set_flush() will be called.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+Cc: Michael Lyle <mlyle@lyle.org>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Huijun Tang <tang.junhui@zte.com.cn>
+---
+ drivers/md/bcache/bcache.h | 2 +-
+ drivers/md/bcache/super.c | 22 +++++++++++++++-------
+ drivers/md/bcache/sysfs.c | 10 +++++++++-
+ drivers/md/bcache/writeback.c | 26 ++++++++++++++++++++++++++
+ 4 files changed, 51 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
+index 5e2d4e80198e..147413ae5890 100644
+--- a/drivers/md/bcache/bcache.h
++++ b/drivers/md/bcache/bcache.h
+@@ -261,7 +261,7 @@ struct bcache_device {
+ #define BCACHE_DEV_CLOSING 0
+ #define BCACHE_DEV_DETACHING 1
+ #define BCACHE_DEV_UNLINK_DONE 2
+-
++#define BCACHE_DEV_WB_RUNNING 4
+ unsigned nr_stripes;
+ unsigned stripe_size;
+ atomic_t *stripe_sectors_dirty;
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index 805b6c37c063..7aa5786fcd9e 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -911,12 +911,6 @@ static void cached_dev_detach_finish(struct work_struct *w)
+
+ mutex_lock(&bch_register_lock);
+
+- cancel_delayed_work_sync(&dc->writeback_rate_update);
+- if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
+- kthread_stop(dc->writeback_thread);
+- dc->writeback_thread = NULL;
+- }
+-
+ memset(&dc->sb.set_uuid, 0, 16);
+ SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE);
+
+@@ -954,6 +948,19 @@ void bch_cached_dev_detach(struct cached_dev *dc)
+ closure_get(&dc->disk.cl);
+
+ bch_writeback_queue(dc);
++
++ /*
++ * Stop dc->writeback_rate_update before cache_set_flush gets called,
++ * to make sure cache set memory won't be freed during routine
++ * update_writeback_rate() is executing.
++ */
++ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
++ cancel_delayed_work_sync(&dc->writeback_rate_update);
++ if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
++ kthread_stop(dc->writeback_thread);
++ dc->writeback_thread = NULL;
++ }
++
+ cached_dev_put(dc);
+ }
+
+@@ -1079,7 +1086,8 @@ static void cached_dev_free(struct closure *cl)
+ {
+ struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
+
+- cancel_delayed_work_sync(&dc->writeback_rate_update);
++ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
++ cancel_delayed_work_sync(&dc->writeback_rate_update);
+ if (!IS_ERR_OR_NULL(dc->writeback_thread))
+ kthread_stop(dc->writeback_thread);
+ if (dc->writeback_write_wq)
+diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
+index b4184092c727..dcbeda994063 100644
+--- a/drivers/md/bcache/sysfs.c
++++ b/drivers/md/bcache/sysfs.c
+@@ -300,9 +300,17 @@ STORE(bch_cached_dev)
+ if (attr == &sysfs_writeback_running)
+ bch_writeback_queue(dc);
+
+- if (attr == &sysfs_writeback_percent)
++ if (attr == &sysfs_writeback_percent) {
++ /*
++ * If this is the first time to run dc->writeback_rate_update,
++ * get a cached_dev refcount. This refcount will be dropped inside
++ * update_writeback_rate().
++ */
++ if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
++ cached_dev_get(dc);
+ schedule_delayed_work(&dc->writeback_rate_update,
+ dc->writeback_rate_update_seconds * HZ);
++ }
+
+ mutex_unlock(&bch_register_lock);
+ return size;
+diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
+index 228271fe622b..0ec23987f098 100644
+--- a/drivers/md/bcache/writeback.c
++++ b/drivers/md/bcache/writeback.c
+@@ -86,11 +86,24 @@ static void __update_writeback_rate(struct cached_dev *dc)
+ dc->writeback_rate_target = target;
+ }
+
++
++/*
++ * BCACHE_DEV_WB_RUNNING is set before schedule dc->writeback_rate_update
++ * to run, and cached_dev_get() is also called where this bit is set.
++ * If BCACHE_DEV_WB_RUNNING is clear here, it means cancel_delayed_work_sync()
++ * is called to stop this delayed work. So it won't re-arm itself, and drop
++ * cached_dev refcount by cache_dev_put() and quit immediately.
++ */
++
+ static void update_writeback_rate(struct work_struct *work)
+ {
+ struct cached_dev *dc = container_of(to_delayed_work(work),
+ struct cached_dev,
+ writeback_rate_update);
++ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
++ cached_dev_put(dc);
++ return;
++ }
+
+ down_read(&dc->writeback_lock);
+
+@@ -100,6 +113,11 @@ static void update_writeback_rate(struct work_struct *work)
+
+ up_read(&dc->writeback_lock);
+
++ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
++ cached_dev_put(dc);
++ return;
++ }
++
+ schedule_delayed_work(&dc->writeback_rate_update,
+ dc->writeback_rate_update_seconds * HZ);
+ }
+@@ -638,6 +656,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
+ dc->writeback_rate_p_term_inverse = 40;
+ dc->writeback_rate_i_term_inverse = 10000;
+
++ BUG_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
+ INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
+ }
+
+@@ -656,6 +675,13 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
+ return PTR_ERR(dc->writeback_thread);
+ }
+
++ /*
++ * If dc->writeback_rate_update is first time to run, get a
++ * cached_dev refcount. This refcount will be dropped inside
++ * update_writeback_rate().
++ */
++ if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
++ cached_dev_get(dc);
+ schedule_delayed_work(&dc->writeback_rate_update,
+ dc->writeback_rate_update_seconds * HZ);
+
+--
+2.15.1
+
diff --git a/for-next/v2-0007-bcache-set-error_limit-correctly.patch b/for-next/v2-0005-bcache-set-error_limit-correctly.patch
index 10f0217..4153421 100644
--- a/for-next/v2-0007-bcache-set-error_limit-correctly.patch
+++ b/for-next/v2-0005-bcache-set-error_limit-correctly.patch
@@ -1,7 +1,7 @@
-From 93e9a82ee54b8fb1e50c4df95a74ab2953aec9ff Mon Sep 17 00:00:00 2001
+From 1cd65ce62d9e25bbad99c48afcca28229bca680d Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
-Date: Wed, 3 Jan 2018 20:37:27 +0800
-Subject: [PATCH v2 07/13] bcache: set error_limit correctly
+Date: Tue, 9 Jan 2018 22:46:25 +0800
+Subject: [PATCH v2 05/11] bcache: set error_limit correctly
Struct cache uses io_errors for two purposes,
- Error decay: when cache set error_decay is set, io_errors is used to
@@ -56,6 +56,9 @@ the default io error limit value makes sense, it is enough for most of
cache devices.
Signed-off-by: Coly Li <colyli@suse.de>
+Reviewed-by: Hannes Reinecke <hare@suse.com>
+Cc: Michael Lyle <mlyle@lyle.org>
+Cc: Huijun Tang <tang.junhui@zte.com.cn>
---
drivers/md/bcache/bcache.h | 1 +
drivers/md/bcache/super.c | 2 +-
@@ -63,10 +66,10 @@ Signed-off-by: Coly Li <colyli@suse.de>
3 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 395b87942a2f..a31dc3737dae 100644
+index 147413ae5890..9d18a02db723 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
-@@ -654,6 +654,7 @@ struct cache_set {
+@@ -662,6 +662,7 @@ struct cache_set {
ON_ERROR_UNREGISTER,
ON_ERROR_PANIC,
} on_error;
@@ -75,10 +78,10 @@ index 395b87942a2f..a31dc3737dae 100644
unsigned error_decay;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 8912be4165c5..02d9d7110769 100644
+index 7aa5786fcd9e..858d3dc4dbc8 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
-@@ -1561,7 +1561,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
+@@ -1560,7 +1560,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
@@ -88,10 +91,10 @@ index 8912be4165c5..02d9d7110769 100644
return c;
err:
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index b4184092c727..d7ce9a05b304 100644
+index dcbeda994063..8a89c3c4bdab 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
-@@ -556,7 +556,7 @@ SHOW(__bch_cache_set)
+@@ -564,7 +564,7 @@ SHOW(__bch_cache_set)
/* See count_io_errors for why 88 */
sysfs_print(io_error_halflife, c->error_decay * 88);
@@ -100,7 +103,7 @@ index b4184092c727..d7ce9a05b304 100644
sysfs_hprint(congested,
((uint64_t) bch_get_congested(c)) << 9);
-@@ -656,7 +656,7 @@ STORE(__bch_cache_set)
+@@ -664,7 +664,7 @@ STORE(__bch_cache_set)
}
if (attr == &sysfs_io_error_limit)
diff --git a/for-next/v2-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch b/for-next/v2-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch
deleted file mode 100644
index 489dee1..0000000
--- a/for-next/v2-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch
+++ /dev/null
@@ -1,68 +0,0 @@
-From 1a9aae02c180b47b2ae2ef9c61915b2b694d1fc2 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 23 Dec 2017 01:50:19 +0800
-Subject: [PATCH v2 05/13] bcache: stop dc->writeback_rate_update if cache set
- is stopping
-
-struct delayed_work writeback_rate_update in struct cache_dev is a delayed
-worker to call function update_writeback_rate() in period (the interval is
-defined by dc->writeback_rate_update_seconds).
-
-When a metadate I/O error happens on cache device, bcache error handling
-routine bch_cache_set_error() will call bch_cache_set_unregister() to
-retire whole cache set. On the unregister code path, cached_dev_free()
-calls cancel_delayed_work_sync(&dc->writeback_rate_update) to stop this
-delayed work.
-
-dc->writeback_rate_update is a special delayed work from others in bcache.
-In its routine update_writeback_rate(), this delayed work is re-armed
-after a piece of time. That means when cancel_delayed_work_sync() returns,
-this delayed work can still be executed after several seconds defined by
-dc->writeback_rate_update_seconds.
-
-The problem is, after cancel_delayed_work_sync() returns, the cache set
-unregister code path will eventually release memory of struct cache set.
-Then the delayed work is scheduled to run, and inside its rountine
-update_writeback_rate() that already released cache set NULL pointer will
-be accessed. Now a NULL pointer deference panic is triggered.
-
-In order to avoid the above problem, this patch checks cache set flags in
-delayed work routine update_writeback_rate(). If flag CACHE_SET_STOPPING
-is set, this routine will quit without re-arm the delayed work. Then the
-NULL pointer deference panic won't happen after cache set is released.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/writeback.c | 9 +++++++++
- 1 file changed, 9 insertions(+)
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 0789a9e18337..745d9b2a326f 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -91,6 +91,11 @@ static void update_writeback_rate(struct work_struct *work)
- struct cached_dev *dc = container_of(to_delayed_work(work),
- struct cached_dev,
- writeback_rate_update);
-+ struct cache_set *c = dc->disk.c;
-+
-+ /* quit directly if cache set is stopping */
-+ if (test_bit(CACHE_SET_STOPPING, &c->flags))
-+ return;
-
- down_read(&dc->writeback_lock);
-
-@@ -100,6 +105,10 @@ static void update_writeback_rate(struct work_struct *work)
-
- up_read(&dc->writeback_lock);
-
-+ /* do not schedule delayed work if cache set is stopping */
-+ if (test_bit(CACHE_SET_STOPPING, &c->flags))
-+ return;
-+
- schedule_delayed_work(&dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
- }
---
-2.15.1
-
diff --git a/for-next/v2-0009-bcache-add-io_disable-to-struct-cache_set.patch b/for-next/v2-0006-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch
index eaaeb51..00f826d 100644
--- a/for-next/v2-0009-bcache-add-io_disable-to-struct-cache_set.patch
+++ b/for-next/v2-0006-bcache-add-CACHE_SET_IO_DISABLE-to-struct-cache_s.patch
@@ -1,7 +1,8 @@
-From 5996e95d633ad28ebbd113004efc488162cd22b7 Mon Sep 17 00:00:00 2001
+From 374b72e439ddac280759ea97ba934272d2978698 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
-Date: Tue, 2 Jan 2018 17:31:07 +0800
-Subject: [PATCH v2 09/13] bcache: add io_disable to struct cache_set
+Date: Wed, 10 Jan 2018 00:18:01 +0800
+Subject: [PATCH v2 06/11] bcache: add CACHE_SET_IO_DISABLE to struct cache_set
+ flags
When too many I/Os failed on cache device, bch_cache_set_error() is called
in the error handling code path to retire whole problematic cache set. If
@@ -16,60 +17,76 @@ prevent the cache set from being retired.
The solution in this patch is, to add per cache set flag to disable I/O
request on this cache and all attached backing devices. Then new coming I/O
requests can be rejected in *_make_request() before taking refcount, kernel
-threads and self-armed kernel worker can stop very fast when io_disable is
-true.
+threads and self-armed kernel worker can stop very fast when flags bit
+CACHE_SET_IO_DISABLE is set.
Because bcache also do internal I/Os for writeback, garbage collection,
bucket allocation, journaling, this kind of I/O should be disabled after
bch_cache_set_error() is called. So closure_bio_submit() is modified to
-check whether cache_set->io_disable is true. If cache_set->io_disable is
-true, closure_bio_submit() will set bio->bi_status to BLK_STS_IOERR and
+check whether CACHE_SET_IO_DISABLE is set on cache_set->flags. If set,
+closure_bio_submit() will set bio->bi_status to BLK_STS_IOERR and
return, generic_make_request() won't be called.
-A sysfs interface is also added for cache_set->io_disable, to read and set
-io_disable value for debugging. It is helpful to trigger more corner case
-issues for failed cache device.
+A sysfs interface is also added to set or clear CACHE_SET_IO_DISABLE bit
+from cache_set->flags, to disable or enable cache set I/O for debugging. It
+is helpful to trigger more corner case issues for failed cache device.
+
+Changelog
+v2, use cache_set->flags to set io disable bit, suggested by Junhui Tang.
+v1, initial version.
Signed-off-by: Coly Li <colyli@suse.de>
+Reviewed-by: Hannes Reinecke <hare@suse.com>
+Cc: Huijun Tang <tang.junhui@zte.com.cn>
+Cc: Michael Lyle <mlyle@lyle.org>
---
- drivers/md/bcache/alloc.c | 2 +-
- drivers/md/bcache/bcache.h | 14 ++++++++++++++
- drivers/md/bcache/btree.c | 6 ++++--
+ drivers/md/bcache/alloc.c | 3 ++-
+ drivers/md/bcache/bcache.h | 18 ++++++++++++++++++
+ drivers/md/bcache/btree.c | 7 +++++--
drivers/md/bcache/io.c | 2 +-
drivers/md/bcache/journal.c | 4 ++--
drivers/md/bcache/request.c | 26 +++++++++++++++++++-------
- drivers/md/bcache/super.c | 7 ++++++-
- drivers/md/bcache/sysfs.c | 4 ++++
+ drivers/md/bcache/super.c | 6 +++++-
+ drivers/md/bcache/sysfs.c | 19 +++++++++++++++++++
drivers/md/bcache/util.h | 6 ------
- drivers/md/bcache/writeback.c | 34 ++++++++++++++++++++++------------
- 10 files changed, 73 insertions(+), 32 deletions(-)
+ drivers/md/bcache/writeback.c | 36 ++++++++++++++++++++++++++++--------
+ 10 files changed, 99 insertions(+), 28 deletions(-)
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index 48c002faf08d..3be737582f27 100644
+index 458e1d38577d..004cc3cc6123 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
-@@ -286,7 +286,7 @@ do { \
+@@ -287,7 +287,8 @@ do { \
break; \
\
mutex_unlock(&(ca)->set->bucket_lock); \
-- if (kthread_should_stop()) \
-+ if (kthread_should_stop() || ca->set->io_disable) \
+- if (kthread_should_stop()) { \
++ if (kthread_should_stop() || \
++ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
+ set_current_state(TASK_RUNNING); \
return 0; \
- \
- set_current_state(TASK_INTERRUPTIBLE); \
+ } \
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index c53f312b2216..9c7f9b1cb791 100644
+index 9d18a02db723..ab51984d12f6 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
-@@ -481,6 +481,7 @@ struct cache_set {
- struct cache_accounting accounting;
-
- unsigned long flags;
-+ bool io_disable;
-
- struct cache_sb sb;
-
-@@ -853,6 +854,19 @@ static inline void wake_up_allocators(struct cache_set *c)
+@@ -474,10 +474,15 @@ struct gc_stat {
+ *
+ * CACHE_SET_RUNNING means all cache devices have been registered and journal
+ * replay is complete.
++ *
++ * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
++ * external and internal I/O should be denied when this flag is set.
++ *
+ */
+ #define CACHE_SET_UNREGISTERING 0
+ #define CACHE_SET_STOPPING 1
+ #define CACHE_SET_RUNNING 2
++#define CACHE_SET_IO_DISABLE 4
+
+ struct cache_set {
+ struct closure cl;
+@@ -861,6 +866,19 @@ static inline void wake_up_allocators(struct cache_set *c)
wake_up_process(ca->alloc_thread);
}
@@ -78,7 +95,7 @@ index c53f312b2216..9c7f9b1cb791 100644
+ struct closure *cl)
+{
+ closure_get(cl);
-+ if (unlikely(c->io_disable)) {
++ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return;
@@ -90,20 +107,21 @@ index c53f312b2216..9c7f9b1cb791 100644
void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index bf0d7978bc3d..75470cce1177 100644
+index 208fd3b61add..7011fb75258c 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
-@@ -1788,9 +1788,11 @@ static int bch_gc_thread(void *arg)
+@@ -1788,9 +1788,12 @@ static int bch_gc_thread(void *arg)
while (1) {
wait_event_interruptible(c->gc_wait,
- kthread_should_stop() || gc_should_run(c));
-+ kthread_should_stop() ||
-+ c->io_disable ||
-+ gc_should_run(c));
++ kthread_should_stop() ||
++ test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
++ gc_should_run(c));
- if (kthread_should_stop())
-+ if (kthread_should_stop() || c->io_disable)
++ if (kthread_should_stop() ||
++ test_bit(CACHE_SET_IO_DISABLE, &c->flags))
break;
set_gc_sectors(c);
@@ -143,10 +161,10 @@ index a87165c1d8e5..979873641030 100644
continue_at(cl, journal_write_done, NULL);
}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 643c3021624f..a85d6a605a8e 100644
+index dd8191cfdb14..12f5b95b3734 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
-@@ -725,7 +725,7 @@ static void cached_dev_read_error(struct closure *cl)
+@@ -747,7 +747,7 @@ static void cached_dev_read_error(struct closure *cl)
/* XXX: invalidate cache */
@@ -155,7 +173,7 @@ index 643c3021624f..a85d6a605a8e 100644
}
continue_at(cl, cached_dev_cache_miss_done, NULL);
-@@ -850,7 +850,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
+@@ -872,7 +872,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->cache_miss = miss;
s->iop.bio = cache_bio;
bio_get(cache_bio);
@@ -164,7 +182,7 @@ index 643c3021624f..a85d6a605a8e 100644
return ret;
out_put:
-@@ -858,7 +858,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
+@@ -880,7 +880,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
out_submit:
miss->bi_end_io = request_endio;
miss->bi_private = &s->cl;
@@ -173,7 +191,7 @@ index 643c3021624f..a85d6a605a8e 100644
return ret;
}
-@@ -923,7 +923,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
+@@ -945,7 +945,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
if ((bio_op(bio) != REQ_OP_DISCARD) ||
blk_queue_discard(bdev_get_queue(dc->bdev)))
@@ -182,7 +200,7 @@ index 643c3021624f..a85d6a605a8e 100644
} else if (s->iop.writeback) {
bch_writeback_add(dc);
s->iop.bio = bio;
-@@ -938,12 +938,12 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
+@@ -960,12 +960,12 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
flush->bi_private = cl;
flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
@@ -197,7 +215,7 @@ index 643c3021624f..a85d6a605a8e 100644
}
closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
-@@ -959,7 +959,7 @@ static void cached_dev_nodata(struct closure *cl)
+@@ -981,7 +981,7 @@ static void cached_dev_nodata(struct closure *cl)
bch_journal_meta(s->iop.c, cl);
/* If it's a flush, we send the flush to the backing device too */
@@ -206,24 +224,24 @@ index 643c3021624f..a85d6a605a8e 100644
continue_at(cl, cached_dev_bio_complete, NULL);
}
-@@ -974,6 +974,12 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
+@@ -996,6 +996,12 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
int rw = bio_data_dir(bio);
-+ if (unlikely(d->c && d->c->io_disable)) {
++ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return BLK_QC_T_NONE;
+ }
+
+ atomic_set(&dc->backing_idle, 0);
generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
- bio_set_dev(bio, dc->bdev);
-@@ -1089,6 +1095,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
+@@ -1112,6 +1118,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
struct bcache_device *d = bio->bi_disk->private_data;
int rw = bio_data_dir(bio);
-+ if (unlikely(d->c->io_disable)) {
++ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return BLK_QC_T_NONE;
@@ -233,7 +251,7 @@ index 643c3021624f..a85d6a605a8e 100644
s = search_alloc(bio, d);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index bbe911847eea..7aa76c3e3556 100644
+index 858d3dc4dbc8..32b64090a056 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -521,7 +521,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
@@ -245,27 +263,26 @@ index bbe911847eea..7aa76c3e3556 100644
closure_sync(cl);
}
-@@ -1333,6 +1333,10 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
- acquire_console_sem();
- */
+@@ -1326,6 +1326,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
+ test_bit(CACHE_SET_STOPPING, &c->flags))
+ return false;
-+ c->io_disable = true;
-+ /* make others know io_disable is true earlier */
-+ smp_mb();
++ if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
++ pr_warn("bcache: CACHE_SET_IO_DISABLE already set");
+
- printk(KERN_ERR "bcache: error on %pU: ", c->sb.set_uuid);
-
- va_start(args, fmt);
-@@ -1564,6 +1568,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
+ /* XXX: we can be called from atomic context
+ acquire_console_sem();
+ */
+@@ -1561,6 +1564,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
c->error_limit = DEFAULT_IO_ERROR_LIMIT;
-+ c->io_disable = false;
++ BUG_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
return c;
err:
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index d7ce9a05b304..acce7c82e111 100644
+index 8a89c3c4bdab..fd04450b5f13 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -92,6 +92,7 @@ read_attribute(partial_stripes_expensive);
@@ -276,23 +293,38 @@ index d7ce9a05b304..acce7c82e111 100644
rw_attribute(discard);
rw_attribute(running);
rw_attribute(label);
-@@ -573,6 +574,7 @@ SHOW(__bch_cache_set)
+@@ -581,6 +582,8 @@ SHOW(__bch_cache_set)
sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
-+ sysfs_printf(io_disable, "%i", c->io_disable);
++ sysfs_printf(io_disable, "%i",
++ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
if (attr == &sysfs_bset_tree_stats)
return bch_bset_print_stats(c, buf);
-@@ -663,6 +665,7 @@ STORE(__bch_cache_set)
+@@ -670,6 +673,21 @@ STORE(__bch_cache_set)
+ if (attr == &sysfs_io_error_halflife)
c->error_decay = strtoul_or_return(buf) / 88;
++ if (attr == &sysfs_io_disable) {
++ int v = strtoul_or_return(buf);
++ if (v) {
++ if (test_and_set_bit(CACHE_SET_IO_DISABLE,
++ &c->flags))
++ pr_warn("bcache: CACHE_SET_IO_DISABLE"
++ " already set");
++ } else {
++ if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
++ &c->flags))
++ pr_warn("bcache: CACHE_SET_IO_DISABLE"
++ " already cleared");
++ }
++ }
++
sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
-+ sysfs_strtoul_clamp(io_disable, c->io_disable, 0, 1);
sysfs_strtoul(verify, c->verify);
sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
- sysfs_strtoul(expensive_debug_checks, c->expensive_debug_checks);
-@@ -744,6 +747,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
+@@ -752,6 +770,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
&sysfs_gc_always_rewrite,
&sysfs_btree_shrinker_disabled,
&sysfs_copy_gc_enabled,
@@ -318,38 +350,40 @@ index ed5e8a412eb8..03e533631798 100644
uint64_t bch_crc64(const void *, size_t);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index e58f9be5ae43..54add41d2569 100644
+index 0ec23987f098..d806fc6d2a6e 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
-@@ -93,8 +93,11 @@ static void update_writeback_rate(struct work_struct *work)
+@@ -100,7 +100,14 @@ static void update_writeback_rate(struct work_struct *work)
+ struct cached_dev *dc = container_of(to_delayed_work(work),
+ struct cached_dev,
writeback_rate_update);
- struct cache_set *c = dc->disk.c;
-
-- /* quit directly if cache set is stopping */
-- if (test_bit(CACHE_SET_STOPPING, &c->flags))
+- if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
++ struct cache_set *c = dc->disk.c;
++
+ /*
-+ * quit directly if cache set is stopping. c->io_disable
-+ * can be set via sysfs, check it here too.
++ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
++ * check it here too.
+ */
-+ if (test_bit(CACHE_SET_STOPPING, &c->flags) || c->io_disable)
++ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
++ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
+ cached_dev_put(dc);
return;
-
- down_read(&dc->writeback_lock);
-@@ -105,8 +108,11 @@ static void update_writeback_rate(struct work_struct *work)
+ }
+@@ -113,7 +120,12 @@ static void update_writeback_rate(struct work_struct *work)
up_read(&dc->writeback_lock);
-- /* do not schedule delayed work if cache set is stopping */
-- if (test_bit(CACHE_SET_STOPPING, &c->flags))
+- if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
+ /*
-+ * do not schedule delayed work if cache set is stopping,
-+ * c->io_disable can be set via sysfs, check it here too.
++ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
++ * check it here too.
+ */
-+ if (test_bit(CACHE_SET_STOPPING, &c->flags) || c->io_disable)
++ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
++ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
+ cached_dev_put(dc);
return;
-
- schedule_delayed_work(&dc->writeback_rate_update,
-@@ -217,7 +223,7 @@ static void write_dirty(struct closure *cl)
+ }
+@@ -248,7 +260,7 @@ static void write_dirty(struct closure *cl)
bio_set_dev(&io->bio, io->dc->bdev);
io->bio.bi_end_io = dirty_endio;
@@ -357,8 +391,8 @@ index e58f9be5ae43..54add41d2569 100644
+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
}
- continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq);
-@@ -240,7 +246,7 @@ static void read_dirty_submit(struct closure *cl)
+ atomic_set(&dc->writeback_sequence_next, next_sequence);
+@@ -274,7 +286,7 @@ static void read_dirty_submit(struct closure *cl)
{
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
@@ -367,27 +401,29 @@ index e58f9be5ae43..54add41d2569 100644
continue_at(cl, write_dirty, io->dc->writeback_write_wq);
}
-@@ -259,7 +265,7 @@ static void read_dirty(struct cached_dev *dc)
- * mempools.
- */
+@@ -300,7 +312,9 @@ static void read_dirty(struct cached_dev *dc)
-- while (!kthread_should_stop()) {
-+ while (!(kthread_should_stop() || dc->disk.c->io_disable)) {
-
- w = bch_keybuf_next(&dc->writeback_keys);
- if (!w)
-@@ -269,7 +275,9 @@ static void read_dirty(struct cached_dev *dc)
-
- if (KEY_START(&w->key) != dc->last_read ||
- jiffies_to_msecs(delay) > 50)
-- while (!kthread_should_stop() && delay)
-+ while (!kthread_should_stop() &&
-+ !dc->disk.c->io_disable &&
-+ delay)
- delay = schedule_timeout_interruptible(delay);
+ next = bch_keybuf_next(&dc->writeback_keys);
- dc->last_read = KEY_OFFSET(&w->key);
-@@ -450,18 +458,19 @@ static bool refill_dirty(struct cached_dev *dc)
+- while (!kthread_should_stop() && next) {
++ while (!kthread_should_stop() &&
++ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
++ next) {
+ size = 0;
+ nk = 0;
+
+@@ -397,7 +411,9 @@ static void read_dirty(struct cached_dev *dc)
+ }
+ }
+
+- while (!kthread_should_stop() && delay) {
++ while (!kthread_should_stop() &&
++ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
++ delay) {
+ schedule_timeout_interruptible(delay);
+ delay = writeback_delay(dc, 0);
+ }
+@@ -553,11 +569,13 @@ static bool refill_dirty(struct cached_dev *dc)
static int bch_writeback_thread(void *arg)
{
struct cached_dev *dc = arg;
@@ -397,37 +433,29 @@ index e58f9be5ae43..54add41d2569 100644
bch_ratelimit_reset(&dc->writeback_rate);
- while (!kthread_should_stop()) {
-+ while (!(kthread_should_stop() || c->io_disable)) {
++ while (!kthread_should_stop() &&
++ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
down_write(&dc->writeback_lock);
+ set_current_state(TASK_INTERRUPTIBLE);
if (!atomic_read(&dc->has_dirty) ||
- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
+@@ -565,7 +583,8 @@ static int bch_writeback_thread(void *arg)
!dc->writeback_running)) {
up_write(&dc->writeback_lock);
-- if (kthread_should_stop())
-+ if (kthread_should_stop() || c->io_disable)
+- if (kthread_should_stop()) {
++ if (kthread_should_stop() ||
++ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
+ set_current_state(TASK_RUNNING);
break;
+ }
+@@ -593,6 +612,7 @@ static int bch_writeback_thread(void *arg)
- set_current_state(TASK_INTERRUPTIBLE);
-@@ -485,8 +494,8 @@ static int bch_writeback_thread(void *arg)
- if (searched_full_index) {
- unsigned delay = dc->writeback_delay * HZ;
-
-- while (delay &&
-- !kthread_should_stop() &&
-+ while (delay && !kthread_should_stop() &&
-+ !c->io_disable &&
+ while (delay &&
+ !kthread_should_stop() &&
++ !test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
delay = schedule_timeout_interruptible(delay);
-@@ -494,6 +503,7 @@ static int bch_writeback_thread(void *arg)
- }
- }
-
-+ dc->writeback_thread = NULL;
- cached_dev_put(dc);
-
- return 0;
--
2.15.1
diff --git a/for-next/v2-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch b/for-next/v2-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch
deleted file mode 100644
index d7438ca..0000000
--- a/for-next/v2-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch
+++ /dev/null
@@ -1,122 +0,0 @@
-From 2da5b83720460c83d0f20d0771a0c955e60028e8 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 3 Jan 2018 00:03:45 +0800
-Subject: [PATCH v2 06/13] bcache: stop dc->writeback_rate_update,
- dc->writeback_thread earlier
-
-Delayed worker dc->writeback_rate_update and kernel thread
-dc->writeback_thread reference cache set data structure in their routine,
-Therefor, before they are stopped, cache set should not be release. Other-
-wise, NULL pointer deference will be triggered.
-
-Currenly delayed worker dc->writeback_rate_update and kernel thread
-dc->writeback_thread are stopped in cached_dev_free(). When cache set is
-retiring by too many I/O errors, cached_dev_free() is called when refcount
-of bcache device's closure (disk.cl) reaches 0. In most of cases, last
-refcount of disk.cl is dropped in last line of cached_dev_detach_finish().
-But in cached_dev_detach_finish() before calling closure_put(&dc->disk.cl),
-bcache_device_detach() is called, and inside bcache_device_detach()
-refcount of cache_set->caching is dropped by closure_put(&d->c->caching).
-
-It is very probably this is the last refcount of this closure, so routine
-cache_set_flush() will be called (it is set in __cache_set_unregister()),
-and its parent closure cache_set->cl may also drop its last refcount and
-cache_set_free() is called too. In cache_set_free() the last refcount of
-cache_set->kobj is dropped and then bch_cache_set_release() is called. Now
-in bch_cache_set_release(), the memory of struct cache_set is freeed.
-
-bch_cache_set_release() is called before cached_dev_free(), then there is a
-time window after cache set memory freed and before dc->writeback_thread
-and dc->writeback_rate_update stopped, if one of them is scheduled to run,
-a NULL pointer deference will be triggered.
-
-This patch fixes the above problem by stopping dc->writeback_thread and
-dc->writeback_rate_update earlier in bcache_device_detach() before calling
-closure_put(&d->c->caching). Because cancel_delayed_work_sync() and
-kthread_stop() are synchronized operations, we can make sure cache set
-is available when the delayed work and kthread are stopping.
-
-Because cached_dev_free() can also be called by writing 1 to sysfs file
-/sys/block/bcache<N>/bcache/stop, this code path may not call
-bcache_device_detach() if d-c is NULL. So stopping dc->writeback_thread
-and dc->writeback_rate_update in cached_dev_free() is still necessary. In
-order to avoid stop them twice, dc->rate_update_canceled is added to
-indicate dc->writeback_rate_update is canceled, and dc->writeback_thread
-is set to NULL to indicate it is stopped.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 1 +
- drivers/md/bcache/super.c | 21 +++++++++++++++++++--
- drivers/md/bcache/writeback.c | 1 +
- 3 files changed, 21 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 83c569942bd0..395b87942a2f 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -322,6 +322,7 @@ struct cached_dev {
-
- struct bch_ratelimit writeback_rate;
- struct delayed_work writeback_rate_update;
-+ bool rate_update_canceled;
-
- /*
- * Internal to the writeback code, so read_dirty() can keep track of
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 5401d2356aa3..8912be4165c5 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -696,8 +696,20 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
-
- static void bcache_device_detach(struct bcache_device *d)
- {
-+ struct cached_dev *dc;
-+
- lockdep_assert_held(&bch_register_lock);
-
-+ dc = container_of(d, struct cached_dev, disk);
-+ if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
-+ kthread_stop(dc->writeback_thread);
-+ dc->writeback_thread = NULL;
-+ }
-+ if (!dc->rate_update_canceled) {
-+ cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ dc->rate_update_canceled = true;
-+ }
-+
- if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
- struct uuid_entry *u = d->c->uuids + d->id;
-
-@@ -1071,9 +1083,14 @@ static void cached_dev_free(struct closure *cl)
- {
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
-
-- cancel_delayed_work_sync(&dc->writeback_rate_update);
-- if (!IS_ERR_OR_NULL(dc->writeback_thread))
-+ if (!dc->rate_update_canceled) {
-+ cancel_delayed_work_sync(&dc->writeback_rate_update);
-+ dc->rate_update_canceled = true;
-+ }
-+ if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
- kthread_stop(dc->writeback_thread);
-+ dc->writeback_thread = NULL;
-+ }
- if (dc->writeback_write_wq)
- destroy_workqueue(dc->writeback_write_wq);
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 745d9b2a326f..ab2ac3d72393 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -548,6 +548,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
- dc->writeback_rate_i_term_inverse = 10000;
-
- INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
-+ dc->rate_update_canceled = false;
- }
-
- int bch_cached_dev_writeback_start(struct cached_dev *dc)
---
-2.15.1
-
diff --git a/for-next/v2-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch b/for-next/v2-0007-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
index 3bd6ae7..1973080 100644
--- a/for-next/v2-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
+++ b/for-next/v2-0007-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
@@ -1,7 +1,7 @@
-From 830c365da083a0ecc03bd9d8ae2a664b506bb1e8 Mon Sep 17 00:00:00 2001
+From c4baae1be516264b2d8c2991af67800126273080 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
-Date: Wed, 3 Jan 2018 18:24:55 +0800
-Subject: [PATCH v2 10/13] bcache: stop all attached bcache devices for a
+Date: Wed, 10 Jan 2018 00:26:32 +0800
+Subject: [PATCH v2 07/11] bcache: stop all attached bcache devices for a
retired cache set
When there are too many I/O errors on cache device, current bcache code
@@ -32,15 +32,18 @@ application will report I/O errors due to disappeared bcache device, and
operation people will know the cache device is broken or disconnected.
Signed-off-by: Coly Li <colyli@suse.de>
+Reviewed-by: Hannes Reinecke <hare@suse.com>
+Cc: Huijun Tang <tang.junhui@zte.com.cn>
+Cc: Michael Lyle <mlyle@lyle.org>
---
drivers/md/bcache/super.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 7aa76c3e3556..3ba98ca9db79 100644
+index 32b64090a056..539569419780 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
-@@ -1459,6 +1459,14 @@ static void __cache_set_unregister(struct closure *cl)
+@@ -1455,6 +1455,14 @@ static void __cache_set_unregister(struct closure *cl)
dc = container_of(c->devices[i],
struct cached_dev, disk);
bch_cached_dev_detach(dc);
@@ -50,7 +53,7 @@ index 7aa76c3e3556..3ba98ca9db79 100644
+ * keep data consistency on cache and
+ * backing devices.
+ */
-+ if (c->io_disable)
++ if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
+ bcache_device_stop(c->devices[i]);
} else {
bcache_device_stop(c->devices[i]);
diff --git a/for-next/v2-0008-bcache-fix-inaccurate-io-state-for-detached-bcach.patch b/for-next/v2-0008-bcache-fix-inaccurate-io-state-for-detached-bcach.patch
new file mode 100644
index 0000000..6494836
--- /dev/null
+++ b/for-next/v2-0008-bcache-fix-inaccurate-io-state-for-detached-bcach.patch
@@ -0,0 +1,118 @@
+From 501d81f7f2abd840b7bf03248f82ee550b6a784e Mon Sep 17 00:00:00 2001
+From: Tang Junhui <tang.junhui@zte.com.cn>
+Date: Tue, 9 Jan 2018 10:27:11 +0800
+Subject: [PATCH v2 08/11] bcache: fix inaccurate io state for detached bcache
+ devices
+
+When we run IO in a detached device, and run iostat to shows IO status,
+normally it will show like bellow (Omitted some fields):
+Device: ... avgrq-sz avgqu-sz await r_await w_await svctm %util
+sdd ... 15.89 0.53 1.82 0.20 2.23 1.81 52.30
+bcache0 ... 15.89 115.42 0.00 0.00 0.00 2.40 69.60
+but after IO stopped, there are still very big avgqu-sz and %util
+values as bellow:
+Device: ... avgrq-sz avgqu-sz await r_await w_await svctm %util
+bcache0 ... 0 5326.32 0.00 0.00 0.00 0.00 100.10
+
+The reason for this issue is that, only generic_start_io_acct() called
+and no generic_end_io_acct() called for detached device in
+cached_dev_make_request(). See the code:
+//start generic_start_io_acct()
+generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
+if (cached_dev_get(dc)) {
+ //will callback generic_end_io_acct()
+}
+else {
+ //will not call generic_end_io_acct()
+}
+
+This patch calls generic_end_io_acct() in the end of IO for detached
+devices, so we can show IO state correctly.
+
+(Modified to use GFP_NOIO in kzalloc() by Coly Li)
+
+Signed-off-by: Tang Junhui <tang.junhui@zte.com.cn>
+Reviewed-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/request.c | 58 +++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 51 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
+index 12f5b95b3734..ae50b1a0fd46 100644
+--- a/drivers/md/bcache/request.c
++++ b/drivers/md/bcache/request.c
+@@ -986,6 +986,55 @@ static void cached_dev_nodata(struct closure *cl)
+ continue_at(cl, cached_dev_bio_complete, NULL);
+ }
+
++struct detached_dev_io_private {
++ struct bcache_device *d;
++ unsigned long start_time;
++ bio_end_io_t *bi_end_io;
++ void *bi_private;
++};
++
++static void detatched_dev_end_io(struct bio *bio)
++{
++ struct detached_dev_io_private *ddip;
++
++ ddip = bio->bi_private;
++ bio->bi_end_io = ddip->bi_end_io;
++ bio->bi_private = ddip->bi_private;
++
++ generic_end_io_acct(ddip->d->disk->queue,
++ bio_data_dir(bio),
++ &ddip->d->disk->part0, ddip->start_time);
++
++ kfree(ddip);
++
++ bio->bi_end_io(bio);
++}
++
++static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
++{
++ struct detached_dev_io_private *ddip;
++ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
++
++ /*
++ * no need to call closure_get(&dc->disk.cl),
++ * because upper layer had already opened bcache device,
++ * which would call closure_get(&dc->disk.cl)
++ */
++ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
++ ddip->d = d;
++ ddip->start_time = jiffies;
++ ddip->bi_end_io = bio->bi_end_io;
++ ddip->bi_private = bio->bi_private;
++ bio->bi_end_io = detatched_dev_end_io;
++ bio->bi_private = ddip;
++
++ if ((bio_op(bio) == REQ_OP_DISCARD) &&
++ !blk_queue_discard(bdev_get_queue(dc->bdev)))
++ bio->bi_end_io(bio);
++ else
++ generic_make_request(bio);
++}
++
+ /* Cached devices - read & write stuff */
+
+ static blk_qc_t cached_dev_make_request(struct request_queue *q,
+@@ -1028,13 +1077,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
+ else
+ cached_dev_read(dc, s);
+ }
+- } else {
+- if ((bio_op(bio) == REQ_OP_DISCARD) &&
+- !blk_queue_discard(bdev_get_queue(dc->bdev)))
+- bio_endio(bio);
+- else
+- generic_make_request(bio);
+- }
++ } else
++ detached_dev_do_request(d, bio);
+
+ return BLK_QC_T_NONE;
+ }
+--
+2.15.1
+
diff --git a/for-next/v2-0008-bcache-fix-misleading-error-message-in-bch_count_.patch b/for-next/v2-0008-bcache-fix-misleading-error-message-in-bch_count_.patch
deleted file mode 100644
index a186b5c..0000000
--- a/for-next/v2-0008-bcache-fix-misleading-error-message-in-bch_count_.patch
+++ /dev/null
@@ -1,118 +0,0 @@
-From 80d7abeee0b81a7ee0e3789bac9580f540437d0e Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 3 Jan 2018 15:59:33 +0800
-Subject: [PATCH v2 08/13] bcache: fix misleading error message in
- bch_count_io_errors()
-
-Bcache only does recoverable I/O for read operations by calling
-cached_dev_read_error(). For write opertions there is no I/O recovery for
-failed requests.
-
-But in bch_count_io_errors() no matter read or write I/Os, before errors
-counter reaches io error limit, pr_err() always prints "IO error on %,
-recoverying". For write requests this information is misleading, because
-there is no I/O recovery at all.
-
-This patch adds a parameter 'is_read' to bch_count_io_errors(), and only
-prints "recovering" by pr_err() when the bio direction is READ.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 2 +-
- drivers/md/bcache/io.c | 13 +++++++++----
- drivers/md/bcache/super.c | 4 +++-
- drivers/md/bcache/writeback.c | 4 +++-
- 4 files changed, 16 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index a31dc3737dae..c53f312b2216 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -855,7 +855,7 @@ static inline void wake_up_allocators(struct cache_set *c)
-
- /* Forward declarations */
-
--void bch_count_io_errors(struct cache *, blk_status_t, const char *);
-+void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
- void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- blk_status_t, const char *);
- void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index fac97ec2d0e2..a783c5a41ff1 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -51,7 +51,10 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
-
- /* IO errors */
-
--void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
-+void bch_count_io_errors(struct cache *ca,
-+ blk_status_t error,
-+ int is_read,
-+ const char *m)
- {
- /*
- * The halflife of an error is:
-@@ -94,8 +97,9 @@ void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
- errors >>= IO_ERROR_SHIFT;
-
- if (errors < ca->set->error_limit)
-- pr_err("%s: IO error on %s, recovering",
-- bdevname(ca->bdev, buf), m);
-+ pr_err("%s: IO error on %s%s",
-+ bdevname(ca->bdev, buf), m,
-+ is_read ? ", recovering." : ".");
- else
- bch_cache_set_error(ca->set,
- "%s: too many IO errors %s",
-@@ -108,6 +112,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
- {
- struct bbio *b = container_of(bio, struct bbio, bio);
- struct cache *ca = PTR_CACHE(c, &b->key, 0);
-+ int is_read = (bio_data_dir(bio) == READ ? 1 : 0);
-
- unsigned threshold = op_is_write(bio_op(bio))
- ? c->congested_write_threshold_us
-@@ -129,7 +134,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
- atomic_inc(&c->congested);
- }
-
-- bch_count_io_errors(ca, error, m);
-+ bch_count_io_errors(ca, error, is_read, m);
- }
-
- void bch_bbio_endio(struct cache_set *c, struct bio *bio,
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 02d9d7110769..bbe911847eea 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -274,7 +274,9 @@ static void write_super_endio(struct bio *bio)
- {
- struct cache *ca = bio->bi_private;
-
-- bch_count_io_errors(ca, bio->bi_status, "writing superblock");
-+ /* is_read = 0 */
-+ bch_count_io_errors(ca, bio->bi_status, 0,
-+ "writing superblock");
- closure_put(&ca->set->sb_write);
- }
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index ab2ac3d72393..e58f9be5ae43 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -228,8 +228,10 @@ static void read_dirty_endio(struct bio *bio)
- struct keybuf_key *w = bio->bi_private;
- struct dirty_io *io = w->private;
-
-+ /* is_read = 1 */
- bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
-- bio->bi_status, "reading dirty data from cache");
-+ bio->bi_status, 1,
-+ "reading dirty data from cache");
-
- dirty_endio(bio);
- }
---
-2.15.1
-
diff --git a/for-next/v2-0011-bcache-add-backing_request_endio-for-bi_end_io-of.patch b/for-next/v2-0009-bcache-add-backing_request_endio-for-bi_end_io-of.patch
index 6538f6d..19042ab 100644
--- a/for-next/v2-0011-bcache-add-backing_request_endio-for-bi_end_io-of.patch
+++ b/for-next/v2-0009-bcache-add-backing_request_endio-for-bi_end_io-of.patch
@@ -1,8 +1,8 @@
-From 9de59f63ca4328e4b8aefca35a1f6a565d14579c Mon Sep 17 00:00:00 2001
+From 8f4cbbd1bd89675b94ff34790c1a13b0547784d1 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
-Date: Thu, 4 Jan 2018 14:38:54 +0800
-Subject: [PATCH v2 11/13] bcache: add backing_request_endio() for bi_end_io of
- backing device I/O
+Date: Wed, 10 Jan 2018 21:01:48 +0800
+Subject: [PATCH v2 09/11] bcache: add backing_request_endio() for bi_end_io of
+ attached backing device I/O
In order to catch I/O error of backing device, a separate bi_end_io
call back is required. Then a per backing device counter can record I/O
@@ -15,6 +15,8 @@ handling. So far there is no real code logic change, I make this change a
separate patch to make sure it is stable and reliable for further work.
Signed-off-by: Coly Li <colyli@suse.de>
+Cc: Huijun Tang <tang.junhui@zte.com.cn>
+Cc: Michael Lyle <mlyle@lyle.org>
---
drivers/md/bcache/request.c | 94 +++++++++++++++++++++++++++++++++++--------
drivers/md/bcache/super.c | 1 +
@@ -22,7 +24,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
3 files changed, 80 insertions(+), 16 deletions(-)
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index a85d6a605a8e..81fe5b977ffd 100644
+index ae50b1a0fd46..2d843a0a3ed8 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl)
@@ -33,7 +35,7 @@ index a85d6a605a8e..81fe5b977ffd 100644
bio_put(bio);
out:
continue_at(cl, bch_data_insert_keys, op->wq);
-@@ -608,6 +609,37 @@ static void request_endio(struct bio *bio)
+@@ -630,6 +631,37 @@ static void request_endio(struct bio *bio)
closure_put(cl);
}
@@ -71,7 +73,7 @@ index a85d6a605a8e..81fe5b977ffd 100644
static void bio_complete(struct search *s)
{
if (s->orig_bio) {
-@@ -622,13 +654,21 @@ static void bio_complete(struct search *s)
+@@ -644,13 +676,21 @@ static void bio_complete(struct search *s)
}
}
@@ -95,7 +97,7 @@ index a85d6a605a8e..81fe5b977ffd 100644
bio->bi_private = &s->cl;
bio_cnt_set(bio, 3);
-@@ -654,7 +694,7 @@ static inline struct search *search_alloc(struct bio *bio,
+@@ -676,7 +716,7 @@ static inline struct search *search_alloc(struct bio *bio,
s = mempool_alloc(d->c->search, GFP_NOIO);
closure_init(&s->cl, NULL);
@@ -104,7 +106,7 @@ index a85d6a605a8e..81fe5b977ffd 100644
s->orig_bio = bio;
s->cache_miss = NULL;
-@@ -721,10 +761,11 @@ static void cached_dev_read_error(struct closure *cl)
+@@ -743,10 +783,11 @@ static void cached_dev_read_error(struct closure *cl)
trace_bcache_read_retry(s->orig_bio);
s->iop.status = 0;
@@ -117,7 +119,7 @@ index a85d6a605a8e..81fe5b977ffd 100644
closure_bio_submit(s->iop.c, bio, cl);
}
-@@ -837,7 +878,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
+@@ -859,7 +900,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
bio_copy_dev(cache_bio, miss);
cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
@@ -126,7 +128,7 @@ index a85d6a605a8e..81fe5b977ffd 100644
cache_bio->bi_private = &s->cl;
bch_bio_map(cache_bio, NULL);
-@@ -850,14 +891,16 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
+@@ -872,14 +913,16 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->cache_miss = miss;
s->iop.bio = cache_bio;
bio_get(cache_bio);
@@ -144,7 +146,7 @@ index a85d6a605a8e..81fe5b977ffd 100644
closure_bio_submit(s->iop.c, miss, &s->cl);
return ret;
}
-@@ -921,31 +964,48 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
+@@ -943,31 +986,48 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
s->iop.bio = s->orig_bio;
bio_get(s->iop.bio);
@@ -203,7 +205,7 @@ index a85d6a605a8e..81fe5b977ffd 100644
closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
continue_at(cl, cached_dev_write_complete, NULL);
}
-@@ -959,6 +1019,7 @@ static void cached_dev_nodata(struct closure *cl)
+@@ -981,6 +1041,7 @@ static void cached_dev_nodata(struct closure *cl)
bch_journal_meta(s->iop.c, cl);
/* If it's a flush, we send the flush to the backing device too */
@@ -211,16 +213,16 @@ index a85d6a605a8e..81fe5b977ffd 100644
closure_bio_submit(s->iop.c, bio, cl);
continue_at(cl, cached_dev_bio_complete, NULL);
-@@ -1010,6 +1071,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- !blk_queue_discard(bdev_get_queue(dc->bdev)))
- bio_endio(bio);
- else
-+ /* I/O request sent to backing device */
- generic_make_request(bio);
- }
+@@ -1078,6 +1139,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
+ cached_dev_read(dc, s);
+ }
+ } else
++ /* I/O request sent to backing device */
+ detached_dev_do_request(d, bio);
+ return BLK_QC_T_NONE;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 3ba98ca9db79..3c996d2d1b85 100644
+index 539569419780..247b87020f1f 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -265,6 +265,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
@@ -232,10 +234,10 @@ index 3ba98ca9db79..3c996d2d1b85 100644
closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 54add41d2569..b4888458cc29 100644
+index d806fc6d2a6e..f668a8efc389 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
-@@ -223,6 +223,7 @@ static void write_dirty(struct closure *cl)
+@@ -260,6 +260,7 @@ static void write_dirty(struct closure *cl)
bio_set_dev(&io->bio, io->dc->bdev);
io->bio.bi_end_io = dirty_endio;
diff --git a/for-next/v2-0012-bcache-add-io_disable-to-struct-cached_dev.patch b/for-next/v2-0010-bcache-add-io_disable-to-struct-cached_dev.patch
index 448faf2..2d50d60 100644
--- a/for-next/v2-0012-bcache-add-io_disable-to-struct-cached_dev.patch
+++ b/for-next/v2-0010-bcache-add-io_disable-to-struct-cached_dev.patch
@@ -1,7 +1,7 @@
-From 9b3fb44af80d00453efc8b2224008426f40f6351 Mon Sep 17 00:00:00 2001
+From 5b361eab149024c208c95b98b2dc2c2639b53ab5 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
-Date: Fri, 5 Jan 2018 14:12:32 +0800
-Subject: [PATCH v2 12/13] bcache: add io_disable to struct cached_dev
+Date: Wed, 10 Jan 2018 21:33:45 +0800
+Subject: [PATCH v2 10/11] bcache: add io_disable to struct cached_dev
If a bcache device is configured to writeback mode, current code does not
handle write I/O errors on backing devices properly.
@@ -25,19 +25,22 @@ reach its error limit, backing device will be disabled and the associated
bcache device will be removed from system.
Signed-off-by: Coly Li <colyli@suse.de>
+Cc: Michael Lyle <mlyle@lyle.org>
+Cc: Hannes Reinecke <hare@suse.com>
+Cc: Huijun Tang <tang.junhui@zte.com.cn>
---
drivers/md/bcache/bcache.h | 7 +++++++
drivers/md/bcache/io.c | 14 ++++++++++++++
- drivers/md/bcache/request.c | 6 +++++-
+ drivers/md/bcache/request.c | 15 +++++++++++++--
drivers/md/bcache/super.c | 23 +++++++++++++++++++++++
- drivers/md/bcache/sysfs.c | 9 ++++++++-
- 5 files changed, 57 insertions(+), 2 deletions(-)
+ drivers/md/bcache/sysfs.c | 14 +++++++++++++-
+ 5 files changed, 70 insertions(+), 3 deletions(-)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 9c7f9b1cb791..653ea664de55 100644
+index ab51984d12f6..27e0c2f56751 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
-@@ -351,6 +351,7 @@ struct cached_dev {
+@@ -359,6 +359,7 @@ struct cached_dev {
unsigned sequential_cutoff;
unsigned readahead;
@@ -45,7 +48,7 @@ index 9c7f9b1cb791..653ea664de55 100644
unsigned verify:1;
unsigned bypass_torture_test:1;
-@@ -370,6 +371,10 @@ struct cached_dev {
+@@ -378,6 +379,10 @@ struct cached_dev {
unsigned writeback_rate_i_term_inverse;
unsigned writeback_rate_p_term_inverse;
unsigned writeback_rate_minimum;
@@ -56,7 +59,7 @@ index 9c7f9b1cb791..653ea664de55 100644
};
enum alloc_reserve {
-@@ -869,6 +874,7 @@ static inline void closure_bio_submit(struct cache_set *c,
+@@ -881,6 +886,7 @@ static inline void closure_bio_submit(struct cache_set *c,
/* Forward declarations */
@@ -64,7 +67,7 @@ index 9c7f9b1cb791..653ea664de55 100644
void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
blk_status_t, const char *);
-@@ -896,6 +902,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
+@@ -908,6 +914,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, bool);
bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
unsigned, unsigned, bool);
@@ -98,10 +101,10 @@ index 8013ecbcdbda..7fac97ae036e 100644
void bch_count_io_errors(struct cache *ca,
blk_status_t error,
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 81fe5b977ffd..36aac569275d 100644
+index 2d843a0a3ed8..ea8632515f7f 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
-@@ -615,6 +615,8 @@ static void backing_request_endio(struct bio *bio)
+@@ -637,6 +637,8 @@ static void backing_request_endio(struct bio *bio)
if (bio->bi_status) {
struct search *s = container_of(cl, struct search, cl);
@@ -110,7 +113,7 @@ index 81fe5b977ffd..36aac569275d 100644
/*
* If a bio has REQ_PREFLUSH for writeback mode, it is
* speically assembled in cached_dev_write() for a non-zero
-@@ -634,6 +636,7 @@ static void backing_request_endio(struct bio *bio)
+@@ -656,6 +658,7 @@ static void backing_request_endio(struct bio *bio)
}
s->recoverable = false;
/* should count I/O error for backing device here */
@@ -118,21 +121,38 @@ index 81fe5b977ffd..36aac569275d 100644
}
bio_put(bio);
-@@ -1035,7 +1038,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
+@@ -1066,8 +1069,14 @@ static void detatched_dev_end_io(struct bio *bio)
+ bio_data_dir(bio),
+ &ddip->d->disk->part0, ddip->start_time);
+
+- kfree(ddip);
++ if (bio->bi_status) {
++ struct cached_dev *dc = container_of(ddip->d,
++ struct cached_dev, disk);
++ /* should count I/O error for backing device here */
++ bch_count_backing_io_errors(dc, bio);
++ }
+
++ kfree(ddip);
+ bio->bi_end_io(bio);
+ }
+
+@@ -1106,7 +1115,9 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
int rw = bio_data_dir(bio);
-- if (unlikely(d->c && d->c->io_disable)) {
-+ if (unlikely((d->c && d->c->io_disable) ||
-+ dc->io_disable)) {
+- if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
++ if (unlikely(d->c &&
++ test_bit(CACHE_SET_IO_DISABLE, &d->c->flags) &&
++ dc->io_disable)) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return BLK_QC_T_NONE;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 3c996d2d1b85..c7a37045beaa 100644
+index 247b87020f1f..7e9acad46e65 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
-@@ -1168,6 +1168,10 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
+@@ -1165,6 +1165,10 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
max(dc->disk.disk->queue->backing_dev_info->ra_pages,
q->backing_dev_info->ra_pages);
@@ -143,7 +163,7 @@ index 3c996d2d1b85..c7a37045beaa 100644
bch_cached_dev_request_init(dc);
bch_cached_dev_writeback_init(dc);
return 0;
-@@ -1319,6 +1323,25 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
+@@ -1316,6 +1320,25 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
return flash_dev_run(c, u);
}
@@ -170,7 +190,7 @@ index 3c996d2d1b85..c7a37045beaa 100644
__printf(2, 3)
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index acce7c82e111..2308c5609c01 100644
+index fd04450b5f13..1a28a203e4fc 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -131,7 +131,9 @@ SHOW(__bch_cached_dev)
@@ -184,16 +204,21 @@ index acce7c82e111..2308c5609c01 100644
var_print(writeback_rate_update_seconds);
var_print(writeback_rate_i_term_inverse);
var_print(writeback_rate_p_term_inverse);
-@@ -220,6 +222,8 @@ STORE(__cached_dev)
+@@ -220,6 +222,13 @@ STORE(__cached_dev)
d_strtoul(writeback_rate_i_term_inverse);
d_strtoul_nonzero(writeback_rate_p_term_inverse);
+ sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
-+ sysfs_strtoul_clamp(io_disable, dc->io_disable, 0, 1);
++
++ if (attr == &sysfs_io_disable) {
++ int v = strtoul_or_return(buf);
++ dc->io_disable = v ? 1 : 0;
++ }
++
d_strtoi_h(sequential_cutoff);
d_strtoi_h(readahead);
-@@ -326,6 +330,9 @@ static struct attribute *bch_cached_dev_files[] = {
+@@ -334,6 +343,9 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_writeback_rate_i_term_inverse,
&sysfs_writeback_rate_p_term_inverse,
&sysfs_writeback_rate_debug,
diff --git a/for-next/v2-0011-bcache-stop-bcache-device-when-backing-device-is-.patch b/for-next/v2-0011-bcache-stop-bcache-device-when-backing-device-is-.patch
new file mode 100644
index 0000000..3006a04
--- /dev/null
+++ b/for-next/v2-0011-bcache-stop-bcache-device-when-backing-device-is-.patch
@@ -0,0 +1,148 @@
+From 6e68ee1e93a5d5e018630e80fa14009b2f676f0f Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Wed, 10 Jan 2018 23:51:12 +0800
+Subject: [PATCH v2 11/11] bcache: stop bcache device when backing device is
+ offline
+
+Currently bcache does not handle backing device failure, if backing
+device is offline and disconnected from system, its bcache device can still
+be accessible. If the bcache device is in writeback mode, I/O requests even
+can success if the requests hit on cache device. That is to say, when and
+how bcache handles offline backing device is undefined.
+
+This patch tries to handle backing device offline in a rather simple way,
+- Add cached_dev->status_update_thread kernel thread to update backing
+ device status in every 1 second.
+- Add cached_dev->offline_seconds to record how many seconds the backing
+ device is observed to be offline. If the backing device is offline for
+ BACKING_DEV_OFFLINE_TIMEOUT (30) seconds, set dc->io_disable to 1 and
+ call bcache_device_stop() to stop the bache device which linked to the
+ offline backing device.
+
+Now if a backing device is offline for BACKING_DEV_OFFLINE_TIMEOUT seconds,
+its bcache device will be removed, then user space application writing on
+it will get error immediately, and handler the device failure in time.
+
+This patch is quite simple, does not handle more complicated situations.
+Once the bcache device is stopped, users need to recovery the backing
+device, register and attach it manually.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+Cc: Michael Lyle <mlyle@lyle.org>
+Cc: Hannes Reinecke <hare@suse.com>
+Cc: Huijun Tang <tang.junhui@zte.com.cn>
+---
+ drivers/md/bcache/bcache.h | 2 ++
+ drivers/md/bcache/super.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 57 insertions(+)
+
+diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
+index 27e0c2f56751..f666d4317624 100644
+--- a/drivers/md/bcache/bcache.h
++++ b/drivers/md/bcache/bcache.h
+@@ -337,6 +337,7 @@ struct cached_dev {
+
+ struct keybuf writeback_keys;
+
++ struct task_struct *status_update_thread;
+ /*
+ * Order the write-half of writeback operations strongly in dispatch
+ * order. (Maintain LBA order; don't allow reads completing out of
+@@ -383,6 +384,7 @@ struct cached_dev {
+ #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
+ atomic_t io_errors;
+ unsigned error_limit;
++ unsigned offline_seconds;
+ };
+
+ enum alloc_reserve {
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index 7e9acad46e65..35ada04f0185 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -646,6 +646,11 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+ {
+ struct bcache_device *d = b->bd_disk->private_data;
++ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
++
++ if (dc->io_disable)
++ return -EIO;
++
+ return d->ioctl(d, mode, cmd, arg);
+ }
+
+@@ -856,6 +861,45 @@ static void calc_cached_dev_sectors(struct cache_set *c)
+ c->cached_dev_sectors = sectors;
+ }
+
++#define BACKING_DEV_OFFLINE_TIMEOUT 30
++static int cached_dev_status_update(void *arg)
++{
++ struct cached_dev *dc = arg;
++ struct request_queue *q;
++ char buf[BDEVNAME_SIZE];
++
++ /*
++ * If this delayed worker is stopping outside, directly quit here.
++ * dc->io_disable might be set via sysfs interface, so check it
++ * here too.
++ */
++ while (!kthread_should_stop() && !dc->io_disable) {
++ q = bdev_get_queue(dc->bdev);
++ if (blk_queue_dying(q))
++ dc->offline_seconds ++;
++ else
++ dc->offline_seconds = 0;
++
++ if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
++ pr_err("%s: device offline for %d seconds",
++ bdevname(dc->bdev, buf),
++ BACKING_DEV_OFFLINE_TIMEOUT);
++ pr_err("%s: disable I/O request due to backing "
++ "device offline", dc->disk.name);
++ dc->io_disable = true;
++ /* let others know earlier that io_disable is true */
++ smp_mb();
++ bcache_device_stop(&dc->disk);
++ break;
++ }
++
++ schedule_timeout_interruptible(HZ);
++ }
++
++ dc->status_update_thread = NULL;
++ return 0;
++}
++
+ void bch_cached_dev_run(struct cached_dev *dc)
+ {
+ struct bcache_device *d = &dc->disk;
+@@ -898,6 +942,15 @@ void bch_cached_dev_run(struct cached_dev *dc)
+ if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
+ sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
+ pr_debug("error creating sysfs link");
++
++ dc->status_update_thread = kthread_create(cached_dev_status_update,
++ dc,
++ "bcache_status_update");
++ if (IS_ERR(dc->status_update_thread)) {
++ pr_warn("bcache: failed to create bcache_status_update "
++ "kthread, continue to run without monitoring backing "
++ "device status");
++ }
+ }
+
+ static void cached_dev_detach_finish(struct work_struct *w)
+@@ -1093,6 +1146,8 @@ static void cached_dev_free(struct closure *cl)
+ kthread_stop(dc->writeback_thread);
+ if (dc->writeback_write_wq)
+ destroy_workqueue(dc->writeback_write_wq);
++ if (!IS_ERR_OR_NULL(dc->status_update_thread))
++ kthread_stop(dc->status_update_thread);
+
+ mutex_lock(&bch_register_lock);
+
+--
+2.15.1
+
diff --git a/for-next/v2-0013-bcache-stop-bcache-device-when-backing-device-is-.patch b/for-next/v2-0013-bcache-stop-bcache-device-when-backing-device-is-.patch
deleted file mode 100644
index bdbd112..0000000
--- a/for-next/v2-0013-bcache-stop-bcache-device-when-backing-device-is-.patch
+++ /dev/null
@@ -1,99 +0,0 @@
-From 16d7b83245059ce3675b0b53363444cb0988a261 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 8 Jan 2018 09:56:33 +0800
-Subject: [PATCH v2 13/13] bcache: stop bcache device when backing device is
- offline
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 2 ++
- drivers/md/bcache/super.c | 39 ++++++++++++++++++++++++++++++++++++++-
- 2 files changed, 40 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 653ea664de55..22d98ba0512b 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -375,6 +375,8 @@ struct cached_dev {
- #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
- atomic_t io_errors;
- unsigned error_limit;
-+ unsigned offline_seconds;
-+ struct delayed_work device_status_update;
- };
-
- enum alloc_reserve {
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index c7a37045beaa..89cbbd83c79e 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -646,6 +646,11 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
- unsigned int cmd, unsigned long arg)
- {
- struct bcache_device *d = b->bd_disk->private_data;
-+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-+
-+ if (dc->io_disable)
-+ return -EIO;
-+
- return d->ioctl(d, mode, cmd, arg);
- }
-
-@@ -1090,6 +1095,7 @@ static void cached_dev_free(struct closure *cl)
- cancel_delayed_work_sync(&dc->writeback_rate_update);
- dc->rate_update_canceled = true;
- }
-+ cancel_delayed_work_sync(&dc->device_status_update);
- if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
- kthread_stop(dc->writeback_thread);
- dc->writeback_thread = NULL;
-@@ -1129,6 +1135,37 @@ static void cached_dev_flush(struct closure *cl)
- continue_at(cl, cached_dev_free, system_wq);
- }
-
-+#define BACKING_DEV_OFFLINE_TIMEOUT 30
-+static void cached_dev_status_update(struct work_struct *work)
-+{
-+ struct cached_dev *dc;
-+ struct request_queue *q;
-+ char buf[BDEVNAME_SIZE];
-+
-+ dc = container_of(to_delayed_work(work),
-+ struct cached_dev,
-+ device_status_update);
-+ q = bdev_get_queue(dc->bdev);
-+
-+ if (blk_queue_dying(q))
-+ dc->offline_seconds ++;
-+ else
-+ dc->offline_seconds = 0;
-+
-+ if (dc->offline_seconds > BACKING_DEV_OFFLINE_TIMEOUT) {
-+ pr_err("%s: device offline for %d seconds",
-+ bdevname(dc->bdev, buf), BACKING_DEV_OFFLINE_TIMEOUT);
-+ pr_err("%s: disable I/O request due to backing device offline",
-+ dc->disk.name);
-+ dc->io_disable = true;
-+ /* let others know earlier that io_disable is true */
-+ smp_mb();
-+ return;
-+ }
-+ /* polling backing device connection status every second */
-+ schedule_delayed_work(&dc->device_status_update, HZ);
-+}
-+
- static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
- {
- int ret;
-@@ -1171,7 +1208,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
- atomic_set(&dc->io_errors, 0);
- dc->io_disable = false;
- dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
--
-+ INIT_DELAYED_WORK(&dc->device_status_update, cached_dev_status_update);
- bch_cached_dev_request_init(dc);
- bch_cached_dev_writeback_init(dc);
- return 0;
---
-2.15.1
-