aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorColy Li <colyli@suse.de>2018-01-03 22:15:19 +0800
committerColy Li <colyli@suse.de>2018-01-03 22:15:19 +0800
commitb39f6105fd5f02d0239af2a86cb2ff01f369f1ae (patch)
treeb638062bc5b55eaa968bd349683ee2995d099be3
parentba0f907d04681ce88c2a75899e46234c38323c41 (diff)
downloadbcache-patches-b39f6105fd5f02d0239af2a86cb2ff01f369f1ae.tar.gz
for-next: update patches to the version sent to upstream for review
-rw-r--r--for-next/v1-0000-cover-letter.patch56
-rw-r--r--for-next/v1-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch (renamed from for-next/0001-bcache-exit-bch_writeback_thread-with-proper-task-st.patch)4
-rw-r--r--for-next/v1-0002-bcache-set-task-properly-in-allocator_wait.patch (renamed from for-next/0002-bcache-set-task-properly-in-allocator_wait.patch)4
-rw-r--r--for-next/v1-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch (renamed from for-next/0003-bcache-reduce-cache_set-devices-iteration-by-devices.patch)4
-rw-r--r--for-next/v1-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch (renamed from for-next/0004-bcache-fix-cached_dev-count-usage-for-bch_cache_set_.patch)20
-rw-r--r--for-next/v1-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch (renamed from for-next/0005-bcache-stop-dc-writeback_rate_update-if-cache-set-is.patch)12
-rw-r--r--for-next/v1-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch (renamed from for-next/0007-bcache-stop-dc-writeback_rate_update-dc-writeback_th.patch)12
-rw-r--r--for-next/v1-0007-bcache-set-error_limit-correctly.patch (renamed from for-next/0008-bcache-set-error_limit-correctly.patch)52
-rw-r--r--for-next/v1-0008-bcache-fix-misleading-error-message-in-bch_count_.patch (renamed from for-next/0009-bcache-fix-misleading-error-message-in-bch_count_io_.patch)16
-rw-r--r--for-next/v1-0009-bcache-add-io_disable-to-struct-cache_set.patch (renamed from for-next/0006-bcache-add-io_disable-to-struct-cache_set.patch)63
-rw-r--r--for-next/v1-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch (renamed from for-next/0010-bcache-stop-all-attached-bcache-devices-for-a-retire.patch)6
11 files changed, 154 insertions, 95 deletions
diff --git a/for-next/v1-0000-cover-letter.patch b/for-next/v1-0000-cover-letter.patch
new file mode 100644
index 0000000..0ac36de
--- /dev/null
+++ b/for-next/v1-0000-cover-letter.patch
@@ -0,0 +1,56 @@
+From 595d5d28a7ed23cae061b9e0dd201611afd6db6d Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Wed, 3 Jan 2018 21:20:57 +0800
+Subject: [PATCH v1 00/10] cache device failure handling improvement
+
+Hi maintainers and folks,
+
+This patch set tries to improve cache device failure handling. A basic
+idea to handle failed cache device is,
+- Unregister cache set
+- Detach all backing devices attached to this cache set
+- Stop all bcache devices linked to this cache set
+The above process is named 'cache set retire' by me. The result of cache
+set retire is, cache set and bcache devices are all removed, following
+I/O requests will get failed immediately to notift upper layer or user
+space coce that the cache device is failed or disconnected.
+
+The first 8 patches of this patch set is to fix existing bugs in bcache,
+the last 2 patches do the real improvement. Order of applying these patches
+is important, if the last 2 patches are applied firstly, kernel panic or
+process hang will be observed. Therefore I suggest to apply the first 8
+fixes, then apply the last 2 patches.
+
+The patch set is tested with writethrough, writeback, writearound mode,
+read/write/readwrite workloads, so far it works as expected. IMHO the
+cache set retire logic is complicated, I need your help to review the
+patches, any question is warmly wlecome.
+
+Coly Li (10):
+ bcache: exit bch_writeback_thread() with proper task state
+ bcache: set task properly in allocator_wait()
+ bcache: reduce cache_set devices iteration by devices_max_used
+ bcache: fix cached_dev->count usage for bch_cache_set_error()
+ bcache: stop dc->writeback_rate_update if cache set is stopping
+ bcache: stop dc->writeback_rate_update, dc->writeback_thread earlier
+ bcache: set error_limit correctly
+ bcache: fix misleading error message in bch_count_io_errors()
+ bcache: add io_disable to struct cache_set
+ bcache: stop all attached bcache devices for a retired cache set
+
+ drivers/md/bcache/alloc.c | 5 ++---
+ drivers/md/bcache/bcache.h | 19 +++++++++++++++-
+ drivers/md/bcache/btree.c | 8 ++++---
+ drivers/md/bcache/io.c | 15 ++++++++-----
+ drivers/md/bcache/journal.c | 4 ++--
+ drivers/md/bcache/request.c | 26 ++++++++++++++++------
+ drivers/md/bcache/super.c | 51 +++++++++++++++++++++++++++++++++++--------
+ drivers/md/bcache/sysfs.c | 8 +++++--
+ drivers/md/bcache/util.h | 6 -----
+ drivers/md/bcache/writeback.c | 51 +++++++++++++++++++++++++++++++++----------
+ drivers/md/bcache/writeback.h | 4 +---
+ 11 files changed, 144 insertions(+), 53 deletions(-)
+
+Thanks in advance.
+
+Coly Li
diff --git a/for-next/0001-bcache-exit-bch_writeback_thread-with-proper-task-st.patch b/for-next/v1-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch
index c204050..1ce1bfb 100644
--- a/for-next/0001-bcache-exit-bch_writeback_thread-with-proper-task-st.patch
+++ b/for-next/v1-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch
@@ -1,7 +1,7 @@
-From 9ec112b1a030b9d19c18797f7a21111f8d9a7717 Mon Sep 17 00:00:00 2001
+From 02cd6111e6e305665b9b734b41d9e66735eefba5 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 20 Dec 2017 20:32:58 +0800
-Subject: [PATCH 01/10] bcache: exit bch_writeback_thread() with proper task
+Subject: [PATCH v1 01/10] bcache: exit bch_writeback_thread() with proper task
state
Kernel thread routine bch_writeback_thread() has the following code block,
diff --git a/for-next/0002-bcache-set-task-properly-in-allocator_wait.patch b/for-next/v1-0002-bcache-set-task-properly-in-allocator_wait.patch
index b520d99..a9b6799 100644
--- a/for-next/0002-bcache-set-task-properly-in-allocator_wait.patch
+++ b/for-next/v1-0002-bcache-set-task-properly-in-allocator_wait.patch
@@ -1,7 +1,7 @@
-From 68e716ba42c801ec40cfb21c8f6a671b96794068 Mon Sep 17 00:00:00 2001
+From 9eb34cfed6f7cf086a31d0e01f79548aaa82eab9 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 20 Dec 2017 22:37:11 +0800
-Subject: [PATCH 02/10] bcache: set task properly in allocator_wait()
+Subject: [PATCH v1 02/10] bcache: set task properly in allocator_wait()
Kernel thread routine bch_allocator_thread() references macro
allocator_wait() to wait for a condition or quit to do_exit()
diff --git a/for-next/0003-bcache-reduce-cache_set-devices-iteration-by-devices.patch b/for-next/v1-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch
index 49463df..8cbf66c 100644
--- a/for-next/0003-bcache-reduce-cache_set-devices-iteration-by-devices.patch
+++ b/for-next/v1-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch
@@ -1,7 +1,7 @@
-From 38397c2aae3ac1c58d6d5d2b4059b3d82e0020aa Mon Sep 17 00:00:00 2001
+From fd33195d255d0f152d9e2b36032b1cc816ededb3 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 20 Dec 2017 23:27:41 +0800
-Subject: [PATCH 03/10] bcache: reduce cache_set devices iteration by
+Subject: [PATCH v1 03/10] bcache: reduce cache_set devices iteration by
devices_max_used
Member devices of struct cache_set is used to reference all attached
diff --git a/for-next/0004-bcache-fix-cached_dev-count-usage-for-bch_cache_set_.patch b/for-next/v1-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
index 648e890..e3975b2 100644
--- a/for-next/0004-bcache-fix-cached_dev-count-usage-for-bch_cache_set_.patch
+++ b/for-next/v1-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch
@@ -1,7 +1,7 @@
-From e6582cee9e6743449a848c574cf236ee2f4fa1c5 Mon Sep 17 00:00:00 2001
+From d697858f6f515b4bacee984c82535cf2b896ace9 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 22 Dec 2017 16:37:17 +0800
-Subject: [PATCH 04/10] bcache: fix cached_dev->count usage for
+Subject: [PATCH v1 04/10] bcache: fix cached_dev->count usage for
bch_cache_set_error()
When bcache metadata I/O fails, bcache will call bch_cache_set_error()
@@ -35,7 +35,7 @@ callback cache_set_flush() set by continue_at() will never be called. The
result is, bcache fails to retire whole cache set.
cache_set_flush() will be called when refcount of closure c->caching is 0,
-and in fuction bcache_device_detach() refcount of closure c->caching is
+and in function bcache_device_detach() refcount of closure c->caching is
released to 0 by clousre_put(). In metadata error code path, function
bcache_device_detach() is called by cached_dev_detach_finish(). This is a
callback routine being called when cached_dev->count is 0. This refcount
@@ -63,15 +63,15 @@ make sure during writeback operatiions both backing and cache devices
won't be released.
Adding following code in bch_writeback_thread() does not work,
-@@ -484,6 +484,9 @@ static int bch_writeback_thread(void *arg)
- }
- }
+ static int bch_writeback_thread(void *arg)
+ [code snip]
-+ if (atomic_read(&dc->has_dirty))
-+ cached_dev_put()
-+
+ + if (atomic_read(&dc->has_dirty))
+ + cached_dev_put()
+ +
return 0;
- }
+ [code snip]
+
because writeback kernel thread can be waken up and start via sysfs entry:
echo 1 > /sys/block/bcache<N>/bcache/writeback_running
It is difficult to check whether backing device is dirty without race and
diff --git a/for-next/0005-bcache-stop-dc-writeback_rate_update-if-cache-set-is.patch b/for-next/v1-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch
index b13696a..d3e78e8 100644
--- a/for-next/0005-bcache-stop-dc-writeback_rate_update-if-cache-set-is.patch
+++ b/for-next/v1-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch
@@ -1,8 +1,8 @@
-From b0a1ed28cd164bb240a0b564472100ff7776ac3a Mon Sep 17 00:00:00 2001
+From 1a9aae02c180b47b2ae2ef9c61915b2b694d1fc2 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Sat, 23 Dec 2017 01:50:19 +0800
-Subject: [PATCH 05/10] bcache: stop dc->writeback_rate_update if cache set is
- stopping
+Subject: [PATCH v1 05/10] bcache: stop dc->writeback_rate_update if cache set
+ is stopping
struct delayed_work writeback_rate_update in struct cache_dev is a delayed
worker to call function update_writeback_rate() in period (the interval is
@@ -22,7 +22,7 @@ dc->writeback_rate_update_seconds.
The problem is, after cancel_delayed_work_sync() returns, the cache set
unregister code path will eventually release memory of struct cache set.
-Then the delayed work is scheduled to run, and inside its rountine
+Then the delayed work is scheduled to run, and inside its routine
update_writeback_rate() that already released cache set NULL pointer will
be accessed. Now a NULL pointer deference panic is triggered.
@@ -37,7 +37,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
1 file changed, 9 insertions(+)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 0789a9e18337..f25fb6cb383f 100644
+index 0789a9e18337..745d9b2a326f 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -91,6 +91,11 @@ static void update_writeback_rate(struct work_struct *work)
@@ -59,7 +59,7 @@ index 0789a9e18337..f25fb6cb383f 100644
+ /* do not schedule delayed work if cache set is stopping */
+ if (test_bit(CACHE_SET_STOPPING, &c->flags))
+ return;
-+
++
schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
}
diff --git a/for-next/0007-bcache-stop-dc-writeback_rate_update-dc-writeback_th.patch b/for-next/v1-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch
index 9e4e0ec..53ce3f2 100644
--- a/for-next/0007-bcache-stop-dc-writeback_rate_update-dc-writeback_th.patch
+++ b/for-next/v1-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch
@@ -1,7 +1,7 @@
-From 8fde6171a41690507657778cb40c24de33a6ccdf Mon Sep 17 00:00:00 2001
+From 2da5b83720460c83d0f20d0771a0c955e60028e8 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 3 Jan 2018 00:03:45 +0800
-Subject: [PATCH 07/10] bcache: stop dc->writeback_rate_update,
+Subject: [PATCH v1 06/10] bcache: stop dc->writeback_rate_update,
dc->writeback_thread earlier
Delayed worker dc->writeback_rate_update and kernel thread
@@ -52,7 +52,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
3 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 42f20261c7bc..c2fef678a16a 100644
+index 83c569942bd0..395b87942a2f 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -322,6 +322,7 @@ struct cached_dev {
@@ -64,7 +64,7 @@ index 42f20261c7bc..c2fef678a16a 100644
/*
* Internal to the writeback code, so read_dirty() can keep track of
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 1a32c2739acb..a8f7f23cd448 100644
+index 5401d2356aa3..8912be4165c5 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -696,8 +696,20 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
@@ -106,10 +106,10 @@ index 1a32c2739acb..a8f7f23cd448 100644
destroy_workqueue(dc->writeback_write_wq);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index c1529cb54478..fe1208f8808e 100644
+index 745d9b2a326f..ab2ac3d72393 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
-@@ -559,6 +559,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
+@@ -548,6 +548,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_rate_i_term_inverse = 10000;
INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
diff --git a/for-next/0008-bcache-set-error_limit-correctly.patch b/for-next/v1-0007-bcache-set-error_limit-correctly.patch
index 99c64fd..2d5b243 100644
--- a/for-next/0008-bcache-set-error_limit-correctly.patch
+++ b/for-next/v1-0007-bcache-set-error_limit-correctly.patch
@@ -1,7 +1,7 @@
-From 9062df227769049b4169d7b819c8430b770c10f6 Mon Sep 17 00:00:00 2001
+From 93e9a82ee54b8fb1e50c4df95a74ab2953aec9ff Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
-Date: Wed, 3 Jan 2018 14:06:24 +0800
-Subject: [PATCH 08/10] bcache: set error_limit correctly
+Date: Wed, 3 Jan 2018 20:37:27 +0800
+Subject: [PATCH v1 07/10] bcache: set error_limit correctly
Struct cache uses io_errors for two purposes,
- Error decay: when cache set error_decay is set, io_errors is used to
@@ -15,25 +15,25 @@ error limit, bch_cache_set_error() will be called to retire the whold cache
set. But current code is problematic when checking the error limit, see the
following code piece from bch_count_io_errors(),
- 90 if (error) {
- 91 char buf[BDEVNAME_SIZE];
- 92 unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT,
- 93 &ca->io_errors);
- 94 errors >>= IO_ERROR_SHIFT;
+ 90 if (error) {
+ 91 char buf[BDEVNAME_SIZE];
+ 92 unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT,
+ 93 &ca->io_errors);
+ 94 errors >>= IO_ERROR_SHIFT;
95
- 96 if (errors < ca->set->error_limit)
- 97 pr_err("%s: IO error on %s, recovering",
- 98 bdevname(ca->bdev, buf), m);
- 99 else
-100 bch_cache_set_error(ca->set,
-101 "%s: too many IO errors %s",
-102 bdevname(ca->bdev, buf), m);
-103 }
+ 96 if (errors < ca->set->error_limit)
+ 97 pr_err("%s: IO error on %s, recovering",
+ 98 bdevname(ca->bdev, buf), m);
+ 99 else
+100 bch_cache_set_error(ca->set,
+101 "%s: too many IO errors %s",
+102 bdevname(ca->bdev, buf), m);
+103 }
At line 94, errors is right shifting IO_ERROR_SHIFT bits, now it is real
errors counter to compare at line 96. But ca->set->error_limit is initia-
lized with an amplified value in bch_cache_set_alloc(),
-1545 c->error_limit = 8 << IO_ERROR_SHIFT;
+1545 c->error_limit = 8 << IO_ERROR_SHIFT;
It means by default, in bch_count_io_errors(), before 8<<20 errors happened
bch_cache_set_error() won't be called to retire the problematic cache
@@ -63,35 +63,35 @@ Signed-off-by: Coly Li <colyli@suse.de>
3 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index c2fef678a16a..6b43083346a5 100644
+index 395b87942a2f..a31dc3737dae 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
-@@ -655,6 +655,7 @@ struct cache_set {
+@@ -654,6 +654,7 @@ struct cache_set {
ON_ERROR_UNREGISTER,
ON_ERROR_PANIC,
} on_error;
-+#define DEFAULT_IO_ERROR_LIMIT 8
++#define DEFAULT_IO_ERROR_LIMIT 8
unsigned error_limit;
unsigned error_decay;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index a8f7f23cd448..b49f480ab8a3 100644
+index 8912be4165c5..02d9d7110769 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
-@@ -1564,7 +1564,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
+@@ -1561,7 +1561,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
- c->error_limit = 8 << IO_ERROR_SHIFT;
+ c->error_limit = DEFAULT_IO_ERROR_LIMIT;
- c->io_disable = false;
return c;
+ err:
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index cf05c35a3665..acce7c82e111 100644
+index b4184092c727..d7ce9a05b304 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
-@@ -557,7 +557,7 @@ SHOW(__bch_cache_set)
+@@ -556,7 +556,7 @@ SHOW(__bch_cache_set)
/* See count_io_errors for why 88 */
sysfs_print(io_error_halflife, c->error_decay * 88);
@@ -100,7 +100,7 @@ index cf05c35a3665..acce7c82e111 100644
sysfs_hprint(congested,
((uint64_t) bch_get_congested(c)) << 9);
-@@ -658,7 +658,7 @@ STORE(__bch_cache_set)
+@@ -656,7 +656,7 @@ STORE(__bch_cache_set)
}
if (attr == &sysfs_io_error_limit)
diff --git a/for-next/0009-bcache-fix-misleading-error-message-in-bch_count_io_.patch b/for-next/v1-0008-bcache-fix-misleading-error-message-in-bch_count_.patch
index f201226..18a5c32 100644
--- a/for-next/0009-bcache-fix-misleading-error-message-in-bch_count_io_.patch
+++ b/for-next/v1-0008-bcache-fix-misleading-error-message-in-bch_count_.patch
@@ -1,7 +1,7 @@
-From 7b33c2472cedd608f7c683ac5cac4349c87cf9c7 Mon Sep 17 00:00:00 2001
+From 80d7abeee0b81a7ee0e3789bac9580f540437d0e Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 3 Jan 2018 15:59:33 +0800
-Subject: [PATCH 09/10] bcache: fix misleading error message in
+Subject: [PATCH v1 08/10] bcache: fix misleading error message in
bch_count_io_errors()
Bcache only does recoverable I/O for read operations by calling
@@ -25,10 +25,10 @@ Signed-off-by: Coly Li <colyli@suse.de>
4 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 6b43083346a5..607a9186d6ad 100644
+index a31dc3737dae..c53f312b2216 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
-@@ -869,7 +869,7 @@ static inline void closure_bio_submit(struct cache_set *c,
+@@ -855,7 +855,7 @@ static inline void wake_up_allocators(struct cache_set *c)
/* Forward declarations */
@@ -38,7 +38,7 @@ index 6b43083346a5..607a9186d6ad 100644
blk_status_t, const char *);
void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index c456095d2bbe..8013ecbcdbda 100644
+index fac97ec2d0e2..a783c5a41ff1 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -51,7 +51,10 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
@@ -83,7 +83,7 @@ index c456095d2bbe..8013ecbcdbda 100644
void bch_bbio_endio(struct cache_set *c, struct bio *bio,
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index b49f480ab8a3..49d6fedf89c3 100644
+index 02d9d7110769..bbe911847eea 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -274,7 +274,9 @@ static void write_super_endio(struct bio *bio)
@@ -98,10 +98,10 @@ index b49f480ab8a3..49d6fedf89c3 100644
}
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index fe1208f8808e..abadf524b5dd 100644
+index ab2ac3d72393..e58f9be5ae43 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
-@@ -234,8 +234,10 @@ static void read_dirty_endio(struct bio *bio)
+@@ -228,8 +228,10 @@ static void read_dirty_endio(struct bio *bio)
struct keybuf_key *w = bio->bi_private;
struct dirty_io *io = w->private;
diff --git a/for-next/0006-bcache-add-io_disable-to-struct-cache_set.patch b/for-next/v1-0009-bcache-add-io_disable-to-struct-cache_set.patch
index 1acdbaa..1c6159e 100644
--- a/for-next/0006-bcache-add-io_disable-to-struct-cache_set.patch
+++ b/for-next/v1-0009-bcache-add-io_disable-to-struct-cache_set.patch
@@ -1,7 +1,7 @@
-From e24bde4ddce438d6b1eb81b29245a585f31e2199 Mon Sep 17 00:00:00 2001
+From 5996e95d633ad28ebbd113004efc488162cd22b7 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 2 Jan 2018 17:31:07 +0800
-Subject: [PATCH 06/10] bcache: add io_disable to struct cache_set
+Subject: [PATCH v1 09/10] bcache: add io_disable to struct cache_set
When too many I/Os failed on cache device, bch_cache_set_error() is called
in the error handling code path to retire whole problematic cache set. If
@@ -38,11 +38,11 @@ Signed-off-by: Coly Li <colyli@suse.de>
drivers/md/bcache/io.c | 2 +-
drivers/md/bcache/journal.c | 4 ++--
drivers/md/bcache/request.c | 26 +++++++++++++++++++-------
- drivers/md/bcache/super.c | 6 +++++-
+ drivers/md/bcache/super.c | 7 ++++++-
drivers/md/bcache/sysfs.c | 4 ++++
drivers/md/bcache/util.h | 6 ------
- drivers/md/bcache/writeback.c | 33 ++++++++++++++++++++++-----------
- 10 files changed, 72 insertions(+), 31 deletions(-)
+ drivers/md/bcache/writeback.c | 34 ++++++++++++++++++++++------------
+ 10 files changed, 73 insertions(+), 32 deletions(-)
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 48c002faf08d..3be737582f27 100644
@@ -58,10 +58,10 @@ index 48c002faf08d..3be737582f27 100644
\
set_current_state(TASK_INTERRUPTIBLE); \
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 83c569942bd0..42f20261c7bc 100644
+index c53f312b2216..9c7f9b1cb791 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
-@@ -480,6 +480,7 @@ struct cache_set {
+@@ -481,6 +481,7 @@ struct cache_set {
struct cache_accounting accounting;
unsigned long flags;
@@ -69,7 +69,7 @@ index 83c569942bd0..42f20261c7bc 100644
struct cache_sb sb;
-@@ -851,6 +852,19 @@ static inline void wake_up_allocators(struct cache_set *c)
+@@ -853,6 +854,19 @@ static inline void wake_up_allocators(struct cache_set *c)
wake_up_process(ca->alloc_thread);
}
@@ -88,7 +88,7 @@ index 83c569942bd0..42f20261c7bc 100644
+
/* Forward declarations */
- void bch_count_io_errors(struct cache *, blk_status_t, const char *);
+ void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index bf0d7978bc3d..75470cce1177 100644
--- a/drivers/md/bcache/btree.c
@@ -108,7 +108,7 @@ index bf0d7978bc3d..75470cce1177 100644
set_gc_sectors(c);
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index fac97ec2d0e2..c456095d2bbe 100644
+index a783c5a41ff1..8013ecbcdbda 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
@@ -233,10 +233,10 @@ index 643c3021624f..a85d6a605a8e 100644
s = search_alloc(bio, d);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 5401d2356aa3..1a32c2739acb 100644
+index bbe911847eea..7aa76c3e3556 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
-@@ -519,7 +519,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
+@@ -521,7 +521,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
bch_bio_map(bio, ca->disk_buckets);
@@ -245,26 +245,27 @@ index 5401d2356aa3..1a32c2739acb 100644
closure_sync(cl);
}
-@@ -1314,6 +1314,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
+@@ -1333,6 +1333,10 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
acquire_console_sem();
*/
+ c->io_disable = true;
++ /* make others know io_disable is true earlier */
+ smp_mb();
+
printk(KERN_ERR "bcache: error on %pU: ", c->sb.set_uuid);
va_start(args, fmt);
-@@ -1545,6 +1548,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
+@@ -1564,6 +1568,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
- c->error_limit = 8 << IO_ERROR_SHIFT;
+ c->error_limit = DEFAULT_IO_ERROR_LIMIT;
+ c->io_disable = false;
return c;
err:
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index b4184092c727..cf05c35a3665 100644
+index d7ce9a05b304..acce7c82e111 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -92,6 +92,7 @@ read_attribute(partial_stripes_expensive);
@@ -317,7 +318,7 @@ index ed5e8a412eb8..03e533631798 100644
uint64_t bch_crc64(const void *, size_t);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index f25fb6cb383f..c1529cb54478 100644
+index e58f9be5ae43..54add41d2569 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -93,8 +93,11 @@ static void update_writeback_rate(struct work_struct *work)
@@ -346,7 +347,7 @@ index f25fb6cb383f..c1529cb54478 100644
+ */
+ if (test_bit(CACHE_SET_STOPPING, &c->flags) || c->io_disable)
return;
-
+
schedule_delayed_work(&dc->writeback_rate_update,
@@ -217,7 +223,7 @@ static void write_dirty(struct closure *cl)
bio_set_dev(&io->bio, io->dc->bdev);
@@ -357,7 +358,7 @@ index f25fb6cb383f..c1529cb54478 100644
}
continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq);
-@@ -238,7 +244,7 @@ static void read_dirty_submit(struct closure *cl)
+@@ -240,7 +246,7 @@ static void read_dirty_submit(struct closure *cl)
{
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
@@ -366,7 +367,7 @@ index f25fb6cb383f..c1529cb54478 100644
continue_at(cl, write_dirty, io->dc->writeback_write_wq);
}
-@@ -257,7 +263,7 @@ static void read_dirty(struct cached_dev *dc)
+@@ -259,7 +265,7 @@ static void read_dirty(struct cached_dev *dc)
* mempools.
*/
@@ -375,7 +376,7 @@ index f25fb6cb383f..c1529cb54478 100644
w = bch_keybuf_next(&dc->writeback_keys);
if (!w)
-@@ -267,7 +273,9 @@ static void read_dirty(struct cached_dev *dc)
+@@ -269,7 +275,9 @@ static void read_dirty(struct cached_dev *dc)
if (KEY_START(&w->key) != dc->last_read ||
jiffies_to_msecs(delay) > 50)
@@ -386,7 +387,7 @@ index f25fb6cb383f..c1529cb54478 100644
delay = schedule_timeout_interruptible(delay);
dc->last_read = KEY_OFFSET(&w->key);
-@@ -448,18 +456,19 @@ static bool refill_dirty(struct cached_dev *dc)
+@@ -450,18 +458,19 @@ static bool refill_dirty(struct cached_dev *dc)
static int bch_writeback_thread(void *arg)
{
struct cached_dev *dc = arg;
@@ -408,16 +409,18 @@ index f25fb6cb383f..c1529cb54478 100644
break;
set_current_state(TASK_INTERRUPTIBLE);
-@@ -485,13 +494,15 @@ static int bch_writeback_thread(void *arg)
-
- while (delay &&
- !kthread_should_stop() &&
-- !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
-+ !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
-+ !c->io_disable)
+@@ -485,8 +494,8 @@ static int bch_writeback_thread(void *arg)
+ if (searched_full_index) {
+ unsigned delay = dc->writeback_delay * HZ;
+
+- while (delay &&
+- !kthread_should_stop() &&
++ while (delay && !kthread_should_stop() &&
++ !c->io_disable &&
+ !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
delay = schedule_timeout_interruptible(delay);
- bch_ratelimit_reset(&dc->writeback_rate);
+@@ -494,6 +503,7 @@ static int bch_writeback_thread(void *arg)
}
}
diff --git a/for-next/0010-bcache-stop-all-attached-bcache-devices-for-a-retire.patch b/for-next/v1-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
index a023bc2..062caae 100644
--- a/for-next/0010-bcache-stop-all-attached-bcache-devices-for-a-retire.patch
+++ b/for-next/v1-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch
@@ -1,8 +1,8 @@
-From c21aff9a0c26503714e7cd313ca1529cc0816393 Mon Sep 17 00:00:00 2001
+From 595d5d28a7ed23cae061b9e0dd201611afd6db6d Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 3 Jan 2018 18:24:55 +0800
-Subject: [PATCH 10/10] bcache: stop all attached bcache devices for a retired
- cache set
+Subject: [PATCH v1 10/10] bcache: stop all attached bcache devices for a
+ retired cache set
When there are too many I/O errors on cache device, current bcache code
will retire the whole cache set, and detach all bcache devices. But the