diff options
author | Coly Li <colyli@suse.de> | 2018-01-03 22:15:19 +0800 |
---|---|---|
committer | Coly Li <colyli@suse.de> | 2018-01-03 22:15:19 +0800 |
commit | b39f6105fd5f02d0239af2a86cb2ff01f369f1ae (patch) | |
tree | b638062bc5b55eaa968bd349683ee2995d099be3 | |
parent | ba0f907d04681ce88c2a75899e46234c38323c41 (diff) | |
download | bcache-patches-b39f6105fd5f02d0239af2a86cb2ff01f369f1ae.tar.gz |
for-next: update patches to the version sent to upstream for review
-rw-r--r-- | for-next/v1-0000-cover-letter.patch | 56 | ||||
-rw-r--r-- | for-next/v1-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch (renamed from for-next/0001-bcache-exit-bch_writeback_thread-with-proper-task-st.patch) | 4 | ||||
-rw-r--r-- | for-next/v1-0002-bcache-set-task-properly-in-allocator_wait.patch (renamed from for-next/0002-bcache-set-task-properly-in-allocator_wait.patch) | 4 | ||||
-rw-r--r-- | for-next/v1-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch (renamed from for-next/0003-bcache-reduce-cache_set-devices-iteration-by-devices.patch) | 4 | ||||
-rw-r--r-- | for-next/v1-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch (renamed from for-next/0004-bcache-fix-cached_dev-count-usage-for-bch_cache_set_.patch) | 20 | ||||
-rw-r--r-- | for-next/v1-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch (renamed from for-next/0005-bcache-stop-dc-writeback_rate_update-if-cache-set-is.patch) | 12 | ||||
-rw-r--r-- | for-next/v1-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch (renamed from for-next/0007-bcache-stop-dc-writeback_rate_update-dc-writeback_th.patch) | 12 | ||||
-rw-r--r-- | for-next/v1-0007-bcache-set-error_limit-correctly.patch (renamed from for-next/0008-bcache-set-error_limit-correctly.patch) | 52 | ||||
-rw-r--r-- | for-next/v1-0008-bcache-fix-misleading-error-message-in-bch_count_.patch (renamed from for-next/0009-bcache-fix-misleading-error-message-in-bch_count_io_.patch) | 16 | ||||
-rw-r--r-- | for-next/v1-0009-bcache-add-io_disable-to-struct-cache_set.patch (renamed from for-next/0006-bcache-add-io_disable-to-struct-cache_set.patch) | 63 | ||||
-rw-r--r-- | for-next/v1-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch (renamed from for-next/0010-bcache-stop-all-attached-bcache-devices-for-a-retire.patch) | 6 |
11 files changed, 154 insertions, 95 deletions
diff --git a/for-next/v1-0000-cover-letter.patch b/for-next/v1-0000-cover-letter.patch new file mode 100644 index 0000000..0ac36de --- /dev/null +++ b/for-next/v1-0000-cover-letter.patch @@ -0,0 +1,56 @@ +From 595d5d28a7ed23cae061b9e0dd201611afd6db6d Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Wed, 3 Jan 2018 21:20:57 +0800 +Subject: [PATCH v1 00/10] cache device failure handling improvement + +Hi maintainers and folks, + +This patch set tries to improve cache device failure handling. A basic +idea to handle failed cache device is, +- Unregister cache set +- Detach all backing devices attached to this cache set +- Stop all bcache devices linked to this cache set +The above process is named 'cache set retire' by me. The result of cache +set retire is, cache set and bcache devices are all removed, following +I/O requests will get failed immediately to notift upper layer or user +space coce that the cache device is failed or disconnected. + +The first 8 patches of this patch set is to fix existing bugs in bcache, +the last 2 patches do the real improvement. Order of applying these patches +is important, if the last 2 patches are applied firstly, kernel panic or +process hang will be observed. Therefore I suggest to apply the first 8 +fixes, then apply the last 2 patches. + +The patch set is tested with writethrough, writeback, writearound mode, +read/write/readwrite workloads, so far it works as expected. IMHO the +cache set retire logic is complicated, I need your help to review the +patches, any question is warmly wlecome. + +Coly Li (10): + bcache: exit bch_writeback_thread() with proper task state + bcache: set task properly in allocator_wait() + bcache: reduce cache_set devices iteration by devices_max_used + bcache: fix cached_dev->count usage for bch_cache_set_error() + bcache: stop dc->writeback_rate_update if cache set is stopping + bcache: stop dc->writeback_rate_update, dc->writeback_thread earlier + bcache: set error_limit correctly + bcache: fix misleading error message in bch_count_io_errors() + bcache: add io_disable to struct cache_set + bcache: stop all attached bcache devices for a retired cache set + + drivers/md/bcache/alloc.c | 5 ++--- + drivers/md/bcache/bcache.h | 19 +++++++++++++++- + drivers/md/bcache/btree.c | 8 ++++--- + drivers/md/bcache/io.c | 15 ++++++++----- + drivers/md/bcache/journal.c | 4 ++-- + drivers/md/bcache/request.c | 26 ++++++++++++++++------ + drivers/md/bcache/super.c | 51 +++++++++++++++++++++++++++++++++++-------- + drivers/md/bcache/sysfs.c | 8 +++++-- + drivers/md/bcache/util.h | 6 ----- + drivers/md/bcache/writeback.c | 51 +++++++++++++++++++++++++++++++++---------- + drivers/md/bcache/writeback.h | 4 +--- + 11 files changed, 144 insertions(+), 53 deletions(-) + +Thanks in advance. + +Coly Li diff --git a/for-next/0001-bcache-exit-bch_writeback_thread-with-proper-task-st.patch b/for-next/v1-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch index c204050..1ce1bfb 100644 --- a/for-next/0001-bcache-exit-bch_writeback_thread-with-proper-task-st.patch +++ b/for-next/v1-0001-bcache-exit-bch_writeback_thread-with-proper-task.patch @@ -1,7 +1,7 @@ -From 9ec112b1a030b9d19c18797f7a21111f8d9a7717 Mon Sep 17 00:00:00 2001 +From 02cd6111e6e305665b9b734b41d9e66735eefba5 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Wed, 20 Dec 2017 20:32:58 +0800 -Subject: [PATCH 01/10] bcache: exit bch_writeback_thread() with proper task +Subject: [PATCH v1 01/10] bcache: exit bch_writeback_thread() with proper task state Kernel thread routine bch_writeback_thread() has the following code block, diff --git a/for-next/0002-bcache-set-task-properly-in-allocator_wait.patch b/for-next/v1-0002-bcache-set-task-properly-in-allocator_wait.patch index b520d99..a9b6799 100644 --- a/for-next/0002-bcache-set-task-properly-in-allocator_wait.patch +++ b/for-next/v1-0002-bcache-set-task-properly-in-allocator_wait.patch @@ -1,7 +1,7 @@ -From 68e716ba42c801ec40cfb21c8f6a671b96794068 Mon Sep 17 00:00:00 2001 +From 9eb34cfed6f7cf086a31d0e01f79548aaa82eab9 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Wed, 20 Dec 2017 22:37:11 +0800 -Subject: [PATCH 02/10] bcache: set task properly in allocator_wait() +Subject: [PATCH v1 02/10] bcache: set task properly in allocator_wait() Kernel thread routine bch_allocator_thread() references macro allocator_wait() to wait for a condition or quit to do_exit() diff --git a/for-next/0003-bcache-reduce-cache_set-devices-iteration-by-devices.patch b/for-next/v1-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch index 49463df..8cbf66c 100644 --- a/for-next/0003-bcache-reduce-cache_set-devices-iteration-by-devices.patch +++ b/for-next/v1-0003-bcache-reduce-cache_set-devices-iteration-by-devi.patch @@ -1,7 +1,7 @@ -From 38397c2aae3ac1c58d6d5d2b4059b3d82e0020aa Mon Sep 17 00:00:00 2001 +From fd33195d255d0f152d9e2b36032b1cc816ededb3 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Wed, 20 Dec 2017 23:27:41 +0800 -Subject: [PATCH 03/10] bcache: reduce cache_set devices iteration by +Subject: [PATCH v1 03/10] bcache: reduce cache_set devices iteration by devices_max_used Member devices of struct cache_set is used to reference all attached diff --git a/for-next/0004-bcache-fix-cached_dev-count-usage-for-bch_cache_set_.patch b/for-next/v1-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch index 648e890..e3975b2 100644 --- a/for-next/0004-bcache-fix-cached_dev-count-usage-for-bch_cache_set_.patch +++ b/for-next/v1-0004-bcache-fix-cached_dev-count-usage-for-bch_cache_s.patch @@ -1,7 +1,7 @@ -From e6582cee9e6743449a848c574cf236ee2f4fa1c5 Mon Sep 17 00:00:00 2001 +From d697858f6f515b4bacee984c82535cf2b896ace9 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Fri, 22 Dec 2017 16:37:17 +0800 -Subject: [PATCH 04/10] bcache: fix cached_dev->count usage for +Subject: [PATCH v1 04/10] bcache: fix cached_dev->count usage for bch_cache_set_error() When bcache metadata I/O fails, bcache will call bch_cache_set_error() @@ -35,7 +35,7 @@ callback cache_set_flush() set by continue_at() will never be called. The result is, bcache fails to retire whole cache set. cache_set_flush() will be called when refcount of closure c->caching is 0, -and in fuction bcache_device_detach() refcount of closure c->caching is +and in function bcache_device_detach() refcount of closure c->caching is released to 0 by clousre_put(). In metadata error code path, function bcache_device_detach() is called by cached_dev_detach_finish(). This is a callback routine being called when cached_dev->count is 0. This refcount @@ -63,15 +63,15 @@ make sure during writeback operatiions both backing and cache devices won't be released. Adding following code in bch_writeback_thread() does not work, -@@ -484,6 +484,9 @@ static int bch_writeback_thread(void *arg) - } - } + static int bch_writeback_thread(void *arg) + [code snip] -+ if (atomic_read(&dc->has_dirty)) -+ cached_dev_put() -+ + + if (atomic_read(&dc->has_dirty)) + + cached_dev_put() + + return 0; - } + [code snip] + because writeback kernel thread can be waken up and start via sysfs entry: echo 1 > /sys/block/bcache<N>/bcache/writeback_running It is difficult to check whether backing device is dirty without race and diff --git a/for-next/0005-bcache-stop-dc-writeback_rate_update-if-cache-set-is.patch b/for-next/v1-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch index b13696a..d3e78e8 100644 --- a/for-next/0005-bcache-stop-dc-writeback_rate_update-if-cache-set-is.patch +++ b/for-next/v1-0005-bcache-stop-dc-writeback_rate_update-if-cache-set.patch @@ -1,8 +1,8 @@ -From b0a1ed28cd164bb240a0b564472100ff7776ac3a Mon Sep 17 00:00:00 2001 +From 1a9aae02c180b47b2ae2ef9c61915b2b694d1fc2 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Sat, 23 Dec 2017 01:50:19 +0800 -Subject: [PATCH 05/10] bcache: stop dc->writeback_rate_update if cache set is - stopping +Subject: [PATCH v1 05/10] bcache: stop dc->writeback_rate_update if cache set + is stopping struct delayed_work writeback_rate_update in struct cache_dev is a delayed worker to call function update_writeback_rate() in period (the interval is @@ -22,7 +22,7 @@ dc->writeback_rate_update_seconds. The problem is, after cancel_delayed_work_sync() returns, the cache set unregister code path will eventually release memory of struct cache set. -Then the delayed work is scheduled to run, and inside its rountine +Then the delayed work is scheduled to run, and inside its routine update_writeback_rate() that already released cache set NULL pointer will be accessed. Now a NULL pointer deference panic is triggered. @@ -37,7 +37,7 @@ Signed-off-by: Coly Li <colyli@suse.de> 1 file changed, 9 insertions(+) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c -index 0789a9e18337..f25fb6cb383f 100644 +index 0789a9e18337..745d9b2a326f 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -91,6 +91,11 @@ static void update_writeback_rate(struct work_struct *work) @@ -59,7 +59,7 @@ index 0789a9e18337..f25fb6cb383f 100644 + /* do not schedule delayed work if cache set is stopping */ + if (test_bit(CACHE_SET_STOPPING, &c->flags)) + return; -+ ++ schedule_delayed_work(&dc->writeback_rate_update, dc->writeback_rate_update_seconds * HZ); } diff --git a/for-next/0007-bcache-stop-dc-writeback_rate_update-dc-writeback_th.patch b/for-next/v1-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch index 9e4e0ec..53ce3f2 100644 --- a/for-next/0007-bcache-stop-dc-writeback_rate_update-dc-writeback_th.patch +++ b/for-next/v1-0006-bcache-stop-dc-writeback_rate_update-dc-writeback.patch @@ -1,7 +1,7 @@ -From 8fde6171a41690507657778cb40c24de33a6ccdf Mon Sep 17 00:00:00 2001 +From 2da5b83720460c83d0f20d0771a0c955e60028e8 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Wed, 3 Jan 2018 00:03:45 +0800 -Subject: [PATCH 07/10] bcache: stop dc->writeback_rate_update, +Subject: [PATCH v1 06/10] bcache: stop dc->writeback_rate_update, dc->writeback_thread earlier Delayed worker dc->writeback_rate_update and kernel thread @@ -52,7 +52,7 @@ Signed-off-by: Coly Li <colyli@suse.de> 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h -index 42f20261c7bc..c2fef678a16a 100644 +index 83c569942bd0..395b87942a2f 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -322,6 +322,7 @@ struct cached_dev { @@ -64,7 +64,7 @@ index 42f20261c7bc..c2fef678a16a 100644 /* * Internal to the writeback code, so read_dirty() can keep track of diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index 1a32c2739acb..a8f7f23cd448 100644 +index 5401d2356aa3..8912be4165c5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -696,8 +696,20 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c, @@ -106,10 +106,10 @@ index 1a32c2739acb..a8f7f23cd448 100644 destroy_workqueue(dc->writeback_write_wq); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c -index c1529cb54478..fe1208f8808e 100644 +index 745d9b2a326f..ab2ac3d72393 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c -@@ -559,6 +559,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) +@@ -548,6 +548,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_rate_i_term_inverse = 10000; INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); diff --git a/for-next/0008-bcache-set-error_limit-correctly.patch b/for-next/v1-0007-bcache-set-error_limit-correctly.patch index 99c64fd..2d5b243 100644 --- a/for-next/0008-bcache-set-error_limit-correctly.patch +++ b/for-next/v1-0007-bcache-set-error_limit-correctly.patch @@ -1,7 +1,7 @@ -From 9062df227769049b4169d7b819c8430b770c10f6 Mon Sep 17 00:00:00 2001 +From 93e9a82ee54b8fb1e50c4df95a74ab2953aec9ff Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> -Date: Wed, 3 Jan 2018 14:06:24 +0800 -Subject: [PATCH 08/10] bcache: set error_limit correctly +Date: Wed, 3 Jan 2018 20:37:27 +0800 +Subject: [PATCH v1 07/10] bcache: set error_limit correctly Struct cache uses io_errors for two purposes, - Error decay: when cache set error_decay is set, io_errors is used to @@ -15,25 +15,25 @@ error limit, bch_cache_set_error() will be called to retire the whold cache set. But current code is problematic when checking the error limit, see the following code piece from bch_count_io_errors(), - 90 if (error) { - 91 char buf[BDEVNAME_SIZE]; - 92 unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT, - 93 &ca->io_errors); - 94 errors >>= IO_ERROR_SHIFT; + 90 if (error) { + 91 char buf[BDEVNAME_SIZE]; + 92 unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT, + 93 &ca->io_errors); + 94 errors >>= IO_ERROR_SHIFT; 95 - 96 if (errors < ca->set->error_limit) - 97 pr_err("%s: IO error on %s, recovering", - 98 bdevname(ca->bdev, buf), m); - 99 else -100 bch_cache_set_error(ca->set, -101 "%s: too many IO errors %s", -102 bdevname(ca->bdev, buf), m); -103 } + 96 if (errors < ca->set->error_limit) + 97 pr_err("%s: IO error on %s, recovering", + 98 bdevname(ca->bdev, buf), m); + 99 else +100 bch_cache_set_error(ca->set, +101 "%s: too many IO errors %s", +102 bdevname(ca->bdev, buf), m); +103 } At line 94, errors is right shifting IO_ERROR_SHIFT bits, now it is real errors counter to compare at line 96. But ca->set->error_limit is initia- lized with an amplified value in bch_cache_set_alloc(), -1545 c->error_limit = 8 << IO_ERROR_SHIFT; +1545 c->error_limit = 8 << IO_ERROR_SHIFT; It means by default, in bch_count_io_errors(), before 8<<20 errors happened bch_cache_set_error() won't be called to retire the problematic cache @@ -63,35 +63,35 @@ Signed-off-by: Coly Li <colyli@suse.de> 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h -index c2fef678a16a..6b43083346a5 100644 +index 395b87942a2f..a31dc3737dae 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h -@@ -655,6 +655,7 @@ struct cache_set { +@@ -654,6 +654,7 @@ struct cache_set { ON_ERROR_UNREGISTER, ON_ERROR_PANIC, } on_error; -+#define DEFAULT_IO_ERROR_LIMIT 8 ++#define DEFAULT_IO_ERROR_LIMIT 8 unsigned error_limit; unsigned error_decay; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index a8f7f23cd448..b49f480ab8a3 100644 +index 8912be4165c5..02d9d7110769 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c -@@ -1564,7 +1564,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) +@@ -1561,7 +1561,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->congested_read_threshold_us = 2000; c->congested_write_threshold_us = 20000; - c->error_limit = 8 << IO_ERROR_SHIFT; + c->error_limit = DEFAULT_IO_ERROR_LIMIT; - c->io_disable = false; return c; + err: diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c -index cf05c35a3665..acce7c82e111 100644 +index b4184092c727..d7ce9a05b304 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c -@@ -557,7 +557,7 @@ SHOW(__bch_cache_set) +@@ -556,7 +556,7 @@ SHOW(__bch_cache_set) /* See count_io_errors for why 88 */ sysfs_print(io_error_halflife, c->error_decay * 88); @@ -100,7 +100,7 @@ index cf05c35a3665..acce7c82e111 100644 sysfs_hprint(congested, ((uint64_t) bch_get_congested(c)) << 9); -@@ -658,7 +658,7 @@ STORE(__bch_cache_set) +@@ -656,7 +656,7 @@ STORE(__bch_cache_set) } if (attr == &sysfs_io_error_limit) diff --git a/for-next/0009-bcache-fix-misleading-error-message-in-bch_count_io_.patch b/for-next/v1-0008-bcache-fix-misleading-error-message-in-bch_count_.patch index f201226..18a5c32 100644 --- a/for-next/0009-bcache-fix-misleading-error-message-in-bch_count_io_.patch +++ b/for-next/v1-0008-bcache-fix-misleading-error-message-in-bch_count_.patch @@ -1,7 +1,7 @@ -From 7b33c2472cedd608f7c683ac5cac4349c87cf9c7 Mon Sep 17 00:00:00 2001 +From 80d7abeee0b81a7ee0e3789bac9580f540437d0e Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Wed, 3 Jan 2018 15:59:33 +0800 -Subject: [PATCH 09/10] bcache: fix misleading error message in +Subject: [PATCH v1 08/10] bcache: fix misleading error message in bch_count_io_errors() Bcache only does recoverable I/O for read operations by calling @@ -25,10 +25,10 @@ Signed-off-by: Coly Li <colyli@suse.de> 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h -index 6b43083346a5..607a9186d6ad 100644 +index a31dc3737dae..c53f312b2216 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h -@@ -869,7 +869,7 @@ static inline void closure_bio_submit(struct cache_set *c, +@@ -855,7 +855,7 @@ static inline void wake_up_allocators(struct cache_set *c) /* Forward declarations */ @@ -38,7 +38,7 @@ index 6b43083346a5..607a9186d6ad 100644 blk_status_t, const char *); void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t, diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c -index c456095d2bbe..8013ecbcdbda 100644 +index fac97ec2d0e2..a783c5a41ff1 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -51,7 +51,10 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c, @@ -83,7 +83,7 @@ index c456095d2bbe..8013ecbcdbda 100644 void bch_bbio_endio(struct cache_set *c, struct bio *bio, diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index b49f480ab8a3..49d6fedf89c3 100644 +index 02d9d7110769..bbe911847eea 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -274,7 +274,9 @@ static void write_super_endio(struct bio *bio) @@ -98,10 +98,10 @@ index b49f480ab8a3..49d6fedf89c3 100644 } diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c -index fe1208f8808e..abadf524b5dd 100644 +index ab2ac3d72393..e58f9be5ae43 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c -@@ -234,8 +234,10 @@ static void read_dirty_endio(struct bio *bio) +@@ -228,8 +228,10 @@ static void read_dirty_endio(struct bio *bio) struct keybuf_key *w = bio->bi_private; struct dirty_io *io = w->private; diff --git a/for-next/0006-bcache-add-io_disable-to-struct-cache_set.patch b/for-next/v1-0009-bcache-add-io_disable-to-struct-cache_set.patch index 1acdbaa..1c6159e 100644 --- a/for-next/0006-bcache-add-io_disable-to-struct-cache_set.patch +++ b/for-next/v1-0009-bcache-add-io_disable-to-struct-cache_set.patch @@ -1,7 +1,7 @@ -From e24bde4ddce438d6b1eb81b29245a585f31e2199 Mon Sep 17 00:00:00 2001 +From 5996e95d633ad28ebbd113004efc488162cd22b7 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Tue, 2 Jan 2018 17:31:07 +0800 -Subject: [PATCH 06/10] bcache: add io_disable to struct cache_set +Subject: [PATCH v1 09/10] bcache: add io_disable to struct cache_set When too many I/Os failed on cache device, bch_cache_set_error() is called in the error handling code path to retire whole problematic cache set. If @@ -38,11 +38,11 @@ Signed-off-by: Coly Li <colyli@suse.de> drivers/md/bcache/io.c | 2 +- drivers/md/bcache/journal.c | 4 ++-- drivers/md/bcache/request.c | 26 +++++++++++++++++++------- - drivers/md/bcache/super.c | 6 +++++- + drivers/md/bcache/super.c | 7 ++++++- drivers/md/bcache/sysfs.c | 4 ++++ drivers/md/bcache/util.h | 6 ------ - drivers/md/bcache/writeback.c | 33 ++++++++++++++++++++++----------- - 10 files changed, 72 insertions(+), 31 deletions(-) + drivers/md/bcache/writeback.c | 34 ++++++++++++++++++++++------------ + 10 files changed, 73 insertions(+), 32 deletions(-) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 48c002faf08d..3be737582f27 100644 @@ -58,10 +58,10 @@ index 48c002faf08d..3be737582f27 100644 \ set_current_state(TASK_INTERRUPTIBLE); \ diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h -index 83c569942bd0..42f20261c7bc 100644 +index c53f312b2216..9c7f9b1cb791 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h -@@ -480,6 +480,7 @@ struct cache_set { +@@ -481,6 +481,7 @@ struct cache_set { struct cache_accounting accounting; unsigned long flags; @@ -69,7 +69,7 @@ index 83c569942bd0..42f20261c7bc 100644 struct cache_sb sb; -@@ -851,6 +852,19 @@ static inline void wake_up_allocators(struct cache_set *c) +@@ -853,6 +854,19 @@ static inline void wake_up_allocators(struct cache_set *c) wake_up_process(ca->alloc_thread); } @@ -88,7 +88,7 @@ index 83c569942bd0..42f20261c7bc 100644 + /* Forward declarations */ - void bch_count_io_errors(struct cache *, blk_status_t, const char *); + void bch_count_io_errors(struct cache *, blk_status_t, int, const char *); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index bf0d7978bc3d..75470cce1177 100644 --- a/drivers/md/bcache/btree.c @@ -108,7 +108,7 @@ index bf0d7978bc3d..75470cce1177 100644 set_gc_sectors(c); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c -index fac97ec2d0e2..c456095d2bbe 100644 +index a783c5a41ff1..8013ecbcdbda 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c) @@ -233,10 +233,10 @@ index 643c3021624f..a85d6a605a8e 100644 s = search_alloc(bio, d); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index 5401d2356aa3..1a32c2739acb 100644 +index bbe911847eea..7aa76c3e3556 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c -@@ -519,7 +519,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op, +@@ -521,7 +521,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op, bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags); bch_bio_map(bio, ca->disk_buckets); @@ -245,26 +245,27 @@ index 5401d2356aa3..1a32c2739acb 100644 closure_sync(cl); } -@@ -1314,6 +1314,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) +@@ -1333,6 +1333,10 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) acquire_console_sem(); */ + c->io_disable = true; ++ /* make others know io_disable is true earlier */ + smp_mb(); + printk(KERN_ERR "bcache: error on %pU: ", c->sb.set_uuid); va_start(args, fmt); -@@ -1545,6 +1548,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) +@@ -1564,6 +1568,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->congested_read_threshold_us = 2000; c->congested_write_threshold_us = 20000; - c->error_limit = 8 << IO_ERROR_SHIFT; + c->error_limit = DEFAULT_IO_ERROR_LIMIT; + c->io_disable = false; return c; err: diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c -index b4184092c727..cf05c35a3665 100644 +index d7ce9a05b304..acce7c82e111 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -92,6 +92,7 @@ read_attribute(partial_stripes_expensive); @@ -317,7 +318,7 @@ index ed5e8a412eb8..03e533631798 100644 uint64_t bch_crc64(const void *, size_t); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c -index f25fb6cb383f..c1529cb54478 100644 +index e58f9be5ae43..54add41d2569 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -93,8 +93,11 @@ static void update_writeback_rate(struct work_struct *work) @@ -346,7 +347,7 @@ index f25fb6cb383f..c1529cb54478 100644 + */ + if (test_bit(CACHE_SET_STOPPING, &c->flags) || c->io_disable) return; - + schedule_delayed_work(&dc->writeback_rate_update, @@ -217,7 +223,7 @@ static void write_dirty(struct closure *cl) bio_set_dev(&io->bio, io->dc->bdev); @@ -357,7 +358,7 @@ index f25fb6cb383f..c1529cb54478 100644 } continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq); -@@ -238,7 +244,7 @@ static void read_dirty_submit(struct closure *cl) +@@ -240,7 +246,7 @@ static void read_dirty_submit(struct closure *cl) { struct dirty_io *io = container_of(cl, struct dirty_io, cl); @@ -366,7 +367,7 @@ index f25fb6cb383f..c1529cb54478 100644 continue_at(cl, write_dirty, io->dc->writeback_write_wq); } -@@ -257,7 +263,7 @@ static void read_dirty(struct cached_dev *dc) +@@ -259,7 +265,7 @@ static void read_dirty(struct cached_dev *dc) * mempools. */ @@ -375,7 +376,7 @@ index f25fb6cb383f..c1529cb54478 100644 w = bch_keybuf_next(&dc->writeback_keys); if (!w) -@@ -267,7 +273,9 @@ static void read_dirty(struct cached_dev *dc) +@@ -269,7 +275,9 @@ static void read_dirty(struct cached_dev *dc) if (KEY_START(&w->key) != dc->last_read || jiffies_to_msecs(delay) > 50) @@ -386,7 +387,7 @@ index f25fb6cb383f..c1529cb54478 100644 delay = schedule_timeout_interruptible(delay); dc->last_read = KEY_OFFSET(&w->key); -@@ -448,18 +456,19 @@ static bool refill_dirty(struct cached_dev *dc) +@@ -450,18 +458,19 @@ static bool refill_dirty(struct cached_dev *dc) static int bch_writeback_thread(void *arg) { struct cached_dev *dc = arg; @@ -408,16 +409,18 @@ index f25fb6cb383f..c1529cb54478 100644 break; set_current_state(TASK_INTERRUPTIBLE); -@@ -485,13 +494,15 @@ static int bch_writeback_thread(void *arg) - - while (delay && - !kthread_should_stop() && -- !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) -+ !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) && -+ !c->io_disable) +@@ -485,8 +494,8 @@ static int bch_writeback_thread(void *arg) + if (searched_full_index) { + unsigned delay = dc->writeback_delay * HZ; + +- while (delay && +- !kthread_should_stop() && ++ while (delay && !kthread_should_stop() && ++ !c->io_disable && + !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) delay = schedule_timeout_interruptible(delay); - bch_ratelimit_reset(&dc->writeback_rate); +@@ -494,6 +503,7 @@ static int bch_writeback_thread(void *arg) } } diff --git a/for-next/0010-bcache-stop-all-attached-bcache-devices-for-a-retire.patch b/for-next/v1-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch index a023bc2..062caae 100644 --- a/for-next/0010-bcache-stop-all-attached-bcache-devices-for-a-retire.patch +++ b/for-next/v1-0010-bcache-stop-all-attached-bcache-devices-for-a-ret.patch @@ -1,8 +1,8 @@ -From c21aff9a0c26503714e7cd313ca1529cc0816393 Mon Sep 17 00:00:00 2001 +From 595d5d28a7ed23cae061b9e0dd201611afd6db6d Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Wed, 3 Jan 2018 18:24:55 +0800 -Subject: [PATCH 10/10] bcache: stop all attached bcache devices for a retired - cache set +Subject: [PATCH v1 10/10] bcache: stop all attached bcache devices for a + retired cache set When there are too many I/O errors on cache device, current bcache code will retire the whole cache set, and detach all bcache devices. But the |