aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorColy Li <colyli@suse.de>2019-06-22 23:45:31 +0800
committerColy Li <colyli@suse.de>2019-06-22 23:45:31 +0800
commit3642dbb6a54cf3c284e9aa47d8ed4deff9d50e8f (patch)
treee423722b82971f287bc27f050b3859f2eb85ad53
parentdd4cbb51267acdc4b9d70a7ea4f2588b950e12dd (diff)
downloadbcache-patches-3642dbb6a54cf3c284e9aa47d8ed4deff9d50e8f.tar.gz
for-next: resort patches, add new patches
-rw-r--r--for-next/0000-cover-letter.patch15
-rw-r--r--for-next/0001-bcache-don-t-set-max-writeback-rate-if-gc-is-running.patch5
-rw-r--r--for-next/0002-bcache-check-c-gc_thread-by-IS_ERR_OR_NULL-in-cache_.patch125
-rw-r--r--for-next/0003-bcache-fix-return-value-error-in-bch_journal_read.patch (renamed from for-next/0002-bcache-fix-return-value-error-in-bch_journal_read.patch)5
-rw-r--r--for-next/0004-Revert-bcache-set-CACHE_SET_IO_DISABLE-in-bch_cached.patch (renamed from for-next/0003-Revert-bcache-set-CACHE_SET_IO_DISABLE-in-bch_cached.patch)6
-rw-r--r--for-next/0005-bcache-avoid-flushing-btree-node-in-cache_set_flush-.patch (renamed from for-next/0004-bcache-avoid-flushing-btree-node-in-cache_set_flush-.patch)8
-rw-r--r--for-next/0006-bcache-ignore-read-ahead-request-failure-on-backing-.patch (renamed from for-next/0005-bcache-ignore-read-ahead-request-failure-on-backing-.patch)6
-rw-r--r--for-next/0007-bcache-add-io-error-counting-in-write_bdev_super_end.patch (renamed from for-next/0006-bcache-add-io-error-counting-in-write_bdev_super_end.patch)6
-rw-r--r--for-next/0008-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch4
-rw-r--r--for-next/0009-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch4
-rw-r--r--for-next/0010-bcache-add-return-value-check-to-bch_cached_dev_run.patch7
-rw-r--r--for-next/0011-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch5
-rw-r--r--for-next/0012-bcache-check-CACHE_SET_IO_DISABLE-in-allocator-code.patch5
-rw-r--r--for-next/0013-bcache-check-CACHE_SET_IO_DISABLE-bit-in-bch_journal.patch5
-rw-r--r--for-next/0014-bcache-more-detailed-error-message-to-bcache_device_.patch6
-rw-r--r--for-next/0015-bcache-add-more-error-message-in-bch_cached_dev_atta.patch6
-rw-r--r--for-next/0016-bcache-improve-error-message-in-bch_cached_dev_run.patch7
-rw-r--r--for-next/0017-bcache-remove-XXX-comment-line-from-run_cache_set.patch (renamed from for-next/0007-bcache-remove-XXX-comment-line-from-run_cache_set.patch)9
-rw-r--r--for-next/0018-bcache-make-bset_search_tree-be-more-understandable.patch (renamed from for-next/0017-bcache-make-bset_search_tree-be-more-understandable.patch)5
-rw-r--r--for-next/0019-bcache-add-pendings_cleanup-to-stop-pending-bcache-d.patch (renamed from for-next/0018-bcache-add-pendings_cleanup-to-stop-pending-bcache-d.patch)6
-rw-r--r--for-next/0020-bcache-fix-mistaken-sysfs-entry-for-io_error-counter.patch (renamed from for-next/0019-bcache-fix-mistaken-sysfs-entry-for-io_error-counter.patch)5
-rw-r--r--for-next/0021-bcache-destroy-dc-writeback_write_wq-if-failed-to-cr.patch (renamed from for-next/0020-bcache-destroy-dc-writeback_write_wq-if-failed-to-cr.patch)4
-rw-r--r--for-next/0022-bcache-stop-writeback-kthread-and-kworker-when-bch_c.patch (renamed from for-next/0021-bcache-stop-writeback-kthread-and-kworker-when-bch_c.patch)6
-rw-r--r--for-next/0023-bcache-acquire-bch_register_lock-later-in-cached_dev.patch47
-rw-r--r--for-next/0023-bcache-avoid-a-deadlock-in-bcache_reboot.patch (renamed from for-next/0022-bcache-avoid-a-deadlock-in-bcache_reboot.patch)6
-rw-r--r--for-next/0024-bcache-acquire-bch_register_lock-later-in-cached_dev.patch171
-rw-r--r--for-next/0025-bcache-acquire-bch_register_lock-later-in-cached_dev.patch160
-rw-r--r--for-next/0026-bcache-fix-potential-deadlock-in-cached_def_free.patch (renamed from for-next/0025-bcache-fix-potential-deadlock-in-cached_def_free.patch)6
-rw-r--r--for-next/0027-bcache-add-code-comments-for-journal_read_bucket.patch (renamed from for-next/0026-bcache-add-code-comments-for-journal_read_bucket.patch)4
-rw-r--r--for-next/0028-bcache-set-largest-seq-to-ja-seq-bucket_index-in-jou.patch (renamed from for-next/0027-bcache-set-largest-seq-to-ja-seq-bucket_index-in-jou.patch)4
-rw-r--r--for-next/0029-bcache-shrink-btree-node-cache-after-bch_btree_check.patch (renamed from for-next/0028-bcache-shrink-btree-node-cache-after-bch_btree_check.patch)7
-rw-r--r--for-next/0030-bcache-Improve-bcache-tollerance-for-out-of-memory-c.patch (renamed from for-next/0029-bcache-Improve-bcache-tollerance-for-out-of-memory-c.patch)8
-rw-r--r--for-next/0031-bcache-Revert-bcache-free-heap-cache_set-flush_btree.patch (renamed from for-next/0030-bcache-Revert-bcache-free-heap-cache_set-flush_btree.patch)4
-rw-r--r--for-next/0032-bcache-Revert-bcache-fix-high-CPU-occupancy-during-j.patch (renamed from for-next/0031-bcache-Revert-bcache-fix-high-CPU-occupancy-during-j.patch)6
-rw-r--r--for-next/0033-bcache-remove-retry_flush_write-from-struct-cache_se.patch (renamed from for-next/0032-bcache-remove-retry_flush_write-from-struct-cache_se.patch)24
-rw-r--r--for-next/0034-bcache-fix-race-in-btree_flush_write.patch (renamed from for-next/0033-bcache-fix-race-in-btree_flush_write.patch)33
-rw-r--r--for-next/0035-bcache-add-reclaimed_journal_buckets-to-struct-cache.patch (renamed from for-next/0034-bcache-add-reclaimed_journal_buckets-to-struct-cache.patch)4
37 files changed, 448 insertions, 296 deletions
diff --git a/for-next/0000-cover-letter.patch b/for-next/0000-cover-letter.patch
index d7ec8e1..5901b42 100644
--- a/for-next/0000-cover-letter.patch
+++ b/for-next/0000-cover-letter.patch
@@ -1,21 +1,21 @@
-From 6c8bf37c6d1ec7a58fa1c5b910862dbf6dbf1818 Mon Sep 17 00:00:00 2001
+From 804d2586ee139cea6a16c0175184ed5ff87ec6cf Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
-Date: Sat, 22 Jun 2019 14:43:26 +0800
-Subject: [PATCH 00/34] *** SUBJECT HERE ***
+Date: Sat, 22 Jun 2019 23:42:06 +0800
+Subject: [RFC PATCH 00/35] *** SUBJECT HERE ***
*** BLURB HERE ***
Alexandru Ardelean (1):
bcache: use sysfs_match_string() instead of __sysfs_match_string()
-Coly Li (33):
+Coly Li (34):
bcache: don't set max writeback rate if gc is running
+ bcache: check c->gc_thread by IS_ERR_OR_NULL in cache_set_flush()
bcache: fix return value error in bch_journal_read()
Revert "bcache: set CACHE_SET_IO_DISABLE in bch_cached_dev_error()"
bcache: avoid flushing btree node in cache_set_flush() if io disabled
bcache: ignore read-ahead request failure on backing device
bcache: add io error counting in write_bdev_super_endio()
- bcache: remove "XXX:" comment line from run_cache_set()
bcache: remove unnecessary prefetch() in bset_search_tree()
bcache: add return value check to bch_cached_dev_run()
bcache: remove unncessary code in bch_btree_keys_init()
@@ -24,6 +24,7 @@ Coly Li (33):
bcache: more detailed error message to bcache_device_link()
bcache: add more error message in bch_cached_dev_attach()
bcache: improve error message in bch_cached_dev_run()
+ bcache: remove "XXX:" comment line from run_cache_set()
bcache: make bset_search_tree() be more understandable
bcache: add pendings_cleanup to stop pending bcache device
bcache: fix mistaken sysfs entry for io_error counter
@@ -56,11 +57,11 @@ Coly Li (33):
drivers/md/bcache/journal.c | 141 ++++++++++++++++++--------
drivers/md/bcache/journal.h | 4 +
drivers/md/bcache/request.c | 12 +++
- drivers/md/bcache/super.c | 229 ++++++++++++++++++++++++++++++++++--------
+ drivers/md/bcache/super.c | 231 ++++++++++++++++++++++++++++++++++--------
drivers/md/bcache/sysfs.c | 67 ++++++++----
drivers/md/bcache/util.h | 2 -
drivers/md/bcache/writeback.c | 8 ++
- 13 files changed, 476 insertions(+), 155 deletions(-)
+ 13 files changed, 477 insertions(+), 156 deletions(-)
--
2.16.4
diff --git a/for-next/0001-bcache-don-t-set-max-writeback-rate-if-gc-is-running.patch b/for-next/0001-bcache-don-t-set-max-writeback-rate-if-gc-is-running.patch
index e2d558d..53f4594 100644
--- a/for-next/0001-bcache-don-t-set-max-writeback-rate-if-gc-is-running.patch
+++ b/for-next/0001-bcache-don-t-set-max-writeback-rate-if-gc-is-running.patch
@@ -1,7 +1,8 @@
-From 190fb732fc6167ec7affad0b918a50bf82bc382a Mon Sep 17 00:00:00 2001
+From e82dbe2b2f193ebe86e8ba742851e609256980f9 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 21 Jun 2019 01:46:20 +0800
-Subject: [PATCH 01/34] bcache: don't set max writeback rate if gc is running
+Subject: [RFC PATCH 01/35] bcache: don't set max writeback rate if gc is
+ running
When gc is running, user space I/O processes may wait inside
bcache code, so no new I/O coming. Indeed this is not a real idle
diff --git a/for-next/0002-bcache-check-c-gc_thread-by-IS_ERR_OR_NULL-in-cache_.patch b/for-next/0002-bcache-check-c-gc_thread-by-IS_ERR_OR_NULL-in-cache_.patch
new file mode 100644
index 0000000..2a50072
--- /dev/null
+++ b/for-next/0002-bcache-check-c-gc_thread-by-IS_ERR_OR_NULL-in-cache_.patch
@@ -0,0 +1,125 @@
+From 0bd7fa1e41805d741b6ca8f61bb7e81746f2aa8f Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Sat, 22 Jun 2019 23:04:36 +0800
+Subject: [RFC PATCH 02/35] bcache: check c->gc_thread by IS_ERR_OR_NULL in
+ cache_set_flush()
+
+When system memory is in heavy pressure, bch_gc_thread_start() from
+run_cache_set() may fail due to out of memory. In such condition,
+c->gc_thread is assigned to -ENOMEM, not NULL pointer. Then in following
+failure code path bch_cache_set_error(), when cache_set_flush() gets
+called, the code piece to stop c->gc_thread is broken,
+ if (!IS_ERR_OR_NULL(c->gc_thread))
+ kthread_stop(c->gc_thread);
+
+And KASAN catches such NULL pointer deference problem, with the warning
+information:
+
+[ 561.207881] ==================================================================
+[ 561.207900] BUG: KASAN: null-ptr-deref in kthread_stop+0x3b/0x440
+[ 561.207904] Write of size 4 at addr 000000000000001c by task kworker/15:1/313
+
+[ 561.207913] CPU: 15 PID: 313 Comm: kworker/15:1 Tainted: G W 5.0.0-vanilla+ #3
+[ 561.207916] Hardware name: Lenovo ThinkSystem SR650 -[7X05CTO1WW]-/-[7X05CTO1WW]-, BIOS -[IVE136T-2.10]- 03/22/2019
+[ 561.207935] Workqueue: events cache_set_flush [bcache]
+[ 561.207940] Call Trace:
+[ 561.207948] dump_stack+0x9a/0xeb
+[ 561.207955] ? kthread_stop+0x3b/0x440
+[ 561.207960] ? kthread_stop+0x3b/0x440
+[ 561.207965] kasan_report+0x176/0x192
+[ 561.207973] ? kthread_stop+0x3b/0x440
+[ 561.207981] kthread_stop+0x3b/0x440
+[ 561.207995] cache_set_flush+0xd4/0x6d0 [bcache]
+[ 561.208008] process_one_work+0x856/0x1620
+[ 561.208015] ? find_held_lock+0x39/0x1d0
+[ 561.208028] ? drain_workqueue+0x380/0x380
+[ 561.208048] worker_thread+0x87/0xb80
+[ 561.208058] ? __kthread_parkme+0xb6/0x180
+[ 561.208067] ? process_one_work+0x1620/0x1620
+[ 561.208072] kthread+0x326/0x3e0
+[ 561.208079] ? kthread_create_worker_on_cpu+0xc0/0xc0
+[ 561.208090] ret_from_fork+0x3a/0x50
+[ 561.208110] ==================================================================
+[ 561.208113] Disabling lock debugging due to kernel taint
+[ 561.208115] irq event stamp: 11800231
+[ 561.208126] hardirqs last enabled at (11800231): [<ffffffff83008538>] do_syscall_64+0x18/0x410
+[ 561.208127] BUG: unable to handle kernel NULL pointer dereference at 000000000000001c
+[ 561.208129] #PF error: [WRITE]
+[ 561.312253] hardirqs last disabled at (11800230): [<ffffffff830052ff>] trace_hardirqs_off_thunk+0x1a/0x1c
+[ 561.312259] softirqs last enabled at (11799832): [<ffffffff850005c7>] __do_softirq+0x5c7/0x8c3
+[ 561.405975] PGD 0 P4D 0
+[ 561.442494] softirqs last disabled at (11799821): [<ffffffff831add2c>] irq_exit+0x1ac/0x1e0
+[ 561.791359] Oops: 0002 [#1] SMP KASAN NOPTI
+[ 561.791362] CPU: 15 PID: 313 Comm: kworker/15:1 Tainted: G B W 5.0.0-vanilla+ #3
+[ 561.791363] Hardware name: Lenovo ThinkSystem SR650 -[7X05CTO1WW]-/-[7X05CTO1WW]-, BIOS -[IVE136T-2.10]- 03/22/2019
+[ 561.791371] Workqueue: events cache_set_flush [bcache]
+[ 561.791374] RIP: 0010:kthread_stop+0x3b/0x440
+[ 561.791376] Code: 00 00 65 8b 05 26 d5 e0 7c 89 c0 48 0f a3 05 ec aa df 02 0f 82 dc 02 00 00 4c 8d 63 20 be 04 00 00 00 4c 89 e7 e8 65 c5 53 00 <f0> ff 43 20 48 8d 7b 24 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48
+[ 561.791377] RSP: 0018:ffff88872fc8fd10 EFLAGS: 00010286
+[ 561.838895] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
+[ 561.838916] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
+[ 561.838934] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
+[ 561.838948] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
+[ 561.838966] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
+[ 561.838979] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
+[ 561.838996] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
+[ 563.067028] RAX: 0000000000000000 RBX: fffffffffffffffc RCX: ffffffff832dd314
+[ 563.067030] RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000297
+[ 563.067032] RBP: ffff88872fc8fe88 R08: fffffbfff0b8213d R09: fffffbfff0b8213d
+[ 563.067034] R10: 0000000000000001 R11: fffffbfff0b8213c R12: 000000000000001c
+[ 563.408618] R13: ffff88dc61cc0f68 R14: ffff888102b94900 R15: ffff88dc61cc0f68
+[ 563.408620] FS: 0000000000000000(0000) GS:ffff888f7dc00000(0000) knlGS:0000000000000000
+[ 563.408622] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 563.408623] CR2: 000000000000001c CR3: 0000000f48a1a004 CR4: 00000000007606e0
+[ 563.408625] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 563.408627] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 563.904795] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
+[ 563.915796] PKRU: 55555554
+[ 563.915797] Call Trace:
+[ 563.915807] cache_set_flush+0xd4/0x6d0 [bcache]
+[ 563.915812] process_one_work+0x856/0x1620
+[ 564.001226] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
+[ 564.033563] ? find_held_lock+0x39/0x1d0
+[ 564.033567] ? drain_workqueue+0x380/0x380
+[ 564.033574] worker_thread+0x87/0xb80
+[ 564.062823] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
+[ 564.118042] ? __kthread_parkme+0xb6/0x180
+[ 564.118046] ? process_one_work+0x1620/0x1620
+[ 564.118048] kthread+0x326/0x3e0
+[ 564.118050] ? kthread_create_worker_on_cpu+0xc0/0xc0
+[ 564.167066] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
+[ 564.252441] ret_from_fork+0x3a/0x50
+[ 564.252447] Modules linked in: msr rpcrdma sunrpc rdma_ucm ib_iser ib_umad rdma_cm ib_ipoib i40iw configfs iw_cm ib_cm libiscsi scsi_transport_iscsi mlx4_ib ib_uverbs mlx4_en ib_core nls_iso8859_1 nls_cp437 vfat fat intel_rapl skx_edac x86_pkg_temp_thermal coretemp iTCO_wdt iTCO_vendor_support crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ses raid0 aesni_intel cdc_ether enclosure usbnet ipmi_ssif joydev aes_x86_64 i40e scsi_transport_sas mii bcache md_mod crypto_simd mei_me ioatdma crc64 ptp cryptd pcspkr i2c_i801 mlx4_core glue_helper pps_core mei lpc_ich dca wmi ipmi_si ipmi_devintf nd_pmem dax_pmem nd_btt ipmi_msghandler device_dax pcc_cpufreq button hid_generic usbhid mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect xhci_pci sysimgblt fb_sys_fops xhci_hcd ttm megaraid_sas drm usbcore nfit libnvdimm sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua efivarfs
+[ 564.299390] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
+[ 564.348360] CR2: 000000000000001c
+[ 564.348362] ---[ end trace b7f0e5cc7b2103b0 ]---
+
+Therefore, it is not enough to only check whether c->gc_thread is NULL,
+we should use IS_ERR_OR_NULL() to check both NULL pointer and error
+value.
+
+This patch changes the above buggy code piece in this way,
+ if (!IS_ERR_OR_NULL(c->gc_thread))
+ kthread_stop(c->gc_thread);
+
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/super.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index 1b63ac876169..64d9de89a63f 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -1564,7 +1564,7 @@ static void cache_set_flush(struct closure *cl)
+ kobject_put(&c->internal);
+ kobject_del(&c->kobj);
+
+- if (c->gc_thread)
++ if (!IS_ERR_OR_NULL(c->gc_thread))
+ kthread_stop(c->gc_thread);
+
+ if (!IS_ERR_OR_NULL(c->root))
+--
+2.16.4
+
diff --git a/for-next/0002-bcache-fix-return-value-error-in-bch_journal_read.patch b/for-next/0003-bcache-fix-return-value-error-in-bch_journal_read.patch
index 686997c..beadd28 100644
--- a/for-next/0002-bcache-fix-return-value-error-in-bch_journal_read.patch
+++ b/for-next/0003-bcache-fix-return-value-error-in-bch_journal_read.patch
@@ -1,7 +1,8 @@
-From 348ce79a9acf3109951cb4f1c4f318512bf8e6a0 Mon Sep 17 00:00:00 2001
+From 4ab78c500f3b50632c170ecf09db8d5212fcc0e1 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 4 Jun 2019 14:43:08 +0800
-Subject: [PATCH 02/34] bcache: fix return value error in bch_journal_read()
+Subject: [RFC PATCH 03/35] bcache: fix return value error in
+ bch_journal_read()
When everything is OK in bch_journal_read(), finally the return value
is returned by,
diff --git a/for-next/0003-Revert-bcache-set-CACHE_SET_IO_DISABLE-in-bch_cached.patch b/for-next/0004-Revert-bcache-set-CACHE_SET_IO_DISABLE-in-bch_cached.patch
index 562fd16..527dab6 100644
--- a/for-next/0003-Revert-bcache-set-CACHE_SET_IO_DISABLE-in-bch_cached.patch
+++ b/for-next/0004-Revert-bcache-set-CACHE_SET_IO_DISABLE-in-bch_cached.patch
@@ -1,7 +1,7 @@
-From ac9cde1715b16e9ce2ec37a131e6bc25172c0fe3 Mon Sep 17 00:00:00 2001
+From 68ecb095dd96fca5b58cda51d81b6f642a279e46 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 4 Jun 2019 15:00:46 +0800
-Subject: [PATCH 03/34] Revert "bcache: set CACHE_SET_IO_DISABLE in
+Subject: [RFC PATCH 04/35] Revert "bcache: set CACHE_SET_IO_DISABLE in
bch_cached_dev_error()"
This reverts commit 6147305c73e4511ca1a975b766b97a779d442567.
@@ -25,7 +25,7 @@ Cc: stable@vger.kernel.org
1 file changed, 17 deletions(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 1b63ac876169..eaaa046fd95d 100644
+index 64d9de89a63f..ba2ad093bc80 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1437,8 +1437,6 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
diff --git a/for-next/0004-bcache-avoid-flushing-btree-node-in-cache_set_flush-.patch b/for-next/0005-bcache-avoid-flushing-btree-node-in-cache_set_flush-.patch
index 320f15b..d1f2ece 100644
--- a/for-next/0004-bcache-avoid-flushing-btree-node-in-cache_set_flush-.patch
+++ b/for-next/0005-bcache-avoid-flushing-btree-node-in-cache_set_flush-.patch
@@ -1,8 +1,8 @@
-From 3e725adb6d449f4bc12278e6d8adaefbcd25687c Mon Sep 17 00:00:00 2001
+From f1deb934c5c05ae7ee932efb374288f7df1f518b Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Thu, 23 May 2019 23:18:10 +0800
-Subject: [PATCH 04/34] bcache: avoid flushing btree node in cache_set_flush()
- if io disabled
+Subject: [RFC PATCH 05/35] bcache: avoid flushing btree node in
+ cache_set_flush() if io disabled
When cache_set_flush() is called for too many I/O errors detected on
cache device and the cache set is retiring, inside the function it
@@ -20,7 +20,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index eaaa046fd95d..da9d6a63b81a 100644
+index ba2ad093bc80..dc6702c2c4b6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1553,13 +1553,17 @@ static void cache_set_flush(struct closure *cl)
diff --git a/for-next/0005-bcache-ignore-read-ahead-request-failure-on-backing-.patch b/for-next/0006-bcache-ignore-read-ahead-request-failure-on-backing-.patch
index 2888222..fc7336b 100644
--- a/for-next/0005-bcache-ignore-read-ahead-request-failure-on-backing-.patch
+++ b/for-next/0006-bcache-ignore-read-ahead-request-failure-on-backing-.patch
@@ -1,8 +1,8 @@
-From fdc26d90bf11b98dd41a9e8fa6b29c24480f4da3 Mon Sep 17 00:00:00 2001
+From bd8c817c0cdb3eec6bb0d8a5d92ad8af21bae79c Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 13 May 2019 22:48:09 +0800
-Subject: [PATCH 05/34] bcache: ignore read-ahead request failure on backing
- device
+Subject: [RFC PATCH 06/35] bcache: ignore read-ahead request failure on
+ backing device
When md raid device (e.g. raid456) is used as backing device, read-ahead
requests on a degrading and recovering md raid device might be failured
diff --git a/for-next/0006-bcache-add-io-error-counting-in-write_bdev_super_end.patch b/for-next/0007-bcache-add-io-error-counting-in-write_bdev_super_end.patch
index d2b5963..c70c69f 100644
--- a/for-next/0006-bcache-add-io-error-counting-in-write_bdev_super_end.patch
+++ b/for-next/0007-bcache-add-io-error-counting-in-write_bdev_super_end.patch
@@ -1,7 +1,7 @@
-From 5c43f975b9da56b6b714dd8d695741758d69d281 Mon Sep 17 00:00:00 2001
+From 101a56fb6a4c3613772c02ea6f1b9805999c852c Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 13 May 2019 23:42:39 +0800
-Subject: [PATCH 06/34] bcache: add io error counting in
+Subject: [RFC PATCH 07/35] bcache: add io error counting in
write_bdev_super_endio()
When backing device super block is written by bch_write_bdev_super(),
@@ -19,7 +19,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index da9d6a63b81a..877113b62b0f 100644
+index dc6702c2c4b6..73466bda12a7 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -197,7 +197,9 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
diff --git a/for-next/0008-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch b/for-next/0008-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch
index 4fe9de5..677a8b3 100644
--- a/for-next/0008-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch
+++ b/for-next/0008-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch
@@ -1,7 +1,7 @@
-From af9d2e4b46bf610e5c1b20e3758ea3f218aee00b Mon Sep 17 00:00:00 2001
+From d30097a506a93f42f2d42566e42e2bc5b712335c Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 14 May 2019 22:23:35 +0800
-Subject: [PATCH 08/34] bcache: remove unnecessary prefetch() in
+Subject: [RFC PATCH 08/35] bcache: remove unnecessary prefetch() in
bset_search_tree()
In function bset_search_tree(), when p >= t->size, t->tree[0] will be
diff --git a/for-next/0009-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch b/for-next/0009-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch
index e59a283..94d8f1e 100644
--- a/for-next/0009-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch
+++ b/for-next/0009-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch
@@ -1,7 +1,7 @@
-From 785b29a56216c084588a185cf831f1fc42c01db6 Mon Sep 17 00:00:00 2001
+From b5bf1b4e61bfe18423ab427bcdcc66b8cf3c7e1c Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <alexandru.ardelean@analog.com>
Date: Tue, 7 May 2019 12:43:12 +0300
-Subject: [PATCH 09/34] bcache: use sysfs_match_string() instead of
+Subject: [RFC PATCH 09/35] bcache: use sysfs_match_string() instead of
__sysfs_match_string()
The arrays (of strings) that are passed to __sysfs_match_string() are
diff --git a/for-next/0010-bcache-add-return-value-check-to-bch_cached_dev_run.patch b/for-next/0010-bcache-add-return-value-check-to-bch_cached_dev_run.patch
index 40557ba..eac0a29 100644
--- a/for-next/0010-bcache-add-return-value-check-to-bch_cached_dev_run.patch
+++ b/for-next/0010-bcache-add-return-value-check-to-bch_cached_dev_run.patch
@@ -1,7 +1,8 @@
-From 063905980aa216374273fcf3fa11b17e85008263 Mon Sep 17 00:00:00 2001
+From bcb2d25f93e82ae1bfec6f4f2282f644c7110524 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 21 May 2019 22:16:38 +0800
-Subject: [PATCH 10/34] bcache: add return value check to bch_cached_dev_run()
+Subject: [RFC PATCH 10/35] bcache: add return value check to
+ bch_cached_dev_run()
This patch adds return value check to bch_cached_dev_run(), now if there
is error happens inside bch_cached_dev_run(), it can be catched.
@@ -27,7 +28,7 @@ index fdf75352e16a..73a97586a2ef 100644
void bch_cache_set_unregister(struct cache_set *c);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 3364b20567eb..74eb18b1af40 100644
+index 73466bda12a7..0abee44092bf 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -910,7 +910,7 @@ static int cached_dev_status_update(void *arg)
diff --git a/for-next/0011-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch b/for-next/0011-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch
index d037766..16217d9 100644
--- a/for-next/0011-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch
+++ b/for-next/0011-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch
@@ -1,7 +1,8 @@
-From 0c096f5f5e1d375ec9e160575af60d789003a052 Mon Sep 17 00:00:00 2001
+From 24b1805121a04d121828e48012a76a3bb8d5bb66 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 21 May 2019 22:36:35 +0800
-Subject: [PATCH 11/34] bcache: remove unncessary code in bch_btree_keys_init()
+Subject: [RFC PATCH 11/35] bcache: remove unncessary code in
+ bch_btree_keys_init()
Function bch_btree_keys_init() initializes b->set[].size and
b->set[].data to zero. As the code comments indicates, these code indeed
diff --git a/for-next/0012-bcache-check-CACHE_SET_IO_DISABLE-in-allocator-code.patch b/for-next/0012-bcache-check-CACHE_SET_IO_DISABLE-in-allocator-code.patch
index a8a818d..68b82fe 100644
--- a/for-next/0012-bcache-check-CACHE_SET_IO_DISABLE-in-allocator-code.patch
+++ b/for-next/0012-bcache-check-CACHE_SET_IO_DISABLE-in-allocator-code.patch
@@ -1,7 +1,8 @@
-From 5beb298bceae1949925a9e04fb06debde0426f9b Mon Sep 17 00:00:00 2001
+From 9f6c0fd9d4594202dd2d60d17934605ace23d98e Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 22 May 2019 21:55:09 +0800
-Subject: [PATCH 12/34] bcache: check CACHE_SET_IO_DISABLE in allocator code
+Subject: [RFC PATCH 12/35] bcache: check CACHE_SET_IO_DISABLE in allocator
+ code
If CACHE_SET_IO_DISABLE of a cache set flag is set by too many I/O
errors, currently allocator routines can still continue allocate
diff --git a/for-next/0013-bcache-check-CACHE_SET_IO_DISABLE-bit-in-bch_journal.patch b/for-next/0013-bcache-check-CACHE_SET_IO_DISABLE-bit-in-bch_journal.patch
index 9b50bd1..84cf911 100644
--- a/for-next/0013-bcache-check-CACHE_SET_IO_DISABLE-bit-in-bch_journal.patch
+++ b/for-next/0013-bcache-check-CACHE_SET_IO_DISABLE-bit-in-bch_journal.patch
@@ -1,7 +1,8 @@
-From 32f189df4439dad0e75feb0b487645fc0e99854d Mon Sep 17 00:00:00 2001
+From 07e6857988c97f1235e92ad7a7a0a4144a1cf8bc Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 22 May 2019 22:06:21 +0800
-Subject: [PATCH 13/34] bcache: check CACHE_SET_IO_DISABLE bit in bch_journal()
+Subject: [RFC PATCH 13/35] bcache: check CACHE_SET_IO_DISABLE bit in
+ bch_journal()
When too many I/O errors happen on cache set and CACHE_SET_IO_DISABLE
bit is set, bch_journal() may continue to work because the journaling
diff --git a/for-next/0014-bcache-more-detailed-error-message-to-bcache_device_.patch b/for-next/0014-bcache-more-detailed-error-message-to-bcache_device_.patch
index c219402..ed76f08 100644
--- a/for-next/0014-bcache-more-detailed-error-message-to-bcache_device_.patch
+++ b/for-next/0014-bcache-more-detailed-error-message-to-bcache_device_.patch
@@ -1,7 +1,7 @@
-From 4b1579bf6b9be657c243bbb8cb15cc0e9c944215 Mon Sep 17 00:00:00 2001
+From 8be7adffee7dccd530ef8fb08dd5f5d01bf1960a Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Sat, 1 Jun 2019 00:57:38 +0800
-Subject: [PATCH 14/34] bcache: more detailed error message to
+Subject: [RFC PATCH 14/35] bcache: more detailed error message to
bcache_device_link()
This patch adds more accurate error message for specific
@@ -14,7 +14,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 74eb18b1af40..1fa3f4e26d02 100644
+index 0abee44092bf..d4d8d1300faf 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -693,6 +693,7 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
diff --git a/for-next/0015-bcache-add-more-error-message-in-bch_cached_dev_atta.patch b/for-next/0015-bcache-add-more-error-message-in-bch_cached_dev_atta.patch
index b55bd18..97f4215 100644
--- a/for-next/0015-bcache-add-more-error-message-in-bch_cached_dev_atta.patch
+++ b/for-next/0015-bcache-add-more-error-message-in-bch_cached_dev_atta.patch
@@ -1,7 +1,7 @@
-From 08b0872136342f1e6665b834745f354382f112ba Mon Sep 17 00:00:00 2001
+From 7dfb8dfabeb9c8aeacd60afcb5d896ab2632966a Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Sat, 1 Jun 2019 01:03:00 +0800
-Subject: [PATCH 15/34] bcache: add more error message in
+Subject: [RFC PATCH 15/35] bcache: add more error message in
bch_cached_dev_attach()
This patch adds more error message for attaching cached device, this is
@@ -13,7 +13,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
1 file changed, 4 insertions(+)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 1fa3f4e26d02..cf5673af3143 100644
+index d4d8d1300faf..a836910ef368 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1169,6 +1169,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
diff --git a/for-next/0016-bcache-improve-error-message-in-bch_cached_dev_run.patch b/for-next/0016-bcache-improve-error-message-in-bch_cached_dev_run.patch
index 0c48eb9..affdc51 100644
--- a/for-next/0016-bcache-improve-error-message-in-bch_cached_dev_run.patch
+++ b/for-next/0016-bcache-improve-error-message-in-bch_cached_dev_run.patch
@@ -1,7 +1,8 @@
-From e13acbcf113a47c8a6a126ceb58b4ebfb30cd6e9 Mon Sep 17 00:00:00 2001
+From bcdf41243bbff8940f6c131591b67c1f60a9f2f2 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 4 Jun 2019 23:12:10 +0800
-Subject: [PATCH 16/34] bcache: improve error message in bch_cached_dev_run()
+Subject: [RFC PATCH 16/35] bcache: improve error message in
+ bch_cached_dev_run()
This patch adds more error message in bch_cached_dev_run() to indicate
the exact reason why an error value is returned. Please notice when
@@ -16,7 +17,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index cf5673af3143..0f67f90d831d 100644
+index a836910ef368..e9e6d653bf70 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -926,13 +926,18 @@ int bch_cached_dev_run(struct cached_dev *dc)
diff --git a/for-next/0007-bcache-remove-XXX-comment-line-from-run_cache_set.patch b/for-next/0017-bcache-remove-XXX-comment-line-from-run_cache_set.patch
index a17f189..2956a1d 100644
--- a/for-next/0007-bcache-remove-XXX-comment-line-from-run_cache_set.patch
+++ b/for-next/0017-bcache-remove-XXX-comment-line-from-run_cache_set.patch
@@ -1,7 +1,8 @@
-From dd3a95d8eac1983ce17fb522671a9f598b8326e3 Mon Sep 17 00:00:00 2001
+From 332809c8e5b6f8dfd98c99c2cbd9ada4730abea1 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 13 May 2019 23:47:38 +0800
-Subject: [PATCH 07/34] bcache: remove "XXX:" comment line from run_cache_set()
+Subject: [RFC PATCH 17/35] bcache: remove "XXX:" comment line from
+ run_cache_set()
In previous bcache patches for Linux v5.2, the failure code path of
run_cache_set() is tested and fixed. So now the following comment
@@ -14,10 +15,10 @@ Signed-off-by: Coly Li <colyli@suse.de>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 877113b62b0f..3364b20567eb 100644
+index e9e6d653bf70..c53fe0f1629f 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
-@@ -1946,7 +1946,7 @@ static int run_cache_set(struct cache_set *c)
+@@ -1979,7 +1979,7 @@ static int run_cache_set(struct cache_set *c)
}
closure_sync(&cl);
diff --git a/for-next/0017-bcache-make-bset_search_tree-be-more-understandable.patch b/for-next/0018-bcache-make-bset_search_tree-be-more-understandable.patch
index 15c740a..708168c 100644
--- a/for-next/0017-bcache-make-bset_search_tree-be-more-understandable.patch
+++ b/for-next/0018-bcache-make-bset_search_tree-be-more-understandable.patch
@@ -1,7 +1,8 @@
-From 7a1691bafbc17f04beb6a95c515aad7be5bb3951 Mon Sep 17 00:00:00 2001
+From ef9bfabb6851fa777cff059bf8a984ce9660bea5 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 14 May 2019 22:51:40 +0800
-Subject: [PATCH 17/34] bcache: make bset_search_tree() be more understandable
+Subject: [RFC PATCH 18/35] bcache: make bset_search_tree() be more
+ understandable
The purpose of following code in bset_search_tree() is to avoid a branch
instruction,
diff --git a/for-next/0018-bcache-add-pendings_cleanup-to-stop-pending-bcache-d.patch b/for-next/0019-bcache-add-pendings_cleanup-to-stop-pending-bcache-d.patch
index f102dfd..41110cb 100644
--- a/for-next/0018-bcache-add-pendings_cleanup-to-stop-pending-bcache-d.patch
+++ b/for-next/0019-bcache-add-pendings_cleanup-to-stop-pending-bcache-d.patch
@@ -1,7 +1,7 @@
-From 85f674e0fe06f063877eece64652fcbd6184b03e Mon Sep 17 00:00:00 2001
+From 30bc020909e5103de2ac639cf455c26734b923fa Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 20 Mar 2019 23:11:59 +0800
-Subject: [PATCH 18/34] bcache: add pendings_cleanup to stop pending bcache
+Subject: [RFC PATCH 19/35] bcache: add pendings_cleanup to stop pending bcache
device
If a bcache device is in dirty state and its cache set is not
@@ -20,7 +20,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
1 file changed, 55 insertions(+)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 0f67f90d831d..2647f089cfcf 100644
+index c53fe0f1629f..c4c4b2d99dc2 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -2273,9 +2273,13 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
diff --git a/for-next/0019-bcache-fix-mistaken-sysfs-entry-for-io_error-counter.patch b/for-next/0020-bcache-fix-mistaken-sysfs-entry-for-io_error-counter.patch
index 38d2553..f60e75b 100644
--- a/for-next/0019-bcache-fix-mistaken-sysfs-entry-for-io_error-counter.patch
+++ b/for-next/0020-bcache-fix-mistaken-sysfs-entry-for-io_error-counter.patch
@@ -1,7 +1,8 @@
-From 02e09b479eb42831b37f000fdfffa69396968261 Mon Sep 17 00:00:00 2001
+From fc693c9beb9e119e8528344c205d5561f663d7be Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Sun, 16 Jun 2019 23:59:12 +0800
-Subject: [PATCH 19/34] bcache: fix mistaken sysfs entry for io_error counter
+Subject: [RFC PATCH 20/35] bcache: fix mistaken sysfs entry for io_error
+ counter
In bch_cached_dev_files[] from driver/md/bcache/sysfs.c, sysfs_errors is
incorrectly inserted in. The correct entry should be sysfs_io_errors.
diff --git a/for-next/0020-bcache-destroy-dc-writeback_write_wq-if-failed-to-cr.patch b/for-next/0021-bcache-destroy-dc-writeback_write_wq-if-failed-to-cr.patch
index 98679bf..a9d3605 100644
--- a/for-next/0020-bcache-destroy-dc-writeback_write_wq-if-failed-to-cr.patch
+++ b/for-next/0021-bcache-destroy-dc-writeback_write_wq-if-failed-to-cr.patch
@@ -1,7 +1,7 @@
-From 8d46e750cf4906c595e9ee2fb23759acc2b85eae Mon Sep 17 00:00:00 2001
+From 56f3ac45869b22a2e81f7e46a2934068eebc86c3 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 17 Jun 2019 00:06:58 +0800
-Subject: [PATCH 20/34] bcache: destroy dc->writeback_write_wq if failed to
+Subject: [RFC PATCH 21/35] bcache: destroy dc->writeback_write_wq if failed to
create dc->writeback_thread
Commit 9baf30972b55 ("bcache: fix for gc and write-back race") added a
diff --git a/for-next/0021-bcache-stop-writeback-kthread-and-kworker-when-bch_c.patch b/for-next/0022-bcache-stop-writeback-kthread-and-kworker-when-bch_c.patch
index ed165ac..0fca7b4 100644
--- a/for-next/0021-bcache-stop-writeback-kthread-and-kworker-when-bch_c.patch
+++ b/for-next/0022-bcache-stop-writeback-kthread-and-kworker-when-bch_c.patch
@@ -1,7 +1,7 @@
-From b328d7e8ba33af6db7fddd9731f2a618e9d0c570 Mon Sep 17 00:00:00 2001
+From 821ac293c4219df6974b6c69f5f35e734bb90ba6 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 17 Jun 2019 23:03:02 +0800
-Subject: [PATCH 21/34] bcache: stop writeback kthread and kworker when
+Subject: [RFC PATCH 22/35] bcache: stop writeback kthread and kworker when
bch_cached_dev_run() failed
In bch_cached_dev_attach() after bch_cached_dev_writeback_start()
@@ -19,7 +19,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
1 file changed, 8 insertions(+)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 2647f089cfcf..b8122757f9f0 100644
+index c4c4b2d99dc2..791cb930b353 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1189,6 +1189,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
diff --git a/for-next/0023-bcache-acquire-bch_register_lock-later-in-cached_dev.patch b/for-next/0023-bcache-acquire-bch_register_lock-later-in-cached_dev.patch
deleted file mode 100644
index fa3d166..0000000
--- a/for-next/0023-bcache-acquire-bch_register_lock-later-in-cached_dev.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From c5e7817305b4a9684c8362be63fcf25516404dfa Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 2 Jun 2019 01:06:12 +0800
-Subject: [PATCH 23/34] bcache: acquire bch_register_lock later in
- cached_dev_detach_finish()
-
-Now there is variable bcache_is_reboot to prevent device register or
-unregister during reboot, it is unncessary to still hold mutex lock
-bch_regsiter_lock before stopping writeback_rate_update kworker and
-writeback kthread. And if the stopping kworker or kthread holding
-bch_register_lock inside their routine (we used to have such problem
-in writeback thread, thanks to Junhui Wang fixed it), it is very easy
-to introduce deadlock during reboot/shutdown procedure.
-
-Therefore in this patch, the location to acquire bch_register_lock is
-moved to the location before calling calc_cached_dev_sectors(). Which
-is later then original location in cached_dev_detach_finish().
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 95e975e00905..7d1301d5e851 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1018,7 +1018,6 @@ static void cached_dev_detach_finish(struct work_struct *w)
- BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
- BUG_ON(refcount_read(&dc->count));
-
-- mutex_lock(&bch_register_lock);
-
- if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
- cancel_writeback_rate_update_dwork(dc);
-@@ -1034,6 +1033,8 @@ static void cached_dev_detach_finish(struct work_struct *w)
- bch_write_bdev_super(dc, &cl);
- closure_sync(&cl);
-
-+ mutex_lock(&bch_register_lock);
-+
- calc_cached_dev_sectors(dc->disk.c);
- bcache_device_detach(&dc->disk);
- list_move(&dc->list, &uncached_devices);
---
-2.16.4
-
diff --git a/for-next/0022-bcache-avoid-a-deadlock-in-bcache_reboot.patch b/for-next/0023-bcache-avoid-a-deadlock-in-bcache_reboot.patch
index d8a9fc9..98b1d21 100644
--- a/for-next/0022-bcache-avoid-a-deadlock-in-bcache_reboot.patch
+++ b/for-next/0023-bcache-avoid-a-deadlock-in-bcache_reboot.patch
@@ -1,7 +1,7 @@
-From 3e9365598332f3e65f5aff2e19df8f808ed746d0 Mon Sep 17 00:00:00 2001
+From 550875e097fa17f7d30eef71b98c914f39bdd601 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 21 May 2019 23:19:55 +0800
-Subject: [PATCH 22/34] bcache: avoid a deadlock in bcache_reboot()
+Subject: [RFC PATCH 23/35] bcache: avoid a deadlock in bcache_reboot()
It is quite frequently to observe deadlock in bcache_reboot() happens
and hang the system reboot process. The reason is, in bcache_reboot()
@@ -30,7 +30,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
2 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index b8122757f9f0..95e975e00905 100644
+index 791cb930b353..a88238ad5da1 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -40,6 +40,7 @@ static const char invalid_uuid[] = {
diff --git a/for-next/0024-bcache-acquire-bch_register_lock-later-in-cached_dev.patch b/for-next/0024-bcache-acquire-bch_register_lock-later-in-cached_dev.patch
index 1c12642..54abc87 100644
--- a/for-next/0024-bcache-acquire-bch_register_lock-later-in-cached_dev.patch
+++ b/for-next/0024-bcache-acquire-bch_register_lock-later-in-cached_dev.patch
@@ -1,160 +1,47 @@
-From 72f2d26b5b4f0acfbb0121d6f81cc773a21c7da2 Mon Sep 17 00:00:00 2001
+From 653c136c91042f61703fd82db364110e28bfd471 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
-Date: Wed, 12 Jun 2019 21:10:38 +0800
-Subject: [PATCH 24/34] bcache: acquire bch_register_lock later in
- cached_dev_free()
-
-When enable lockdep engine, a lockdep warning can be observed when
-reboot or shutdown system,
-
-[ 3142.764557][ T1] bcache: bcache_reboot() Stopping all devices:
-[ 3142.776265][ T2649]
-[ 3142.777159][ T2649] ======================================================
-[ 3142.780039][ T2649] WARNING: possible circular locking dependency detected
-[ 3142.782869][ T2649] 5.2.0-rc4-lp151.20-default+ #1 Tainted: G W
-[ 3142.785684][ T2649] ------------------------------------------------------
-[ 3142.788479][ T2649] kworker/3:67/2649 is trying to acquire lock:
-[ 3142.790738][ T2649] 00000000aaf02291 ((wq_completion)bcache_writeback_wq){+.+.}, at: flush_workqueue+0x87/0x4c0
-[ 3142.794678][ T2649]
-[ 3142.794678][ T2649] but task is already holding lock:
-[ 3142.797402][ T2649] 000000004fcf89c5 (&bch_register_lock){+.+.}, at: cached_dev_free+0x17/0x120 [bcache]
-[ 3142.801462][ T2649]
-[ 3142.801462][ T2649] which lock already depends on the new lock.
-[ 3142.801462][ T2649]
-[ 3142.805277][ T2649]
-[ 3142.805277][ T2649] the existing dependency chain (in reverse order) is:
-[ 3142.808902][ T2649]
-[ 3142.808902][ T2649] -> #2 (&bch_register_lock){+.+.}:
-[ 3142.812396][ T2649] __mutex_lock+0x7a/0x9d0
-[ 3142.814184][ T2649] cached_dev_free+0x17/0x120 [bcache]
-[ 3142.816415][ T2649] process_one_work+0x2a4/0x640
-[ 3142.818413][ T2649] worker_thread+0x39/0x3f0
-[ 3142.820276][ T2649] kthread+0x125/0x140
-[ 3142.822061][ T2649] ret_from_fork+0x3a/0x50
-[ 3142.823965][ T2649]
-[ 3142.823965][ T2649] -> #1 ((work_completion)(&cl->work)#2){+.+.}:
-[ 3142.827244][ T2649] process_one_work+0x277/0x640
-[ 3142.829160][ T2649] worker_thread+0x39/0x3f0
-[ 3142.830958][ T2649] kthread+0x125/0x140
-[ 3142.832674][ T2649] ret_from_fork+0x3a/0x50
-[ 3142.834915][ T2649]
-[ 3142.834915][ T2649] -> #0 ((wq_completion)bcache_writeback_wq){+.+.}:
-[ 3142.838121][ T2649] lock_acquire+0xb4/0x1c0
-[ 3142.840025][ T2649] flush_workqueue+0xae/0x4c0
-[ 3142.842035][ T2649] drain_workqueue+0xa9/0x180
-[ 3142.844042][ T2649] destroy_workqueue+0x17/0x250
-[ 3142.846142][ T2649] cached_dev_free+0x52/0x120 [bcache]
-[ 3142.848530][ T2649] process_one_work+0x2a4/0x640
-[ 3142.850663][ T2649] worker_thread+0x39/0x3f0
-[ 3142.852464][ T2649] kthread+0x125/0x140
-[ 3142.854106][ T2649] ret_from_fork+0x3a/0x50
-[ 3142.855880][ T2649]
-[ 3142.855880][ T2649] other info that might help us debug this:
-[ 3142.855880][ T2649]
-[ 3142.859663][ T2649] Chain exists of:
-[ 3142.859663][ T2649] (wq_completion)bcache_writeback_wq --> (work_completion)(&cl->work)#2 --> &bch_register_lock
-[ 3142.859663][ T2649]
-[ 3142.865424][ T2649] Possible unsafe locking scenario:
-[ 3142.865424][ T2649]
-[ 3142.868022][ T2649] CPU0 CPU1
-[ 3142.869885][ T2649] ---- ----
-[ 3142.871751][ T2649] lock(&bch_register_lock);
-[ 3142.873379][ T2649] lock((work_completion)(&cl->work)#2);
-[ 3142.876399][ T2649] lock(&bch_register_lock);
-[ 3142.879727][ T2649] lock((wq_completion)bcache_writeback_wq);
-[ 3142.882064][ T2649]
-[ 3142.882064][ T2649] *** DEADLOCK ***
-[ 3142.882064][ T2649]
-[ 3142.885060][ T2649] 3 locks held by kworker/3:67/2649:
-[ 3142.887245][ T2649] #0: 00000000e774cdd0 ((wq_completion)events){+.+.}, at: process_one_work+0x21e/0x640
-[ 3142.890815][ T2649] #1: 00000000f7df89da ((work_completion)(&cl->work)#2){+.+.}, at: process_one_work+0x21e/0x640
-[ 3142.894884][ T2649] #2: 000000004fcf89c5 (&bch_register_lock){+.+.}, at: cached_dev_free+0x17/0x120 [bcache]
-[ 3142.898797][ T2649]
-[ 3142.898797][ T2649] stack backtrace:
-[ 3142.900961][ T2649] CPU: 3 PID: 2649 Comm: kworker/3:67 Tainted: G W 5.2.0-rc4-lp151.20-default+ #1
-[ 3142.904789][ T2649] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/13/2018
-[ 3142.909168][ T2649] Workqueue: events cached_dev_free [bcache]
-[ 3142.911422][ T2649] Call Trace:
-[ 3142.912656][ T2649] dump_stack+0x85/0xcb
-[ 3142.914181][ T2649] print_circular_bug+0x19a/0x1f0
-[ 3142.916193][ T2649] __lock_acquire+0x16cd/0x1850
-[ 3142.917936][ T2649] ? __lock_acquire+0x6a8/0x1850
-[ 3142.919704][ T2649] ? lock_acquire+0xb4/0x1c0
-[ 3142.921335][ T2649] ? find_held_lock+0x34/0xa0
-[ 3142.923052][ T2649] lock_acquire+0xb4/0x1c0
-[ 3142.924635][ T2649] ? flush_workqueue+0x87/0x4c0
-[ 3142.926375][ T2649] flush_workqueue+0xae/0x4c0
-[ 3142.928047][ T2649] ? flush_workqueue+0x87/0x4c0
-[ 3142.929824][ T2649] ? drain_workqueue+0xa9/0x180
-[ 3142.931686][ T2649] drain_workqueue+0xa9/0x180
-[ 3142.933534][ T2649] destroy_workqueue+0x17/0x250
-[ 3142.935787][ T2649] cached_dev_free+0x52/0x120 [bcache]
-[ 3142.937795][ T2649] process_one_work+0x2a4/0x640
-[ 3142.939803][ T2649] worker_thread+0x39/0x3f0
-[ 3142.941487][ T2649] ? process_one_work+0x640/0x640
-[ 3142.943389][ T2649] kthread+0x125/0x140
-[ 3142.944894][ T2649] ? kthread_create_worker_on_cpu+0x70/0x70
-[ 3142.947744][ T2649] ret_from_fork+0x3a/0x50
-[ 3142.970358][ T2649] bcache: bcache_device_free() bcache0 stopped
-
-Here is how the deadlock happens.
-1) bcache_reboot() calls bcache_device_stop(), then inside
- bcache_device_stop() BCACHE_DEV_CLOSING bit is set on d->flags.
- Then closure_queue(&d->cl) is called to invoke cached_dev_flush().
-2) In cached_dev_flush(), cached_dev_free() is called by continu_at().
-3) In cached_dev_free(), when stopping the writeback kthread of the
- cached device by kthread_stop(), dc->writeback_thread will be waken
- up to quite the kthread while-loop, then cached_dev_put() is called
- in bch_writeback_thread().
-4) Calling cached_dev_put() in writeback kthread may drop dc->count to
- 0, then dc->detach kworker is scheduled, which is initialized as
- cached_dev_detach_finish().
-5) Inside cached_dev_detach_finish(), the last line of code is to call
- closure_put(&dc->disk.cl), which drops the last reference counter of
- closrure dc->disk.cl, then the callback cached_dev_flush() gets
- called.
-Now cached_dev_flush() is called for second time in the code path, the
-first time is in step 2). And again bch_register_lock will be acquired
-again, and a A-A lock (lockdep terminology) is happening.
-
-The root cause of the above A-A lock is in cached_dev_free(), mutex
-bch_register_lock is held before stopping writeback kthread and other
-kworkers. Fortunately now we have variable 'bcache_is_reboot', which may
-prevent device registration or unregistration during reboot/shutdown
-time, so it is unncessary to hold bch_register_lock such early now.
-
-This is how this patch fixes the reboot/shutdown time A-A lock issue:
-After moving mutex_lock(&bch_register_lock) to a later location where
-before atomic_read(&dc->running) in cached_dev_free(), such A-A lock
-problem can be solved without any reboot time registration race.
+Date: Sun, 2 Jun 2019 01:06:12 +0800
+Subject: [RFC PATCH 24/35] bcache: acquire bch_register_lock later in
+ cached_dev_detach_finish()
+
+Now there is variable bcache_is_reboot to prevent device register or
+unregister during reboot, it is unncessary to still hold mutex lock
+bch_regsiter_lock before stopping writeback_rate_update kworker and
+writeback kthread. And if the stopping kworker or kthread holding
+bch_register_lock inside their routine (we used to have such problem
+in writeback thread, thanks to Junhui Wang fixed it), it is very easy
+to introduce deadlock during reboot/shutdown procedure.
+
+Therefore in this patch, the location to acquire bch_register_lock is
+moved to the location before calling calc_cached_dev_sectors(). Which
+is later then original location in cached_dev_detach_finish().
Signed-off-by: Coly Li <colyli@suse.de>
---
- drivers/md/bcache/super.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
+ drivers/md/bcache/super.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 7d1301d5e851..bd67837a7e8a 100644
+index a88238ad5da1..40d857e690f9 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
-@@ -1231,8 +1231,6 @@ static void cached_dev_free(struct closure *cl)
- {
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
+@@ -1018,7 +1018,6 @@ static void cached_dev_detach_finish(struct work_struct *w)
+ BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
+ BUG_ON(refcount_read(&dc->count));
- mutex_lock(&bch_register_lock);
--
+
if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
cancel_writeback_rate_update_dwork(dc);
-
-@@ -1243,6 +1241,8 @@ static void cached_dev_free(struct closure *cl)
- if (!IS_ERR_OR_NULL(dc->status_update_thread))
- kthread_stop(dc->status_update_thread);
+@@ -1034,6 +1033,8 @@ static void cached_dev_detach_finish(struct work_struct *w)
+ bch_write_bdev_super(dc, &cl);
+ closure_sync(&cl);
+ mutex_lock(&bch_register_lock);
+
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
- bcache_device_free(&dc->disk);
+ calc_cached_dev_sectors(dc->disk.c);
+ bcache_device_detach(&dc->disk);
+ list_move(&dc->list, &uncached_devices);
--
2.16.4
diff --git a/for-next/0025-bcache-acquire-bch_register_lock-later-in-cached_dev.patch b/for-next/0025-bcache-acquire-bch_register_lock-later-in-cached_dev.patch
new file mode 100644
index 0000000..e8aee63
--- /dev/null
+++ b/for-next/0025-bcache-acquire-bch_register_lock-later-in-cached_dev.patch
@@ -0,0 +1,160 @@
+From 18905d6a08c04433b01f6075b0d1ba65f63475c9 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Wed, 12 Jun 2019 21:10:38 +0800
+Subject: [RFC PATCH 25/35] bcache: acquire bch_register_lock later in
+ cached_dev_free()
+
+When enable lockdep engine, a lockdep warning can be observed when
+reboot or shutdown system,
+
+[ 3142.764557][ T1] bcache: bcache_reboot() Stopping all devices:
+[ 3142.776265][ T2649]
+[ 3142.777159][ T2649] ======================================================
+[ 3142.780039][ T2649] WARNING: possible circular locking dependency detected
+[ 3142.782869][ T2649] 5.2.0-rc4-lp151.20-default+ #1 Tainted: G W
+[ 3142.785684][ T2649] ------------------------------------------------------
+[ 3142.788479][ T2649] kworker/3:67/2649 is trying to acquire lock:
+[ 3142.790738][ T2649] 00000000aaf02291 ((wq_completion)bcache_writeback_wq){+.+.}, at: flush_workqueue+0x87/0x4c0
+[ 3142.794678][ T2649]
+[ 3142.794678][ T2649] but task is already holding lock:
+[ 3142.797402][ T2649] 000000004fcf89c5 (&bch_register_lock){+.+.}, at: cached_dev_free+0x17/0x120 [bcache]
+[ 3142.801462][ T2649]
+[ 3142.801462][ T2649] which lock already depends on the new lock.
+[ 3142.801462][ T2649]
+[ 3142.805277][ T2649]
+[ 3142.805277][ T2649] the existing dependency chain (in reverse order) is:
+[ 3142.808902][ T2649]
+[ 3142.808902][ T2649] -> #2 (&bch_register_lock){+.+.}:
+[ 3142.812396][ T2649] __mutex_lock+0x7a/0x9d0
+[ 3142.814184][ T2649] cached_dev_free+0x17/0x120 [bcache]
+[ 3142.816415][ T2649] process_one_work+0x2a4/0x640
+[ 3142.818413][ T2649] worker_thread+0x39/0x3f0
+[ 3142.820276][ T2649] kthread+0x125/0x140
+[ 3142.822061][ T2649] ret_from_fork+0x3a/0x50
+[ 3142.823965][ T2649]
+[ 3142.823965][ T2649] -> #1 ((work_completion)(&cl->work)#2){+.+.}:
+[ 3142.827244][ T2649] process_one_work+0x277/0x640
+[ 3142.829160][ T2649] worker_thread+0x39/0x3f0
+[ 3142.830958][ T2649] kthread+0x125/0x140
+[ 3142.832674][ T2649] ret_from_fork+0x3a/0x50
+[ 3142.834915][ T2649]
+[ 3142.834915][ T2649] -> #0 ((wq_completion)bcache_writeback_wq){+.+.}:
+[ 3142.838121][ T2649] lock_acquire+0xb4/0x1c0
+[ 3142.840025][ T2649] flush_workqueue+0xae/0x4c0
+[ 3142.842035][ T2649] drain_workqueue+0xa9/0x180
+[ 3142.844042][ T2649] destroy_workqueue+0x17/0x250
+[ 3142.846142][ T2649] cached_dev_free+0x52/0x120 [bcache]
+[ 3142.848530][ T2649] process_one_work+0x2a4/0x640
+[ 3142.850663][ T2649] worker_thread+0x39/0x3f0
+[ 3142.852464][ T2649] kthread+0x125/0x140
+[ 3142.854106][ T2649] ret_from_fork+0x3a/0x50
+[ 3142.855880][ T2649]
+[ 3142.855880][ T2649] other info that might help us debug this:
+[ 3142.855880][ T2649]
+[ 3142.859663][ T2649] Chain exists of:
+[ 3142.859663][ T2649] (wq_completion)bcache_writeback_wq --> (work_completion)(&cl->work)#2 --> &bch_register_lock
+[ 3142.859663][ T2649]
+[ 3142.865424][ T2649] Possible unsafe locking scenario:
+[ 3142.865424][ T2649]
+[ 3142.868022][ T2649] CPU0 CPU1
+[ 3142.869885][ T2649] ---- ----
+[ 3142.871751][ T2649] lock(&bch_register_lock);
+[ 3142.873379][ T2649] lock((work_completion)(&cl->work)#2);
+[ 3142.876399][ T2649] lock(&bch_register_lock);
+[ 3142.879727][ T2649] lock((wq_completion)bcache_writeback_wq);
+[ 3142.882064][ T2649]
+[ 3142.882064][ T2649] *** DEADLOCK ***
+[ 3142.882064][ T2649]
+[ 3142.885060][ T2649] 3 locks held by kworker/3:67/2649:
+[ 3142.887245][ T2649] #0: 00000000e774cdd0 ((wq_completion)events){+.+.}, at: process_one_work+0x21e/0x640
+[ 3142.890815][ T2649] #1: 00000000f7df89da ((work_completion)(&cl->work)#2){+.+.}, at: process_one_work+0x21e/0x640
+[ 3142.894884][ T2649] #2: 000000004fcf89c5 (&bch_register_lock){+.+.}, at: cached_dev_free+0x17/0x120 [bcache]
+[ 3142.898797][ T2649]
+[ 3142.898797][ T2649] stack backtrace:
+[ 3142.900961][ T2649] CPU: 3 PID: 2649 Comm: kworker/3:67 Tainted: G W 5.2.0-rc4-lp151.20-default+ #1
+[ 3142.904789][ T2649] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/13/2018
+[ 3142.909168][ T2649] Workqueue: events cached_dev_free [bcache]
+[ 3142.911422][ T2649] Call Trace:
+[ 3142.912656][ T2649] dump_stack+0x85/0xcb
+[ 3142.914181][ T2649] print_circular_bug+0x19a/0x1f0
+[ 3142.916193][ T2649] __lock_acquire+0x16cd/0x1850
+[ 3142.917936][ T2649] ? __lock_acquire+0x6a8/0x1850
+[ 3142.919704][ T2649] ? lock_acquire+0xb4/0x1c0
+[ 3142.921335][ T2649] ? find_held_lock+0x34/0xa0
+[ 3142.923052][ T2649] lock_acquire+0xb4/0x1c0
+[ 3142.924635][ T2649] ? flush_workqueue+0x87/0x4c0
+[ 3142.926375][ T2649] flush_workqueue+0xae/0x4c0
+[ 3142.928047][ T2649] ? flush_workqueue+0x87/0x4c0
+[ 3142.929824][ T2649] ? drain_workqueue+0xa9/0x180
+[ 3142.931686][ T2649] drain_workqueue+0xa9/0x180
+[ 3142.933534][ T2649] destroy_workqueue+0x17/0x250
+[ 3142.935787][ T2649] cached_dev_free+0x52/0x120 [bcache]
+[ 3142.937795][ T2649] process_one_work+0x2a4/0x640
+[ 3142.939803][ T2649] worker_thread+0x39/0x3f0
+[ 3142.941487][ T2649] ? process_one_work+0x640/0x640
+[ 3142.943389][ T2649] kthread+0x125/0x140
+[ 3142.944894][ T2649] ? kthread_create_worker_on_cpu+0x70/0x70
+[ 3142.947744][ T2649] ret_from_fork+0x3a/0x50
+[ 3142.970358][ T2649] bcache: bcache_device_free() bcache0 stopped
+
+Here is how the deadlock happens.
+1) bcache_reboot() calls bcache_device_stop(), then inside
+ bcache_device_stop() BCACHE_DEV_CLOSING bit is set on d->flags.
+ Then closure_queue(&d->cl) is called to invoke cached_dev_flush().
+2) In cached_dev_flush(), cached_dev_free() is called by continu_at().
+3) In cached_dev_free(), when stopping the writeback kthread of the
+ cached device by kthread_stop(), dc->writeback_thread will be waken
+ up to quite the kthread while-loop, then cached_dev_put() is called
+ in bch_writeback_thread().
+4) Calling cached_dev_put() in writeback kthread may drop dc->count to
+ 0, then dc->detach kworker is scheduled, which is initialized as
+ cached_dev_detach_finish().
+5) Inside cached_dev_detach_finish(), the last line of code is to call
+ closure_put(&dc->disk.cl), which drops the last reference counter of
+ closrure dc->disk.cl, then the callback cached_dev_flush() gets
+ called.
+Now cached_dev_flush() is called for second time in the code path, the
+first time is in step 2). And again bch_register_lock will be acquired
+again, and a A-A lock (lockdep terminology) is happening.
+
+The root cause of the above A-A lock is in cached_dev_free(), mutex
+bch_register_lock is held before stopping writeback kthread and other
+kworkers. Fortunately now we have variable 'bcache_is_reboot', which may
+prevent device registration or unregistration during reboot/shutdown
+time, so it is unncessary to hold bch_register_lock such early now.
+
+This is how this patch fixes the reboot/shutdown time A-A lock issue:
+After moving mutex_lock(&bch_register_lock) to a later location where
+before atomic_read(&dc->running) in cached_dev_free(), such A-A lock
+problem can be solved without any reboot time registration race.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/super.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index 40d857e690f9..8a12a8313367 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -1231,8 +1231,6 @@ static void cached_dev_free(struct closure *cl)
+ {
+ struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
+
+- mutex_lock(&bch_register_lock);
+-
+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
+ cancel_writeback_rate_update_dwork(dc);
+
+@@ -1243,6 +1241,8 @@ static void cached_dev_free(struct closure *cl)
+ if (!IS_ERR_OR_NULL(dc->status_update_thread))
+ kthread_stop(dc->status_update_thread);
+
++ mutex_lock(&bch_register_lock);
++
+ if (atomic_read(&dc->running))
+ bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
+ bcache_device_free(&dc->disk);
+--
+2.16.4
+
diff --git a/for-next/0025-bcache-fix-potential-deadlock-in-cached_def_free.patch b/for-next/0026-bcache-fix-potential-deadlock-in-cached_def_free.patch
index 4fae6c3..748eec2 100644
--- a/for-next/0025-bcache-fix-potential-deadlock-in-cached_def_free.patch
+++ b/for-next/0026-bcache-fix-potential-deadlock-in-cached_def_free.patch
@@ -1,7 +1,7 @@
-From 8bd74a44e7393d7fedb29b4a4c170c8add2739e8 Mon Sep 17 00:00:00 2001
+From aaab36516e163f88bf628df4c23e9aeb9c563e82 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 4 Jun 2019 14:28:33 +0800
-Subject: [PATCH 25/34] bcache: fix potential deadlock in cached_def_free()
+Subject: [RFC PATCH 26/35] bcache: fix potential deadlock in cached_def_free()
When enable lockdep and reboot system with a writeback mode bcache
device, the following potential deadlock warning is reported by lockdep
@@ -136,7 +136,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index bd67837a7e8a..7b9d8f2fcfc0 100644
+index 8a12a8313367..a8ea4e2086a9 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1236,8 +1236,6 @@ static void cached_dev_free(struct closure *cl)
diff --git a/for-next/0026-bcache-add-code-comments-for-journal_read_bucket.patch b/for-next/0027-bcache-add-code-comments-for-journal_read_bucket.patch
index a862288..e5430c5 100644
--- a/for-next/0026-bcache-add-code-comments-for-journal_read_bucket.patch
+++ b/for-next/0027-bcache-add-code-comments-for-journal_read_bucket.patch
@@ -1,7 +1,7 @@
-From eee65a56aa818a05f15dabe93265b8e7d5e42bcb Mon Sep 17 00:00:00 2001
+From 3b379c28388207864fe8aba6c1ec15bfe3c74143 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Thu, 30 May 2019 18:39:17 +0800
-Subject: [PATCH 26/34] bcache: add code comments for journal_read_bucket()
+Subject: [RFC PATCH 27/35] bcache: add code comments for journal_read_bucket()
This patch adds more code comments in journal_read_bucket(), this is an
effort to make the code to be more understandable.
diff --git a/for-next/0027-bcache-set-largest-seq-to-ja-seq-bucket_index-in-jou.patch b/for-next/0028-bcache-set-largest-seq-to-ja-seq-bucket_index-in-jou.patch
index 6d92b0b..e5c39a0 100644
--- a/for-next/0027-bcache-set-largest-seq-to-ja-seq-bucket_index-in-jou.patch
+++ b/for-next/0028-bcache-set-largest-seq-to-ja-seq-bucket_index-in-jou.patch
@@ -1,7 +1,7 @@
-From c68b3d29020561e846755f2d2deccd5f95928755 Mon Sep 17 00:00:00 2001
+From 0c63a67cda7304df43f0e029e0c7c1e8129c5355 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Thu, 30 May 2019 18:40:37 +0800
-Subject: [PATCH 27/34] bcache: set largest seq to ja->seq[bucket_index] in
+Subject: [RFC PATCH 28/35] bcache: set largest seq to ja->seq[bucket_index] in
journal_read_bucket()
In journal_read_bucket() when setting ja->seq[bucket_index], there might
diff --git a/for-next/0028-bcache-shrink-btree-node-cache-after-bch_btree_check.patch b/for-next/0029-bcache-shrink-btree-node-cache-after-bch_btree_check.patch
index 16dcf32..5289e7e 100644
--- a/for-next/0028-bcache-shrink-btree-node-cache-after-bch_btree_check.patch
+++ b/for-next/0029-bcache-shrink-btree-node-cache-after-bch_btree_check.patch
@@ -1,7 +1,8 @@
-From 0f5b9956e6ed34142303a7fd6e2a4a32e6df5c79 Mon Sep 17 00:00:00 2001
+From d085ab286b31725eedb66103682889a224470f66 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 31 May 2019 17:29:56 +0800
-Subject: [PATCH 28/34] bcache: shrink btree node cache after bch_btree_check()
+Subject: [RFC PATCH 29/35] bcache: shrink btree node cache after
+ bch_btree_check()
When cache set starts, bch_btree_check() will check all bkeys on cache
device by calculating the checksum. This operation will consume a huge
@@ -23,7 +24,7 @@ Signed-off-by: Coly Li <colyli@suse.de>
1 file changed, 17 insertions(+)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 7b9d8f2fcfc0..14007ab6e61b 100644
+index a8ea4e2086a9..26e374fbf57c 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1880,6 +1880,23 @@ static int run_cache_set(struct cache_set *c)
diff --git a/for-next/0029-bcache-Improve-bcache-tollerance-for-out-of-memory-c.patch b/for-next/0030-bcache-Improve-bcache-tollerance-for-out-of-memory-c.patch
index d600c14..bf16312 100644
--- a/for-next/0029-bcache-Improve-bcache-tollerance-for-out-of-memory-c.patch
+++ b/for-next/0030-bcache-Improve-bcache-tollerance-for-out-of-memory-c.patch
@@ -1,9 +1,11 @@
-From 55f89314157a0e7da3772cfbbb9fe96650b270d8 Mon Sep 17 00:00:00 2001
+From 5dfc37594d9522b582dc6b8205e60e9c0d55b241 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Sat, 22 Jun 2019 14:29:17 +0800
-Subject: [PATCH 29/34] bcache: Improve bcache tollerance for out-of-memory
+Subject: [RFC PATCH 30/35] bcache: Improve bcache tollerance for out-of-memory
condition
+
+
Signed-off-by: Coly Li <colyli@suse.de>
Reported-and-tested-by: kbuild test robot <lkp@intel.com>
---
@@ -156,7 +158,7 @@ index 41adcd1546f1..a7a570a881cc 100644
s = search_alloc(bio, d);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 14007ab6e61b..9c48ea5b1e2a 100644
+index 26e374fbf57c..41337a61aceb 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1619,6 +1619,7 @@ static void cache_set_flush(struct closure *cl)
diff --git a/for-next/0030-bcache-Revert-bcache-free-heap-cache_set-flush_btree.patch b/for-next/0031-bcache-Revert-bcache-free-heap-cache_set-flush_btree.patch
index d5185bf..624d058 100644
--- a/for-next/0030-bcache-Revert-bcache-free-heap-cache_set-flush_btree.patch
+++ b/for-next/0031-bcache-Revert-bcache-free-heap-cache_set-flush_btree.patch
@@ -1,7 +1,7 @@
-From 658244d6ba82030c011854fd16e45f9d20e0e218 Mon Sep 17 00:00:00 2001
+From d74ea7111f70b325059ac54bbf0bc00f47c2a8a8 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 28 May 2019 21:36:56 +0800
-Subject: [PATCH 30/34] bcache: Revert "bcache: free heap
+Subject: [RFC PATCH 31/35] bcache: Revert "bcache: free heap
cache_set->flush_btree in bch_journal_free"
This reverts commit 6268dc2c4703aabfb0b35681be709acf4c2826c6.
diff --git a/for-next/0031-bcache-Revert-bcache-fix-high-CPU-occupancy-during-j.patch b/for-next/0032-bcache-Revert-bcache-fix-high-CPU-occupancy-during-j.patch
index c55f74e..94c0d30 100644
--- a/for-next/0031-bcache-Revert-bcache-fix-high-CPU-occupancy-during-j.patch
+++ b/for-next/0032-bcache-Revert-bcache-fix-high-CPU-occupancy-during-j.patch
@@ -1,8 +1,8 @@
-From 926c5dc8279dd7651109adc895681230ae372bb3 Mon Sep 17 00:00:00 2001
+From da0892eebe8ca4ce94a5a9ccb8adae605b212030 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 28 May 2019 21:19:38 +0800
-Subject: [PATCH 31/34] bcache: Revert "bcache: fix high CPU occupancy during
- journal"
+Subject: [RFC PATCH 32/35] bcache: Revert "bcache: fix high CPU occupancy
+ during journal"
This reverts commit c4dc2497d50d9c6fb16aa0d07b6a14f3b2adb1e0.
diff --git a/for-next/0032-bcache-remove-retry_flush_write-from-struct-cache_se.patch b/for-next/0033-bcache-remove-retry_flush_write-from-struct-cache_se.patch
index 8d14ae8..7ceb1b1 100644
--- a/for-next/0032-bcache-remove-retry_flush_write-from-struct-cache_se.patch
+++ b/for-next/0033-bcache-remove-retry_flush_write-from-struct-cache_se.patch
@@ -1,7 +1,8 @@
-From 4c2437cb934732739fbe5c157b13e5a218798808 Mon Sep 17 00:00:00 2001
+From a5c5ed932fcfc7e349d5dbe0ba731ab204053e31 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Sat, 1 Jun 2019 01:58:23 +0800
-Subject: [PATCH 32/34] bcache: remove retry_flush_write from struct cache_set
+Subject: [RFC PATCH 33/35] bcache: remove retry_flush_write from struct
+ cache_set
In struct cache_set, retry_flush_write is added for commit c4dc2497d50d
("bcache: fix high CPU occupancy during journal") which is reverted in
@@ -11,9 +12,10 @@ Now it is useless anymore, and this patch removes it from bcache code.
Signed-off-by: Coly Li <colyli@suse.de>
---
- drivers/md/bcache/bcache.h | 1 -
- drivers/md/bcache/sysfs.c | 5 -----
- 2 files changed, 6 deletions(-)
+ drivers/md/bcache/bcache.h | 1 -
+ drivers/md/bcache/journal.c | 1 -
+ drivers/md/bcache/sysfs.c | 5 -----
+ 3 files changed, 7 deletions(-)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 8938b6b54b52..7c392768a96c 100644
@@ -27,6 +29,18 @@ index 8938b6b54b52..7c392768a96c 100644
enum {
ON_ERROR_UNREGISTER,
+diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
+index 14a4e2c44de9..1218e3cada3c 100644
+--- a/drivers/md/bcache/journal.c
++++ b/drivers/md/bcache/journal.c
+@@ -447,7 +447,6 @@ static void btree_flush_write(struct cache_set *c)
+ if (!btree_current_write(b)->journal) {
+ mutex_unlock(&b->write_lock);
+ /* We raced */
+- atomic_long_inc(&c->retry_flush_write);
+ goto retry;
+ }
+
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index d62e28643109..701a386a954c 100644
--- a/drivers/md/bcache/sysfs.c
diff --git a/for-next/0033-bcache-fix-race-in-btree_flush_write.patch b/for-next/0034-bcache-fix-race-in-btree_flush_write.patch
index f85cc2a..fd189cd 100644
--- a/for-next/0033-bcache-fix-race-in-btree_flush_write.patch
+++ b/for-next/0034-bcache-fix-race-in-btree_flush_write.patch
@@ -1,7 +1,7 @@
-From 61a77454764a05be40685f7a3bcf5b492254a92b Mon Sep 17 00:00:00 2001
+From c3a71234b22644aee9ec0fd978599cfb8c74062b Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
-Date: Sat, 1 Jun 2019 01:55:30 +0800
-Subject: [PATCH 33/34] bcache: fix race in btree_flush_write()
+Date: Sat, 22 Jun 2019 23:38:58 +0800
+Subject: [RFC PATCH 34/35] bcache: fix race in btree_flush_write()
There is a race between mca_reap(), btree_node_free() and journal code
btree_flush_write(), which results very rare and strange deadlock or
@@ -43,14 +43,14 @@ without BTREE_NODE_journal_flush flag, such race is avoided.
Once corner case should be noticed, that is btree_node_free(). It might
be called in some error handling code path. For example the following
code piece from btree_split(),
- 2149 err_free2:
- 2150 bkey_put(b->c, &n2->key);
- 2151 btree_node_free(n2);
- 2152 rw_unlock(true, n2);
- 2153 err_free1:
- 2154 bkey_put(b->c, &n1->key);
- 2155 btree_node_free(n1);
- 2156 rw_unlock(true, n1);
+ 2149 err_free2:
+ 2150 bkey_put(b->c, &n2->key);
+ 2151 btree_node_free(n2);
+ 2152 rw_unlock(true, n2);
+ 2153 err_free1:
+ 2154 bkey_put(b->c, &n1->key);
+ 2155 btree_node_free(n1);
+ 2156 rw_unlock(true, n1);
At line 2151 and 2155, the btree node n2 and n1 are released without
mac_reap(), so BTREE_NODE_journal_flush also needs to be checked here.
If btree_node_free() is called directly in such error handling path,
@@ -101,9 +101,9 @@ Signed-off-by: Coly Li <colyli@suse.de>
---
drivers/md/bcache/btree.c | 17 ++++++++-
drivers/md/bcache/btree.h | 2 +
- drivers/md/bcache/journal.c | 93 ++++++++++++++++++++++++++++++++++-----------
+ drivers/md/bcache/journal.c | 92 ++++++++++++++++++++++++++++++++++-----------
drivers/md/bcache/journal.h | 4 ++
- 4 files changed, 91 insertions(+), 25 deletions(-)
+ 4 files changed, 91 insertions(+), 24 deletions(-)
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 42b6df61a4f7..ee7712c85e04 100644
@@ -167,10 +167,10 @@ index d1c72ef64edf..76cfd121a486 100644
static inline struct btree_write *btree_current_write(struct btree *b)
{
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 14a4e2c44de9..a4ea61a70480 100644
+index 1218e3cada3c..a4ea61a70480 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
-@@ -419,41 +419,87 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
+@@ -419,40 +419,87 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
static void btree_flush_write(struct cache_set *c)
{
@@ -256,7 +256,6 @@ index 14a4e2c44de9..a4ea61a70480 100644
if (!btree_current_write(b)->journal) {
mutex_unlock(&b->write_lock);
- /* We raced */
-- atomic_long_inc(&c->retry_flush_write);
- goto retry;
+ pr_debug("bnode %p: written by others", b);
+ clear_bit(BTREE_NODE_journal_flush, &b->flags);
@@ -281,7 +280,7 @@ index 14a4e2c44de9..a4ea61a70480 100644
}
#define last_seq(j) ((j)->seq - fifo_used(&(j)->pin) + 1)
-@@ -875,6 +921,7 @@ int bch_journal_alloc(struct cache_set *c)
+@@ -874,6 +921,7 @@ int bch_journal_alloc(struct cache_set *c)
struct journal *j = &c->journal;
spin_lock_init(&j->lock);
diff --git a/for-next/0034-bcache-add-reclaimed_journal_buckets-to-struct-cache.patch b/for-next/0035-bcache-add-reclaimed_journal_buckets-to-struct-cache.patch
index 42f7578..d411115 100644
--- a/for-next/0034-bcache-add-reclaimed_journal_buckets-to-struct-cache.patch
+++ b/for-next/0035-bcache-add-reclaimed_journal_buckets-to-struct-cache.patch
@@ -1,7 +1,7 @@
-From 6c8bf37c6d1ec7a58fa1c5b910862dbf6dbf1818 Mon Sep 17 00:00:00 2001
+From 804d2586ee139cea6a16c0175184ed5ff87ec6cf Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Sun, 2 Jun 2019 00:47:23 +0800
-Subject: [PATCH 34/34] bcache: add reclaimed_journal_buckets to struct
+Subject: [RFC PATCH 35/35] bcache: add reclaimed_journal_buckets to struct
cache_set
Now we have counters for how many times jouranl is reclaimed, how many