aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorColy Li <colyli@suse.de>2019-08-15 00:57:56 +0800
committerColy Li <colyli@suse.de>2019-08-15 00:57:56 +0800
commitd38cc59c844c41b3e53b35735298dcca7b82799e (patch)
treed174217b6c1339a7fbe95669ad96fa7df4d240f9
parentf7c4bd7d9a005cbd47c475b09227695ddfe87eb0 (diff)
downloadbcache-patches-d38cc59c844c41b3e53b35735298dcca7b82799e.tar.gz
update for-next and for-test
-rw-r--r--for-current/0000-cover-letter.patch92
-rw-r--r--for-current/0001-bcache-don-t-set-max-writeback-rate-if-gc-is-running.patch41
-rw-r--r--for-current/0002-bcache-check-c-gc_thread-by-IS_ERR_OR_NULL-in-cache_.patch125
-rw-r--r--for-current/0003-bcache-fix-return-value-error-in-bch_journal_read.patch42
-rw-r--r--for-current/0004-Revert-bcache-set-CACHE_SET_IO_DISABLE-in-bch_cached.patch64
-rw-r--r--for-current/0005-bcache-avoid-flushing-btree-node-in-cache_set_flush-.patch53
-rw-r--r--for-current/0006-bcache-ignore-read-ahead-request-failure-on-backing-.patch56
-rw-r--r--for-current/0007-bcache-add-io-error-counting-in-write_bdev_super_end.patch38
-rw-r--r--for-current/0008-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch56
-rw-r--r--for-current/0009-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch97
-rw-r--r--for-current/0010-bcache-add-return-value-check-to-bch_cached_dev_run.patch152
-rw-r--r--for-current/0011-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch72
-rw-r--r--for-current/0012-bcache-check-CACHE_SET_IO_DISABLE-in-allocator-code.patch52
-rw-r--r--for-current/0013-bcache-check-CACHE_SET_IO_DISABLE-bit-in-bch_journal.patch39
-rw-r--r--for-current/0014-bcache-more-detailed-error-message-to-bcache_device_.patch47
-rw-r--r--for-current/0015-bcache-add-more-error-message-in-bch_cached_dev_atta.patch39
-rw-r--r--for-current/0016-bcache-improve-error-message-in-bch_cached_dev_run.patch53
-rw-r--r--for-current/0017-bcache-remove-XXX-comment-line-from-run_cache_set.patch31
-rw-r--r--for-current/0018-bcache-make-bset_search_tree-be-more-understandable.patch70
-rw-r--r--for-current/0019-bcache-add-pendings_cleanup-to-stop-pending-bcache-d.patch107
-rw-r--r--for-current/0020-bcache-fix-mistaken-sysfs-entry-for-io_error-counter.patch43
-rw-r--r--for-current/0021-bcache-destroy-dc-writeback_write_wq-if-failed-to-cr.patch35
-rw-r--r--for-current/0022-bcache-stop-writeback-kthread-and-kworker-when-bch_c.patch42
-rw-r--r--for-current/0023-bcache-avoid-a-deadlock-in-bcache_reboot.patch211
-rw-r--r--for-current/0024-bcache-acquire-bch_register_lock-later-in-cached_dev.patch47
-rw-r--r--for-current/0025-bcache-acquire-bch_register_lock-later-in-cached_dev.patch160
-rw-r--r--for-current/0026-bcache-fix-potential-deadlock-in-cached_def_free.patch168
-rw-r--r--for-current/0027-bcache-add-code-comments-for-journal_read_bucket.patch72
-rw-r--r--for-current/0028-bcache-set-largest-seq-to-ja-seq-bucket_index-in-jou.patch34
-rw-r--r--for-current/0029-bcache-shrink-btree-node-cache-after-bch_btree_check.patch55
-rw-r--r--for-current/0030-bcache-Revert-bcache-free-heap-cache_set-flush_btree.patch35
-rw-r--r--for-current/0031-bcache-Revert-bcache-fix-high-CPU-occupancy-during-j.patch129
-rw-r--r--for-current/0032-bcache-only-clear-BTREE_NODE_dirty-bit-when-it-is-se.patch57
-rw-r--r--for-current/0033-bcache-add-comments-for-mutex_lock-b-write_lock.patch47
-rw-r--r--for-current/0034-bcache-remove-retry_flush_write-from-struct-cache_se.patch75
-rw-r--r--for-current/0035-bcache-fix-race-in-btree_flush_write.patch186
-rw-r--r--for-current/0036-bcache-performance-improvement-for-btree_flush_write.patch187
-rw-r--r--for-current/0037-bcache-add-reclaimed_journal_buckets-to-struct-cache.patch80
-rw-r--r--for-next/0001-bcache-add-cond_resched-in-__bch_cache_cmp.patch29
-rw-r--r--for-next/0001-closures-fix-a-race-on-wakeup-from-closure_sync.patch35
-rw-r--r--for-test/0001-bcache-fix-deadlock-in-bcache_allocator.patch149
-rw-r--r--for-test/0001-bcache-introduce-btree_cache_total_pages-into-struct.patch110
-rw-r--r--for-test/0001-bcache-only-set-b-accessed-1-for-dirty-btree-node-ca.patch28
-rw-r--r--for-test/mca_limit/0001-bcache-introduce-btree_cache_total_pages-into-struct.patch110
-rw-r--r--for-test/mca_limit/0001-bcache-restrict-mca-pages-consumption.patch149
45 files changed, 610 insertions, 2989 deletions
diff --git a/for-current/0000-cover-letter.patch b/for-current/0000-cover-letter.patch
deleted file mode 100644
index 506d7d6..0000000
--- a/for-current/0000-cover-letter.patch
+++ /dev/null
@@ -1,92 +0,0 @@
-From ea4bf18c9eb2ef705dce00b1bc5fde2f49ef2740 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Thu, 27 Jun 2019 23:29:25 +0800
-Subject: [PATCH 00/37] bcache patches for Linux v5.3
-
-Hi Jens,
-
-Here are the bcache patches for Linux v5.3. All these patches are
-tested for a while and survived from my smoking and pressure testings.
-
-This run we have Alexandru Ardelean contributes a clean up patch. The
-rested patches are from me, there is an important race fix has the
-following patches involved in,
-- bcache: Revert "bcache: free heap cache_set->flush_btree in
- bch_journal_free"
-- bcache: Revert "bcache: fix high CPU occupancy during journal"
-- bcache: remove retry_flush_write from struct cache_set
-- bcache: fix race in btree_flush_write()
-- bcache: performance improvement for btree_flush_write()
-- bcache: add reclaimed_journal_buckets to struct cache_set
-On a Lenovo SR650 server (48 cores, 200G dram, 1T NVMe SSD as cache
-device and 12T NVMe SSD as backing device), without this fix, bcache
-can only run 40 around minutes before deadlock or panic happens. Now
-I don't observe any deadlock or panic for 5+ hours smoking test.
-
-Please pick them for Linux v5.3, and thank you in advance.
-
-Coly Li
----
-
-Alexandru Ardelean (1):
- bcache: use sysfs_match_string() instead of __sysfs_match_string()
-
-Coly Li (36):
- bcache: don't set max writeback rate if gc is running
- bcache: check c->gc_thread by IS_ERR_OR_NULL in cache_set_flush()
- bcache: fix return value error in bch_journal_read()
- Revert "bcache: set CACHE_SET_IO_DISABLE in bch_cached_dev_error()"
- bcache: avoid flushing btree node in cache_set_flush() if io disabled
- bcache: ignore read-ahead request failure on backing device
- bcache: add io error counting in write_bdev_super_endio()
- bcache: remove unnecessary prefetch() in bset_search_tree()
- bcache: add return value check to bch_cached_dev_run()
- bcache: remove unncessary code in bch_btree_keys_init()
- bcache: check CACHE_SET_IO_DISABLE in allocator code
- bcache: check CACHE_SET_IO_DISABLE bit in bch_journal()
- bcache: more detailed error message to bcache_device_link()
- bcache: add more error message in bch_cached_dev_attach()
- bcache: improve error message in bch_cached_dev_run()
- bcache: remove "XXX:" comment line from run_cache_set()
- bcache: make bset_search_tree() be more understandable
- bcache: add pendings_cleanup to stop pending bcache device
- bcache: fix mistaken sysfs entry for io_error counter
- bcache: destroy dc->writeback_write_wq if failed to create
- dc->writeback_thread
- bcache: stop writeback kthread and kworker when bch_cached_dev_run()
- failed
- bcache: avoid a deadlock in bcache_reboot()
- bcache: acquire bch_register_lock later in cached_dev_detach_finish()
- bcache: acquire bch_register_lock later in cached_dev_free()
- bcache: fix potential deadlock in cached_def_free()
- bcache: add code comments for journal_read_bucket()
- bcache: set largest seq to ja->seq[bucket_index] in
- journal_read_bucket()
- bcache: shrink btree node cache after bch_btree_check()
- bcache: Revert "bcache: free heap cache_set->flush_btree in
- bch_journal_free"
- bcache: Revert "bcache: fix high CPU occupancy during journal"
- bcache: only clear BTREE_NODE_dirty bit when it is set
- bcache: add comments for mutex_lock(&b->write_lock)
- bcache: remove retry_flush_write from struct cache_set
- bcache: fix race in btree_flush_write()
- bcache: performance improvement for btree_flush_write()
- bcache: add reclaimed_journal_buckets to struct cache_set
-
- drivers/md/bcache/alloc.c | 9 ++
- drivers/md/bcache/bcache.h | 6 +-
- drivers/md/bcache/bset.c | 61 ++++--------
- drivers/md/bcache/btree.c | 53 ++++++++--
- drivers/md/bcache/btree.h | 2 +
- drivers/md/bcache/io.c | 12 +++
- drivers/md/bcache/journal.c | 141 ++++++++++++++++++--------
- drivers/md/bcache/journal.h | 4 +
- drivers/md/bcache/super.c | 227 ++++++++++++++++++++++++++++++++++--------
- drivers/md/bcache/sysfs.c | 67 +++++++++----
- drivers/md/bcache/util.h | 2 -
- drivers/md/bcache/writeback.c | 8 ++
- 12 files changed, 432 insertions(+), 160 deletions(-)
-
---
-2.16.4
-
diff --git a/for-current/0001-bcache-don-t-set-max-writeback-rate-if-gc-is-running.patch b/for-current/0001-bcache-don-t-set-max-writeback-rate-if-gc-is-running.patch
deleted file mode 100644
index 1cdab6a..0000000
--- a/for-current/0001-bcache-don-t-set-max-writeback-rate-if-gc-is-running.patch
+++ /dev/null
@@ -1,41 +0,0 @@
-From e58f5f253e35ac1ccbe0dd4db2b71783a913c79b Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Fri, 21 Jun 2019 01:46:20 +0800
-Subject: [PATCH 01/37] bcache: don't set max writeback rate if gc is running
-
-When gc is running, user space I/O processes may wait inside
-bcache code, so no new I/O coming. Indeed this is not a real idle
-time, maximum writeback rate should not be set in such situation.
-Otherwise a faster writeback thread may compete locks with gc thread
-and makes garbage collection slower, which results a longer I/O
-freeze period.
-
-This patch checks c->gc_mark_valid in set_at_max_writeback_rate(). If
-c->gc_mark_valid is 0 (gc running), set_at_max_writeback_rate() returns
-false, then update_writeback_rate() will not set writeback rate to
-maximum value even c->idle_counter reaches an idle threshold.
-
-Now writeback thread won't interfere gc thread performance.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/writeback.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 73f0efac2b9f..262f7ef20992 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -122,6 +122,9 @@ static void __update_writeback_rate(struct cached_dev *dc)
- static bool set_at_max_writeback_rate(struct cache_set *c,
- struct cached_dev *dc)
- {
-+ /* Don't set max writeback rate if gc is running */
-+ if (!c->gc_mark_valid)
-+ return false;
- /*
- * Idle_counter is increased everytime when update_writeback_rate() is
- * called. If all backing devices attached to the same cache set have
---
-2.16.4
-
diff --git a/for-current/0002-bcache-check-c-gc_thread-by-IS_ERR_OR_NULL-in-cache_.patch b/for-current/0002-bcache-check-c-gc_thread-by-IS_ERR_OR_NULL-in-cache_.patch
deleted file mode 100644
index 1166f2c..0000000
--- a/for-current/0002-bcache-check-c-gc_thread-by-IS_ERR_OR_NULL-in-cache_.patch
+++ /dev/null
@@ -1,125 +0,0 @@
-From 4f14821794c4cacb35e305ea347095e8ae70d871 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 22 Jun 2019 23:04:36 +0800
-Subject: [PATCH 02/37] bcache: check c->gc_thread by IS_ERR_OR_NULL in
- cache_set_flush()
-
-When system memory is in heavy pressure, bch_gc_thread_start() from
-run_cache_set() may fail due to out of memory. In such condition,
-c->gc_thread is assigned to -ENOMEM, not NULL pointer. Then in following
-failure code path bch_cache_set_error(), when cache_set_flush() gets
-called, the code piece to stop c->gc_thread is broken,
- if (!IS_ERR_OR_NULL(c->gc_thread))
- kthread_stop(c->gc_thread);
-
-And KASAN catches such NULL pointer deference problem, with the warning
-information:
-
-[ 561.207881] ==================================================================
-[ 561.207900] BUG: KASAN: null-ptr-deref in kthread_stop+0x3b/0x440
-[ 561.207904] Write of size 4 at addr 000000000000001c by task kworker/15:1/313
-
-[ 561.207913] CPU: 15 PID: 313 Comm: kworker/15:1 Tainted: G W 5.0.0-vanilla+ #3
-[ 561.207916] Hardware name: Lenovo ThinkSystem SR650 -[7X05CTO1WW]-/-[7X05CTO1WW]-, BIOS -[IVE136T-2.10]- 03/22/2019
-[ 561.207935] Workqueue: events cache_set_flush [bcache]
-[ 561.207940] Call Trace:
-[ 561.207948] dump_stack+0x9a/0xeb
-[ 561.207955] ? kthread_stop+0x3b/0x440
-[ 561.207960] ? kthread_stop+0x3b/0x440
-[ 561.207965] kasan_report+0x176/0x192
-[ 561.207973] ? kthread_stop+0x3b/0x440
-[ 561.207981] kthread_stop+0x3b/0x440
-[ 561.207995] cache_set_flush+0xd4/0x6d0 [bcache]
-[ 561.208008] process_one_work+0x856/0x1620
-[ 561.208015] ? find_held_lock+0x39/0x1d0
-[ 561.208028] ? drain_workqueue+0x380/0x380
-[ 561.208048] worker_thread+0x87/0xb80
-[ 561.208058] ? __kthread_parkme+0xb6/0x180
-[ 561.208067] ? process_one_work+0x1620/0x1620
-[ 561.208072] kthread+0x326/0x3e0
-[ 561.208079] ? kthread_create_worker_on_cpu+0xc0/0xc0
-[ 561.208090] ret_from_fork+0x3a/0x50
-[ 561.208110] ==================================================================
-[ 561.208113] Disabling lock debugging due to kernel taint
-[ 561.208115] irq event stamp: 11800231
-[ 561.208126] hardirqs last enabled at (11800231): [<ffffffff83008538>] do_syscall_64+0x18/0x410
-[ 561.208127] BUG: unable to handle kernel NULL pointer dereference at 000000000000001c
-[ 561.208129] #PF error: [WRITE]
-[ 561.312253] hardirqs last disabled at (11800230): [<ffffffff830052ff>] trace_hardirqs_off_thunk+0x1a/0x1c
-[ 561.312259] softirqs last enabled at (11799832): [<ffffffff850005c7>] __do_softirq+0x5c7/0x8c3
-[ 561.405975] PGD 0 P4D 0
-[ 561.442494] softirqs last disabled at (11799821): [<ffffffff831add2c>] irq_exit+0x1ac/0x1e0
-[ 561.791359] Oops: 0002 [#1] SMP KASAN NOPTI
-[ 561.791362] CPU: 15 PID: 313 Comm: kworker/15:1 Tainted: G B W 5.0.0-vanilla+ #3
-[ 561.791363] Hardware name: Lenovo ThinkSystem SR650 -[7X05CTO1WW]-/-[7X05CTO1WW]-, BIOS -[IVE136T-2.10]- 03/22/2019
-[ 561.791371] Workqueue: events cache_set_flush [bcache]
-[ 561.791374] RIP: 0010:kthread_stop+0x3b/0x440
-[ 561.791376] Code: 00 00 65 8b 05 26 d5 e0 7c 89 c0 48 0f a3 05 ec aa df 02 0f 82 dc 02 00 00 4c 8d 63 20 be 04 00 00 00 4c 89 e7 e8 65 c5 53 00 <f0> ff 43 20 48 8d 7b 24 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48
-[ 561.791377] RSP: 0018:ffff88872fc8fd10 EFLAGS: 00010286
-[ 561.838895] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
-[ 561.838916] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
-[ 561.838934] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
-[ 561.838948] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
-[ 561.838966] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
-[ 561.838979] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
-[ 561.838996] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
-[ 563.067028] RAX: 0000000000000000 RBX: fffffffffffffffc RCX: ffffffff832dd314
-[ 563.067030] RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000297
-[ 563.067032] RBP: ffff88872fc8fe88 R08: fffffbfff0b8213d R09: fffffbfff0b8213d
-[ 563.067034] R10: 0000000000000001 R11: fffffbfff0b8213c R12: 000000000000001c
-[ 563.408618] R13: ffff88dc61cc0f68 R14: ffff888102b94900 R15: ffff88dc61cc0f68
-[ 563.408620] FS: 0000000000000000(0000) GS:ffff888f7dc00000(0000) knlGS:0000000000000000
-[ 563.408622] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
-[ 563.408623] CR2: 000000000000001c CR3: 0000000f48a1a004 CR4: 00000000007606e0
-[ 563.408625] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
-[ 563.408627] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
-[ 563.904795] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
-[ 563.915796] PKRU: 55555554
-[ 563.915797] Call Trace:
-[ 563.915807] cache_set_flush+0xd4/0x6d0 [bcache]
-[ 563.915812] process_one_work+0x856/0x1620
-[ 564.001226] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
-[ 564.033563] ? find_held_lock+0x39/0x1d0
-[ 564.033567] ? drain_workqueue+0x380/0x380
-[ 564.033574] worker_thread+0x87/0xb80
-[ 564.062823] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
-[ 564.118042] ? __kthread_parkme+0xb6/0x180
-[ 564.118046] ? process_one_work+0x1620/0x1620
-[ 564.118048] kthread+0x326/0x3e0
-[ 564.118050] ? kthread_create_worker_on_cpu+0xc0/0xc0
-[ 564.167066] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
-[ 564.252441] ret_from_fork+0x3a/0x50
-[ 564.252447] Modules linked in: msr rpcrdma sunrpc rdma_ucm ib_iser ib_umad rdma_cm ib_ipoib i40iw configfs iw_cm ib_cm libiscsi scsi_transport_iscsi mlx4_ib ib_uverbs mlx4_en ib_core nls_iso8859_1 nls_cp437 vfat fat intel_rapl skx_edac x86_pkg_temp_thermal coretemp iTCO_wdt iTCO_vendor_support crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ses raid0 aesni_intel cdc_ether enclosure usbnet ipmi_ssif joydev aes_x86_64 i40e scsi_transport_sas mii bcache md_mod crypto_simd mei_me ioatdma crc64 ptp cryptd pcspkr i2c_i801 mlx4_core glue_helper pps_core mei lpc_ich dca wmi ipmi_si ipmi_devintf nd_pmem dax_pmem nd_btt ipmi_msghandler device_dax pcc_cpufreq button hid_generic usbhid mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect xhci_pci sysimgblt fb_sys_fops xhci_hcd ttm megaraid_sas drm usbcore nfit libnvdimm sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua efivarfs
-[ 564.299390] bcache: bch_count_io_errors() nvme0n1: IO error on writing btree.
-[ 564.348360] CR2: 000000000000001c
-[ 564.348362] ---[ end trace b7f0e5cc7b2103b0 ]---
-
-Therefore, it is not enough to only check whether c->gc_thread is NULL,
-we should use IS_ERR_OR_NULL() to check both NULL pointer and error
-value.
-
-This patch changes the above buggy code piece in this way,
- if (!IS_ERR_OR_NULL(c->gc_thread))
- kthread_stop(c->gc_thread);
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 1b63ac876169..64d9de89a63f 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1564,7 +1564,7 @@ static void cache_set_flush(struct closure *cl)
- kobject_put(&c->internal);
- kobject_del(&c->kobj);
-
-- if (c->gc_thread)
-+ if (!IS_ERR_OR_NULL(c->gc_thread))
- kthread_stop(c->gc_thread);
-
- if (!IS_ERR_OR_NULL(c->root))
---
-2.16.4
-
diff --git a/for-current/0003-bcache-fix-return-value-error-in-bch_journal_read.patch b/for-current/0003-bcache-fix-return-value-error-in-bch_journal_read.patch
deleted file mode 100644
index 75a6fd1..0000000
--- a/for-current/0003-bcache-fix-return-value-error-in-bch_journal_read.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From b941c10de9c1619ef8598663123ac0e637e23b72 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 4 Jun 2019 14:43:08 +0800
-Subject: [PATCH 03/37] bcache: fix return value error in bch_journal_read()
-
-When everything is OK in bch_journal_read(), finally the return value
-is returned by,
- return ret;
-which assumes ret will be 0 here. This assumption is wrong when all
-journal buckets as are full and filled with valid journal entries. In
-such cache the last location referencess read_bucket() sets 'ret' to
-1, which means new jset added into jset list. The jset list is list
-'journal' in caller run_cache_set().
-
-Return 1 to run_cache_set() means something wrong and the cache set
-won't start, but indeed everything is OK.
-
-This patch changes the line at end of bch_journal_read() to directly
-return 0 since everything if verything is good. Then a bogus error
-is fixed.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 12dae9348147..4e5fc05720fc 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -268,7 +268,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
- struct journal_replay,
- list)->j.seq;
-
-- return ret;
-+ return 0;
- #undef read_bucket
- }
-
---
-2.16.4
-
diff --git a/for-current/0004-Revert-bcache-set-CACHE_SET_IO_DISABLE-in-bch_cached.patch b/for-current/0004-Revert-bcache-set-CACHE_SET_IO_DISABLE-in-bch_cached.patch
deleted file mode 100644
index 13f5fe1..0000000
--- a/for-current/0004-Revert-bcache-set-CACHE_SET_IO_DISABLE-in-bch_cached.patch
+++ /dev/null
@@ -1,64 +0,0 @@
-From 89e69ecae72e064bab278cdee6d391f5fcb732b3 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 4 Jun 2019 15:00:46 +0800
-Subject: [PATCH 04/37] Revert "bcache: set CACHE_SET_IO_DISABLE in
- bch_cached_dev_error()"
-
-This reverts commit 6147305c73e4511ca1a975b766b97a779d442567.
-
-Although this patch helps the failed bcache device to stop faster when
-too many I/O errors detected on corresponding cached device, setting
-CACHE_SET_IO_DISABLE bit to cache set c->flags was not a good idea. This
-operation will disable all I/Os on cache set, which means other attached
-bcache devices won't work neither.
-
-Without this patch, the failed bcache device can also be stopped
-eventually if internal I/O accomplished (e.g. writeback). Therefore here
-I revert it.
-
-Fixes: 6147305c73e4 ("bcache: set CACHE_SET_IO_DISABLE in bch_cached_dev_error()")
-Reported-by: Yong Li <mr.liyong@qq.com>
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: stable@vger.kernel.org
----
- drivers/md/bcache/super.c | 17 -----------------
- 1 file changed, 17 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 64d9de89a63f..ba2ad093bc80 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1437,8 +1437,6 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
-
- bool bch_cached_dev_error(struct cached_dev *dc)
- {
-- struct cache_set *c;
--
- if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
- return false;
-
-@@ -1449,21 +1447,6 @@ bool bch_cached_dev_error(struct cached_dev *dc)
- pr_err("stop %s: too many IO errors on backing device %s\n",
- dc->disk.disk->disk_name, dc->backing_dev_name);
-
-- /*
-- * If the cached device is still attached to a cache set,
-- * even dc->io_disable is true and no more I/O requests
-- * accepted, cache device internal I/O (writeback scan or
-- * garbage collection) may still prevent bcache device from
-- * being stopped. So here CACHE_SET_IO_DISABLE should be
-- * set to c->flags too, to make the internal I/O to cache
-- * device rejected and stopped immediately.
-- * If c is NULL, that means the bcache device is not attached
-- * to any cache set, then no CACHE_SET_IO_DISABLE bit to set.
-- */
-- c = dc->disk.c;
-- if (c && test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
-- pr_info("CACHE_SET_IO_DISABLE already set");
--
- bcache_device_stop(&dc->disk);
- return true;
- }
---
-2.16.4
-
diff --git a/for-current/0005-bcache-avoid-flushing-btree-node-in-cache_set_flush-.patch b/for-current/0005-bcache-avoid-flushing-btree-node-in-cache_set_flush-.patch
deleted file mode 100644
index 2df04c2..0000000
--- a/for-current/0005-bcache-avoid-flushing-btree-node-in-cache_set_flush-.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From fc406bc07ad1e5718f3be439111f95001a2bcf9c Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Thu, 23 May 2019 23:18:10 +0800
-Subject: [PATCH 05/37] bcache: avoid flushing btree node in cache_set_flush()
- if io disabled
-
-When cache_set_flush() is called for too many I/O errors detected on
-cache device and the cache set is retiring, inside the function it
-doesn't make sense to flushing cached btree nodes from c->btree_cache
-because CACHE_SET_IO_DISABLE is set on c->flags already and all I/Os
-onto cache device will be rejected.
-
-This patch checks in cache_set_flush() that whether CACHE_SET_IO_DISABLE
-is set. If yes, then avoids to flush the cached btree nodes to reduce
-more time and make cache set retiring more faster.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 18 +++++++++++-------
- 1 file changed, 11 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index ba2ad093bc80..dc6702c2c4b6 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1553,13 +1553,17 @@ static void cache_set_flush(struct closure *cl)
- if (!IS_ERR_OR_NULL(c->root))
- list_add(&c->root->list, &c->btree_cache);
-
-- /* Should skip this if we're unregistering because of an error */
-- list_for_each_entry(b, &c->btree_cache, list) {
-- mutex_lock(&b->write_lock);
-- if (btree_node_dirty(b))
-- __bch_btree_node_write(b, NULL);
-- mutex_unlock(&b->write_lock);
-- }
-+ /*
-+ * Avoid flushing cached nodes if cache set is retiring
-+ * due to too many I/O errors detected.
-+ */
-+ if (!test_bit(CACHE_SET_IO_DISABLE, &c->flags))
-+ list_for_each_entry(b, &c->btree_cache, list) {
-+ mutex_lock(&b->write_lock);
-+ if (btree_node_dirty(b))
-+ __bch_btree_node_write(b, NULL);
-+ mutex_unlock(&b->write_lock);
-+ }
-
- for_each_cache(ca, c, i)
- if (ca->alloc_thread)
---
-2.16.4
-
diff --git a/for-current/0006-bcache-ignore-read-ahead-request-failure-on-backing-.patch b/for-current/0006-bcache-ignore-read-ahead-request-failure-on-backing-.patch
deleted file mode 100644
index b43866a..0000000
--- a/for-current/0006-bcache-ignore-read-ahead-request-failure-on-backing-.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From b864955e61393f70425c704ff2f16df72f508eb9 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 13 May 2019 22:48:09 +0800
-Subject: [PATCH 06/37] bcache: ignore read-ahead request failure on backing
- device
-
-When md raid device (e.g. raid456) is used as backing device, read-ahead
-requests on a degrading and recovering md raid device might be failured
-immediately by md raid code, but indeed this md raid array can still be
-read or write for normal I/O requests. Therefore such failed read-ahead
-request are not real hardware failure. Further more, after degrading and
-recovering accomplished, read-ahead requests will be handled by md raid
-array again.
-
-For such condition, I/O failures of read-ahead requests don't indicate
-real health status (because normal I/O still be served), they should not
-be counted into I/O error counter dc->io_errors.
-
-Since there is no simple way to detect whether the backing divice is a
-md raid device, this patch simply ignores I/O failures for read-ahead
-bios on backing device, to avoid bogus backing device failure on a
-degrading md raid array.
-
-Suggested-and-tested-by: Thorsten Knabe <linux@thorsten-knabe.de>
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: stable@vger.kernel.org
----
- drivers/md/bcache/io.c | 12 ++++++++++++
- 1 file changed, 12 insertions(+)
-
-diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
-index c25097968319..4d93f07f63e5 100644
---- a/drivers/md/bcache/io.c
-+++ b/drivers/md/bcache/io.c
-@@ -58,6 +58,18 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
-
- WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
-
-+ /*
-+ * Read-ahead requests on a degrading and recovering md raid
-+ * (e.g. raid6) device might be failured immediately by md
-+ * raid code, which is not a real hardware media failure. So
-+ * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors.
-+ */
-+ if (bio->bi_opf & REQ_RAHEAD) {
-+ pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore",
-+ dc->backing_dev_name);
-+ return;
-+ }
-+
- errors = atomic_add_return(1, &dc->io_errors);
- if (errors < dc->error_limit)
- pr_err("%s: IO error on backing device, unrecoverable",
---
-2.16.4
-
diff --git a/for-current/0007-bcache-add-io-error-counting-in-write_bdev_super_end.patch b/for-current/0007-bcache-add-io-error-counting-in-write_bdev_super_end.patch
deleted file mode 100644
index c4e916c..0000000
--- a/for-current/0007-bcache-add-io-error-counting-in-write_bdev_super_end.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From 9776d8bfe0f0706004cdb083e5954aec718aa931 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 13 May 2019 23:42:39 +0800
-Subject: [PATCH 07/37] bcache: add io error counting in
- write_bdev_super_endio()
-
-When backing device super block is written by bch_write_bdev_super(),
-the bio complete callback write_bdev_super_endio() simply ignores I/O
-status. Indeed such write request also contribute to backing device
-health status if the request failed.
-
-This patch checkes bio->bi_status in write_bdev_super_endio(), if there
-is error, bch_count_backing_io_errors() will be called to count an I/O
-error to dc->io_errors.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index dc6702c2c4b6..73466bda12a7 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -197,7 +197,9 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
- static void write_bdev_super_endio(struct bio *bio)
- {
- struct cached_dev *dc = bio->bi_private;
-- /* XXX: error checking */
-+
-+ if (bio->bi_status)
-+ bch_count_backing_io_errors(dc, bio);
-
- closure_put(&dc->sb_write);
- }
---
-2.16.4
-
diff --git a/for-current/0008-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch b/for-current/0008-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch
deleted file mode 100644
index ce26ff1..0000000
--- a/for-current/0008-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 041674ba27a752cf1a14cc6564dbc436b3b11b51 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 14 May 2019 22:23:35 +0800
-Subject: [PATCH 08/37] bcache: remove unnecessary prefetch() in
- bset_search_tree()
-
-In function bset_search_tree(), when p >= t->size, t->tree[0] will be
-prefetched by the following code piece,
- 974 unsigned int p = n << 4;
- 975
- 976 p &= ((int) (p - t->size)) >> 31;
- 977
- 978 prefetch(&t->tree[p]);
-
-The purpose of the above code is to avoid a branch instruction, but
-when p >= t->size, prefetch(&t->tree[0]) has no positive performance
-contribution at all. This patch avoids the unncessary prefetch by only
-calling prefetch() when p < t->size.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bset.c | 16 ++--------------
- 1 file changed, 2 insertions(+), 14 deletions(-)
-
-diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
-index 268f1b685084..e36a108d3648 100644
---- a/drivers/md/bcache/bset.c
-+++ b/drivers/md/bcache/bset.c
-@@ -970,22 +970,10 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t,
- unsigned int inorder, j, n = 1;
-
- do {
-- /*
-- * A bit trick here.
-- * If p < t->size, (int)(p - t->size) is a minus value and
-- * the most significant bit is set, right shifting 31 bits
-- * gets 1. If p >= t->size, the most significant bit is
-- * not set, right shifting 31 bits gets 0.
-- * So the following 2 lines equals to
-- * if (p >= t->size)
-- * p = 0;
-- * but a branch instruction is avoided.
-- */
- unsigned int p = n << 4;
-
-- p &= ((int) (p - t->size)) >> 31;
--
-- prefetch(&t->tree[p]);
-+ if (p < t->size)
-+ prefetch(&t->tree[p]);
-
- j = n;
- f = &t->tree[j];
---
-2.16.4
-
diff --git a/for-current/0009-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch b/for-current/0009-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch
deleted file mode 100644
index b21d257..0000000
--- a/for-current/0009-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch
+++ /dev/null
@@ -1,97 +0,0 @@
-From 44567cf0d395784d1f95120ed170354d470b6116 Mon Sep 17 00:00:00 2001
-From: Alexandru Ardelean <alexandru.ardelean@analog.com>
-Date: Tue, 7 May 2019 12:43:12 +0300
-Subject: [PATCH 09/37] bcache: use sysfs_match_string() instead of
- __sysfs_match_string()
-
-The arrays (of strings) that are passed to __sysfs_match_string() are
-static, so use sysfs_match_string() which does an implicit ARRAY_SIZE()
-over these arrays.
-
-Functionally, this doesn't change anything.
-The change is more cosmetic.
-
-It only shrinks the static arrays by 1 byte each.
-
-Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/sysfs.c | 20 ++++++++------------
- 1 file changed, 8 insertions(+), 12 deletions(-)
-
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index bfb437ffb13c..760cf8951338 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -21,28 +21,24 @@ static const char * const bch_cache_modes[] = {
- "writethrough",
- "writeback",
- "writearound",
-- "none",
-- NULL
-+ "none"
- };
-
- /* Default is 0 ("auto") */
- static const char * const bch_stop_on_failure_modes[] = {
- "auto",
-- "always",
-- NULL
-+ "always"
- };
-
- static const char * const cache_replacement_policies[] = {
- "lru",
- "fifo",
-- "random",
-- NULL
-+ "random"
- };
-
- static const char * const error_actions[] = {
- "unregister",
-- "panic",
-- NULL
-+ "panic"
- };
-
- write_attribute(attach);
-@@ -333,7 +329,7 @@ STORE(__cached_dev)
- bch_cached_dev_run(dc);
-
- if (attr == &sysfs_cache_mode) {
-- v = __sysfs_match_string(bch_cache_modes, -1, buf);
-+ v = sysfs_match_string(bch_cache_modes, buf);
- if (v < 0)
- return v;
-
-@@ -344,7 +340,7 @@ STORE(__cached_dev)
- }
-
- if (attr == &sysfs_stop_when_cache_set_failed) {
-- v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
-+ v = sysfs_match_string(bch_stop_on_failure_modes, buf);
- if (v < 0)
- return v;
-
-@@ -799,7 +795,7 @@ STORE(__bch_cache_set)
- 0, UINT_MAX);
-
- if (attr == &sysfs_errors) {
-- v = __sysfs_match_string(error_actions, -1, buf);
-+ v = sysfs_match_string(error_actions, buf);
- if (v < 0)
- return v;
-
-@@ -1063,7 +1059,7 @@ STORE(__bch_cache)
- }
-
- if (attr == &sysfs_cache_replacement_policy) {
-- v = __sysfs_match_string(cache_replacement_policies, -1, buf);
-+ v = sysfs_match_string(cache_replacement_policies, buf);
- if (v < 0)
- return v;
-
---
-2.16.4
-
diff --git a/for-current/0010-bcache-add-return-value-check-to-bch_cached_dev_run.patch b/for-current/0010-bcache-add-return-value-check-to-bch_cached_dev_run.patch
deleted file mode 100644
index 81058cc..0000000
--- a/for-current/0010-bcache-add-return-value-check-to-bch_cached_dev_run.patch
+++ /dev/null
@@ -1,152 +0,0 @@
-From 3c6554692a3361189936d5dbdcc490ee7bf86eb6 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 21 May 2019 22:16:38 +0800
-Subject: [PATCH 10/37] bcache: add return value check to bch_cached_dev_run()
-
-This patch adds return value check to bch_cached_dev_run(), now if there
-is error happens inside bch_cached_dev_run(), it can be catched.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 2 +-
- drivers/md/bcache/super.c | 33 ++++++++++++++++++++++++++-------
- drivers/md/bcache/sysfs.c | 7 +++++--
- 3 files changed, 32 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index fdf75352e16a..73a97586a2ef 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -1006,7 +1006,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size);
- int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
- uint8_t *set_uuid);
- void bch_cached_dev_detach(struct cached_dev *dc);
--void bch_cached_dev_run(struct cached_dev *dc);
-+int bch_cached_dev_run(struct cached_dev *dc);
- void bcache_device_stop(struct bcache_device *d);
-
- void bch_cache_set_unregister(struct cache_set *c);
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 73466bda12a7..0abee44092bf 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -910,7 +910,7 @@ static int cached_dev_status_update(void *arg)
- }
-
-
--void bch_cached_dev_run(struct cached_dev *dc)
-+int bch_cached_dev_run(struct cached_dev *dc)
- {
- struct bcache_device *d = &dc->disk;
- char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL);
-@@ -921,11 +921,14 @@ void bch_cached_dev_run(struct cached_dev *dc)
- NULL,
- };
-
-+ if (dc->io_disable)
-+ return -EIO;
-+
- if (atomic_xchg(&dc->running, 1)) {
- kfree(env[1]);
- kfree(env[2]);
- kfree(buf);
-- return;
-+ return -EBUSY;
- }
-
- if (!d->c &&
-@@ -951,8 +954,11 @@ void bch_cached_dev_run(struct cached_dev *dc)
- kfree(buf);
-
- if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
-- sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
-+ sysfs_create_link(&disk_to_dev(d->disk)->kobj,
-+ &d->kobj, "bcache")) {
- pr_debug("error creating sysfs link");
-+ return -ENOMEM;
-+ }
-
- dc->status_update_thread = kthread_run(cached_dev_status_update,
- dc, "bcache_status_update");
-@@ -961,6 +967,8 @@ void bch_cached_dev_run(struct cached_dev *dc)
- "continue to run without monitoring backing "
- "device status");
- }
-+
-+ return 0;
- }
-
- /*
-@@ -1056,6 +1064,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
- uint32_t rtime = cpu_to_le32((u32)ktime_get_real_seconds());
- struct uuid_entry *u;
- struct cached_dev *exist_dc, *t;
-+ int ret = 0;
-
- if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) ||
- (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
-@@ -1165,7 +1174,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
-
- bch_sectors_dirty_init(&dc->disk);
-
-- bch_cached_dev_run(dc);
-+ ret = bch_cached_dev_run(dc);
-+ if (ret && (ret != -EBUSY)) {
-+ up_write(&dc->writeback_lock);
-+ return ret;
-+ }
-+
- bcache_device_link(&dc->disk, c, "bdev");
- atomic_inc(&c->attached_dev_nr);
-
-@@ -1292,6 +1306,7 @@ static int register_bdev(struct cache_sb *sb, struct page *sb_page,
- {
- const char *err = "cannot allocate memory";
- struct cache_set *c;
-+ int ret = -ENOMEM;
-
- bdevname(bdev, dc->backing_dev_name);
- memcpy(&dc->sb, sb, sizeof(struct cache_sb));
-@@ -1321,14 +1336,18 @@ static int register_bdev(struct cache_sb *sb, struct page *sb_page,
- bch_cached_dev_attach(dc, c, NULL);
-
- if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
-- BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
-- bch_cached_dev_run(dc);
-+ BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) {
-+ err = "failed to run cached device";
-+ ret = bch_cached_dev_run(dc);
-+ if (ret)
-+ goto err;
-+ }
-
- return 0;
- err:
- pr_notice("error %s: %s", dc->backing_dev_name, err);
- bcache_device_stop(&dc->disk);
-- return -EIO;
-+ return ret;
- }
-
- /* Flash only volumes */
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index 760cf8951338..eb678e43ac00 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -325,8 +325,11 @@ STORE(__cached_dev)
- bch_cache_accounting_clear(&dc->accounting);
-
- if (attr == &sysfs_running &&
-- strtoul_or_return(buf))
-- bch_cached_dev_run(dc);
-+ strtoul_or_return(buf)) {
-+ v = bch_cached_dev_run(dc);
-+ if (v)
-+ return v;
-+ }
-
- if (attr == &sysfs_cache_mode) {
- v = sysfs_match_string(bch_cache_modes, buf);
---
-2.16.4
-
diff --git a/for-current/0011-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch b/for-current/0011-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch
deleted file mode 100644
index f76096e..0000000
--- a/for-current/0011-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 8cf2fe851139c27cafc5e52700b483793077611e Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 21 May 2019 22:36:35 +0800
-Subject: [PATCH 11/37] bcache: remove unncessary code in bch_btree_keys_init()
-
-Function bch_btree_keys_init() initializes b->set[].size and
-b->set[].data to zero. As the code comments indicates, these code indeed
-is unncessary, because both struct btree_keys and struct bset_tree are
-nested embedded into struct btree, when struct btree is filled with 0
-bits by kzalloc() in mca_bucket_alloc(), b->set[].size and
-b->set[].data are initialized to 0 (a.k.a NULL) already.
-
-This patch removes the redundant code, and add comments in
-bch_btree_keys_init() and mca_bucket_alloc() to explain why it's safe.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bset.c | 15 ++++++---------
- drivers/md/bcache/btree.c | 4 ++++
- 2 files changed, 10 insertions(+), 9 deletions(-)
-
-diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
-index e36a108d3648..8af9509e78bd 100644
---- a/drivers/md/bcache/bset.c
-+++ b/drivers/md/bcache/bset.c
-@@ -347,22 +347,19 @@ EXPORT_SYMBOL(bch_btree_keys_alloc);
- void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
- bool *expensive_debug_checks)
- {
-- unsigned int i;
--
- b->ops = ops;
- b->expensive_debug_checks = expensive_debug_checks;
- b->nsets = 0;
- b->last_set_unwritten = 0;
-
-- /* XXX: shouldn't be needed */
-- for (i = 0; i < MAX_BSETS; i++)
-- b->set[i].size = 0;
- /*
-- * Second loop starts at 1 because b->keys[0]->data is the memory we
-- * allocated
-+ * struct btree_keys in embedded in struct btree, and struct
-+ * bset_tree is embedded into struct btree_keys. They are all
-+ * initialized as 0 by kzalloc() in mca_bucket_alloc(), and
-+ * b->set[0].data is allocated in bch_btree_keys_alloc(), so we
-+ * don't have to initiate b->set[].size and b->set[].data here
-+ * any more.
- */
-- for (i = 1; i < MAX_BSETS; i++)
-- b->set[i].data = NULL;
- }
- EXPORT_SYMBOL(bch_btree_keys_init);
-
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index 773f5fdad25f..cf38a1b031fa 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -613,6 +613,10 @@ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
- static struct btree *mca_bucket_alloc(struct cache_set *c,
- struct bkey *k, gfp_t gfp)
- {
-+ /*
-+ * kzalloc() is necessary here for initialization,
-+ * see code comments in bch_btree_keys_init().
-+ */
- struct btree *b = kzalloc(sizeof(struct btree), gfp);
-
- if (!b)
---
-2.16.4
-
diff --git a/for-current/0012-bcache-check-CACHE_SET_IO_DISABLE-in-allocator-code.patch b/for-current/0012-bcache-check-CACHE_SET_IO_DISABLE-in-allocator-code.patch
deleted file mode 100644
index d50cf9a..0000000
--- a/for-current/0012-bcache-check-CACHE_SET_IO_DISABLE-in-allocator-code.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-From 83696d19f199f99cd89b65fe5eb2ab1603b3bd2e Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 22 May 2019 21:55:09 +0800
-Subject: [PATCH 12/37] bcache: check CACHE_SET_IO_DISABLE in allocator code
-
-If CACHE_SET_IO_DISABLE of a cache set flag is set by too many I/O
-errors, currently allocator routines can still continue allocate
-space which may introduce inconsistent metadata state.
-
-This patch checkes CACHE_SET_IO_DISABLE bit in following allocator
-routines,
-- bch_bucket_alloc()
-- __bch_bucket_alloc_set()
-Once CACHE_SET_IO_DISABLE is set on cache set, the allocator routines
-may reject allocation request earlier to avoid potential inconsistent
-metadata.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/alloc.c | 9 +++++++++
- 1 file changed, 9 insertions(+)
-
-diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
-index f8986effcb50..6f776823b9ba 100644
---- a/drivers/md/bcache/alloc.c
-+++ b/drivers/md/bcache/alloc.c
-@@ -393,6 +393,11 @@ long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
- struct bucket *b;
- long r;
-
-+
-+ /* No allocation if CACHE_SET_IO_DISABLE bit is set */
-+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)))
-+ return -1;
-+
- /* fastpath */
- if (fifo_pop(&ca->free[RESERVE_NONE], r) ||
- fifo_pop(&ca->free[reserve], r))
-@@ -484,6 +489,10 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
- {
- int i;
-
-+ /* No allocation if CACHE_SET_IO_DISABLE bit is set */
-+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags)))
-+ return -1;
-+
- lockdep_assert_held(&c->bucket_lock);
- BUG_ON(!n || n > c->caches_loaded || n > MAX_CACHES_PER_SET);
-
---
-2.16.4
-
diff --git a/for-current/0013-bcache-check-CACHE_SET_IO_DISABLE-bit-in-bch_journal.patch b/for-current/0013-bcache-check-CACHE_SET_IO_DISABLE-bit-in-bch_journal.patch
deleted file mode 100644
index 2fb8deb..0000000
--- a/for-current/0013-bcache-check-CACHE_SET_IO_DISABLE-bit-in-bch_journal.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 538b8c892e46fce9c4ce5f26be7f471001054e21 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 22 May 2019 22:06:21 +0800
-Subject: [PATCH 13/37] bcache: check CACHE_SET_IO_DISABLE bit in bch_journal()
-
-When too many I/O errors happen on cache set and CACHE_SET_IO_DISABLE
-bit is set, bch_journal() may continue to work because the journaling
-bkey might be still in write set yet. The caller of bch_journal() may
-believe the journal still work but the truth is in-memory journal write
-set won't be written into cache device any more. This behavior may
-introduce potential inconsistent metadata status.
-
-This patch checks CACHE_SET_IO_DISABLE bit at the head of bch_journal(),
-if the bit is set, bch_journal() returns NULL immediately to notice
-caller to know journal does not work.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 4e5fc05720fc..54f8886b6177 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -811,6 +811,10 @@ atomic_t *bch_journal(struct cache_set *c,
- struct journal_write *w;
- atomic_t *ret;
-
-+ /* No journaling if CACHE_SET_IO_DISABLE set already */
-+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags)))
-+ return NULL;
-+
- if (!CACHE_SYNC(&c->sb))
- return NULL;
-
---
-2.16.4
-
diff --git a/for-current/0014-bcache-more-detailed-error-message-to-bcache_device_.patch b/for-current/0014-bcache-more-detailed-error-message-to-bcache_device_.patch
deleted file mode 100644
index f1db3e8..0000000
--- a/for-current/0014-bcache-more-detailed-error-message-to-bcache_device_.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From 3d5eca3da96c4dcfeb71a5947d1b916098f98090 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 1 Jun 2019 00:57:38 +0800
-Subject: [PATCH 14/37] bcache: more detailed error message to
- bcache_device_link()
-
-This patch adds more accurate error message for specific
-ssyfs_create_link() call, to help debugging failure during
-bcache device start tup.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 11 ++++++++---
- 1 file changed, 8 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 0abee44092bf..d4d8d1300faf 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -693,6 +693,7 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
- {
- unsigned int i;
- struct cache *ca;
-+ int ret;
-
- for_each_cache(ca, d->c, i)
- bd_link_disk_holder(ca->bdev, d->disk);
-@@ -700,9 +701,13 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
- snprintf(d->name, BCACHEDEVNAME_SIZE,
- "%s%u", name, d->id);
-
-- WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") ||
-- sysfs_create_link(&c->kobj, &d->kobj, d->name),
-- "Couldn't create device <-> cache set symlinks");
-+ ret = sysfs_create_link(&d->kobj, &c->kobj, "cache");
-+ if (ret < 0)
-+ pr_err("Couldn't create device -> cache set symlink");
-+
-+ ret = sysfs_create_link(&c->kobj, &d->kobj, d->name);
-+ if (ret < 0)
-+ pr_err("Couldn't create cache set -> device symlink");
-
- clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
- }
---
-2.16.4
-
diff --git a/for-current/0015-bcache-add-more-error-message-in-bch_cached_dev_atta.patch b/for-current/0015-bcache-add-more-error-message-in-bch_cached_dev_atta.patch
deleted file mode 100644
index 5f566aa..0000000
--- a/for-current/0015-bcache-add-more-error-message-in-bch_cached_dev_atta.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 4ece2cd8f76fd9c43d0f479f9e8c6f1d41a2c323 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 1 Jun 2019 01:03:00 +0800
-Subject: [PATCH 15/37] bcache: add more error message in
- bch_cached_dev_attach()
-
-This patch adds more error message for attaching cached device, this is
-helpful to debug code failure during bache device start up.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index d4d8d1300faf..a836910ef368 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1169,6 +1169,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
- down_write(&dc->writeback_lock);
- if (bch_cached_dev_writeback_start(dc)) {
- up_write(&dc->writeback_lock);
-+ pr_err("Couldn't start writeback facilities for %s",
-+ dc->disk.disk->disk_name);
- return -ENOMEM;
- }
-
-@@ -1182,6 +1184,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
- ret = bch_cached_dev_run(dc);
- if (ret && (ret != -EBUSY)) {
- up_write(&dc->writeback_lock);
-+ pr_err("Couldn't run cached device %s",
-+ dc->backing_dev_name);
- return ret;
- }
-
---
-2.16.4
-
diff --git a/for-current/0016-bcache-improve-error-message-in-bch_cached_dev_run.patch b/for-current/0016-bcache-improve-error-message-in-bch_cached_dev_run.patch
deleted file mode 100644
index 777cc6f..0000000
--- a/for-current/0016-bcache-improve-error-message-in-bch_cached_dev_run.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 1c01818c558a50c66e86dce4196c8ef525dcbf58 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 4 Jun 2019 23:12:10 +0800
-Subject: [PATCH 16/37] bcache: improve error message in bch_cached_dev_run()
-
-This patch adds more error message in bch_cached_dev_run() to indicate
-the exact reason why an error value is returned. Please notice when
-printing out the "is running already" message, pr_info() is used here,
-because in this case also -EBUSY is returned, the bcache device can
-continue to attach to the cache devince and run, so it won't be an
-error level message in kernel message.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 9 +++++++--
- 1 file changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index a836910ef368..e9e6d653bf70 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -926,13 +926,18 @@ int bch_cached_dev_run(struct cached_dev *dc)
- NULL,
- };
-
-- if (dc->io_disable)
-+ if (dc->io_disable) {
-+ pr_err("I/O disabled on cached dev %s",
-+ dc->backing_dev_name);
- return -EIO;
-+ }
-
- if (atomic_xchg(&dc->running, 1)) {
- kfree(env[1]);
- kfree(env[2]);
- kfree(buf);
-+ pr_info("cached dev %s is running already",
-+ dc->backing_dev_name);
- return -EBUSY;
- }
-
-@@ -961,7 +966,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
- if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
- sysfs_create_link(&disk_to_dev(d->disk)->kobj,
- &d->kobj, "bcache")) {
-- pr_debug("error creating sysfs link");
-+ pr_err("Couldn't create bcache dev <-> disk sysfs symlinks");
- return -ENOMEM;
- }
-
---
-2.16.4
-
diff --git a/for-current/0017-bcache-remove-XXX-comment-line-from-run_cache_set.patch b/for-current/0017-bcache-remove-XXX-comment-line-from-run_cache_set.patch
deleted file mode 100644
index 7c374f6..0000000
--- a/for-current/0017-bcache-remove-XXX-comment-line-from-run_cache_set.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From d3227df55cfc8d09d733d48619663401bc3862d1 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 13 May 2019 23:47:38 +0800
-Subject: [PATCH 17/37] bcache: remove "XXX:" comment line from run_cache_set()
-
-In previous bcache patches for Linux v5.2, the failure code path of
-run_cache_set() is tested and fixed. So now the following comment
-line can be removed from run_cache_set(),
- /* XXX: test this, it's broken */
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index e9e6d653bf70..c53fe0f1629f 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1979,7 +1979,7 @@ static int run_cache_set(struct cache_set *c)
- }
-
- closure_sync(&cl);
-- /* XXX: test this, it's broken */
-+
- bch_cache_set_error(c, "%s", err);
-
- return -EIO;
---
-2.16.4
-
diff --git a/for-current/0018-bcache-make-bset_search_tree-be-more-understandable.patch b/for-current/0018-bcache-make-bset_search_tree-be-more-understandable.patch
deleted file mode 100644
index 8b48fca..0000000
--- a/for-current/0018-bcache-make-bset_search_tree-be-more-understandable.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From bb73929cd5ef3fe192253d7f74afb448c13d01f7 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 14 May 2019 22:51:40 +0800
-Subject: [PATCH 18/37] bcache: make bset_search_tree() be more understandable
-
-The purpose of following code in bset_search_tree() is to avoid a branch
-instruction,
- 994 if (likely(f->exponent != 127))
- 995 n = j * 2 + (((unsigned int)
- 996 (f->mantissa -
- 997 bfloat_mantissa(search, f))) >> 31);
- 998 else
- 999 n = (bkey_cmp(tree_to_bkey(t, j), search) > 0)
-1000 ? j * 2
-1001 : j * 2 + 1;
-
-This piece of code is not very clear to understand, even when I tried to
-add code comment for it, I made mistake. This patch removes the implict
-bit operation and uses explicit branch to calculate next location in
-binary tree search.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bset.c | 30 +++++++++++-------------------
- 1 file changed, 11 insertions(+), 19 deletions(-)
-
-diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
-index 8af9509e78bd..08768796b543 100644
---- a/drivers/md/bcache/bset.c
-+++ b/drivers/md/bcache/bset.c
-@@ -975,25 +975,17 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t,
- j = n;
- f = &t->tree[j];
-
-- /*
-- * Similar bit trick, use subtract operation to avoid a branch
-- * instruction.
-- *
-- * n = (f->mantissa > bfloat_mantissa())
-- * ? j * 2
-- * : j * 2 + 1;
-- *
-- * We need to subtract 1 from f->mantissa for the sign bit trick
-- * to work - that's done in make_bfloat()
-- */
-- if (likely(f->exponent != 127))
-- n = j * 2 + (((unsigned int)
-- (f->mantissa -
-- bfloat_mantissa(search, f))) >> 31);
-- else
-- n = (bkey_cmp(tree_to_bkey(t, j), search) > 0)
-- ? j * 2
-- : j * 2 + 1;
-+ if (likely(f->exponent != 127)) {
-+ if (f->mantissa >= bfloat_mantissa(search, f))
-+ n = j * 2;
-+ else
-+ n = j * 2 + 1;
-+ } else {
-+ if (bkey_cmp(tree_to_bkey(t, j), search) > 0)
-+ n = j * 2;
-+ else
-+ n = j * 2 + 1;
-+ }
- } while (n < t->size);
-
- inorder = to_inorder(j, t);
---
-2.16.4
-
diff --git a/for-current/0019-bcache-add-pendings_cleanup-to-stop-pending-bcache-d.patch b/for-current/0019-bcache-add-pendings_cleanup-to-stop-pending-bcache-d.patch
deleted file mode 100644
index 1e0d93d..0000000
--- a/for-current/0019-bcache-add-pendings_cleanup-to-stop-pending-bcache-d.patch
+++ /dev/null
@@ -1,107 +0,0 @@
-From 0d36cf832c884a5e8aac9dcf1739376a027026e0 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 20 Mar 2019 23:11:59 +0800
-Subject: [PATCH 19/37] bcache: add pendings_cleanup to stop pending bcache
- device
-
-If a bcache device is in dirty state and its cache set is not
-registered, this bcache device will not appear in /dev/bcache<N>,
-and there is no way to stop it or remove the bcache kernel module.
-
-This is an as-designed behavior, but sometimes people has to reboot
-whole system to release or stop the pending backing device.
-
-This sysfs interface may remove such pending bcache devices when
-write anything into the sysfs file manually.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 55 insertions(+)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index c53fe0f1629f..c4c4b2d99dc2 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -2273,9 +2273,13 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
-
- static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- const char *buffer, size_t size);
-+static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
-+ struct kobj_attribute *attr,
-+ const char *buffer, size_t size);
-
- kobj_attribute_write(register, register_bcache);
- kobj_attribute_write(register_quiet, register_bcache);
-+kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup);
-
- static bool bch_is_open_backing(struct block_device *bdev)
- {
-@@ -2400,6 +2404,56 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- goto out;
- }
-
-+
-+struct pdev {
-+ struct list_head list;
-+ struct cached_dev *dc;
-+};
-+
-+static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
-+ struct kobj_attribute *attr,
-+ const char *buffer,
-+ size_t size)
-+{
-+ LIST_HEAD(pending_devs);
-+ ssize_t ret = size;
-+ struct cached_dev *dc, *tdc;
-+ struct pdev *pdev, *tpdev;
-+ struct cache_set *c, *tc;
-+
-+ mutex_lock(&bch_register_lock);
-+ list_for_each_entry_safe(dc, tdc, &uncached_devices, list) {
-+ pdev = kmalloc(sizeof(struct pdev), GFP_KERNEL);
-+ if (!pdev)
-+ break;
-+ pdev->dc = dc;
-+ list_add(&pdev->list, &pending_devs);
-+ }
-+
-+ list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
-+ list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
-+ char *pdev_set_uuid = pdev->dc->sb.set_uuid;
-+ char *set_uuid = c->sb.uuid;
-+
-+ if (!memcmp(pdev_set_uuid, set_uuid, 16)) {
-+ list_del(&pdev->list);
-+ kfree(pdev);
-+ break;
-+ }
-+ }
-+ }
-+ mutex_unlock(&bch_register_lock);
-+
-+ list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
-+ pr_info("delete pdev %p", pdev);
-+ list_del(&pdev->list);
-+ bcache_device_stop(&pdev->dc->disk);
-+ kfree(pdev);
-+ }
-+
-+ return ret;
-+}
-+
- static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
- {
- if (code == SYS_DOWN ||
-@@ -2518,6 +2572,7 @@ static int __init bcache_init(void)
- static const struct attribute *files[] = {
- &ksysfs_register.attr,
- &ksysfs_register_quiet.attr,
-+ &ksysfs_pendings_cleanup.attr,
- NULL
- };
-
---
-2.16.4
-
diff --git a/for-current/0020-bcache-fix-mistaken-sysfs-entry-for-io_error-counter.patch b/for-current/0020-bcache-fix-mistaken-sysfs-entry-for-io_error-counter.patch
deleted file mode 100644
index a6c25ca..0000000
--- a/for-current/0020-bcache-fix-mistaken-sysfs-entry-for-io_error-counter.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From e5d866519cd41e45afd27256dfa1bd9adc056331 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 16 Jun 2019 23:59:12 +0800
-Subject: [PATCH 20/37] bcache: fix mistaken sysfs entry for io_error counter
-
-In bch_cached_dev_files[] from driver/md/bcache/sysfs.c, sysfs_errors is
-incorrectly inserted in. The correct entry should be sysfs_io_errors.
-
-This patch fixes the problem and now I/O errors of cached device can be
-read from /sys/block/bcache<N>/bcache/io_errors.
-
-Fixes: c7b7bd07404c5 ("bcache: add io_disable to struct cached_dev")
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: stable@vger.kernel.org
----
- drivers/md/bcache/sysfs.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index eb678e43ac00..dddb8d4048ce 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -176,7 +176,7 @@ SHOW(__bch_cached_dev)
- var_print(writeback_percent);
- sysfs_hprint(writeback_rate,
- wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0);
-- sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
-+ sysfs_printf(io_errors, "%i", atomic_read(&dc->io_errors));
- sysfs_printf(io_error_limit, "%i", dc->error_limit);
- sysfs_printf(io_disable, "%i", dc->io_disable);
- var_print(writeback_rate_update_seconds);
-@@ -463,7 +463,7 @@ static struct attribute *bch_cached_dev_files[] = {
- &sysfs_writeback_rate_p_term_inverse,
- &sysfs_writeback_rate_minimum,
- &sysfs_writeback_rate_debug,
-- &sysfs_errors,
-+ &sysfs_io_errors,
- &sysfs_io_error_limit,
- &sysfs_io_disable,
- &sysfs_dirty_data,
---
-2.16.4
-
diff --git a/for-current/0021-bcache-destroy-dc-writeback_write_wq-if-failed-to-cr.patch b/for-current/0021-bcache-destroy-dc-writeback_write_wq-if-failed-to-cr.patch
deleted file mode 100644
index 6e40e1f..0000000
--- a/for-current/0021-bcache-destroy-dc-writeback_write_wq-if-failed-to-cr.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-From dbc25640a571d5ee3d90380fcbaff5ce4a2b77ef Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 17 Jun 2019 00:06:58 +0800
-Subject: [PATCH 21/37] bcache: destroy dc->writeback_write_wq if failed to
- create dc->writeback_thread
-
-Commit 9baf30972b55 ("bcache: fix for gc and write-back race") added a
-new work queue dc->writeback_write_wq, but forgot to destroy it in the
-error condition when creating dc->writeback_thread failed.
-
-This patch destroys dc->writeback_write_wq if kthread_create() returns
-error pointer to dc->writeback_thread, then a memory leak is avoided.
-
-Fixes: 9baf30972b55 ("bcache: fix for gc and write-back race")
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: stable@vger.kernel.org
----
- drivers/md/bcache/writeback.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 262f7ef20992..21081febcb59 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -833,6 +833,7 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- "bcache_writeback");
- if (IS_ERR(dc->writeback_thread)) {
- cached_dev_put(dc);
-+ destroy_workqueue(dc->writeback_write_wq);
- return PTR_ERR(dc->writeback_thread);
- }
- dc->writeback_running = true;
---
-2.16.4
-
diff --git a/for-current/0022-bcache-stop-writeback-kthread-and-kworker-when-bch_c.patch b/for-current/0022-bcache-stop-writeback-kthread-and-kworker-when-bch_c.patch
deleted file mode 100644
index 93d0881..0000000
--- a/for-current/0022-bcache-stop-writeback-kthread-and-kworker-when-bch_c.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From fd2668ce597ccaf68a102d6cda906e359e2de4b6 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 17 Jun 2019 23:03:02 +0800
-Subject: [PATCH 22/37] bcache: stop writeback kthread and kworker when
- bch_cached_dev_run() failed
-
-In bch_cached_dev_attach() after bch_cached_dev_writeback_start()
-called, the wrireback kthread and writeback rate update kworker of the
-cached device are created, if the following bch_cached_dev_run()
-failed, bch_cached_dev_attach() will return with -ENOMEM without
-stopping the writeback related kthread and kworker.
-
-This patch stops writeback kthread and writeback rate update kworker
-before returning -ENOMEM if bch_cached_dev_run() returns error.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index c4c4b2d99dc2..791cb930b353 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1189,6 +1189,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
- ret = bch_cached_dev_run(dc);
- if (ret && (ret != -EBUSY)) {
- up_write(&dc->writeback_lock);
-+ /*
-+ * bch_register_lock is held, bcache_device_stop() is not
-+ * able to be directly called. The kthread and kworker
-+ * created previously in bch_cached_dev_writeback_start()
-+ * have to be stopped manually here.
-+ */
-+ kthread_stop(dc->writeback_thread);
-+ cancel_writeback_rate_update_dwork(dc);
- pr_err("Couldn't run cached device %s",
- dc->backing_dev_name);
- return ret;
---
-2.16.4
-
diff --git a/for-current/0023-bcache-avoid-a-deadlock-in-bcache_reboot.patch b/for-current/0023-bcache-avoid-a-deadlock-in-bcache_reboot.patch
deleted file mode 100644
index 128ea9f..0000000
--- a/for-current/0023-bcache-avoid-a-deadlock-in-bcache_reboot.patch
+++ /dev/null
@@ -1,211 +0,0 @@
-From dec31a9984ccded737345da74b8657dbf3c78ea4 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 21 May 2019 23:19:55 +0800
-Subject: [PATCH 23/37] bcache: avoid a deadlock in bcache_reboot()
-
-It is quite frequently to observe deadlock in bcache_reboot() happens
-and hang the system reboot process. The reason is, in bcache_reboot()
-when calling bch_cache_set_stop() and bcache_device_stop() the mutex
-bch_register_lock is held. But in the process to stop cache set and
-bcache device, bch_register_lock will be acquired again. If this mutex
-is held here, deadlock will happen inside the stopping process. The
-aftermath of the deadlock is, whole system reboot gets hung.
-
-The fix is to avoid holding bch_register_lock for the following loops
-in bcache_reboot(),
- list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
- bch_cache_set_stop(c);
-
- list_for_each_entry_safe(dc, tdc, &uncached_devices, list)
- bcache_device_stop(&dc->disk);
-
-A module range variable 'bcache_is_reboot' is added, it sets to true
-in bcache_reboot(). In register_bcache(), if bcache_is_reboot is checked
-to be true, reject the registration by returning -EBUSY immediately.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 40 +++++++++++++++++++++++++++++++++++++++-
- drivers/md/bcache/sysfs.c | 26 ++++++++++++++++++++++++++
- 2 files changed, 65 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 791cb930b353..a88238ad5da1 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -40,6 +40,7 @@ static const char invalid_uuid[] = {
-
- static struct kobject *bcache_kobj;
- struct mutex bch_register_lock;
-+bool bcache_is_reboot;
- LIST_HEAD(bch_cache_sets);
- static LIST_HEAD(uncached_devices);
-
-@@ -49,6 +50,7 @@ static wait_queue_head_t unregister_wait;
- struct workqueue_struct *bcache_wq;
- struct workqueue_struct *bch_journal_wq;
-
-+
- #define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE)
- /* limitation of partitions number on single bcache device */
- #define BCACHE_MINORS 128
-@@ -2335,6 +2337,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- if (!try_module_get(THIS_MODULE))
- return -EBUSY;
-
-+ /* For latest state of bcache_is_reboot */
-+ smp_mb();
-+ if (bcache_is_reboot)
-+ return -EBUSY;
-+
- path = kstrndup(buffer, size, GFP_KERNEL);
- if (!path)
- goto err;
-@@ -2464,6 +2471,9 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
-
- static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
- {
-+ if (bcache_is_reboot)
-+ return NOTIFY_DONE;
-+
- if (code == SYS_DOWN ||
- code == SYS_HALT ||
- code == SYS_POWER_OFF) {
-@@ -2476,19 +2486,45 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
-
- mutex_lock(&bch_register_lock);
-
-+ if (bcache_is_reboot)
-+ goto out;
-+
-+ /* New registration is rejected since now */
-+ bcache_is_reboot = true;
-+ /*
-+ * Make registering caller (if there is) on other CPU
-+ * core know bcache_is_reboot set to true earlier
-+ */
-+ smp_mb();
-+
- if (list_empty(&bch_cache_sets) &&
- list_empty(&uncached_devices))
- goto out;
-
-+ mutex_unlock(&bch_register_lock);
-+
- pr_info("Stopping all devices:");
-
-+ /*
-+ * The reason bch_register_lock is not held to call
-+ * bch_cache_set_stop() and bcache_device_stop() is to
-+ * avoid potential deadlock during reboot, because cache
-+ * set or bcache device stopping process will acqurie
-+ * bch_register_lock too.
-+ *
-+ * We are safe here because bcache_is_reboot sets to
-+ * true already, register_bcache() will reject new
-+ * registration now. bcache_is_reboot also makes sure
-+ * bcache_reboot() won't be re-entered on by other thread,
-+ * so there is no race in following list iteration by
-+ * list_for_each_entry_safe().
-+ */
- list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
- bch_cache_set_stop(c);
-
- list_for_each_entry_safe(dc, tdc, &uncached_devices, list)
- bcache_device_stop(&dc->disk);
-
-- mutex_unlock(&bch_register_lock);
-
- /*
- * Give an early chance for other kthreads and
-@@ -2616,6 +2652,8 @@ static int __init bcache_init(void)
- bch_debug_init();
- closure_debug_init();
-
-+ bcache_is_reboot = false;
-+
- return 0;
- err:
- bcache_exit();
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index dddb8d4048ce..d62e28643109 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -16,6 +16,8 @@
- #include <linux/sort.h>
- #include <linux/sched/clock.h>
-
-+extern bool bcache_is_reboot;
-+
- /* Default is 0 ("writethrough") */
- static const char * const bch_cache_modes[] = {
- "writethrough",
-@@ -267,6 +269,10 @@ STORE(__cached_dev)
- struct cache_set *c;
- struct kobj_uevent_env *env;
-
-+ /* no user space access if system is rebooting */
-+ if (bcache_is_reboot)
-+ return -EBUSY;
-+
- #define d_strtoul(var) sysfs_strtoul(var, dc->var)
- #define d_strtoul_nonzero(var) sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
- #define d_strtoi_h(var) sysfs_hatoi(var, dc->var)
-@@ -407,6 +413,10 @@ STORE(bch_cached_dev)
- struct cached_dev *dc = container_of(kobj, struct cached_dev,
- disk.kobj);
-
-+ /* no user space access if system is rebooting */
-+ if (bcache_is_reboot)
-+ return -EBUSY;
-+
- mutex_lock(&bch_register_lock);
- size = __cached_dev_store(kobj, attr, buf, size);
-
-@@ -510,6 +520,10 @@ STORE(__bch_flash_dev)
- kobj);
- struct uuid_entry *u = &d->c->uuids[d->id];
-
-+ /* no user space access if system is rebooting */
-+ if (bcache_is_reboot)
-+ return -EBUSY;
-+
- sysfs_strtoul(data_csum, d->data_csum);
-
- if (attr == &sysfs_size) {
-@@ -745,6 +759,10 @@ STORE(__bch_cache_set)
- struct cache_set *c = container_of(kobj, struct cache_set, kobj);
- ssize_t v;
-
-+ /* no user space access if system is rebooting */
-+ if (bcache_is_reboot)
-+ return -EBUSY;
-+
- if (attr == &sysfs_unregister)
- bch_cache_set_unregister(c);
-
-@@ -864,6 +882,10 @@ STORE(bch_cache_set_internal)
- {
- struct cache_set *c = container_of(kobj, struct cache_set, internal);
-
-+ /* no user space access if system is rebooting */
-+ if (bcache_is_reboot)
-+ return -EBUSY;
-+
- return bch_cache_set_store(&c->kobj, attr, buf, size);
- }
-
-@@ -1049,6 +1071,10 @@ STORE(__bch_cache)
- struct cache *ca = container_of(kobj, struct cache, kobj);
- ssize_t v;
-
-+ /* no user space access if system is rebooting */
-+ if (bcache_is_reboot)
-+ return -EBUSY;
-+
- if (attr == &sysfs_discard) {
- bool v = strtoul_or_return(buf);
-
---
-2.16.4
-
diff --git a/for-current/0024-bcache-acquire-bch_register_lock-later-in-cached_dev.patch b/for-current/0024-bcache-acquire-bch_register_lock-later-in-cached_dev.patch
deleted file mode 100644
index ea0c810..0000000
--- a/for-current/0024-bcache-acquire-bch_register_lock-later-in-cached_dev.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From 78af82b45144e562e87ac70f0b4710c96bec04ff Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 2 Jun 2019 01:06:12 +0800
-Subject: [PATCH 24/37] bcache: acquire bch_register_lock later in
- cached_dev_detach_finish()
-
-Now there is variable bcache_is_reboot to prevent device register or
-unregister during reboot, it is unncessary to still hold mutex lock
-bch_register_lock before stopping writeback_rate_update kworker and
-writeback kthread. And if the stopping kworker or kthread holding
-bch_register_lock inside their routine (we used to have such problem
-in writeback thread, thanks to Junhui Wang fixed it), it is very easy
-to introduce deadlock during reboot/shutdown procedure.
-
-Therefore in this patch, the location to acquire bch_register_lock is
-moved to the location before calling calc_cached_dev_sectors(). Which
-is later then original location in cached_dev_detach_finish().
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index a88238ad5da1..40d857e690f9 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1018,7 +1018,6 @@ static void cached_dev_detach_finish(struct work_struct *w)
- BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
- BUG_ON(refcount_read(&dc->count));
-
-- mutex_lock(&bch_register_lock);
-
- if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
- cancel_writeback_rate_update_dwork(dc);
-@@ -1034,6 +1033,8 @@ static void cached_dev_detach_finish(struct work_struct *w)
- bch_write_bdev_super(dc, &cl);
- closure_sync(&cl);
-
-+ mutex_lock(&bch_register_lock);
-+
- calc_cached_dev_sectors(dc->disk.c);
- bcache_device_detach(&dc->disk);
- list_move(&dc->list, &uncached_devices);
---
-2.16.4
-
diff --git a/for-current/0025-bcache-acquire-bch_register_lock-later-in-cached_dev.patch b/for-current/0025-bcache-acquire-bch_register_lock-later-in-cached_dev.patch
deleted file mode 100644
index 0884f6a..0000000
--- a/for-current/0025-bcache-acquire-bch_register_lock-later-in-cached_dev.patch
+++ /dev/null
@@ -1,160 +0,0 @@
-From ee7b60589c0e89a38ded0885d5810c652f343e3e Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 12 Jun 2019 21:10:38 +0800
-Subject: [PATCH 25/37] bcache: acquire bch_register_lock later in
- cached_dev_free()
-
-When enable lockdep engine, a lockdep warning can be observed when
-reboot or shutdown system,
-
-[ 3142.764557][ T1] bcache: bcache_reboot() Stopping all devices:
-[ 3142.776265][ T2649]
-[ 3142.777159][ T2649] ======================================================
-[ 3142.780039][ T2649] WARNING: possible circular locking dependency detected
-[ 3142.782869][ T2649] 5.2.0-rc4-lp151.20-default+ #1 Tainted: G W
-[ 3142.785684][ T2649] ------------------------------------------------------
-[ 3142.788479][ T2649] kworker/3:67/2649 is trying to acquire lock:
-[ 3142.790738][ T2649] 00000000aaf02291 ((wq_completion)bcache_writeback_wq){+.+.}, at: flush_workqueue+0x87/0x4c0
-[ 3142.794678][ T2649]
-[ 3142.794678][ T2649] but task is already holding lock:
-[ 3142.797402][ T2649] 000000004fcf89c5 (&bch_register_lock){+.+.}, at: cached_dev_free+0x17/0x120 [bcache]
-[ 3142.801462][ T2649]
-[ 3142.801462][ T2649] which lock already depends on the new lock.
-[ 3142.801462][ T2649]
-[ 3142.805277][ T2649]
-[ 3142.805277][ T2649] the existing dependency chain (in reverse order) is:
-[ 3142.808902][ T2649]
-[ 3142.808902][ T2649] -> #2 (&bch_register_lock){+.+.}:
-[ 3142.812396][ T2649] __mutex_lock+0x7a/0x9d0
-[ 3142.814184][ T2649] cached_dev_free+0x17/0x120 [bcache]
-[ 3142.816415][ T2649] process_one_work+0x2a4/0x640
-[ 3142.818413][ T2649] worker_thread+0x39/0x3f0
-[ 3142.820276][ T2649] kthread+0x125/0x140
-[ 3142.822061][ T2649] ret_from_fork+0x3a/0x50
-[ 3142.823965][ T2649]
-[ 3142.823965][ T2649] -> #1 ((work_completion)(&cl->work)#2){+.+.}:
-[ 3142.827244][ T2649] process_one_work+0x277/0x640
-[ 3142.829160][ T2649] worker_thread+0x39/0x3f0
-[ 3142.830958][ T2649] kthread+0x125/0x140
-[ 3142.832674][ T2649] ret_from_fork+0x3a/0x50
-[ 3142.834915][ T2649]
-[ 3142.834915][ T2649] -> #0 ((wq_completion)bcache_writeback_wq){+.+.}:
-[ 3142.838121][ T2649] lock_acquire+0xb4/0x1c0
-[ 3142.840025][ T2649] flush_workqueue+0xae/0x4c0
-[ 3142.842035][ T2649] drain_workqueue+0xa9/0x180
-[ 3142.844042][ T2649] destroy_workqueue+0x17/0x250
-[ 3142.846142][ T2649] cached_dev_free+0x52/0x120 [bcache]
-[ 3142.848530][ T2649] process_one_work+0x2a4/0x640
-[ 3142.850663][ T2649] worker_thread+0x39/0x3f0
-[ 3142.852464][ T2649] kthread+0x125/0x140
-[ 3142.854106][ T2649] ret_from_fork+0x3a/0x50
-[ 3142.855880][ T2649]
-[ 3142.855880][ T2649] other info that might help us debug this:
-[ 3142.855880][ T2649]
-[ 3142.859663][ T2649] Chain exists of:
-[ 3142.859663][ T2649] (wq_completion)bcache_writeback_wq --> (work_completion)(&cl->work)#2 --> &bch_register_lock
-[ 3142.859663][ T2649]
-[ 3142.865424][ T2649] Possible unsafe locking scenario:
-[ 3142.865424][ T2649]
-[ 3142.868022][ T2649] CPU0 CPU1
-[ 3142.869885][ T2649] ---- ----
-[ 3142.871751][ T2649] lock(&bch_register_lock);
-[ 3142.873379][ T2649] lock((work_completion)(&cl->work)#2);
-[ 3142.876399][ T2649] lock(&bch_register_lock);
-[ 3142.879727][ T2649] lock((wq_completion)bcache_writeback_wq);
-[ 3142.882064][ T2649]
-[ 3142.882064][ T2649] *** DEADLOCK ***
-[ 3142.882064][ T2649]
-[ 3142.885060][ T2649] 3 locks held by kworker/3:67/2649:
-[ 3142.887245][ T2649] #0: 00000000e774cdd0 ((wq_completion)events){+.+.}, at: process_one_work+0x21e/0x640
-[ 3142.890815][ T2649] #1: 00000000f7df89da ((work_completion)(&cl->work)#2){+.+.}, at: process_one_work+0x21e/0x640
-[ 3142.894884][ T2649] #2: 000000004fcf89c5 (&bch_register_lock){+.+.}, at: cached_dev_free+0x17/0x120 [bcache]
-[ 3142.898797][ T2649]
-[ 3142.898797][ T2649] stack backtrace:
-[ 3142.900961][ T2649] CPU: 3 PID: 2649 Comm: kworker/3:67 Tainted: G W 5.2.0-rc4-lp151.20-default+ #1
-[ 3142.904789][ T2649] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/13/2018
-[ 3142.909168][ T2649] Workqueue: events cached_dev_free [bcache]
-[ 3142.911422][ T2649] Call Trace:
-[ 3142.912656][ T2649] dump_stack+0x85/0xcb
-[ 3142.914181][ T2649] print_circular_bug+0x19a/0x1f0
-[ 3142.916193][ T2649] __lock_acquire+0x16cd/0x1850
-[ 3142.917936][ T2649] ? __lock_acquire+0x6a8/0x1850
-[ 3142.919704][ T2649] ? lock_acquire+0xb4/0x1c0
-[ 3142.921335][ T2649] ? find_held_lock+0x34/0xa0
-[ 3142.923052][ T2649] lock_acquire+0xb4/0x1c0
-[ 3142.924635][ T2649] ? flush_workqueue+0x87/0x4c0
-[ 3142.926375][ T2649] flush_workqueue+0xae/0x4c0
-[ 3142.928047][ T2649] ? flush_workqueue+0x87/0x4c0
-[ 3142.929824][ T2649] ? drain_workqueue+0xa9/0x180
-[ 3142.931686][ T2649] drain_workqueue+0xa9/0x180
-[ 3142.933534][ T2649] destroy_workqueue+0x17/0x250
-[ 3142.935787][ T2649] cached_dev_free+0x52/0x120 [bcache]
-[ 3142.937795][ T2649] process_one_work+0x2a4/0x640
-[ 3142.939803][ T2649] worker_thread+0x39/0x3f0
-[ 3142.941487][ T2649] ? process_one_work+0x640/0x640
-[ 3142.943389][ T2649] kthread+0x125/0x140
-[ 3142.944894][ T2649] ? kthread_create_worker_on_cpu+0x70/0x70
-[ 3142.947744][ T2649] ret_from_fork+0x3a/0x50
-[ 3142.970358][ T2649] bcache: bcache_device_free() bcache0 stopped
-
-Here is how the deadlock happens.
-1) bcache_reboot() calls bcache_device_stop(), then inside
- bcache_device_stop() BCACHE_DEV_CLOSING bit is set on d->flags.
- Then closure_queue(&d->cl) is called to invoke cached_dev_flush().
-2) In cached_dev_flush(), cached_dev_free() is called by continu_at().
-3) In cached_dev_free(), when stopping the writeback kthread of the
- cached device by kthread_stop(), dc->writeback_thread will be waken
- up to quite the kthread while-loop, then cached_dev_put() is called
- in bch_writeback_thread().
-4) Calling cached_dev_put() in writeback kthread may drop dc->count to
- 0, then dc->detach kworker is scheduled, which is initialized as
- cached_dev_detach_finish().
-5) Inside cached_dev_detach_finish(), the last line of code is to call
- closure_put(&dc->disk.cl), which drops the last reference counter of
- closrure dc->disk.cl, then the callback cached_dev_flush() gets
- called.
-Now cached_dev_flush() is called for second time in the code path, the
-first time is in step 2). And again bch_register_lock will be acquired
-again, and a A-A lock (lockdep terminology) is happening.
-
-The root cause of the above A-A lock is in cached_dev_free(), mutex
-bch_register_lock is held before stopping writeback kthread and other
-kworkers. Fortunately now we have variable 'bcache_is_reboot', which may
-prevent device registration or unregistration during reboot/shutdown
-time, so it is unncessary to hold bch_register_lock such early now.
-
-This is how this patch fixes the reboot/shutdown time A-A lock issue:
-After moving mutex_lock(&bch_register_lock) to a later location where
-before atomic_read(&dc->running) in cached_dev_free(), such A-A lock
-problem can be solved without any reboot time registration race.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 40d857e690f9..8a12a8313367 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1231,8 +1231,6 @@ static void cached_dev_free(struct closure *cl)
- {
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
-
-- mutex_lock(&bch_register_lock);
--
- if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
- cancel_writeback_rate_update_dwork(dc);
-
-@@ -1243,6 +1241,8 @@ static void cached_dev_free(struct closure *cl)
- if (!IS_ERR_OR_NULL(dc->status_update_thread))
- kthread_stop(dc->status_update_thread);
-
-+ mutex_lock(&bch_register_lock);
-+
- if (atomic_read(&dc->running))
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
- bcache_device_free(&dc->disk);
---
-2.16.4
-
diff --git a/for-current/0026-bcache-fix-potential-deadlock-in-cached_def_free.patch b/for-current/0026-bcache-fix-potential-deadlock-in-cached_def_free.patch
deleted file mode 100644
index 6178515..0000000
--- a/for-current/0026-bcache-fix-potential-deadlock-in-cached_def_free.patch
+++ /dev/null
@@ -1,168 +0,0 @@
-From 9076f3622e5ed9a65b67ae47bba6c3a8f5c0e5d2 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 4 Jun 2019 14:28:33 +0800
-Subject: [PATCH 26/37] bcache: fix potential deadlock in cached_def_free()
-
-When enable lockdep and reboot system with a writeback mode bcache
-device, the following potential deadlock warning is reported by lockdep
-engine.
-
-[ 101.536569][ T401] kworker/2:2/401 is trying to acquire lock:
-[ 101.538575][ T401] 00000000bbf6e6c7 ((wq_completion)bcache_writeback_wq){+.+.}, at: flush_workqueue+0x87/0x4c0
-[ 101.542054][ T401]
-[ 101.542054][ T401] but task is already holding lock:
-[ 101.544587][ T401] 00000000f5f305b3 ((work_completion)(&cl->work)#2){+.+.}, at: process_one_work+0x21e/0x640
-[ 101.548386][ T401]
-[ 101.548386][ T401] which lock already depends on the new lock.
-[ 101.548386][ T401]
-[ 101.551874][ T401]
-[ 101.551874][ T401] the existing dependency chain (in reverse order) is:
-[ 101.555000][ T401]
-[ 101.555000][ T401] -> #1 ((work_completion)(&cl->work)#2){+.+.}:
-[ 101.557860][ T401] process_one_work+0x277/0x640
-[ 101.559661][ T401] worker_thread+0x39/0x3f0
-[ 101.561340][ T401] kthread+0x125/0x140
-[ 101.562963][ T401] ret_from_fork+0x3a/0x50
-[ 101.564718][ T401]
-[ 101.564718][ T401] -> #0 ((wq_completion)bcache_writeback_wq){+.+.}:
-[ 101.567701][ T401] lock_acquire+0xb4/0x1c0
-[ 101.569651][ T401] flush_workqueue+0xae/0x4c0
-[ 101.571494][ T401] drain_workqueue+0xa9/0x180
-[ 101.573234][ T401] destroy_workqueue+0x17/0x250
-[ 101.575109][ T401] cached_dev_free+0x44/0x120 [bcache]
-[ 101.577304][ T401] process_one_work+0x2a4/0x640
-[ 101.579357][ T401] worker_thread+0x39/0x3f0
-[ 101.581055][ T401] kthread+0x125/0x140
-[ 101.582709][ T401] ret_from_fork+0x3a/0x50
-[ 101.584592][ T401]
-[ 101.584592][ T401] other info that might help us debug this:
-[ 101.584592][ T401]
-[ 101.588355][ T401] Possible unsafe locking scenario:
-[ 101.588355][ T401]
-[ 101.590974][ T401] CPU0 CPU1
-[ 101.592889][ T401] ---- ----
-[ 101.594743][ T401] lock((work_completion)(&cl->work)#2);
-[ 101.596785][ T401] lock((wq_completion)bcache_writeback_wq);
-[ 101.600072][ T401] lock((work_completion)(&cl->work)#2);
-[ 101.602971][ T401] lock((wq_completion)bcache_writeback_wq);
-[ 101.605255][ T401]
-[ 101.605255][ T401] *** DEADLOCK ***
-[ 101.605255][ T401]
-[ 101.608310][ T401] 2 locks held by kworker/2:2/401:
-[ 101.610208][ T401] #0: 00000000cf2c7d17 ((wq_completion)events){+.+.}, at: process_one_work+0x21e/0x640
-[ 101.613709][ T401] #1: 00000000f5f305b3 ((work_completion)(&cl->work)#2){+.+.}, at: process_one_work+0x21e/0x640
-[ 101.617480][ T401]
-[ 101.617480][ T401] stack backtrace:
-[ 101.619539][ T401] CPU: 2 PID: 401 Comm: kworker/2:2 Tainted: G W 5.2.0-rc4-lp151.20-default+ #1
-[ 101.623225][ T401] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/13/2018
-[ 101.627210][ T401] Workqueue: events cached_dev_free [bcache]
-[ 101.629239][ T401] Call Trace:
-[ 101.630360][ T401] dump_stack+0x85/0xcb
-[ 101.631777][ T401] print_circular_bug+0x19a/0x1f0
-[ 101.633485][ T401] __lock_acquire+0x16cd/0x1850
-[ 101.635184][ T401] ? __lock_acquire+0x6a8/0x1850
-[ 101.636863][ T401] ? lock_acquire+0xb4/0x1c0
-[ 101.638421][ T401] ? find_held_lock+0x34/0xa0
-[ 101.640015][ T401] lock_acquire+0xb4/0x1c0
-[ 101.641513][ T401] ? flush_workqueue+0x87/0x4c0
-[ 101.643248][ T401] flush_workqueue+0xae/0x4c0
-[ 101.644832][ T401] ? flush_workqueue+0x87/0x4c0
-[ 101.646476][ T401] ? drain_workqueue+0xa9/0x180
-[ 101.648303][ T401] drain_workqueue+0xa9/0x180
-[ 101.649867][ T401] destroy_workqueue+0x17/0x250
-[ 101.651503][ T401] cached_dev_free+0x44/0x120 [bcache]
-[ 101.653328][ T401] process_one_work+0x2a4/0x640
-[ 101.655029][ T401] worker_thread+0x39/0x3f0
-[ 101.656693][ T401] ? process_one_work+0x640/0x640
-[ 101.658501][ T401] kthread+0x125/0x140
-[ 101.660012][ T401] ? kthread_create_worker_on_cpu+0x70/0x70
-[ 101.661985][ T401] ret_from_fork+0x3a/0x50
-[ 101.691318][ T401] bcache: bcache_device_free() bcache0 stopped
-
-Here is how the above potential deadlock may happen in reboot/shutdown
-code path,
-1) bcache_reboot() is called firstly in the reboot/shutdown code path,
- then in bcache_reboot(), bcache_device_stop() is called.
-2) bcache_device_stop() sets BCACHE_DEV_CLOSING on d->falgs, then call
- closure_queue(&d->cl) to invoke cached_dev_flush(). And in turn
- cached_dev_flush() calls cached_dev_free() via closure_at()
-3) In cached_dev_free(), after stopped writebach kthread
- dc->writeback_thread, the kwork dc->writeback_write_wq is stopping by
- destroy_workqueue().
-4) Inside destroy_workqueue(), drain_workqueue() is called. Inside
- drain_workqueue(), flush_workqueue() is called. Then wq->lockdep_map
- is acquired by lock_map_acquire() in flush_workqueue(). After the
- lock acquired the rest part of flush_workqueue() just wait for the
- workqueue to complete.
-5) Now we look back at writeback thread routine bch_writeback_thread(),
- in the main while-loop, write_dirty() is called via continue_at() in
- read_dirty_submit(), which is called via continue_at() in while-loop
- level called function read_dirty(). Inside write_dirty() it may be
- re-called on workqueeu dc->writeback_write_wq via continue_at().
- It means when the writeback kthread is stopped in cached_dev_free()
- there might be still one kworker queued on dc->writeback_write_wq
- to execute write_dirty() again.
-6) Now this kworker is scheduled on dc->writeback_write_wq to run by
- process_one_work() (which is called by worker_thread()). Before
- calling the kwork routine, wq->lockdep_map is acquired.
-7) But wq->lockdep_map is acquired already in step 4), so a A-A lock
- (lockdep terminology) scenario happens.
-
-Indeed on multiple cores syatem, the above deadlock is very rare to
-happen, just as the code comments in process_one_work() says,
-2263 * AFAICT there is no possible deadlock scenario between the
-2264 * flush_work() and complete() primitives (except for
- single-threaded
-2265 * workqueues), so hiding them isn't a problem.
-
-But it is still good to fix such lockdep warning, even no one running
-bcache on single core system.
-
-The fix is simple. This patch solves the above potential deadlock by,
-- Do not destroy workqueue dc->writeback_write_wq in cached_dev_free().
-- Flush and destroy dc->writeback_write_wq in writebach kthread routine
- bch_writeback_thread(), where after quit the thread main while-loop
- and before cached_dev_put() is called.
-
-By this fix, dc->writeback_write_wq will be stopped and destroy before
-the writeback kthread stopped, so the chance for a A-A locking on
-wq->lockdep_map is disappeared, such A-A deadlock won't happen
-any more.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 2 --
- drivers/md/bcache/writeback.c | 4 ++++
- 2 files changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 8a12a8313367..a8ea4e2086a9 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1236,8 +1236,6 @@ static void cached_dev_free(struct closure *cl)
-
- if (!IS_ERR_OR_NULL(dc->writeback_thread))
- kthread_stop(dc->writeback_thread);
-- if (dc->writeback_write_wq)
-- destroy_workqueue(dc->writeback_write_wq);
- if (!IS_ERR_OR_NULL(dc->status_update_thread))
- kthread_stop(dc->status_update_thread);
-
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 21081febcb59..d60268fe49e1 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -738,6 +738,10 @@ static int bch_writeback_thread(void *arg)
- }
- }
-
-+ if (dc->writeback_write_wq) {
-+ flush_workqueue(dc->writeback_write_wq);
-+ destroy_workqueue(dc->writeback_write_wq);
-+ }
- cached_dev_put(dc);
- wait_for_kthread_stop();
-
---
-2.16.4
-
diff --git a/for-current/0027-bcache-add-code-comments-for-journal_read_bucket.patch b/for-current/0027-bcache-add-code-comments-for-journal_read_bucket.patch
deleted file mode 100644
index e04fec8..0000000
--- a/for-current/0027-bcache-add-code-comments-for-journal_read_bucket.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 4a355725c03084500247141749a752c23fa0790d Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Thu, 30 May 2019 18:39:17 +0800
-Subject: [PATCH 27/37] bcache: add code comments for journal_read_bucket()
-
-This patch adds more code comments in journal_read_bucket(), this is an
-effort to make the code to be more understandable.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 24 ++++++++++++++++++++++++
- 1 file changed, 24 insertions(+)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 54f8886b6177..98ee467ec3f7 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -100,6 +100,20 @@ reread: left = ca->sb.bucket_size - offset;
-
- blocks = set_blocks(j, block_bytes(ca->set));
-
-+ /*
-+ * Nodes in 'list' are in linear increasing order of
-+ * i->j.seq, the node on head has the smallest (oldest)
-+ * journal seq, the node on tail has the biggest
-+ * (latest) journal seq.
-+ */
-+
-+ /*
-+ * Check from the oldest jset for last_seq. If
-+ * i->j.seq < j->last_seq, it means the oldest jset
-+ * in list is expired and useless, remove it from
-+ * this list. Otherwise, j is a condidate jset for
-+ * further following checks.
-+ */
- while (!list_empty(list)) {
- i = list_first_entry(list,
- struct journal_replay, list);
-@@ -109,13 +123,22 @@ reread: left = ca->sb.bucket_size - offset;
- kfree(i);
- }
-
-+ /* iterate list in reverse order (from latest jset) */
- list_for_each_entry_reverse(i, list, list) {
- if (j->seq == i->j.seq)
- goto next_set;
-
-+ /*
-+ * if j->seq is less than any i->j.last_seq
-+ * in list, j is an expired and useless jset.
-+ */
- if (j->seq < i->j.last_seq)
- goto next_set;
-
-+ /*
-+ * 'where' points to first jset in list which
-+ * is elder then j.
-+ */
- if (j->seq > i->j.seq) {
- where = &i->list;
- goto add;
-@@ -129,6 +152,7 @@ reread: left = ca->sb.bucket_size - offset;
- if (!i)
- return -ENOMEM;
- memcpy(&i->j, j, bytes);
-+ /* Add to the location after 'where' points to */
- list_add(&i->list, where);
- ret = 1;
-
---
-2.16.4
-
diff --git a/for-current/0028-bcache-set-largest-seq-to-ja-seq-bucket_index-in-jou.patch b/for-current/0028-bcache-set-largest-seq-to-ja-seq-bucket_index-in-jou.patch
deleted file mode 100644
index 419dbe5..0000000
--- a/for-current/0028-bcache-set-largest-seq-to-ja-seq-bucket_index-in-jou.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 3221dc7b0f4d0dff70943b8a9a600ee5bfd17e53 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Thu, 30 May 2019 18:40:37 +0800
-Subject: [PATCH 28/37] bcache: set largest seq to ja->seq[bucket_index] in
- journal_read_bucket()
-
-In journal_read_bucket() when setting ja->seq[bucket_index], there might
-be potential case that a later non-maximum overwrites a better sequence
-number to ja->seq[bucket_index]. This patch adds a check to make sure
-that ja->seq[bucket_index] will be only set a new value if it is bigger
-then current value.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 98ee467ec3f7..3d321bffddc9 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -156,7 +156,8 @@ reread: left = ca->sb.bucket_size - offset;
- list_add(&i->list, where);
- ret = 1;
-
-- ja->seq[bucket_index] = j->seq;
-+ if (j->seq > ja->seq[bucket_index])
-+ ja->seq[bucket_index] = j->seq;
- next_set:
- offset += blocks * ca->sb.block_size;
- len -= blocks * ca->sb.block_size;
---
-2.16.4
-
diff --git a/for-current/0029-bcache-shrink-btree-node-cache-after-bch_btree_check.patch b/for-current/0029-bcache-shrink-btree-node-cache-after-bch_btree_check.patch
deleted file mode 100644
index 4fcc994..0000000
--- a/for-current/0029-bcache-shrink-btree-node-cache-after-bch_btree_check.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-From a041fd83337fb5ca0a1a55103c3f0f057d0980f3 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Fri, 31 May 2019 17:29:56 +0800
-Subject: [PATCH 29/37] bcache: shrink btree node cache after bch_btree_check()
-
-When cache set starts, bch_btree_check() will check all bkeys on cache
-device by calculating the checksum. This operation will consume a huge
-number of system memory if there are a lot of data cached. Since bcache
-uses its own mca cache to maintain all its read-in btree nodes, and only
-releases the cache space when system memory manage code starts to shrink
-caches. Then before memory manager code to call the mca cache shrinker
-callback, bcache mca cache will compete memory resource with user space
-application, which may have nagive effect to performance of user space
-workloads (e.g. data base, or I/O service of distributed storage node).
-
-This patch tries to call bcache mca shrinker routine to proactively
-release mca cache memory, to decrease the memory pressure of system and
-avoid negative effort of the overall system I/O performance.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 17 +++++++++++++++++
- 1 file changed, 17 insertions(+)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index a8ea4e2086a9..26e374fbf57c 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1880,6 +1880,23 @@ static int run_cache_set(struct cache_set *c)
- if (bch_btree_check(c))
- goto err;
-
-+ /*
-+ * bch_btree_check() may occupy too much system memory which
-+ * has negative effects to user space application (e.g. data
-+ * base) performance. Shrink the mca cache memory proactively
-+ * here to avoid competing memory with user space workloads..
-+ */
-+ if (!c->shrinker_disabled) {
-+ struct shrink_control sc;
-+
-+ sc.gfp_mask = GFP_KERNEL;
-+ sc.nr_to_scan = c->btree_cache_used * c->btree_pages;
-+ /* first run to clear b->accessed tag */
-+ c->shrink.scan_objects(&c->shrink, &sc);
-+ /* second run to reap non-accessed nodes */
-+ c->shrink.scan_objects(&c->shrink, &sc);
-+ }
-+
- bch_journal_mark(c, &journal);
- bch_initial_gc_finish(c);
- pr_debug("btree_check() done");
---
-2.16.4
-
diff --git a/for-current/0030-bcache-Revert-bcache-free-heap-cache_set-flush_btree.patch b/for-current/0030-bcache-Revert-bcache-free-heap-cache_set-flush_btree.patch
deleted file mode 100644
index 9f65feb..0000000
--- a/for-current/0030-bcache-Revert-bcache-free-heap-cache_set-flush_btree.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-From 73177d7836a9f472451c15b4498e7e0b79c46908 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 28 May 2019 21:36:56 +0800
-Subject: [PATCH 30/37] bcache: Revert "bcache: free heap
- cache_set->flush_btree in bch_journal_free"
-
-This reverts commit 6268dc2c4703aabfb0b35681be709acf4c2826c6.
-
-This patch depends on commit c4dc2497d50d ("bcache: fix high CPU
-occupancy during journal") which is reverted in previous patch. So
-revert this one too.
-
-Fixes: 6268dc2c4703 ("bcache: free heap cache_set->flush_btree in bch_journal_free")
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: stable@vger.kernel.org
-Cc: Shenghui Wang <shhuiw@foxmail.com>
----
- drivers/md/bcache/journal.c | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 3d321bffddc9..11d8c93b88bb 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -884,7 +884,6 @@ void bch_journal_free(struct cache_set *c)
- free_pages((unsigned long) c->journal.w[1].data, JSET_BITS);
- free_pages((unsigned long) c->journal.w[0].data, JSET_BITS);
- free_fifo(&c->journal.pin);
-- free_heap(&c->flush_btree);
- }
-
- int bch_journal_alloc(struct cache_set *c)
---
-2.16.4
-
diff --git a/for-current/0031-bcache-Revert-bcache-fix-high-CPU-occupancy-during-j.patch b/for-current/0031-bcache-Revert-bcache-fix-high-CPU-occupancy-during-j.patch
deleted file mode 100644
index 814d7f1..0000000
--- a/for-current/0031-bcache-Revert-bcache-fix-high-CPU-occupancy-during-j.patch
+++ /dev/null
@@ -1,129 +0,0 @@
-From c738cc581e2658874876f29c5db4abd2fbcbfd4e Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 28 May 2019 21:19:38 +0800
-Subject: [PATCH 31/37] bcache: Revert "bcache: fix high CPU occupancy during
- journal"
-
-This reverts commit c4dc2497d50d9c6fb16aa0d07b6a14f3b2adb1e0.
-
-This patch enlarges a race between normal btree flush code path and
-flush_btree_write(), which causes deadlock when journal space is
-exhausted. Reverts this patch makes the race window from 128 btree
-nodes to only 1 btree nodes.
-
-Fixes: c4dc2497d50d ("bcache: fix high CPU occupancy during journal")
-Signed-off-by: Coly Li <colyli@suse.de>
-Cc: stable@vger.kernel.org
-Cc: Tang Junhui <tang.junhui.linux@gmail.com>
----
- drivers/md/bcache/bcache.h | 2 --
- drivers/md/bcache/journal.c | 47 +++++++++++++++------------------------------
- drivers/md/bcache/util.h | 2 --
- 3 files changed, 15 insertions(+), 36 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 73a97586a2ef..cb268d7c6cea 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -726,8 +726,6 @@ struct cache_set {
-
- #define BUCKET_HASH_BITS 12
- struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
--
-- DECLARE_HEAP(struct btree *, flush_btree);
- };
-
- struct bbio {
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 11d8c93b88bb..14a4e2c44de9 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -416,12 +416,6 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
- }
-
- /* Journalling */
--#define journal_max_cmp(l, r) \
-- (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) < \
-- fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
--#define journal_min_cmp(l, r) \
-- (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) > \
-- fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
-
- static void btree_flush_write(struct cache_set *c)
- {
-@@ -429,35 +423,25 @@ static void btree_flush_write(struct cache_set *c)
- * Try to find the btree node with that references the oldest journal
- * entry, best is our current candidate and is locked if non NULL:
- */
-- struct btree *b;
-- int i;
-+ struct btree *b, *best;
-+ unsigned int i;
-
- atomic_long_inc(&c->flush_write);
--
- retry:
-- spin_lock(&c->journal.lock);
-- if (heap_empty(&c->flush_btree)) {
-- for_each_cached_btree(b, c, i)
-- if (btree_current_write(b)->journal) {
-- if (!heap_full(&c->flush_btree))
-- heap_add(&c->flush_btree, b,
-- journal_max_cmp);
-- else if (journal_max_cmp(b,
-- heap_peek(&c->flush_btree))) {
-- c->flush_btree.data[0] = b;
-- heap_sift(&c->flush_btree, 0,
-- journal_max_cmp);
-- }
-+ best = NULL;
-+
-+ for_each_cached_btree(b, c, i)
-+ if (btree_current_write(b)->journal) {
-+ if (!best)
-+ best = b;
-+ else if (journal_pin_cmp(c,
-+ btree_current_write(best)->journal,
-+ btree_current_write(b)->journal)) {
-+ best = b;
- }
-+ }
-
-- for (i = c->flush_btree.used / 2 - 1; i >= 0; --i)
-- heap_sift(&c->flush_btree, i, journal_min_cmp);
-- }
--
-- b = NULL;
-- heap_pop(&c->flush_btree, b, journal_min_cmp);
-- spin_unlock(&c->journal.lock);
--
-+ b = best;
- if (b) {
- mutex_lock(&b->write_lock);
- if (!btree_current_write(b)->journal) {
-@@ -898,8 +882,7 @@ int bch_journal_alloc(struct cache_set *c)
- j->w[0].c = c;
- j->w[1].c = c;
-
-- if (!(init_heap(&c->flush_btree, 128, GFP_KERNEL)) ||
-- !(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
-+ if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
- !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) ||
- !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)))
- return -ENOMEM;
-diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
-index 1fbced94e4cc..c029f7443190 100644
---- a/drivers/md/bcache/util.h
-+++ b/drivers/md/bcache/util.h
-@@ -113,8 +113,6 @@ do { \
-
- #define heap_full(h) ((h)->used == (h)->size)
-
--#define heap_empty(h) ((h)->used == 0)
--
- #define DECLARE_FIFO(type, name) \
- struct { \
- size_t front, back, size, mask; \
---
-2.16.4
-
diff --git a/for-current/0032-bcache-only-clear-BTREE_NODE_dirty-bit-when-it-is-se.patch b/for-current/0032-bcache-only-clear-BTREE_NODE_dirty-bit-when-it-is-se.patch
deleted file mode 100644
index 40c8ba0..0000000
--- a/for-current/0032-bcache-only-clear-BTREE_NODE_dirty-bit-when-it-is-se.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 6426472bb2a01a472329b9399df2c30ec4c7fce8 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 24 Jun 2019 15:31:57 +0800
-Subject: [PATCH 32/37] bcache: only clear BTREE_NODE_dirty bit when it is set
-
-In bch_btree_cache_free() and btree_node_free(), BTREE_NODE_dirty is
-always set no matter btree node is dirty or not. The code looks like
-this,
- if (btree_node_dirty(b))
- btree_complete_write(b, btree_current_write(b));
- clear_bit(BTREE_NODE_dirty, &b->flags);
-
-Indeed if btree_node_dirty(b) returns false, it means BTREE_NODE_dirty
-bit is cleared, then it is unnecessary to clear the bit again.
-
-This patch only clears BTREE_NODE_dirty when btree_node_dirty(b) is
-true (the bit is set), to save a few CPU cycles.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/btree.c | 11 ++++++-----
- 1 file changed, 6 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index cf38a1b031fa..88e5aa3fbb07 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -782,10 +782,10 @@ void bch_btree_cache_free(struct cache_set *c)
- while (!list_empty(&c->btree_cache)) {
- b = list_first_entry(&c->btree_cache, struct btree, list);
-
-- if (btree_node_dirty(b))
-+ if (btree_node_dirty(b)) {
- btree_complete_write(b, btree_current_write(b));
-- clear_bit(BTREE_NODE_dirty, &b->flags);
--
-+ clear_bit(BTREE_NODE_dirty, &b->flags);
-+ }
- mca_data_free(b);
- }
-
-@@ -1073,9 +1073,10 @@ static void btree_node_free(struct btree *b)
-
- mutex_lock(&b->write_lock);
-
-- if (btree_node_dirty(b))
-+ if (btree_node_dirty(b)) {
- btree_complete_write(b, btree_current_write(b));
-- clear_bit(BTREE_NODE_dirty, &b->flags);
-+ clear_bit(BTREE_NODE_dirty, &b->flags);
-+ }
-
- mutex_unlock(&b->write_lock);
-
---
-2.16.4
-
diff --git a/for-current/0033-bcache-add-comments-for-mutex_lock-b-write_lock.patch b/for-current/0033-bcache-add-comments-for-mutex_lock-b-write_lock.patch
deleted file mode 100644
index 8a15da2..0000000
--- a/for-current/0033-bcache-add-comments-for-mutex_lock-b-write_lock.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From b3bb7eb417b3c4efb4241f1a940af3da1763dcdb Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Mon, 24 Jun 2019 16:10:55 +0800
-Subject: [PATCH 33/37] bcache: add comments for mutex_lock(&b->write_lock)
-
-When accessing or modifying BTREE_NODE_dirty bit, it is not always
-necessary to acquire b->write_lock. In bch_btree_cache_free() and
-mca_reap() acquiring b->write_lock is necessary, and this patch adds
-comments to explain why mutex_lock(&b->write_lock) is necessary for
-checking or clearing BTREE_NODE_dirty bit there.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/btree.c | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index 88e5aa3fbb07..846306c3a887 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -659,6 +659,11 @@ static int mca_reap(struct btree *b, unsigned int min_order, bool flush)
- up(&b->io_mutex);
- }
-
-+ /*
-+ * BTREE_NODE_dirty might be cleared in btree_flush_btree() by
-+ * __bch_btree_node_write(). To avoid an extra flush, acquire
-+ * b->write_lock before checking BTREE_NODE_dirty bit.
-+ */
- mutex_lock(&b->write_lock);
- if (btree_node_dirty(b))
- __bch_btree_node_write(b, &cl);
-@@ -782,6 +787,11 @@ void bch_btree_cache_free(struct cache_set *c)
- while (!list_empty(&c->btree_cache)) {
- b = list_first_entry(&c->btree_cache, struct btree, list);
-
-+ /*
-+ * This function is called by cache_set_free(), no I/O
-+ * request on cache now, it is unnecessary to acquire
-+ * b->write_lock before clearing BTREE_NODE_dirty anymore.
-+ */
- if (btree_node_dirty(b)) {
- btree_complete_write(b, btree_current_write(b));
- clear_bit(BTREE_NODE_dirty, &b->flags);
---
-2.16.4
-
diff --git a/for-current/0034-bcache-remove-retry_flush_write-from-struct-cache_se.patch b/for-current/0034-bcache-remove-retry_flush_write-from-struct-cache_se.patch
deleted file mode 100644
index 0f14758..0000000
--- a/for-current/0034-bcache-remove-retry_flush_write-from-struct-cache_se.patch
+++ /dev/null
@@ -1,75 +0,0 @@
-From 6c7913bbc396a830cd06017eb2ea570fad187fba Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 1 Jun 2019 01:58:23 +0800
-Subject: [PATCH 34/37] bcache: remove retry_flush_write from struct cache_set
-
-In struct cache_set, retry_flush_write is added for commit c4dc2497d50d
-("bcache: fix high CPU occupancy during journal") which is reverted in
-previous patch.
-
-Now it is useless anymore, and this patch removes it from bcache code.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 1 -
- drivers/md/bcache/journal.c | 1 -
- drivers/md/bcache/sysfs.c | 5 -----
- 3 files changed, 7 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index cb268d7c6cea..35396248a7d5 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -706,7 +706,6 @@ struct cache_set {
-
- atomic_long_t reclaim;
- atomic_long_t flush_write;
-- atomic_long_t retry_flush_write;
-
- enum {
- ON_ERROR_UNREGISTER,
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 14a4e2c44de9..1218e3cada3c 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -447,7 +447,6 @@ static void btree_flush_write(struct cache_set *c)
- if (!btree_current_write(b)->journal) {
- mutex_unlock(&b->write_lock);
- /* We raced */
-- atomic_long_inc(&c->retry_flush_write);
- goto retry;
- }
-
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index d62e28643109..701a386a954c 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -83,7 +83,6 @@ read_attribute(state);
- read_attribute(cache_read_races);
- read_attribute(reclaim);
- read_attribute(flush_write);
--read_attribute(retry_flush_write);
- read_attribute(writeback_keys_done);
- read_attribute(writeback_keys_failed);
- read_attribute(io_errors);
-@@ -709,9 +708,6 @@ SHOW(__bch_cache_set)
- sysfs_print(flush_write,
- atomic_long_read(&c->flush_write));
-
-- sysfs_print(retry_flush_write,
-- atomic_long_read(&c->retry_flush_write));
--
- sysfs_print(writeback_keys_done,
- atomic_long_read(&c->writeback_keys_done));
- sysfs_print(writeback_keys_failed,
-@@ -936,7 +932,6 @@ static struct attribute *bch_cache_set_internal_files[] = {
- &sysfs_cache_read_races,
- &sysfs_reclaim,
- &sysfs_flush_write,
-- &sysfs_retry_flush_write,
- &sysfs_writeback_keys_done,
- &sysfs_writeback_keys_failed,
-
---
-2.16.4
-
diff --git a/for-current/0035-bcache-fix-race-in-btree_flush_write.patch b/for-current/0035-bcache-fix-race-in-btree_flush_write.patch
deleted file mode 100644
index db50797..0000000
--- a/for-current/0035-bcache-fix-race-in-btree_flush_write.patch
+++ /dev/null
@@ -1,186 +0,0 @@
-From e79b0a3af2cad623846e90c46964761457d57741 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Thu, 27 Jun 2019 21:28:43 +0800
-Subject: [PATCH 35/37] bcache: fix race in btree_flush_write()
-
-There is a race between mca_reap(), btree_node_free() and journal code
-btree_flush_write(), which results very rare and strange deadlock or
-panic and are very hard to reproduce.
-
-Let me explain how the race happens. In btree_flush_write() one btree
-node with oldest journal pin is selected, then it is flushed to cache
-device, the select-and-flush is a two steps operation. Between these two
-steps, there are something may happen inside the race window,
-- The selected btree node was reaped by mca_reap() and allocated to
- other requesters for other btree node.
-- The slected btree node was selected, flushed and released by mca
- shrink callback bch_mca_scan().
-When btree_flush_write() tries to flush the selected btree node, firstly
-b->write_lock is held by mutex_lock(). If the race happens and the
-memory of selected btree node is allocated to other btree node, if that
-btree node's write_lock is held already, a deadlock very probably
-happens here. A worse case is the memory of the selected btree node is
-released, then all references to this btree node (e.g. b->write_lock)
-will trigger NULL pointer deference panic.
-
-This race was introduced in commit cafe56359144 ("bcache: A block layer
-cache"), and enlarged by commit c4dc2497d50d ("bcache: fix high CPU
-occupancy during journal"), which selected 128 btree nodes and flushed
-them one-by-one in a quite long time period.
-
-Such race is not easy to reproduce before. On a Lenovo SR650 server with
-48 Xeon cores, and configure 1 NVMe SSD as cache device, a MD raid0
-device assembled by 3 NVMe SSDs as backing device, this race can be
-observed around every 10,000 times btree_flush_write() gets called. Both
-deadlock and kernel panic all happened as aftermath of the race.
-
-The idea of the fix is to add a btree flag BTREE_NODE_journal_flush. It
-is set when selecting btree nodes, and cleared after btree nodes
-flushed. Then when mca_reap() selects a btree node with this bit set,
-this btree node will be skipped. Since mca_reap() only reaps btree node
-without BTREE_NODE_journal_flush flag, such race is avoided.
-
-Once corner case should be noticed, that is btree_node_free(). It might
-be called in some error handling code path. For example the following
-code piece from btree_split(),
- 2149 err_free2:
- 2150 bkey_put(b->c, &n2->key);
- 2151 btree_node_free(n2);
- 2152 rw_unlock(true, n2);
- 2153 err_free1:
- 2154 bkey_put(b->c, &n1->key);
- 2155 btree_node_free(n1);
- 2156 rw_unlock(true, n1);
-At line 2151 and 2155, the btree node n2 and n1 are released without
-mac_reap(), so BTREE_NODE_journal_flush also needs to be checked here.
-If btree_node_free() is called directly in such error handling path,
-and the selected btree node has BTREE_NODE_journal_flush bit set, just
-delay for 1 us and retry again. In this case this btree node won't
-be skipped, just retry until the BTREE_NODE_journal_flush bit cleared,
-and free the btree node memory.
-
-Fixes: cafe56359144 ("bcache: A block layer cache")
-Signed-off-by: Coly Li <colyli@suse.de>
-Reported-and-tested-by: kbuild test robot <lkp@intel.com>
-Cc: stable@vger.kernel.org
----
- drivers/md/bcache/btree.c | 28 +++++++++++++++++++++++++++-
- drivers/md/bcache/btree.h | 2 ++
- drivers/md/bcache/journal.c | 7 +++++++
- 3 files changed, 36 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index 846306c3a887..ba434d9ac720 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -35,7 +35,7 @@
- #include <linux/rcupdate.h>
- #include <linux/sched/clock.h>
- #include <linux/rculist.h>
--
-+#include <linux/delay.h>
- #include <trace/events/bcache.h>
-
- /*
-@@ -659,12 +659,25 @@ static int mca_reap(struct btree *b, unsigned int min_order, bool flush)
- up(&b->io_mutex);
- }
-
-+retry:
- /*
- * BTREE_NODE_dirty might be cleared in btree_flush_btree() by
- * __bch_btree_node_write(). To avoid an extra flush, acquire
- * b->write_lock before checking BTREE_NODE_dirty bit.
- */
- mutex_lock(&b->write_lock);
-+ /*
-+ * If this btree node is selected in btree_flush_write() by journal
-+ * code, delay and retry until the node is flushed by journal code
-+ * and BTREE_NODE_journal_flush bit cleared by btree_flush_write().
-+ */
-+ if (btree_node_journal_flush(b)) {
-+ pr_debug("bnode %p is flushing by journal, retry", b);
-+ mutex_unlock(&b->write_lock);
-+ udelay(1);
-+ goto retry;
-+ }
-+
- if (btree_node_dirty(b))
- __bch_btree_node_write(b, &cl);
- mutex_unlock(&b->write_lock);
-@@ -1081,7 +1094,20 @@ static void btree_node_free(struct btree *b)
-
- BUG_ON(b == b->c->root);
-
-+retry:
- mutex_lock(&b->write_lock);
-+ /*
-+ * If the btree node is selected and flushing in btree_flush_write(),
-+ * delay and retry until the BTREE_NODE_journal_flush bit cleared,
-+ * then it is safe to free the btree node here. Otherwise this btree
-+ * node will be in race condition.
-+ */
-+ if (btree_node_journal_flush(b)) {
-+ mutex_unlock(&b->write_lock);
-+ pr_debug("bnode %p journal_flush set, retry", b);
-+ udelay(1);
-+ goto retry;
-+ }
-
- if (btree_node_dirty(b)) {
- btree_complete_write(b, btree_current_write(b));
-diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
-index d1c72ef64edf..76cfd121a486 100644
---- a/drivers/md/bcache/btree.h
-+++ b/drivers/md/bcache/btree.h
-@@ -158,11 +158,13 @@ enum btree_flags {
- BTREE_NODE_io_error,
- BTREE_NODE_dirty,
- BTREE_NODE_write_idx,
-+ BTREE_NODE_journal_flush,
- };
-
- BTREE_FLAG(io_error);
- BTREE_FLAG(dirty);
- BTREE_FLAG(write_idx);
-+BTREE_FLAG(journal_flush);
-
- static inline struct btree_write *btree_current_write(struct btree *b)
- {
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 1218e3cada3c..a1e3e1fcea6e 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -430,6 +430,7 @@ static void btree_flush_write(struct cache_set *c)
- retry:
- best = NULL;
-
-+ mutex_lock(&c->bucket_lock);
- for_each_cached_btree(b, c, i)
- if (btree_current_write(b)->journal) {
- if (!best)
-@@ -442,15 +443,21 @@ static void btree_flush_write(struct cache_set *c)
- }
-
- b = best;
-+ if (b)
-+ set_btree_node_journal_flush(b);
-+ mutex_unlock(&c->bucket_lock);
-+
- if (b) {
- mutex_lock(&b->write_lock);
- if (!btree_current_write(b)->journal) {
-+ clear_bit(BTREE_NODE_journal_flush, &b->flags);
- mutex_unlock(&b->write_lock);
- /* We raced */
- goto retry;
- }
-
- __bch_btree_node_write(b, NULL);
-+ clear_bit(BTREE_NODE_journal_flush, &b->flags);
- mutex_unlock(&b->write_lock);
- }
- }
---
-2.16.4
-
diff --git a/for-current/0036-bcache-performance-improvement-for-btree_flush_write.patch b/for-current/0036-bcache-performance-improvement-for-btree_flush_write.patch
deleted file mode 100644
index e7d5207..0000000
--- a/for-current/0036-bcache-performance-improvement-for-btree_flush_write.patch
+++ /dev/null
@@ -1,187 +0,0 @@
-From f2b6d7b2245938b2f08daa7c7f498e439e7ae176 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Thu, 27 Jun 2019 23:07:22 +0800
-Subject: [PATCH 36/37] bcache: performance improvement for btree_flush_write()
-
-This patch improves performance for btree_flush_write() in following
-ways,
-- Use another spinlock journal.flush_write_lock to replace the very
- hot journal.lock. We don't have to use journal.lock here, selecting
- candidate btree nodes takes a lot of time, hold journal.lock here will
- block other jouranling threads and drop the overall I/O performance.
-- Only select flushing btree node from c->btree_cache list. When the
- machine has a large system memory, mca cache may have a huge number of
- cached btree nodes. Iterating all the cached nodes will take a lot
- of CPU time, and most of the nodes on c->btree_cache_freeable and
- c->btree_cache_freed lists are cleared and have need to flush. So only
- travel mca list c->btree_cache to select flushing btree node should be
- enough for most of the cases.
-- Don't iterate whole c->btree_cache list, only reversely select first
- BTREE_FLUSH_NR btree nodes to flush. Iterate all btree nodes from
- c->btree_cache and select the oldest journal pin btree nodes consumes
- huge number of CPU cycles if the list is huge (push and pop a node
- into/out of a heap is expensive). The last several dirty btree nodes
- on the tail of c->btree_cache list are earlest allocated and cached
- btree nodes, they are relative to the oldest journal pin btree nodes.
- Therefore only flushing BTREE_FLUSH_NR btree nodes from tail of
- c->btree_cache probably includes the oldest journal pin btree nodes.
-
-In my testing, the above change decreases 50%+ CPU consumption when
-journal space is full. Some times IOPS drops to 0 for 5-8 seconds,
-comparing blocking I/O for 120+ seconds in previous code, this is much
-better. Maybe there is room to improve in future, but at this momment
-the fix looks fine and performs well in my testing.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 85 +++++++++++++++++++++++++++++++++------------
- drivers/md/bcache/journal.h | 4 +++
- 2 files changed, 67 insertions(+), 22 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index a1e3e1fcea6e..8bcd8f1bf8cb 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -419,47 +419,87 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
-
- static void btree_flush_write(struct cache_set *c)
- {
-- /*
-- * Try to find the btree node with that references the oldest journal
-- * entry, best is our current candidate and is locked if non NULL:
-- */
-- struct btree *b, *best;
-- unsigned int i;
-+ struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR];
-+ unsigned int i, n;
-+
-+ if (c->journal.btree_flushing)
-+ return;
-+
-+ spin_lock(&c->journal.flush_write_lock);
-+ if (c->journal.btree_flushing) {
-+ spin_unlock(&c->journal.flush_write_lock);
-+ return;
-+ }
-+ c->journal.btree_flushing = true;
-+ spin_unlock(&c->journal.flush_write_lock);
-
- atomic_long_inc(&c->flush_write);
--retry:
-- best = NULL;
-+ memset(btree_nodes, 0, sizeof(btree_nodes));
-+ n = 0;
-
- mutex_lock(&c->bucket_lock);
-- for_each_cached_btree(b, c, i)
-- if (btree_current_write(b)->journal) {
-- if (!best)
-- best = b;
-- else if (journal_pin_cmp(c,
-- btree_current_write(best)->journal,
-- btree_current_write(b)->journal)) {
-- best = b;
-- }
-+ list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) {
-+ if (btree_node_journal_flush(b))
-+ pr_err("BUG: flush_write bit should not be set here!");
-+
-+ mutex_lock(&b->write_lock);
-+
-+ if (!btree_node_dirty(b)) {
-+ mutex_unlock(&b->write_lock);
-+ continue;
-+ }
-+
-+ if (!btree_current_write(b)->journal) {
-+ mutex_unlock(&b->write_lock);
-+ continue;
- }
-
-- b = best;
-- if (b)
- set_btree_node_journal_flush(b);
-+
-+ mutex_unlock(&b->write_lock);
-+
-+ btree_nodes[n++] = b;
-+ if (n == BTREE_FLUSH_NR)
-+ break;
-+ }
- mutex_unlock(&c->bucket_lock);
-
-- if (b) {
-+ for (i = 0; i < n; i++) {
-+ b = btree_nodes[i];
-+ if (!b) {
-+ pr_err("BUG: btree_nodes[%d] is NULL", i);
-+ continue;
-+ }
-+
-+ /* safe to check without holding b->write_lock */
-+ if (!btree_node_journal_flush(b)) {
-+ pr_err("BUG: bnode %p: journal_flush bit cleaned", b);
-+ continue;
-+ }
-+
- mutex_lock(&b->write_lock);
- if (!btree_current_write(b)->journal) {
- clear_bit(BTREE_NODE_journal_flush, &b->flags);
- mutex_unlock(&b->write_lock);
-- /* We raced */
-- goto retry;
-+ pr_debug("bnode %p: written by others", b);
-+ continue;
-+ }
-+
-+ if (!btree_node_dirty(b)) {
-+ clear_bit(BTREE_NODE_journal_flush, &b->flags);
-+ mutex_unlock(&b->write_lock);
-+ pr_debug("bnode %p: dirty bit cleaned by others", b);
-+ continue;
- }
-
- __bch_btree_node_write(b, NULL);
- clear_bit(BTREE_NODE_journal_flush, &b->flags);
- mutex_unlock(&b->write_lock);
- }
-+
-+ spin_lock(&c->journal.flush_write_lock);
-+ c->journal.btree_flushing = false;
-+ spin_unlock(&c->journal.flush_write_lock);
- }
-
- #define last_seq(j) ((j)->seq - fifo_used(&(j)->pin) + 1)
-@@ -881,6 +921,7 @@ int bch_journal_alloc(struct cache_set *c)
- struct journal *j = &c->journal;
-
- spin_lock_init(&j->lock);
-+ spin_lock_init(&j->flush_write_lock);
- INIT_DELAYED_WORK(&j->work, journal_write_work);
-
- c->journal_delay_ms = 100;
-diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
-index 66f0facff84b..f2ea34d5f431 100644
---- a/drivers/md/bcache/journal.h
-+++ b/drivers/md/bcache/journal.h
-@@ -103,6 +103,8 @@ struct journal_write {
- /* Embedded in struct cache_set */
- struct journal {
- spinlock_t lock;
-+ spinlock_t flush_write_lock;
-+ bool btree_flushing;
- /* used when waiting because the journal was full */
- struct closure_waitlist wait;
- struct closure io;
-@@ -154,6 +156,8 @@ struct journal_device {
- struct bio_vec bv[8];
- };
-
-+#define BTREE_FLUSH_NR 8
-+
- #define journal_pin_cmp(c, l, r) \
- (fifo_idx(&(c)->journal.pin, (l)) > fifo_idx(&(c)->journal.pin, (r)))
-
---
-2.16.4
-
diff --git a/for-current/0037-bcache-add-reclaimed_journal_buckets-to-struct-cache.patch b/for-current/0037-bcache-add-reclaimed_journal_buckets-to-struct-cache.patch
deleted file mode 100644
index efc95f0..0000000
--- a/for-current/0037-bcache-add-reclaimed_journal_buckets-to-struct-cache.patch
+++ /dev/null
@@ -1,80 +0,0 @@
-From ea4bf18c9eb2ef705dce00b1bc5fde2f49ef2740 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 2 Jun 2019 00:47:23 +0800
-Subject: [PATCH 37/37] bcache: add reclaimed_journal_buckets to struct
- cache_set
-
-Now we have counters for how many times jouranl is reclaimed, how many
-times cached dirty btree nodes are flushed, but we don't know how many
-jouranl buckets are really reclaimed.
-
-This patch adds reclaimed_journal_buckets into struct cache_set, this
-is an increasing only counter, to tell how many journal buckets are
-reclaimed since cache set runs. From all these three counters (reclaim,
-reclaimed_journal_buckets, flush_write), we can have idea how well
-current journal space reclaim code works.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 1 +
- drivers/md/bcache/journal.c | 1 +
- drivers/md/bcache/sysfs.c | 5 +++++
- 3 files changed, 7 insertions(+)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 35396248a7d5..013e35a9e317 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -705,6 +705,7 @@ struct cache_set {
- atomic_long_t writeback_keys_failed;
-
- atomic_long_t reclaim;
-+ atomic_long_t reclaimed_journal_buckets;
- atomic_long_t flush_write;
-
- enum {
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 8bcd8f1bf8cb..be2a2a201603 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -614,6 +614,7 @@ static void journal_reclaim(struct cache_set *c)
- k->ptr[n++] = MAKE_PTR(0,
- bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
- ca->sb.nr_this_dev);
-+ atomic_long_inc(&c->reclaimed_journal_buckets);
- }
-
- if (n) {
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index 701a386a954c..9f0826712845 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -82,6 +82,7 @@ read_attribute(bset_tree_stats);
- read_attribute(state);
- read_attribute(cache_read_races);
- read_attribute(reclaim);
-+read_attribute(reclaimed_journal_buckets);
- read_attribute(flush_write);
- read_attribute(writeback_keys_done);
- read_attribute(writeback_keys_failed);
-@@ -705,6 +706,9 @@ SHOW(__bch_cache_set)
- sysfs_print(reclaim,
- atomic_long_read(&c->reclaim));
-
-+ sysfs_print(reclaimed_journal_buckets,
-+ atomic_long_read(&c->reclaimed_journal_buckets));
-+
- sysfs_print(flush_write,
- atomic_long_read(&c->flush_write));
-
-@@ -931,6 +935,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
- &sysfs_bset_tree_stats,
- &sysfs_cache_read_races,
- &sysfs_reclaim,
-+ &sysfs_reclaimed_journal_buckets,
- &sysfs_flush_write,
- &sysfs_writeback_keys_done,
- &sysfs_writeback_keys_failed,
---
-2.16.4
-
diff --git a/for-next/0001-bcache-add-cond_resched-in-__bch_cache_cmp.patch b/for-next/0001-bcache-add-cond_resched-in-__bch_cache_cmp.patch
new file mode 100644
index 0000000..edd8fb6
--- /dev/null
+++ b/for-next/0001-bcache-add-cond_resched-in-__bch_cache_cmp.patch
@@ -0,0 +1,29 @@
+From: Shile Zhang <shile.zhang@linux.alibaba.com>
+Date: Thu, 15 Aug 2019 00:51:51 +0800
+Subject: [PATCH] bcache: add cond_resched() in __bch_cache_cmp()
+
+Read /sys/fs/bcache/<uuid>/cacheN/priority_stats can take very long
+time with huge cache after long run.
+
+Signed-off-by: Shile Zhang <shile.zhang@linux.alibaba.com>
+Tested-by: Heitor Alves de Siqueira <halves@canonical.com>
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/sysfs.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
+index 9f0826712845..6b29e34acf7a 100644
+--- a/drivers/md/bcache/sysfs.c
++++ b/drivers/md/bcache/sysfs.c
+@@ -960,6 +960,7 @@ KTYPE(bch_cache_set_internal);
+
+ static int __bch_cache_cmp(const void *l, const void *r)
+ {
++ cond_resched();
+ return *((uint16_t *)r) - *((uint16_t *)l);
+ }
+
+--
+2.16.4
+
diff --git a/for-next/0001-closures-fix-a-race-on-wakeup-from-closure_sync.patch b/for-next/0001-closures-fix-a-race-on-wakeup-from-closure_sync.patch
new file mode 100644
index 0000000..44096e4
--- /dev/null
+++ b/for-next/0001-closures-fix-a-race-on-wakeup-from-closure_sync.patch
@@ -0,0 +1,35 @@
+From 3c3c34a87be58548a302573dbe32b518f047db09 Mon Sep 17 00:00:00 2001
+From: Kent Overstreet <kent.overstreet@gmail.com>
+Date: Mon, 10 Jun 2019 15:14:20 -0400
+Subject: [PATCH] closures: fix a race on wakeup from closure_sync
+
+Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
+Acked-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/closure.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
+index 73f5319295bc..c12cd809ab19 100644
+--- a/drivers/md/bcache/closure.c
++++ b/drivers/md/bcache/closure.c
+@@ -105,8 +105,14 @@ struct closure_syncer {
+
+ static void closure_sync_fn(struct closure *cl)
+ {
+- cl->s->done = 1;
+- wake_up_process(cl->s->task);
++ struct closure_syncer *s = cl->s;
++ struct task_struct *p;
++
++ rcu_read_lock();
++ p = READ_ONCE(s->task);
++ s->done = 1;
++ wake_up_process(p);
++ rcu_read_unlock();
+ }
+
+ void __sched __closure_sync(struct closure *cl)
+--
+2.16.4
+
diff --git a/for-test/0001-bcache-fix-deadlock-in-bcache_allocator.patch b/for-test/0001-bcache-fix-deadlock-in-bcache_allocator.patch
new file mode 100644
index 0000000..81646a5
--- /dev/null
+++ b/for-test/0001-bcache-fix-deadlock-in-bcache_allocator.patch
@@ -0,0 +1,149 @@
+From fb31000daa352493b206064a5e8c4bcaac0e6b6c Mon Sep 17 00:00:00 2001
+From: Andrea Righi <andrea.righi@canonical.com>
+Date: Wed, 7 Aug 2019 12:38:06 +0200
+Subject: [PATCH] bcache: fix deadlock in bcache_allocator
+
+bcache_allocator can call the following:
+
+ bch_allocator_thread()
+ -> bch_prio_write()
+ -> bch_bucket_alloc()
+ -> wait on &ca->set->bucket_wait
+
+But the wake up event on bucket_wait is supposed to come from
+bch_allocator_thread() itself => deadlock:
+
+[ 1158.490744] INFO: task bcache_allocato:15861 blocked for more than 10 seconds.
+[ 1158.495929] Not tainted 5.3.0-050300rc3-generic #201908042232
+[ 1158.500653] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+[ 1158.504413] bcache_allocato D 0 15861 2 0x80004000
+[ 1158.504419] Call Trace:
+[ 1158.504429] __schedule+0x2a8/0x670
+[ 1158.504432] schedule+0x2d/0x90
+[ 1158.504448] bch_bucket_alloc+0xe5/0x370 [bcache]
+[ 1158.504453] ? wait_woken+0x80/0x80
+[ 1158.504466] bch_prio_write+0x1dc/0x390 [bcache]
+[ 1158.504476] bch_allocator_thread+0x233/0x490 [bcache]
+[ 1158.504491] kthread+0x121/0x140
+[ 1158.504503] ? invalidate_buckets+0x890/0x890 [bcache]
+[ 1158.504506] ? kthread_park+0xb0/0xb0
+[ 1158.504510] ret_from_fork+0x35/0x40
+
+Fix by making the call to bch_prio_write() non-blocking, so that
+bch_allocator_thread() never waits on itself.
+
+Moreover, make sure to wake up the garbage collector thread when
+bch_prio_write() is failing to allocate buckets.
+
+BugLink: https://bugs.launchpad.net/bugs/1784665
+BugLink: https://bugs.launchpad.net/bugs/1796292
+Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
+---
+ drivers/md/bcache/alloc.c | 5 ++++-
+ drivers/md/bcache/bcache.h | 2 +-
+ drivers/md/bcache/super.c | 27 +++++++++++++++++++++------
+ 3 files changed, 26 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
+index 6f776823b9ba..a1df0d95151c 100644
+--- a/drivers/md/bcache/alloc.c
++++ b/drivers/md/bcache/alloc.c
+@@ -377,7 +377,10 @@ static int bch_allocator_thread(void *arg)
+ if (!fifo_full(&ca->free_inc))
+ goto retry_invalidate;
+
+- bch_prio_write(ca);
++ if (bch_prio_write(ca, false) < 0) {
++ ca->invalidate_needs_gc = 1;
++ wake_up_gc(ca->set);
++ }
+ }
+ }
+ out:
+diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
+index 013e35a9e317..deb924e1d790 100644
+--- a/drivers/md/bcache/bcache.h
++++ b/drivers/md/bcache/bcache.h
+@@ -977,7 +977,7 @@ bool bch_cached_dev_error(struct cached_dev *dc);
+ __printf(2, 3)
+ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
+
+-void bch_prio_write(struct cache *ca);
++int bch_prio_write(struct cache *ca, bool wait);
+ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
+
+ extern struct workqueue_struct *bcache_wq;
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index 20ed838e9413..bd153234290d 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -529,12 +529,29 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
+ closure_sync(cl);
+ }
+
+-void bch_prio_write(struct cache *ca)
++int bch_prio_write(struct cache *ca, bool wait)
+ {
+ int i;
+ struct bucket *b;
+ struct closure cl;
+
++ pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu",
++ fifo_used(&ca->free[RESERVE_PRIO]),
++ fifo_used(&ca->free[RESERVE_NONE]),
++ fifo_used(&ca->free_inc));
++
++ /*
++ * Pre-check if there are enough free buckets. In the non-blocking
++ * scenario it's better to fail early rather than starting to allocate
++ * buckets and do a cleanup later in case of failure.
++ */
++ if (!wait) {
++ size_t avail = fifo_used(&ca->free[RESERVE_PRIO]) +
++ fifo_used(&ca->free[RESERVE_NONE]);
++ if (prio_buckets(ca) > avail)
++ return -ENOMEM;
++ }
++
+ closure_init_stack(&cl);
+
+ lockdep_assert_held(&ca->set->bucket_lock);
+@@ -544,9 +561,6 @@ void bch_prio_write(struct cache *ca)
+ atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
+ &ca->meta_sectors_written);
+
+- //pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
+- // fifo_used(&ca->free_inc), fifo_used(&ca->unused));
+-
+ for (i = prio_buckets(ca) - 1; i >= 0; --i) {
+ long bucket;
+ struct prio_set *p = ca->disk_buckets;
+@@ -564,7 +578,7 @@ void bch_prio_write(struct cache *ca)
+ p->magic = pset_magic(&ca->sb);
+ p->csum = bch_crc64(&p->magic, bucket_bytes(ca) - 8);
+
+- bucket = bch_bucket_alloc(ca, RESERVE_PRIO, true);
++ bucket = bch_bucket_alloc(ca, RESERVE_PRIO, wait);
+ BUG_ON(bucket == -1);
+
+ mutex_unlock(&ca->set->bucket_lock);
+@@ -593,6 +607,7 @@ void bch_prio_write(struct cache *ca)
+
+ ca->prio_last_buckets[i] = ca->prio_buckets[i];
+ }
++ return 0;
+ }
+
+ static void prio_read(struct cache *ca, uint64_t bucket)
+@@ -1954,7 +1969,7 @@ static int run_cache_set(struct cache_set *c)
+
+ mutex_lock(&c->bucket_lock);
+ for_each_cache(ca, c, i)
+- bch_prio_write(ca);
++ bch_prio_write(ca, true);
+ mutex_unlock(&c->bucket_lock);
+
+ err = "cannot allocate new UUID bucket";
+--
+2.16.4
+
diff --git a/for-test/0001-bcache-introduce-btree_cache_total_pages-into-struct.patch b/for-test/0001-bcache-introduce-btree_cache_total_pages-into-struct.patch
new file mode 100644
index 0000000..3f8238f
--- /dev/null
+++ b/for-test/0001-bcache-introduce-btree_cache_total_pages-into-struct.patch
@@ -0,0 +1,110 @@
+From db1fb64ff29474b18e07a7a3887e326dd2b891b5 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Thu, 11 Jul 2019 22:56:40 +0800
+Subject: [PATCH] bcache: introduce btree_cache_total_pages into struct
+ cache_set
+
+A new member "atomic_long_t btree_cache_total_pages" is added into
+struct cache_set, to record total page numbers occupied by bcache
+internal btree node cache.
+
+When mca_data_alloc() is called to allocate pages for btree node cache,
+the allocated pages number is added to btree_cache_total_pages. When
+mca_data_free() is called to free pages of a btree node cache, the
+freed pages numbeer is subtracted from btree_cache_total_pages.
+
+Then in sysfs.c:bch_cache_size(), when calculating the total pages
+occupied by bcache btree node cache, it is unncessary to iterate list
+c->btree_cache, c->btree_cache_total_pages can be directly used now.
+
+Now reading /sys/fs/bcache/<cache set UUID>/btree_cache_size is faster,
+and this patch is also a preparation to limit pages consumption by the
+bcache internal btree node cache.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/bcache.h | 1 +
+ drivers/md/bcache/btree.c | 12 ++++++++----
+ drivers/md/bcache/super.c | 1 +
+ drivers/md/bcache/sysfs.c | 9 ++-------
+ 4 files changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
+index 013e35a9e317..fe5ff867725c 100644
+--- a/drivers/md/bcache/bcache.h
++++ b/drivers/md/bcache/bcache.h
+@@ -570,6 +570,7 @@ struct cache_set {
+ struct list_head btree_cache;
+ struct list_head btree_cache_freeable;
+ struct list_head btree_cache_freed;
++ atomic_long_t btree_cache_total_pages;
+
+ /* Number of elements in btree_cache + btree_cache_freeable lists */
+ unsigned int btree_cache_used;
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index ba434d9ac720..df800e3e2dba 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -578,6 +578,8 @@ static void mca_data_free(struct btree *b)
+
+ bch_btree_keys_free(&b->keys);
+
++ atomic_long_sub(1<<b->keys.page_order,
++ &b->c->btree_cache_total_pages);
+ b->c->btree_cache_used--;
+ list_move(&b->list, &b->c->btree_cache_freed);
+ }
+@@ -598,11 +600,13 @@ static unsigned int btree_order(struct bkey *k)
+
+ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
+ {
+- if (!bch_btree_keys_alloc(&b->keys,
+- max_t(unsigned int,
++ unsigned int page_order = max_t(unsigned int,
+ ilog2(b->c->btree_pages),
+- btree_order(k)),
+- gfp)) {
++ btree_order(k));
++
++ if (!bch_btree_keys_alloc(&b->keys, page_order, gfp)) {
++ atomic_long_add(1 << page_order,
++ &b->c->btree_cache_total_pages);
+ b->c->btree_cache_used++;
+ list_move(&b->list, &b->c->btree_cache);
+ } else {
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index 26e374fbf57c..c67013b116a3 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -1781,6 +1781,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
+ INIT_LIST_HEAD(&c->btree_cache);
+ INIT_LIST_HEAD(&c->btree_cache_freeable);
+ INIT_LIST_HEAD(&c->btree_cache_freed);
++ atomic_long_set(&c->btree_cache_total_pages, 0);
+ INIT_LIST_HEAD(&c->data_buckets);
+
+ iter_size = (sb->bucket_size / sb->block_size + 1) *
+diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
+index 9f0826712845..f5065b30c994 100644
+--- a/drivers/md/bcache/sysfs.c
++++ b/drivers/md/bcache/sysfs.c
+@@ -629,14 +629,9 @@ static unsigned int bch_root_usage(struct cache_set *c)
+
+ static size_t bch_cache_size(struct cache_set *c)
+ {
+- size_t ret = 0;
+- struct btree *b;
+-
+- mutex_lock(&c->bucket_lock);
+- list_for_each_entry(b, &c->btree_cache, list)
+- ret += 1 << (b->keys.page_order + PAGE_SHIFT);
++ size_t ret;
+
+- mutex_unlock(&c->bucket_lock);
++ ret = atomic_long_read(&c->btree_cache_total_pages) << PAGE_SHIFT;
+ return ret;
+ }
+
+--
+2.16.4
+
diff --git a/for-test/0001-bcache-only-set-b-accessed-1-for-dirty-btree-node-ca.patch b/for-test/0001-bcache-only-set-b-accessed-1-for-dirty-btree-node-ca.patch
new file mode 100644
index 0000000..7ccd838
--- /dev/null
+++ b/for-test/0001-bcache-only-set-b-accessed-1-for-dirty-btree-node-ca.patch
@@ -0,0 +1,28 @@
+From 779bada095ec02a9bd400bc0a46039c4ead6c00d Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Tue, 2 Jul 2019 22:30:29 +0800
+Subject: [PATCH] bcache: only set b->accessed = 1 for dirty btree node cache
+
+---
+ drivers/md/bcache/btree.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index ba434d9ac720..1497f1114b10 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -1058,7 +1058,10 @@ struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
+ BUG_ON(!b->written);
+
+ b->parent = parent;
+- b->accessed = 1;
++
++ /* make clean btree node more easier to be reclaim */
++ if (!write)
++ b->accessed = 1;
+
+ for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
+ prefetch(b->keys.set[i].tree);
+--
+2.16.4
+
diff --git a/for-test/mca_limit/0001-bcache-introduce-btree_cache_total_pages-into-struct.patch b/for-test/mca_limit/0001-bcache-introduce-btree_cache_total_pages-into-struct.patch
new file mode 100644
index 0000000..3f8238f
--- /dev/null
+++ b/for-test/mca_limit/0001-bcache-introduce-btree_cache_total_pages-into-struct.patch
@@ -0,0 +1,110 @@
+From db1fb64ff29474b18e07a7a3887e326dd2b891b5 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Thu, 11 Jul 2019 22:56:40 +0800
+Subject: [PATCH] bcache: introduce btree_cache_total_pages into struct
+ cache_set
+
+A new member "atomic_long_t btree_cache_total_pages" is added into
+struct cache_set, to record total page numbers occupied by bcache
+internal btree node cache.
+
+When mca_data_alloc() is called to allocate pages for btree node cache,
+the allocated pages number is added to btree_cache_total_pages. When
+mca_data_free() is called to free pages of a btree node cache, the
+freed pages numbeer is subtracted from btree_cache_total_pages.
+
+Then in sysfs.c:bch_cache_size(), when calculating the total pages
+occupied by bcache btree node cache, it is unncessary to iterate list
+c->btree_cache, c->btree_cache_total_pages can be directly used now.
+
+Now reading /sys/fs/bcache/<cache set UUID>/btree_cache_size is faster,
+and this patch is also a preparation to limit pages consumption by the
+bcache internal btree node cache.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/bcache.h | 1 +
+ drivers/md/bcache/btree.c | 12 ++++++++----
+ drivers/md/bcache/super.c | 1 +
+ drivers/md/bcache/sysfs.c | 9 ++-------
+ 4 files changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
+index 013e35a9e317..fe5ff867725c 100644
+--- a/drivers/md/bcache/bcache.h
++++ b/drivers/md/bcache/bcache.h
+@@ -570,6 +570,7 @@ struct cache_set {
+ struct list_head btree_cache;
+ struct list_head btree_cache_freeable;
+ struct list_head btree_cache_freed;
++ atomic_long_t btree_cache_total_pages;
+
+ /* Number of elements in btree_cache + btree_cache_freeable lists */
+ unsigned int btree_cache_used;
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index ba434d9ac720..df800e3e2dba 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -578,6 +578,8 @@ static void mca_data_free(struct btree *b)
+
+ bch_btree_keys_free(&b->keys);
+
++ atomic_long_sub(1<<b->keys.page_order,
++ &b->c->btree_cache_total_pages);
+ b->c->btree_cache_used--;
+ list_move(&b->list, &b->c->btree_cache_freed);
+ }
+@@ -598,11 +600,13 @@ static unsigned int btree_order(struct bkey *k)
+
+ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
+ {
+- if (!bch_btree_keys_alloc(&b->keys,
+- max_t(unsigned int,
++ unsigned int page_order = max_t(unsigned int,
+ ilog2(b->c->btree_pages),
+- btree_order(k)),
+- gfp)) {
++ btree_order(k));
++
++ if (!bch_btree_keys_alloc(&b->keys, page_order, gfp)) {
++ atomic_long_add(1 << page_order,
++ &b->c->btree_cache_total_pages);
+ b->c->btree_cache_used++;
+ list_move(&b->list, &b->c->btree_cache);
+ } else {
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index 26e374fbf57c..c67013b116a3 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -1781,6 +1781,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
+ INIT_LIST_HEAD(&c->btree_cache);
+ INIT_LIST_HEAD(&c->btree_cache_freeable);
+ INIT_LIST_HEAD(&c->btree_cache_freed);
++ atomic_long_set(&c->btree_cache_total_pages, 0);
+ INIT_LIST_HEAD(&c->data_buckets);
+
+ iter_size = (sb->bucket_size / sb->block_size + 1) *
+diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
+index 9f0826712845..f5065b30c994 100644
+--- a/drivers/md/bcache/sysfs.c
++++ b/drivers/md/bcache/sysfs.c
+@@ -629,14 +629,9 @@ static unsigned int bch_root_usage(struct cache_set *c)
+
+ static size_t bch_cache_size(struct cache_set *c)
+ {
+- size_t ret = 0;
+- struct btree *b;
+-
+- mutex_lock(&c->bucket_lock);
+- list_for_each_entry(b, &c->btree_cache, list)
+- ret += 1 << (b->keys.page_order + PAGE_SHIFT);
++ size_t ret;
+
+- mutex_unlock(&c->bucket_lock);
++ ret = atomic_long_read(&c->btree_cache_total_pages) << PAGE_SHIFT;
+ return ret;
+ }
+
+--
+2.16.4
+
diff --git a/for-test/mca_limit/0001-bcache-restrict-mca-pages-consumption.patch b/for-test/mca_limit/0001-bcache-restrict-mca-pages-consumption.patch
new file mode 100644
index 0000000..7996bd4
--- /dev/null
+++ b/for-test/mca_limit/0001-bcache-restrict-mca-pages-consumption.patch
@@ -0,0 +1,149 @@
+From 82cfcce9f62cfb5c4a00d774525a33531a6e7091 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Tue, 23 Jul 2019 01:11:28 +0800
+Subject: [PATCH] bcache: restrict mca pages consumption
+
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/bcache.h | 4 +--
+ drivers/md/bcache/btree.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++
+ drivers/md/bcache/btree.h | 6 +++++
+ drivers/md/bcache/super.c | 2 +-
+ 4 files changed, 72 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
+index fe5ff867725c..c245b656a057 100644
+--- a/drivers/md/bcache/bcache.h
++++ b/drivers/md/bcache/bcache.h
+@@ -498,7 +498,7 @@ struct gc_stat {
+ #define CACHE_SET_STOPPING 1
+ #define CACHE_SET_RUNNING 2
+ #define CACHE_SET_IO_DISABLE 3
+-
++#define CACHE_SET_MCA_SHRINKING 4
+ struct cache_set {
+ struct closure cl;
+
+@@ -571,9 +571,9 @@ struct cache_set {
+ struct list_head btree_cache_freeable;
+ struct list_head btree_cache_freed;
+ atomic_long_t btree_cache_total_pages;
+-
+ /* Number of elements in btree_cache + btree_cache_freeable lists */
+ unsigned int btree_cache_used;
++ struct work_struct btree_cache_shrink_work;
+
+ /*
+ * If we need to allocate memory for a new btree node and that
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index df800e3e2dba..29d33d42096b 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -827,6 +827,69 @@ void bch_btree_cache_free(struct cache_set *c)
+ mutex_unlock(&c->bucket_lock);
+ }
+
++static inline unsigned int get_mca_consume_percent(struct cache_set *c)
++{
++ return (unsigned int)
++ (atomic_long_read(&c->btree_cache_total_pages) * 100) /
++ totalram_pages();
++}
++
++void bch_mca_shrink_work(struct work_struct *w)
++{
++ unsigned int mca_consume_percent, shrink_target_percent;
++ unsigned int loop_nr = 0;
++ struct cache_set *c =
++ container_of(w, struct cache_set, btree_cache_shrink_work);
++
++ if (c->shrinker_disabled) {
++ pr_info_ratelimited("shrinker_disabled set, no shrink");
++ goto out;
++ }
++
++ mca_consume_percent = get_mca_consume_percent(c);
++
++ if (mca_consume_percent >= BCH_MCA_WMARK_HIGH)
++ shrink_target_percent =
++ BCH_MCA_WMARK_LOW - BCH_MCA_SHRINK_HYSTERESIS;
++ else if (mca_consume_percent >= BCH_MCA_WMARK_LOW)
++ shrink_target_percent =
++ mca_consume_percent - BCH_MCA_SHRINK_HYSTERESIS;
++ else {
++ pr_info("mca_consume_percent: %u%%, not shrink",
++ mca_consume_percent);
++ goto out;
++ }
++
++ while (mca_consume_percent > shrink_target_percent) {
++ struct shrink_control sc;
++
++ sc.gfp_mask = GFP_KERNEL;
++ sc.nr_to_scan = c->btree_cache_used * c->btree_pages / 10;
++ c->shrink.scan_objects(&c->shrink, &sc);
++ mca_consume_percent = get_mca_consume_percent(c);
++ loop_nr++;
++ }
++
++ pr_info("loop_nr: %u for consum_percent(%u):target_percent(%u)",
++ loop_nr, mca_consume_percent, shrink_target_percent);
++
++out:
++ if(!test_and_clear_bit(CACHE_SET_MCA_SHRINKING, &c->flags))
++ WARN(1, "CACHE_SET_MCA_SHRINKING cleared already");
++
++ closure_put(&c->cl);
++}
++
++/* Proactively shrink mca pages for low watermark */
++void bch_mca_cache_shrink(struct cache_set *c)
++{
++ if (test_and_set_bit(CACHE_SET_MCA_SHRINKING, &c->flags))
++ return;
++
++ closure_get(&c->cl);
++ queue_work(system_wq, &c->btree_cache_shrink_work);
++}
++
+ int bch_btree_cache_alloc(struct cache_set *c)
+ {
+ unsigned int i;
+diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
+index 76cfd121a486..665a111ad9ce 100644
+--- a/drivers/md/bcache/btree.h
++++ b/drivers/md/bcache/btree.h
+@@ -102,6 +102,10 @@
+ #include "bset.h"
+ #include "debug.h"
+
++#define BCH_MCA_WMARK_HIGH 80
++#define BCH_MCA_WMARK_LOW 60
++#define BCH_MCA_SHRINK_HYSTERESIS 10
++
+ struct btree_write {
+ atomic_t *journal;
+
+@@ -331,4 +335,6 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
+ struct bkey *end,
+ keybuf_pred_fn *pred);
+ void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats);
++void bch_mca_cache_shrink(struct cache_set *c);
++void bch_mca_shrink_work(struct work_struct *w);
+ #endif
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index c67013b116a3..87c44e3475cd 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -1783,7 +1783,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
+ INIT_LIST_HEAD(&c->btree_cache_freed);
+ atomic_long_set(&c->btree_cache_total_pages, 0);
+ INIT_LIST_HEAD(&c->data_buckets);
+-
++ INIT_WORK(&c->btree_cache_shrink_work, bch_mca_shrink_work);
+ iter_size = (sb->bucket_size / sb->block_size + 1) *
+ sizeof(struct btree_iter_set);
+
+--
+2.16.4
+