aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorColy Li <colyli@suse.de>2019-06-11 21:40:40 +0800
committerColy Li <colyli@suse.de>2019-06-11 21:40:40 +0800
commitf520123a62a6ff495d60cc22babd77c3685f3029 (patch)
tree45867ee465e0472e36e972f627549fffdc312f3f
parentab6809b961f320d7bc80af3b81a10fe20e894c54 (diff)
downloadbcache-patches-f520123a62a6ff495d60cc22babd77c3685f3029.tar.gz
for-test: remove out of date patches
-rw-r--r--for-test/0001-bcache-add-w_data_avg.patch91
-rw-r--r--for-test/0023-bcache-use-bcache_mod_wq-to-replace-system-wide-syst.patch107
-rw-r--r--for-test/0026-bcache-move-dc-io_disable-into-dc-flags.patch170
-rw-r--r--for-test/0029-bcache-replace-system_wq-to-bcache_mod_wq.patch104
-rw-r--r--for-test/jouranl-deadlock/v1/v1-0000-cover-letter.patch59
-rw-r--r--for-test/jouranl-deadlock/v1/v1-0001-bcache-acquire-c-journal.lock-in-bch_btree_leaf_d.patch52
-rw-r--r--for-test/jouranl-deadlock/v1/v1-0002-bcache-move-definition-of-int-ret-out-of-macro-re.patch50
-rw-r--r--for-test/jouranl-deadlock/v1/v1-0003-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-.patch93
-rw-r--r--for-test/jouranl-deadlock/v1/v1-0004-bcache-reload-jouranl-key-information-during-jour.patch160
-rw-r--r--for-test/jouranl-deadlock/v1/v1-0005-bcache-fix-journal-deadlock-during-jouranl-replay.patch275
-rw-r--r--for-test/jouranl-deadlock/v1/v1-0006-bcache-reserve-space-for-journal_meta-in-run-time.patch241
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0000-cover-letter.patch87
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0001-bcache-move-definition-of-int-ret-out-of-macro-re.patch50
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0002-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-.patch94
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0003-bcache-reload-jouranl-key-information-during-jour.patch161
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0004-bcache-fix-journal-deadlock-during-jouranl-replay.patch276
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0005-bcache-reserve-space-for-journal_meta-in-run-time.patch241
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0006-bcache-add-failure-check-to-run_cache_set-for-jou.patch88
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0007-bcache-add-comments-for-kobj-release-callback-rou.patch62
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0008-bcache-return-error-immediately-in-bch_journal_re.patch48
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0009-bcache-add-error-check-for-calling-register_bdev.patch91
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0010-bcache-Add-comments-for-blkdev_put-in-registratio.patch51
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0011-bcache-add-comments-for-closure_fn-to-be-called-i.patch42
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0012-bcache-add-pendings_cleanup-to-stop-pending-bcach.patch107
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0013-bcache-fix-fifo-index-swapping-condition-in-btree.patch90
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0014-bcache-try-to-flush-btree-nodes-as-many-as-possib.patch82
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0015-bcache-improve-bcache_reboot.patch50
-rw-r--r--for-test/jouranl-deadlock/v2/v2-0016-bcache-introduce-spinlock_t-flush_write_lock-in-s.patch74
28 files changed, 0 insertions, 3096 deletions
diff --git a/for-test/0001-bcache-add-w_data_avg.patch b/for-test/0001-bcache-add-w_data_avg.patch
deleted file mode 100644
index 3b6e089..0000000
--- a/for-test/0001-bcache-add-w_data_avg.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From 47e164ffcae5dc3e03bff72a0787652fa5aaf057 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Fri, 7 Dec 2018 23:52:39 +0800
-Subject: [PATCH] bcache: add w_data_avg
-
-To record average write size for journal w[]->data.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 8 +++++++-
- drivers/md/bcache/journal.h | 1 +
- drivers/md/bcache/sysfs.c | 4 ++++
- 3 files changed, 12 insertions(+), 1 deletion(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 522c7426f3a0..569aa1484ac0 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -613,7 +613,7 @@ static void journal_write_unlocked(struct closure *cl)
- struct bkey *k = &c->journal.key;
- unsigned int i, sectors = set_blocks(w->data, block_bytes(c)) *
- c->sb.block_size;
--
-+ int w_data_avg;
- struct bio *bio;
- struct bio_list list;
-
-@@ -671,6 +671,11 @@ static void journal_write_unlocked(struct closure *cl)
- ca->journal.seq[ca->journal.cur_idx] = w->data->seq;
- }
-
-+ /* record average size of written w->data in sectors */
-+ w_data_avg = atomic_read(&c->journal.w_data_avg);
-+ w_data_avg = ewma_add(w_data_avg, sectors, 8, 4);
-+ atomic_set(&c->journal.w_data_avg, w_data_avg);
-+
- atomic_dec_bug(&fifo_back(&c->journal.pin));
- bch_journal_next(&c->journal);
- journal_reclaim(c);
-@@ -845,6 +850,7 @@ int bch_journal_alloc(struct cache_set *c)
-
- j->w[0].c = c;
- j->w[1].c = c;
-+ atomic_set(&j->w_data_avg, 0);
-
- if (!(init_heap(&c->flush_btree, 128, GFP_KERNEL)) ||
- !(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
-diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
-index 66f0facff84b..3be9d7f72d5a 100644
---- a/drivers/md/bcache/journal.h
-+++ b/drivers/md/bcache/journal.h
-@@ -117,6 +117,7 @@ struct journal {
- BKEY_PADDED(key);
-
- struct journal_write w[2], *cur;
-+ atomic_t w_data_avg;
- };
-
- /*
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index 26f035a0c5b9..d3b56cd3b794 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -67,6 +67,7 @@ read_attribute(written);
- read_attribute(btree_written);
- read_attribute(metadata_written);
- read_attribute(active_journal_entries);
-+read_attribute(w_data_avg);
-
- sysfs_time_stats_attribute(btree_gc, sec, ms);
- sysfs_time_stats_attribute(btree_split, sec, us);
-@@ -669,6 +670,8 @@ SHOW(__bch_cache_set)
- c->congested_write_threshold_us);
-
- sysfs_print(active_journal_entries, fifo_used(&c->journal.pin));
-+ sysfs_print(w_data_avg,
-+ atomic_read(&c->journal.w_data_avg));
- sysfs_printf(verify, "%i", c->verify);
- sysfs_printf(key_merging_disabled, "%i", c->key_merging_disabled);
- sysfs_printf(expensive_debug_checks,
-@@ -841,6 +844,7 @@ KTYPE(bch_cache_set);
-
- static struct attribute *bch_cache_set_internal_files[] = {
- &sysfs_active_journal_entries,
-+ &sysfs_w_data_avg,
-
- sysfs_time_stats_attribute_list(btree_gc, sec, ms)
- sysfs_time_stats_attribute_list(btree_split, sec, us)
---
-2.16.4
-
diff --git a/for-test/0023-bcache-use-bcache_mod_wq-to-replace-system-wide-syst.patch b/for-test/0023-bcache-use-bcache_mod_wq-to-replace-system-wide-syst.patch
deleted file mode 100644
index ecb5500..0000000
--- a/for-test/0023-bcache-use-bcache_mod_wq-to-replace-system-wide-syst.patch
+++ /dev/null
@@ -1,107 +0,0 @@
-From fa0d3525fd1572c44f2568513670dc7742c62ccd Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 2 Jun 2019 00:36:18 +0800
-Subject: [PATCH 23/32] bcache: use bcache_mod_wq to replace system wide
- system_wq
-
-to avoid blocking happens in bcache worker blocks other kernel
-subsystem kworker (e.g. network).
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 21 ++++++++++++++-------
- 1 file changed, 14 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 97dbe3151a9c..915ff9365ec6 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -47,6 +47,7 @@ static LIST_HEAD(uncached_devices);
- static int bcache_major;
- static DEFINE_IDA(bcache_device_idx);
- static wait_queue_head_t unregister_wait;
-+struct workqueue_struct *bcache_mod_wq;
- struct workqueue_struct *bcache_wq;
- struct workqueue_struct *bch_journal_wq;
-
-@@ -1260,7 +1261,7 @@ static void cached_dev_flush(struct closure *cl)
- bch_cache_accounting_destroy(&dc->accounting);
- kobject_del(&d->kobj);
-
-- continue_at(cl, cached_dev_free, system_wq);
-+ continue_at(cl, cached_dev_free, bcache_mod_wq);
- }
-
- static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
-@@ -1272,7 +1273,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
- __module_get(THIS_MODULE);
- INIT_LIST_HEAD(&dc->list);
- closure_init(&dc->disk.cl, NULL);
-- set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
-+ set_closure_fn(&dc->disk.cl, cached_dev_flush, bcache_mod_wq);
- kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
- INIT_WORK(&dc->detach, cached_dev_detach_finish);
- sema_init(&dc->sb_write_mutex, 1);
-@@ -1395,7 +1396,7 @@ static void flash_dev_flush(struct closure *cl)
- bcache_device_unlink(d);
- mutex_unlock(&bch_register_lock);
- kobject_del(&d->kobj);
-- continue_at(cl, flash_dev_free, system_wq);
-+ continue_at(cl, flash_dev_free, bcache_mod_wq);
- }
-
- static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
-@@ -1406,7 +1407,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
- return -ENOMEM;
-
- closure_init(&d->cl, NULL);
-- set_closure_fn(&d->cl, flash_dev_flush, system_wq);
-+ set_closure_fn(&d->cl, flash_dev_flush, bcache_mod_wq);
-
- kobject_init(&d->kobj, &bch_flash_dev_ktype);
-
-@@ -1714,7 +1715,7 @@ static void __cache_set_unregister(struct closure *cl)
-
- mutex_unlock(&bch_register_lock);
-
-- continue_at(cl, cache_set_flush, system_wq);
-+ continue_at(cl, cache_set_flush, bcache_mod_wq);
- }
-
- void bch_cache_set_stop(struct cache_set *c)
-@@ -1743,10 +1744,10 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
-
- __module_get(THIS_MODULE);
- closure_init(&c->cl, NULL);
-- set_closure_fn(&c->cl, cache_set_free, system_wq);
-+ set_closure_fn(&c->cl, cache_set_free, bcache_mod_wq);
-
- closure_init(&c->caching, &c->cl);
-- set_closure_fn(&c->caching, __cache_set_unregister, system_wq);
-+ set_closure_fn(&c->caching, __cache_set_unregister, bcache_mod_wq);
-
- /* Maybe create continue_at_noreturn() and use it here? */
- closure_set_stopped(&c->cl);
-@@ -2583,6 +2584,8 @@ static void bcache_exit(void)
- bch_request_exit();
- if (bcache_kobj)
- kobject_put(bcache_kobj);
-+ if (bcache_mod_wq)
-+ destroy_workqueue(bcache_mod_wq);
- if (bcache_wq)
- destroy_workqueue(bcache_wq);
- if (bch_journal_wq)
-@@ -2642,6 +2645,10 @@ static int __init bcache_init(void)
- return bcache_major;
- }
-
-+ bcache_mod_wq = alloc_workqueue("bcache_mod_wq", WQ_MEM_RECLAIM, 0);
-+ if (!bcache_mod_wq)
-+ goto err;
-+
- bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0);
- if (!bcache_wq)
- goto err;
---
-2.16.4
-
diff --git a/for-test/0026-bcache-move-dc-io_disable-into-dc-flags.patch b/for-test/0026-bcache-move-dc-io_disable-into-dc-flags.patch
deleted file mode 100644
index 705b89a..0000000
--- a/for-test/0026-bcache-move-dc-io_disable-into-dc-flags.patch
+++ /dev/null
@@ -1,170 +0,0 @@
-From 3153d5b784eb8a6008cbd7a6087d8eaf1e8f9fe8 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 2 Jun 2019 01:41:01 +0800
-Subject: [PATCH 26/32] bcache: move dc->io_disable into dc->flags
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 3 ++-
- drivers/md/bcache/request.c | 4 ++--
- drivers/md/bcache/super.c | 36 ++++++++++++++++++++++--------------
- drivers/md/bcache/sysfs.c | 9 +++++++--
- 4 files changed, 33 insertions(+), 19 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index 013e35a9e317..ccfc3b245462 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -362,7 +362,8 @@ struct cached_dev {
- unsigned int sequential_cutoff;
- unsigned int readahead;
-
-- unsigned int io_disable:1;
-+#define CACHED_DEV_IO_DISABLED 0
-+ unsigned long flags;
- unsigned int verify:1;
- unsigned int bypass_torture_test:1;
-
-diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
-index 41adcd1546f1..4bdf5be04c0a 100644
---- a/drivers/md/bcache/request.c
-+++ b/drivers/md/bcache/request.c
-@@ -1175,7 +1175,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
- int rw = bio_data_dir(bio);
-
- if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
-- dc->io_disable)) {
-+ test_bit(CACHED_DEV_IO_DISABLED, &dc->flags))) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- return BLK_QC_T_NONE;
-@@ -1236,7 +1236,7 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
- {
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-
-- if (dc->io_disable)
-+ if (test_bit(CACHED_DEV_IO_DISABLED, &dc->flags))
- return -EIO;
-
- return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg);
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index bf28a51dbdea..c219a1aeef02 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -888,10 +888,11 @@ static int cached_dev_status_update(void *arg)
-
- /*
- * If this delayed worker is stopping outside, directly quit here.
-- * dc->io_disable might be set via sysfs interface, so check it
-- * here too.
-+ * CACHED_DEV_IO_DISABLED might be set via sysfs interface, so check
-+ * it here too.
- */
-- while (!kthread_should_stop() && !dc->io_disable) {
-+ while (!kthread_should_stop() &&
-+ !test_bit(CACHED_DEV_IO_DISABLED, &dc->flags)) {
- q = bdev_get_queue(dc->bdev);
- if (blk_queue_dying(q))
- dc->offline_seconds++;
-@@ -904,8 +905,11 @@ static int cached_dev_status_update(void *arg)
- BACKING_DEV_OFFLINE_TIMEOUT);
- pr_err("%s: disable I/O request due to backing "
- "device offline", dc->disk.name);
-- dc->io_disable = true;
-- /* let others know earlier that io_disable is true */
-+ set_bit(CACHED_DEV_IO_DISABLED, &dc->flags);
-+ /*
-+ * let others know earlier that CACHED_DEV_IO_DISABLED
-+ * is set.
-+ */
- smp_mb();
- bcache_device_stop(&dc->disk);
- break;
-@@ -929,7 +933,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
- NULL,
- };
-
-- if (dc->io_disable) {
-+ if (test_bit(CACHED_DEV_IO_DISABLED, &dc->flags)) {
- pr_err("I/O disabled on cached dev %s",
- dc->backing_dev_name);
- return -EIO;
-@@ -1305,7 +1309,11 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
- q->backing_dev_info->ra_pages);
-
- atomic_set(&dc->io_errors, 0);
-- dc->io_disable = false;
-+ /*
-+ * Clear following bit position in dc->flags
-+ * - CACHED_DEV_IO_DISABLED
-+ */
-+ dc->flags = 0;
- dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
- /* default to auto */
- dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
-@@ -1480,8 +1488,8 @@ bool bch_cached_dev_error(struct cached_dev *dc)
- if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
- return false;
-
-- dc->io_disable = true;
-- /* make others know io_disable is true earlier */
-+ set_bit(CACHED_DEV_IO_DISABLED, &dc->flags);
-+ /* make others know CACHED_DEV_IO_DISABLED is set earlier */
- smp_mb();
-
- pr_err("stop %s: too many IO errors on backing device %s\n",
-@@ -1489,7 +1497,7 @@ bool bch_cached_dev_error(struct cached_dev *dc)
-
- /*
- * If the cached device is still attached to a cache set,
-- * even dc->io_disable is true and no more I/O requests
-+ * even CACHED_DEV_IO_DISABLED is set and no more I/O requests
- * accepted, cache device internal I/O (writeback scan or
- * garbage collection) may still prevent bcache device from
- * being stopped. So here CACHE_SET_IO_DISABLE should be
-@@ -1672,11 +1680,11 @@ static void conditional_stop_bcache_device(struct cache_set *c,
- * behavior may also introduce potential inconsistence
- * data in writeback mode while cache is dirty.
- * Therefore before calling bcache_device_stop() due
-- * to a broken cache device, dc->io_disable should be
-- * explicitly set to true.
-+ * to a broken cache device, CACHED_DEV_IO_DISABLED should
-+ * be explicitly set.
- */
-- dc->io_disable = true;
-- /* make others know io_disable is true earlier */
-+ set_bit(CACHED_DEV_IO_DISABLED, &dc->flags);
-+ /* make others know CACHED_DEV_IO_DISABLED is set earlier */
- smp_mb();
- bcache_device_stop(d);
- } else {
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index 4ab15442cab5..4bb1592270b1 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -180,7 +180,8 @@ SHOW(__bch_cached_dev)
- wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0);
- sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
- sysfs_printf(io_error_limit, "%i", dc->error_limit);
-- sysfs_printf(io_disable, "%i", dc->io_disable);
-+ sysfs_printf(io_disable, "%i",
-+ (int)test_bit(CACHED_DEV_IO_DISABLED, &dc->flags));
- var_print(writeback_rate_update_seconds);
- var_print(writeback_rate_i_term_inverse);
- var_print(writeback_rate_p_term_inverse);
-@@ -319,7 +320,11 @@ STORE(__cached_dev)
- if (attr == &sysfs_io_disable) {
- int v = strtoul_or_return(buf);
-
-- dc->io_disable = v ? 1 : 0;
-+ if (v > 0)
-+ set_bit(CACHED_DEV_IO_DISABLED, &dc->flags);
-+ else
-+ clear_bit(CACHED_DEV_IO_DISABLED, &dc->flags);
-+ return size;
- }
-
- sysfs_strtoul_clamp(sequential_cutoff,
---
-2.16.4
-
diff --git a/for-test/0029-bcache-replace-system_wq-to-bcache_mod_wq.patch b/for-test/0029-bcache-replace-system_wq-to-bcache_mod_wq.patch
deleted file mode 100644
index 4897e85..0000000
--- a/for-test/0029-bcache-replace-system_wq-to-bcache_mod_wq.patch
+++ /dev/null
@@ -1,104 +0,0 @@
-From b51fb8f54a265b7734d916016c20889a92ca0882 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 2 Jun 2019 18:55:09 +0800
-Subject: [PATCH 29/32] bcache: replace system_wq to bcache_mod_wq
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/bcache.h | 3 ++-
- drivers/md/bcache/btree.c | 4 ++--
- drivers/md/bcache/journal.c | 2 +-
- drivers/md/bcache/sysfs.c | 2 +-
- drivers/md/bcache/writeback.c | 4 ++--
- 5 files changed, 8 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
-index aae69060db7a..e7f0c42ab234 100644
---- a/drivers/md/bcache/bcache.h
-+++ b/drivers/md/bcache/bcache.h
-@@ -870,10 +870,11 @@ do { \
- for (b = (ca)->buckets + (ca)->sb.first_bucket; \
- b < (ca)->buckets + (ca)->sb.nbuckets; b++)
-
-+extern struct workqueue_struct *bcache_mod_wq;
- static inline void cached_dev_put(struct cached_dev *dc)
- {
- if (refcount_dec_and_test(&dc->count))
-- schedule_work(&dc->detach);
-+ queue_work(bcache_mod_wq, &dc->detach);
- }
-
- static inline bool cached_dev_get(struct cached_dev *dc)
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index c0dd8fde37af..8325a2d11717 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -366,7 +366,7 @@ static void __btree_node_write_done(struct closure *cl)
- btree_complete_write(b, w);
-
- if (btree_node_dirty(b))
-- schedule_delayed_work(&b->work, 30 * HZ);
-+ queue_delayed_work(bcache_mod_wq, &b->work, 30 * HZ);
-
- closure_return_with_destructor(cl, btree_node_write_unlock);
- }
-@@ -539,7 +539,7 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
- BUG_ON(!i->keys);
-
- if (!btree_node_dirty(b))
-- schedule_delayed_work(&b->work, 30 * HZ);
-+ queue_delayed_work(bcache_mod_wq, &b->work, 30 * HZ);
-
- set_btree_node_dirty(b);
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index ef4142c623fe..646e0386de4a 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -887,7 +887,7 @@ atomic_t *bch_journal(struct cache_set *c,
- journal_try_write(c);
- } else if (!w->dirty) {
- w->dirty = true;
-- schedule_delayed_work(&c->journal.work,
-+ queue_delayed_work(bcache_mod_wq, &c->journal.work,
- msecs_to_jiffies(c->journal_delay_ms));
- spin_unlock(&c->journal.lock);
- } else {
-diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
-index 4bb1592270b1..849146d539c9 100644
---- a/drivers/md/bcache/sysfs.c
-+++ b/drivers/md/bcache/sysfs.c
-@@ -447,7 +447,7 @@ STORE(bch_cached_dev)
-
- if (attr == &sysfs_writeback_percent)
- if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ queue_delayed_work(bcache_mod_wq, &dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-
- mutex_unlock(&bch_register_lock);
-diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
-index 73f0efac2b9f..54f68ae9d343 100644
---- a/drivers/md/bcache/writeback.c
-+++ b/drivers/md/bcache/writeback.c
-@@ -212,7 +212,7 @@ static void update_writeback_rate(struct work_struct *work)
- */
- if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
- !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ queue_delayed_work(bcache_mod_wq, &dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
- }
-
-@@ -835,7 +835,7 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
- dc->writeback_running = true;
-
- WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
-- schedule_delayed_work(&dc->writeback_rate_update,
-+ queue_delayed_work(bcache_mod_wq, &dc->writeback_rate_update,
- dc->writeback_rate_update_seconds * HZ);
-
- bch_writeback_queue(dc);
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v1/v1-0000-cover-letter.patch b/for-test/jouranl-deadlock/v1/v1-0000-cover-letter.patch
deleted file mode 100644
index d2e85ad..0000000
--- a/for-test/jouranl-deadlock/v1/v1-0000-cover-letter.patch
+++ /dev/null
@@ -1,59 +0,0 @@
-From 60b326d839c8df0528c9567db590173a8d11060b Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 2 Mar 2019 21:22:28 +0800
-Subject: [RFC PATCH v1 0/6] bcache: fix journal no-space deadlock
-
-Hi folks,
-
-Sorry for taking such a long time to compose a fix patch set for bcache
-journal deadlock issue since the first time it was reported.
-
-The initial issue was know as several kthreads or kworkers were reported
-by kernel to hang for quite long time. The reason was a deadlock happened
-when there is no more journal space avialable for new coming journal
-request.
-
-Finally it turns out to be two conditions that the jouranl no-space
-deadlock may happen, one is in jouranl replay time, one is in normal
-journal runtime.
-
-During my testing, I find deadlock still exists with my fixes, after
-a lot of effort other three related bugs are explored and fixed.
-
-Now the patch set is testing on two machines for 3+ hours, the journal
-deadlock does not appear yet. In my previous testings, it may show up
-within 30 minutes on my machine. (In order to make the jouranl space to
-be more easier exhuasted, I change SB_JOURNAL_BUCKETS from 256U to 3U
-both in kernel and bcache-tools code).
-
-More testings on more machines will start soon, and the patches are not
-stable enough for production environment usage. Bbut I think it is time
-to post the patch set for your review and comments.
-
-I will continue to improve the fixes, e.g. remove some BUG_ON() once
-I am sure they won't happen indeed. If you may help to test the patch
-set, that will be really cool.
-
-Hope we may make this patch set stable soon.
-
-Thanks in advance for your help.
-
-Coly Li
----
-
-Coly Li (6):
- bcache: acquire c->journal.lock in bch_btree_leaf_dirty()
- bcache: move definition of 'int ret' out of macro read_bucket()
- bcache: never set 0 to KEY_PTRS of jouranl key in journal_reclaim()
- bcache: reload jouranl key information during journal replay
- bcache: fix journal deadlock during jouranl replay
- bcache: reserve space for journal_meta() in run time
-
- drivers/md/bcache/btree.c | 2 +
- drivers/md/bcache/journal.c | 244 +++++++++++++++++++++++++++++++++++++++++---
- drivers/md/bcache/journal.h | 5 +
- 3 files changed, 238 insertions(+), 13 deletions(-)
-
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v1/v1-0001-bcache-acquire-c-journal.lock-in-bch_btree_leaf_d.patch b/for-test/jouranl-deadlock/v1/v1-0001-bcache-acquire-c-journal.lock-in-bch_btree_leaf_d.patch
deleted file mode 100644
index 12c7da4..0000000
--- a/for-test/jouranl-deadlock/v1/v1-0001-bcache-acquire-c-journal.lock-in-bch_btree_leaf_d.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-From 3c7e66546d18ead01bd821fa07f3ca2c73a9d964 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 2 Mar 2019 18:19:08 +0800
-Subject: [RFC PATCH v1 1/6] bcache: acquire c->journal.lock in
- bch_btree_leaf_dirty()
-
-In bch_btree_leaf_dirty() when increase bcache journal pin counter,
-current code uses atomic_inc(w->journal) directly. This is problematic
-indeed, which may cause following code in journal.c:journal_reclaim()
-not work properly,
- 610 while (!atomic_read(&fifo_front(&c->journal.pin)))
- 611 fifo_pop(&c->journal.pin, p);
-
-The above code piece is protected by spinlock c->journal.lock, and
-the atomic counter w->journal in btree.c:bch_btree_leaf_dirty() is one
-of the nodes from c->journal.pin. If the above while() loop just happens
-to reach a fifo node which is w->journal in bch_btree_leaf_dirty(),
-it is possible that the between line 610 and 611 the counter w->journal
-is increased but poped off in journal_reclaim(). Then the journal jset
-which w->journal referenced in bch_btree_leaf_dirty() gets lost.
-
-If system crashes or reboots before bkeys of the lost jset flushing back
-to bcache btree node, journal_replay() after the reboot may complains
-some journal entries lost and fail to register cache set.
-
-Such race condition is very rare to happen, I observe such issue when
-I modify the journal buckets number to 3, which makes only a limited
-number of jset being available. Then it is possible to observe journal
-replay failure due to lost journal jset(s).
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/btree.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
-index 23cb1dc7296b..ac1b9159402e 100644
---- a/drivers/md/bcache/btree.c
-+++ b/drivers/md/bcache/btree.c
-@@ -551,7 +551,9 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
-
- if (!w->journal) {
- w->journal = journal_ref;
-+ spin_lock(&b->c->journal.lock);
- atomic_inc(w->journal);
-+ spin_unlock(&b->c->journal.lock);
- }
- }
-
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v1/v1-0002-bcache-move-definition-of-int-ret-out-of-macro-re.patch b/for-test/jouranl-deadlock/v1/v1-0002-bcache-move-definition-of-int-ret-out-of-macro-re.patch
deleted file mode 100644
index 4ddb4fa..0000000
--- a/for-test/jouranl-deadlock/v1/v1-0002-bcache-move-definition-of-int-ret-out-of-macro-re.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From da41d81e0abd211d2990d848cd33744ff335cd43 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 27 Feb 2019 18:10:48 +0800
-Subject: [RFC PATCH v1 2/6] bcache: move definition of 'int ret' out of macro
- read_bucket()
-
-'int ret' is defined as a local variable inside macro read_bucket().
-Since this macro is called multiple times, and following patches will
-use a 'int ret' variable in bch_journal_read(), this patch moves
-definition of 'int ret' from macro read_bucket() to range of function
-bch_journal_read().
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index b2fd412715b1..6e18057d1d82 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -147,7 +147,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
- {
- #define read_bucket(b) \
- ({ \
-- int ret = journal_read_bucket(ca, list, b); \
-+ ret = journal_read_bucket(ca, list, b); \
- __set_bit(b, bitmap); \
- if (ret < 0) \
- return ret; \
-@@ -156,6 +156,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
-
- struct cache *ca;
- unsigned int iter;
-+ int ret = 0;
-
- for_each_cache(ca, c, iter) {
- struct journal_device *ja = &ca->journal;
-@@ -267,7 +268,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
- struct journal_replay,
- list)->j.seq;
-
-- return 0;
-+ return ret;
- #undef read_bucket
- }
-
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v1/v1-0003-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-.patch b/for-test/jouranl-deadlock/v1/v1-0003-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-.patch
deleted file mode 100644
index 1e1e476..0000000
--- a/for-test/jouranl-deadlock/v1/v1-0003-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-.patch
+++ /dev/null
@@ -1,93 +0,0 @@
-From d8c81f7cdb63bc3a2b00a8a9e5e9b4783e42c702 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 27 Feb 2019 20:22:23 +0800
-Subject: [RFC PATCH v1 3/6] bcache: never set 0 to KEY_PTRS of jouranl key in
- journal_reclaim()
-
-In journal_reclaim() ja->cur_idx of each cache will be update to
-reclaim available journal buckets. Variable 'int n' is used to count how
-many cache is successfully reclaimed, then n is set to c->journal.key
-by SET_KEY_PTRS(). Later in journal_write_unlocked(), a for_each_cache()
-loop will write the jset data onto each cache.
-
-The problem is, if all jouranl buckets on each cache is full, the
-following code in journal_reclaim(),
-
-529 for_each_cache(ca, c, iter) {
-530 struct journal_device *ja = &ca->journal;
-531 unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
-532
-533 /* No space available on this device */
-534 if (next == ja->discard_idx)
-535 continue;
-536
-537 ja->cur_idx = next;
-538 k->ptr[n++] = MAKE_PTR(0,
-539 bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
-540 ca->sb.nr_this_dev);
-541 }
-542
-543 bkey_init(k);
-544 SET_KEY_PTRS(k, n);
-
-If there is no available bucket to reclaim, the if() condition at line
-534 will always true, and n remains 0. Then at line 544, SET_KEY_PTRS()
-will set 0 to KEY_PTRS field of c->journal.key.
-
-Setting KEY_PTRS field of c->journal.key to 0 is wrong. Because in
-journal_write_unlocked() the journal data is written in following loop,
-
-649 for (i = 0; i < KEY_PTRS(k); i++) {
-650-671 submit journal data to cache device
-672 }
-
-If KEY_PTRS field is set to 0 in jouranl_reclaim(), the journal data
-won't be written to cache device here. If system crahed or rebooted
-before bkeys of the lost journal entries written into btree nodes, data
-corruption will be reported during bcache reload after rebooting the
-system.
-
-Indeed there is only one cache in a cache set, there is no need to set
-KEY_PTRS field in journal_reclaim() at all. But in order to keep the
-for_each_cache() logic consistent for now, this patch fixes the above
-problem by not setting 0 KEY_PTRS of journal key, if there is no bucket
-available to reclaim.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 11 +++++++----
- 1 file changed, 7 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 6e18057d1d82..5180bed911ef 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -541,11 +541,11 @@ static void journal_reclaim(struct cache_set *c)
- ca->sb.nr_this_dev);
- }
-
-- bkey_init(k);
-- SET_KEY_PTRS(k, n);
--
-- if (n)
-+ if (n) {
-+ bkey_init(k);
-+ SET_KEY_PTRS(k, n);
- c->journal.blocks_free = c->sb.bucket_size >> c->block_bits;
-+ }
- out:
- if (!journal_full(&c->journal))
- __closure_wake_up(&c->journal.wait);
-@@ -672,6 +672,9 @@ static void journal_write_unlocked(struct closure *cl)
- ca->journal.seq[ca->journal.cur_idx] = w->data->seq;
- }
-
-+ /* If KEY_PTRS(k) == 0, this jset gets lost in air */
-+ BUG_ON(i == 0);
-+
- atomic_dec_bug(&fifo_back(&c->journal.pin));
- bch_journal_next(&c->journal);
- journal_reclaim(c);
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v1/v1-0004-bcache-reload-jouranl-key-information-during-jour.patch b/for-test/jouranl-deadlock/v1/v1-0004-bcache-reload-jouranl-key-information-during-jour.patch
deleted file mode 100644
index 7877cfa..0000000
--- a/for-test/jouranl-deadlock/v1/v1-0004-bcache-reload-jouranl-key-information-during-jour.patch
+++ /dev/null
@@ -1,160 +0,0 @@
-From 1ff320546f894a6067c6a73bfaa937fca20308de Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 27 Feb 2019 20:32:22 +0800
-Subject: [RFC PATCH v1 4/6] bcache: reload jouranl key information during
- journal replay
-
-When bcache journal initiates during running cache set, cache set
-journal.blocks_free is initiated as 0. Then during journal replay if
-journal_meta() is called and an empty jset is written to cache device,
-journal_reclaim() is called. If there is available journal bucket to
-reclaim, c->journal.blocks_free is set to numbers of blocks of a journal
-bucket, which is c->sb.bucket_size >> c->block_bits.
-
-Most of time the above process works correctly, expect the condtion
-when journal space is almost full. "Almost full" means there is no free
-journal bucket, but there are still free blocks in last available
-bucket indexed by ja->cur_idx.
-
-If system crashes or reboots when journal space is almost full, problem
-comes. During cache set reload after the reboot, c->journal.blocks_free
-is initialized as 0, when jouranl replay process writes bcache jouranl,
-journal_reclaim() will be called to reclaim available journal bucket and
-set c->journal.blocks_free to c->sb.bucket_size >> c->block_bits. But
-there is no fully free bucket to reclaim in journal_reclaim(), so value
-of c->journal.blocks_free will keep 0. If the first journal entry
-processed by journal_replay() causes btree split and requires writing
-journal space by journal_meta(), journal_meta() has to go into an
-infinite loop to reclaim jouranl bucket, and blocks the whole cache set
-to run.
-
-Such buggy situation can be solved if we do following things before
-journal replay starts,
-- Recover previous value of c->journal.blocks_free in last run time,
- and set it to current c->journal.blocks_free as initial value.
-- Recover previous value of ja->cur_idx in last run time, and set it to
- KEY_PTR of current c->journal.key as initial value.
-
-After c->journal.blocks_free and c->journal.key are recovered, in
-condition when jouranl space is almost full and cache set is reloaded,
-meta journal entry from journal reply can be written into free blocks of
-the last available journal bucket, then old jouranl entries can be
-replayed and reclaimed for further journaling request.
-
-This patch adds bch_journal_key_reload() to recover journal blocks_free
-and key ptr value for above purpose. bch_journal_key_reload() is called
-in bch_journal_read() before replying journal by bch_journal_replay().
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 87 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 87 insertions(+)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 5180bed911ef..a6deb16c15c8 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -143,6 +143,89 @@ reread: left = ca->sb.bucket_size - offset;
- return ret;
- }
-
-+static int bch_journal_key_reload(struct cache_set *c)
-+{
-+ struct cache *ca;
-+ unsigned int iter, n = 0;
-+ struct bkey *k = &c->journal.key;
-+ int ret = 0;
-+
-+ for_each_cache(ca, c, iter) {
-+ struct journal_device *ja = &ca->journal;
-+ struct bio *bio = &ja->bio;
-+ struct jset *j, *data = c->journal.w[0].data;
-+ struct closure cl;
-+ unsigned int len, left;
-+ unsigned int offset = 0, used_blocks = 0;
-+ sector_t bucket = bucket_to_sector(c, ca->sb.d[ja->cur_idx]);
-+
-+ closure_init_stack(&cl);
-+
-+ while (offset < ca->sb.bucket_size) {
-+reread: left = ca->sb.bucket_size - offset;
-+ len = min_t(unsigned int,
-+ left, PAGE_SECTORS << JSET_BITS);
-+
-+ bio_reset(bio);
-+ bio->bi_iter.bi_sector = bucket + offset;
-+ bio_set_dev(bio, ca->bdev);
-+ bio->bi_iter.bi_size = len << 9;
-+
-+ bio->bi_end_io = journal_read_endio;
-+ bio->bi_private = &cl;
-+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
-+ bch_bio_map(bio, data);
-+
-+ closure_bio_submit(c, bio, &cl);
-+ closure_sync(&cl);
-+
-+ j = data;
-+ while (len) {
-+ size_t blocks, bytes = set_bytes(j);
-+
-+ if (j->magic != jset_magic(&ca->sb))
-+ goto out;
-+
-+ if (bytes > left << 9 ||
-+ bytes > PAGE_SIZE << JSET_BITS) {
-+ pr_err("jset may be correpted: too big");
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ if (bytes > len << 9)
-+ goto reread;
-+
-+ if (j->csum != csum_set(j)) {
-+ pr_err("jset may be corrupted: bad csum");
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ blocks = set_blocks(j, block_bytes(c));
-+ used_blocks += blocks;
-+
-+ offset += blocks * ca->sb.block_size;
-+ len -= blocks * ca->sb.block_size;
-+ j = ((void *) j) + blocks * block_bytes(ca);
-+ }
-+ }
-+out:
-+ c->journal.blocks_free =
-+ (c->sb.bucket_size >> c->block_bits) -
-+ used_blocks;
-+
-+ k->ptr[n++] = MAKE_PTR(0, bucket, ca->sb.nr_this_dev);
-+ }
-+
-+ BUG_ON(n == 0);
-+ bkey_init(k);
-+ SET_KEY_PTRS(k, n);
-+
-+err:
-+ return ret;
-+}
-+
- int bch_journal_read(struct cache_set *c, struct list_head *list)
- {
- #define read_bucket(b) \
-@@ -268,6 +351,10 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
- struct journal_replay,
- list)->j.seq;
-
-+ /* Initial value of c->journal.blocks_free should be 0 */
-+ BUG_ON(c->journal.blocks_free != 0);
-+ ret = bch_journal_key_reload(c);
-+
- return ret;
- #undef read_bucket
- }
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v1/v1-0005-bcache-fix-journal-deadlock-during-jouranl-replay.patch b/for-test/jouranl-deadlock/v1/v1-0005-bcache-fix-journal-deadlock-during-jouranl-replay.patch
deleted file mode 100644
index ba2a763..0000000
--- a/for-test/jouranl-deadlock/v1/v1-0005-bcache-fix-journal-deadlock-during-jouranl-replay.patch
+++ /dev/null
@@ -1,275 +0,0 @@
-From ee8cbff3518dcaf67c16cff0cefe2a4424573bff Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 27 Feb 2019 20:35:02 +0800
-Subject: [RFC PATCH v1 5/6] bcache: fix journal deadlock during jouranl replay
-
-A deadlock of bcache jouranling may happen during journal replay. Such
-deadlock happens when,
-- Journal space is totally full (no any free blocks) and system crashes
- or reboots.
-- After reboot, the first journal entry handled by jouranl replay causes
- btree split and jouranl_meta() is called to write an empty jset to
- journal space.
-- There is no journal space to write and journal_reclaim() fails to get
- any available bucket because this is the first replayed journal entry
- to be blocked.
-Then the whole cache set is blocked from running.
-
-This patch is an effort to fix such journal replay deadlock in a simpler
-way,
-- Add a bool varialbe 'in_replay' in struct journal, set it to true when
- journal replay starts, and set it to false when journal replay
- completed. in_replay is initialized to be false.
-- Reserve 6 sectors in journal bucket, do not use them in normal bcache
- runtime. These sectors are only permitted to use during journal
- replay (when c->jouranl.in_replay is true)
-
-Then in normal bcache runtime, journal space won't be totally full and
-there are 6 sectors are always reserved for journal replay time. After
-system reboots, if bch_btree_insert() in bch_journal_replay() causes
-btree split and bch_journal_beta() gets called to require 1 sector
-from journal buckets to write an empty jset, there are enough reserved
-space to serve.
-
-The reason to reserve 6 sectors is, we should choose a number that won't
-fix into a bucket size. If the reserved space happens to be a whole
-bucket, more logic has to be added in journal_replay() to handle
-journal.blocks_free with reserved spaces in journal replay time. This is
-why 6 sectors is choosed, it is 3KB and won't be any proper block size
-or bucket size.
-
-The bcache btree node size is quite large, so btree node split won't be
-a frequent event. And when btree node split happens, new added key will
-be insert directly into uppper level or neighbor nodes and won't go into
-journal again, only bch_journal_meta() is called to write jset metadata
-which occupies 1 block in journal space. If blocksize is set to 4K size,
-reserve 6 sectors indeed is 2 blocks, so there can be two continuously
-btree splitting happen during journal replay, this is very very rare in
-practice. As default blocksize is set to sector size, that equals to
-6 blocks reserved. Contiously splitting the btree for 6 times in journal
-replay is almost impossible, so the reserved space seems to be enough
-in my humble opinion.
-
-If in future the reserved space turns out to be not enough, let's extend
-it then.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 100 ++++++++++++++++++++++++++++++++++++++++----
- drivers/md/bcache/journal.h | 4 ++
- 2 files changed, 97 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index a6deb16c15c8..c60a702f53a9 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -415,6 +415,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
- uint64_t start = i->j.last_seq, end = i->j.seq, n = start;
- struct keylist keylist;
-
-+ s->journal.in_replay = true;
-+
- list_for_each_entry(i, list, list) {
- BUG_ON(i->pin && atomic_read(i->pin) != 1);
-
-@@ -448,6 +450,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
- pr_info("journal replay done, %i keys in %i entries, seq %llu",
- keys, entries, end);
- err:
-+ s->journal.in_replay = false;
- while (!list_empty(list)) {
- i = list_first_entry(list, struct journal_replay, list);
- list_del(&i->list);
-@@ -577,6 +580,22 @@ static void do_journal_discard(struct cache *ca)
- }
- }
-
-+static inline bool last_available_journal_bucket(struct cache_set *c)
-+{
-+ struct cache *ca;
-+ unsigned int iter;
-+ struct journal_device *ja;
-+
-+ for_each_cache(ca, c, iter) {
-+ ja = &ca->journal;
-+ if (unlikely((ja->cur_idx + 1) % ca->sb.njournal_buckets ==
-+ ja->last_idx))
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
- static void journal_reclaim(struct cache_set *c)
- {
- struct bkey *k = &c->journal.key;
-@@ -584,6 +603,7 @@ static void journal_reclaim(struct cache_set *c)
- uint64_t last_seq;
- unsigned int iter, n = 0;
- atomic_t p __maybe_unused;
-+ bool last, do_wakeup = false;
-
- atomic_long_inc(&c->reclaim);
-
-@@ -606,8 +626,13 @@ static void journal_reclaim(struct cache_set *c)
- for_each_cache(ca, c, iter)
- do_journal_discard(ca);
-
-- if (c->journal.blocks_free)
-+ last = last_available_journal_bucket(c);
-+ if ((!last && c->journal.blocks_free) ||
-+ (last && (c->journal.blocks_free * c->sb.block_size) >
-+ BCH_JOURNAL_RPLY_RESERVE)) {
-+ do_wakeup = true;
- goto out;
-+ }
-
- /*
- * Allocate:
-@@ -632,9 +657,10 @@ static void journal_reclaim(struct cache_set *c)
- bkey_init(k);
- SET_KEY_PTRS(k, n);
- c->journal.blocks_free = c->sb.bucket_size >> c->block_bits;
-+ do_wakeup = true;
- }
- out:
-- if (!journal_full(&c->journal))
-+ if (do_wakeup && !journal_full(&c->journal))
- __closure_wake_up(&c->journal.wait);
- }
-
-@@ -692,6 +718,21 @@ static void journal_write_unlock(struct closure *cl)
- spin_unlock(&c->journal.lock);
- }
-
-+static bool should_reclaim(struct cache_set *c,
-+ struct journal_write *w)
-+{
-+ if (unlikely(journal_full(&c->journal)))
-+ return true;
-+
-+ if (unlikely(last_available_journal_bucket(c) &&
-+ (!c->journal.in_replay) &&
-+ (c->journal.blocks_free * c->sb.block_size <=
-+ BCH_JOURNAL_RPLY_RESERVE)))
-+ return true;
-+
-+ return false;
-+}
-+
- static void journal_write_unlocked(struct closure *cl)
- __releases(c->journal.lock)
- {
-@@ -710,7 +751,7 @@ static void journal_write_unlocked(struct closure *cl)
- if (!w->need_write) {
- closure_return_with_destructor(cl, journal_write_unlock);
- return;
-- } else if (journal_full(&c->journal)) {
-+ } else if (should_reclaim(c, w)) {
- journal_reclaim(c);
- spin_unlock(&c->journal.lock);
-
-@@ -798,6 +839,52 @@ static void journal_try_write(struct cache_set *c)
- }
- }
-
-+static bool no_journal_wait(struct cache_set *c,
-+ size_t sectors)
-+{
-+ bool last = last_available_journal_bucket(c);
-+ size_t reserved_sectors = 0;
-+ size_t n = min_t(size_t,
-+ c->journal.blocks_free * c->sb.block_size,
-+ PAGE_SECTORS << JSET_BITS);
-+
-+ if (last && !c->journal.in_replay)
-+ reserved_sectors = BCH_JOURNAL_RPLY_RESERVE;
-+
-+ if (sectors <= (n - reserved_sectors))
-+ return true;
-+
-+ return false;
-+}
-+
-+static bool should_try_write(struct cache_set *c,
-+ struct journal_write *w)
-+{
-+ size_t reserved_sectors, n, sectors;
-+
-+ if (journal_full(&c->journal))
-+ return false;
-+
-+ if (!last_available_journal_bucket(c))
-+ return true;
-+
-+ /* the check in no_journal_wait exceeds BCH_JOURNAL_RPLY_RESERVE */
-+ if (w->data->keys == 0)
-+ return false;
-+
-+ reserved_sectors = BCH_JOURNAL_RPLY_RESERVE;
-+ n = min_t(size_t,
-+ (c->journal.blocks_free * c->sb.block_size),
-+ PAGE_SECTORS << JSET_BITS);
-+ sectors = __set_blocks(w->data, w->data->keys,
-+ block_bytes(c)) * c->sb.block_size;
-+ if (sectors <= (n - reserved_sectors))
-+ return true;
-+
-+ return false;
-+}
-+
-+
- static struct journal_write *journal_wait_for_write(struct cache_set *c,
- unsigned int nkeys)
- __acquires(&c->journal.lock)
-@@ -816,15 +903,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
- sectors = __set_blocks(w->data, w->data->keys + nkeys,
- block_bytes(c)) * c->sb.block_size;
-
-- if (sectors <= min_t(size_t,
-- c->journal.blocks_free * c->sb.block_size,
-- PAGE_SECTORS << JSET_BITS))
-+ if (no_journal_wait(c, sectors))
- return w;
-
- if (wait)
- closure_wait(&c->journal.wait, &cl);
-
-- if (!journal_full(&c->journal)) {
-+ if (should_try_write(c, w)) {
- if (wait)
- trace_bcache_journal_entry_full(c);
-
-@@ -933,6 +1018,7 @@ int bch_journal_alloc(struct cache_set *c)
- INIT_DELAYED_WORK(&j->work, journal_write_work);
-
- c->journal_delay_ms = 100;
-+ j->in_replay = false;
-
- j->w[0].c = c;
- j->w[1].c = c;
-diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
-index 66f0facff84b..54408e248a39 100644
---- a/drivers/md/bcache/journal.h
-+++ b/drivers/md/bcache/journal.h
-@@ -108,6 +108,7 @@ struct journal {
- struct closure io;
- int io_in_flight;
- struct delayed_work work;
-+ bool in_replay;
-
- /* Number of blocks free in the bucket(s) we're currently writing to */
- unsigned int blocks_free;
-@@ -159,6 +160,9 @@ struct journal_device {
-
- #define JOURNAL_PIN 20000
-
-+/* Reserved jouranl space in sectors */
-+#define BCH_JOURNAL_RPLY_RESERVE 6U
-+
- #define journal_full(j) \
- (!(j)->blocks_free || fifo_free(&(j)->pin) <= 1)
-
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v1/v1-0006-bcache-reserve-space-for-journal_meta-in-run-time.patch b/for-test/jouranl-deadlock/v1/v1-0006-bcache-reserve-space-for-journal_meta-in-run-time.patch
deleted file mode 100644
index 04d0a83..0000000
--- a/for-test/jouranl-deadlock/v1/v1-0006-bcache-reserve-space-for-journal_meta-in-run-time.patch
+++ /dev/null
@@ -1,241 +0,0 @@
-From 60b326d839c8df0528c9567db590173a8d11060b Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Thu, 28 Feb 2019 20:29:00 +0800
-Subject: [RFC PATCH v1 6/6] bcache: reserve space for journal_meta() in run
- time
-
-Another journal deadlock of bcache jouranling can happen in normal
-bcache runtime. It is very rare to happen but there are people report
-bkey insert work queue blocked which caused by such deadlock.
-
-This is how such jouranling deadlock in runtime happens,
-- Journal space is totally full and no free space to reclaim, jouranling
- tasks waiting for space to write in journal_wait_for_write().
-- In order to have free journal space, btree_flush_write() is called to
- flush earlest journaled in-memory btree key into btree node. Then all
- journaled bkey in early used journal buckets are flushed to on-disk
- btree, this journal bucket can be reclaimed for new coming jouranl
- request.
-- But if the earlest jouranled bkey causes a btree node split during
- insert it into btree node, finally journal_meta() will be called to
- journal btree root (and other information) into the journal space.
-- Unfortunately the journal space is full, and the jouranl entries has
- to be flushed in linear turn. So bch_journal_meta() from bkey insert
- is blocked too.
-Then jouranling deadlock during bcache run time happens.
-
-A method to fix such deadlock is to reserve some journal space too. The
-reserved space can only be used when,
-- Current journal bucket is the last journal bucket which has available
- space to write into.
-- When calling bch_journal(), current jset is empty and there is no key
- in the inserting key list. This means the journal request if from
- bch_journal_meta() and no non-reserved space can be used.
-
-Then if such journaling request is from bch_journal_meta() of inserting
-the earlest journaled bkey back into btree, the deadlock condition won't
-happen any more because the reserved space can be used for such
-scenario.
-
-Since there are already 6 sectors reserved for journal replay, here we
-reserve 7 sectors for runtime meta journal from btree split caused by
-flushing journal entries back to btree node. Depends on block size from
-1 sector to 4KB, the reserved space can serve for form 7 to 2 journal
-blocks. Indeed only one journal block reserved for such journal deadlock
-scenario is enough, 2 continuous btree splits cause by two adjoin bkey
-flushing from journal is very very rare to happen. So reserve 7 sectors
-should works.
-
-Another reason for reserving 7 sectors is, there are already 6 sectors
-reserved fo journal repley, so in total there are 13 sectors reserved in
-last available journal bucket. 13 sectors won't be a proper bucket size,
-so we don't need to add more code to handle journal.blocks_free
-initialization for whole reserved jouranl bucket. Even such code logic
-is simple, less code is better in my humble opinion.
-
-Again, if in future the reserved space turns out to be not enough, let's
-extend it then.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 89 +++++++++++++++++++++++++++++++++------------
- drivers/md/bcache/journal.h | 1 +
- 2 files changed, 66 insertions(+), 24 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index c60a702f53a9..6aa68ab7cd78 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -629,7 +629,7 @@ static void journal_reclaim(struct cache_set *c)
- last = last_available_journal_bucket(c);
- if ((!last && c->journal.blocks_free) ||
- (last && (c->journal.blocks_free * c->sb.block_size) >
-- BCH_JOURNAL_RPLY_RESERVE)) {
-+ (BCH_JOURNAL_RESERVE + BCH_JOURNAL_RPLY_RESERVE))) {
- do_wakeup = true;
- goto out;
- }
-@@ -718,18 +718,27 @@ static void journal_write_unlock(struct closure *cl)
- spin_unlock(&c->journal.lock);
- }
-
--static bool should_reclaim(struct cache_set *c,
-- struct journal_write *w)
-+static inline bool should_reclaim(struct cache_set *c,
-+ struct journal_write *w)
- {
-- if (unlikely(journal_full(&c->journal)))
-- return true;
-+ bool last = last_available_journal_bucket(c);
-
-- if (unlikely(last_available_journal_bucket(c) &&
-- (!c->journal.in_replay) &&
-- (c->journal.blocks_free * c->sb.block_size <=
-- BCH_JOURNAL_RPLY_RESERVE)))
-+ if (!last && journal_full(&c->journal))
- return true;
-
-+ if (unlikely(last)) {
-+ size_t n = c->journal.blocks_free * c->sb.block_size;
-+
-+ if (!c->journal.in_replay) {
-+ if (n <= BCH_JOURNAL_RESERVE +
-+ BCH_JOURNAL_RPLY_RESERVE)
-+ return true;
-+ } else {
-+ if (n <= BCH_JOURNAL_RPLY_RESERVE)
-+ return true;
-+ }
-+ }
-+
- return false;
- }
-
-@@ -751,7 +760,9 @@ static void journal_write_unlocked(struct closure *cl)
- if (!w->need_write) {
- closure_return_with_destructor(cl, journal_write_unlock);
- return;
-- } else if (should_reclaim(c, w)) {
-+ }
-+
-+ if (should_reclaim(c, w)) {
- journal_reclaim(c);
- spin_unlock(&c->journal.lock);
-
-@@ -840,16 +851,26 @@ static void journal_try_write(struct cache_set *c)
- }
-
- static bool no_journal_wait(struct cache_set *c,
-- size_t sectors)
-+ size_t sectors,
-+ int nkeys)
- {
-+ bool is_journal_meta = (nkeys == 0) ? true : false;
- bool last = last_available_journal_bucket(c);
- size_t reserved_sectors = 0;
-- size_t n = min_t(size_t,
-- c->journal.blocks_free * c->sb.block_size,
-- PAGE_SECTORS << JSET_BITS);
-+ size_t n;
-+
-+ if (unlikely(last)) {
-+ if (!is_journal_meta)
-+ reserved_sectors = BCH_JOURNAL_RESERVE +
-+ BCH_JOURNAL_RPLY_RESERVE;
-+ else
-+ reserved_sectors = (!c->journal.in_replay) ?
-+ BCH_JOURNAL_RPLY_RESERVE : 0;
-+ }
-
-- if (last && !c->journal.in_replay)
-- reserved_sectors = BCH_JOURNAL_RPLY_RESERVE;
-+ n = min_t(size_t,
-+ c->journal.blocks_free * c->sb.block_size,
-+ PAGE_SECTORS << JSET_BITS);
-
- if (sectors <= (n - reserved_sectors))
- return true;
-@@ -858,26 +879,46 @@ static bool no_journal_wait(struct cache_set *c,
- }
-
- static bool should_try_write(struct cache_set *c,
-- struct journal_write *w)
-+ struct journal_write *w,
-+ int nkeys)
- {
- size_t reserved_sectors, n, sectors;
-+ bool last, empty_jset;
-
- if (journal_full(&c->journal))
- return false;
-
-- if (!last_available_journal_bucket(c))
-+ last = last_available_journal_bucket(c);
-+ empty_jset = (w->data->keys == 0) ? true : false;
-+
-+ if (!last) {
-+ /*
-+ * Not last available journal bucket, no reserved journal
-+ * space restriction, an empty jset should not be here.
-+ */
-+ BUG_ON(empty_jset);
- return true;
-+ }
-
-- /* the check in no_journal_wait exceeds BCH_JOURNAL_RPLY_RESERVE */
-- if (w->data->keys == 0)
-+ if (empty_jset) {
-+ /*
-+ * If nkeys is 0 it means the journaling request is for meta
-+ * data, which should be returned in journal_wait_for_write()
-+ * by checking no_journal_wait(), and won't get here.
-+ */
-+ BUG_ON(nkeys == 0);
- return false;
-+ }
-
-- reserved_sectors = BCH_JOURNAL_RPLY_RESERVE;
-+ reserved_sectors = BCH_JOURNAL_RESERVE +
-+ BCH_JOURNAL_RPLY_RESERVE;
- n = min_t(size_t,
- (c->journal.blocks_free * c->sb.block_size),
- PAGE_SECTORS << JSET_BITS);
-- sectors = __set_blocks(w->data, w->data->keys,
-+ sectors = __set_blocks(w->data,
-+ w->data->keys,
- block_bytes(c)) * c->sb.block_size;
-+
- if (sectors <= (n - reserved_sectors))
- return true;
-
-@@ -903,13 +944,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
- sectors = __set_blocks(w->data, w->data->keys + nkeys,
- block_bytes(c)) * c->sb.block_size;
-
-- if (no_journal_wait(c, sectors))
-+ if (no_journal_wait(c, sectors, nkeys))
- return w;
-
- if (wait)
- closure_wait(&c->journal.wait, &cl);
-
-- if (should_try_write(c, w)) {
-+ if (should_try_write(c, w, nkeys)) {
- if (wait)
- trace_bcache_journal_entry_full(c);
-
-diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
-index 54408e248a39..55f81443f304 100644
---- a/drivers/md/bcache/journal.h
-+++ b/drivers/md/bcache/journal.h
-@@ -162,6 +162,7 @@ struct journal_device {
-
- /* Reserved jouranl space in sectors */
- #define BCH_JOURNAL_RPLY_RESERVE 6U
-+#define BCH_JOURNAL_RESERVE 7U
-
- #define journal_full(j) \
- (!(j)->blocks_free || fifo_free(&(j)->pin) <= 1)
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0000-cover-letter.patch b/for-test/jouranl-deadlock/v2/v2-0000-cover-letter.patch
deleted file mode 100644
index 19d3c21..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0000-cover-letter.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From 24539bb78565d784ddabb81f24968c13835eb000 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Fri, 19 Apr 2019 00:37:27 +0800
-Subject: [RFC PATCH v2 00/16] bcache: fix journal no-space deadlock
-
-The initial journal no-space deadlock issue was known as several
-kthreads or kworkers were reported by kernel to hang for quite long
-time. The reason was a deadlock happened when there is no more journal
-space avialable for new coming journal request.
-
-In v1 RFC series, I though the journal no-space deadlock was from two
-conditions, which was not the truth. After long time testing and
-debugging, I realize the journal deadlock was a result of a series of
-problems hidden in current code.
-
-Now I make progress in v2 series, and all known problems related to the
-journal no-space deadlock are fixed. I don't observe journal deadlock
-and related I/O hang warning any more.
-
-Unfortunately we can not apply this whole series at this moment, because
-after fixing the journal no-space deadlock issue, I find a race in dirty
-btree node flushing. Beside normal dirty btree node flushing, when there
-is no journal space, btree_flush_write() will be called to write down
-the oldest dirty btree node. Once the oldest dirty btree node is written
-from memory into cache device, its associated journal reference will be
-released, this operation is necessary to reclaim oldest busy journal
-bucket when no-space in journal buckets.
-
-The problem of this race is, when building c->flush_btree heap, all
-dirty btree node from for_each_cached_btree() are not protected or
-referenced, so there is a race that after the heap c->flush_btree is
-built and before the oldest node is selected from the heap, the oldest
-node is already written in normal code path, and the memory is
-released/reused.
-
-From my testing, a kernel panic triggered by wild pointer deference or
-un-paired mutex_lock/unlock can be observed from btree_flush_write(),
-this is because the selected btree node was written and released
-already, btree_flush_write() just references invalid memory object.
-
-So far I don't have good idea to fix such race without hurting I/O
-performance, and IMHO the bcache I/O hang by journal is kind of better
-than kenrel panic. Therefore before the race of dirty btree nodes
-writting gets fixed, I won't apply the whole series.
-
-But there are still some helpful and non-major fixes which can go into
-upstream, to reduce the whole patch set and avoid huge changes in a
-single kernel merge.
-
-The patch 'bcache: acquire c->journal.lock in bch_btree_leaf_dirty()` in
-v1 series was removed from v2 series. I still feel this is a problem to
-access journal pipo without any protection, but this fix is limited and
-I need to think about a more thoughtful way to fix.
-
-Any review comment or suggestion are warmly welcome.
-
-Thanks in advance for your help.
-
-Coly Li
----
-
-Coly Li (16):
- bcache: move definition of 'int ret' out of macro read_bucket()
- bcache: never set 0 to KEY_PTRS of jouranl key in journal_reclaim()
- bcache: reload jouranl key information during journal replay
- bcache: fix journal deadlock during jouranl replay
- bcache: reserve space for journal_meta() in run time
- bcache: add failure check to run_cache_set() for journal replay
- bcache: add comments for kobj release callback routine
- bcache: return error immediately in bch_journal_replay()
- bcache: add error check for calling register_bdev()
- bcache: Add comments for blkdev_put() in registration code path
- bcache: add comments for closure_fn to be called in closure_queue()
- bcache: add pendings_cleanup to stop pending bcache device
- bcache: fix fifo index swapping condition in btree_flush_write()
- bcache: try to flush btree nodes as many as possible
- bcache: improve bcache_reboot()
- bcache: introduce spinlock_t flush_write_lock in struct journal
-
- drivers/md/bcache/journal.c | 312 ++++++++++++++++++++++++++++++++++++++++----
- drivers/md/bcache/journal.h | 8 +-
- drivers/md/bcache/super.c | 112 ++++++++++++++--
- 3 files changed, 393 insertions(+), 39 deletions(-)
-
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0001-bcache-move-definition-of-int-ret-out-of-macro-re.patch b/for-test/jouranl-deadlock/v2/v2-0001-bcache-move-definition-of-int-ret-out-of-macro-re.patch
deleted file mode 100644
index 6f5e2da..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0001-bcache-move-definition-of-int-ret-out-of-macro-re.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From b6bbfb503e206f65196dc44c7f3ca7f77458b8e0 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 27 Feb 2019 18:10:48 +0800
-Subject: [RFC PATCH v2 01/16] bcache: move definition of 'int ret' out of
- macro read_bucket()
-
-'int ret' is defined as a local variable inside macro read_bucket().
-Since this macro is called multiple times, and following patches will
-use a 'int ret' variable in bch_journal_read(), this patch moves
-definition of 'int ret' from macro read_bucket() to range of function
-bch_journal_read().
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index b2fd412715b1..6e18057d1d82 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -147,7 +147,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
- {
- #define read_bucket(b) \
- ({ \
-- int ret = journal_read_bucket(ca, list, b); \
-+ ret = journal_read_bucket(ca, list, b); \
- __set_bit(b, bitmap); \
- if (ret < 0) \
- return ret; \
-@@ -156,6 +156,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
-
- struct cache *ca;
- unsigned int iter;
-+ int ret = 0;
-
- for_each_cache(ca, c, iter) {
- struct journal_device *ja = &ca->journal;
-@@ -267,7 +268,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
- struct journal_replay,
- list)->j.seq;
-
-- return 0;
-+ return ret;
- #undef read_bucket
- }
-
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0002-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-.patch b/for-test/jouranl-deadlock/v2/v2-0002-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-.patch
deleted file mode 100644
index fcb490d..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0002-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-From dc171a41dbbac4a43cd9503a18c92c7a31185ac7 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 27 Feb 2019 20:22:23 +0800
-Subject: [RFC PATCH v2 02/16] bcache: never set 0 to KEY_PTRS of jouranl key
- in journal_reclaim()
-
-In journal_reclaim() ja->cur_idx of each cache will be update to
-reclaim available journal buckets. Variable 'int n' is used to count how
-many cache is successfully reclaimed, then n is set to c->journal.key
-by SET_KEY_PTRS(). Later in journal_write_unlocked(), a for_each_cache()
-loop will write the jset data onto each cache.
-
-The problem is, if all jouranl buckets on each cache is full, the
-following code in journal_reclaim(),
-
-529 for_each_cache(ca, c, iter) {
-530 struct journal_device *ja = &ca->journal;
-531 unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
-532
-533 /* No space available on this device */
-534 if (next == ja->discard_idx)
-535 continue;
-536
-537 ja->cur_idx = next;
-538 k->ptr[n++] = MAKE_PTR(0,
-539 bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
-540 ca->sb.nr_this_dev);
-541 }
-542
-543 bkey_init(k);
-544 SET_KEY_PTRS(k, n);
-
-If there is no available bucket to reclaim, the if() condition at line
-534 will always true, and n remains 0. Then at line 544, SET_KEY_PTRS()
-will set KEY_PTRS field of c->journal.key to 0.
-
-Setting KEY_PTRS field of c->journal.key to 0 is wrong. Because in
-journal_write_unlocked() the journal data is written in following loop,
-
-649 for (i = 0; i < KEY_PTRS(k); i++) {
-650-671 submit journal data to cache device
-672 }
-
-If KEY_PTRS field is set to 0 in jouranl_reclaim(), the journal data
-won't be written to cache device here. If system crahed or rebooted
-before bkeys of the lost journal entries written into btree nodes, data
-corruption will be reported during bcache reload after rebooting the
-system.
-
-Indeed there is only one cache in a cache set, there is no need to set
-KEY_PTRS field in journal_reclaim() at all. But in order to keep the
-for_each_cache() logic consistent for now, this patch fixes the above
-problem by not setting 0 KEY_PTRS of journal key, if there is no bucket
-available to reclaim.
-
-Cc: stable@vger.kernel.org
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 11 +++++++----
- 1 file changed, 7 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 6e18057d1d82..5180bed911ef 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -541,11 +541,11 @@ static void journal_reclaim(struct cache_set *c)
- ca->sb.nr_this_dev);
- }
-
-- bkey_init(k);
-- SET_KEY_PTRS(k, n);
--
-- if (n)
-+ if (n) {
-+ bkey_init(k);
-+ SET_KEY_PTRS(k, n);
- c->journal.blocks_free = c->sb.bucket_size >> c->block_bits;
-+ }
- out:
- if (!journal_full(&c->journal))
- __closure_wake_up(&c->journal.wait);
-@@ -672,6 +672,9 @@ static void journal_write_unlocked(struct closure *cl)
- ca->journal.seq[ca->journal.cur_idx] = w->data->seq;
- }
-
-+ /* If KEY_PTRS(k) == 0, this jset gets lost in air */
-+ BUG_ON(i == 0);
-+
- atomic_dec_bug(&fifo_back(&c->journal.pin));
- bch_journal_next(&c->journal);
- journal_reclaim(c);
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0003-bcache-reload-jouranl-key-information-during-jour.patch b/for-test/jouranl-deadlock/v2/v2-0003-bcache-reload-jouranl-key-information-during-jour.patch
deleted file mode 100644
index cfe5323..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0003-bcache-reload-jouranl-key-information-during-jour.patch
+++ /dev/null
@@ -1,161 +0,0 @@
-From e3c194808a99446e9bf69ac0707c7d3f473be518 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 27 Feb 2019 20:32:22 +0800
-Subject: [RFC PATCH v2 03/16] bcache: reload jouranl key information during
- journal replay
-
-When bcache journal initiates during running cache set, cache set
-journal.blocks_free is initiated as 0. Then during journal replay if
-journal_meta() is called and an empty jset is written to cache device,
-journal_reclaim() is called. If there is available journal bucket to
-reclaim, c->journal.blocks_free is set to numbers of blocks of a journal
-bucket, which is c->sb.bucket_size >> c->block_bits.
-
-Most of time the above process works correctly, expect the condtion
-when journal space is almost full. "Almost full" means there is no free
-journal bucket, but there are still free blocks in last available
-bucket indexed by ja->cur_idx.
-
-If system crashes or reboots when journal space is almost full, problem
-comes. During cache set reload after the reboot, c->journal.blocks_free
-is initialized as 0, when jouranl replay process writes bcache jouranl,
-journal_reclaim() will be called to reclaim available journal bucket and
-set c->journal.blocks_free to c->sb.bucket_size >> c->block_bits. But
-there is no fully free bucket to reclaim in journal_reclaim(), so value
-of c->journal.blocks_free will keep 0. If the first journal entry
-processed by journal_replay() causes btree split and requires writing
-journal space by journal_meta(), journal_meta() has to go into an
-infinite loop to reclaim jouranl bucket, and blocks the whole cache set
-to run.
-
-Such buggy situation can be solved if we do following things before
-journal replay starts,
-- Recover previous value of c->journal.blocks_free in last run time,
- and set it to current c->journal.blocks_free as initial value.
-- Recover previous value of ja->cur_idx in last run time, and set it to
- KEY_PTR of current c->journal.key as initial value.
-
-After c->journal.blocks_free and c->journal.key are recovered, in
-condition when jouranl space is almost full and cache set is reloaded,
-meta journal entry from journal reply can be written into free blocks of
-the last available journal bucket, then old jouranl entries can be
-replayed and reclaimed for further journaling request.
-
-This patch adds bch_journal_key_reload() to recover journal blocks_free
-and key ptr value for above purpose. bch_journal_key_reload() is called
-in bch_journal_read() before replying journal by bch_journal_replay().
-
-Cc: stable@vger.kernel.org
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 87 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 87 insertions(+)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 5180bed911ef..a6deb16c15c8 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -143,6 +143,89 @@ reread: left = ca->sb.bucket_size - offset;
- return ret;
- }
-
-+static int bch_journal_key_reload(struct cache_set *c)
-+{
-+ struct cache *ca;
-+ unsigned int iter, n = 0;
-+ struct bkey *k = &c->journal.key;
-+ int ret = 0;
-+
-+ for_each_cache(ca, c, iter) {
-+ struct journal_device *ja = &ca->journal;
-+ struct bio *bio = &ja->bio;
-+ struct jset *j, *data = c->journal.w[0].data;
-+ struct closure cl;
-+ unsigned int len, left;
-+ unsigned int offset = 0, used_blocks = 0;
-+ sector_t bucket = bucket_to_sector(c, ca->sb.d[ja->cur_idx]);
-+
-+ closure_init_stack(&cl);
-+
-+ while (offset < ca->sb.bucket_size) {
-+reread: left = ca->sb.bucket_size - offset;
-+ len = min_t(unsigned int,
-+ left, PAGE_SECTORS << JSET_BITS);
-+
-+ bio_reset(bio);
-+ bio->bi_iter.bi_sector = bucket + offset;
-+ bio_set_dev(bio, ca->bdev);
-+ bio->bi_iter.bi_size = len << 9;
-+
-+ bio->bi_end_io = journal_read_endio;
-+ bio->bi_private = &cl;
-+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
-+ bch_bio_map(bio, data);
-+
-+ closure_bio_submit(c, bio, &cl);
-+ closure_sync(&cl);
-+
-+ j = data;
-+ while (len) {
-+ size_t blocks, bytes = set_bytes(j);
-+
-+ if (j->magic != jset_magic(&ca->sb))
-+ goto out;
-+
-+ if (bytes > left << 9 ||
-+ bytes > PAGE_SIZE << JSET_BITS) {
-+ pr_err("jset may be correpted: too big");
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ if (bytes > len << 9)
-+ goto reread;
-+
-+ if (j->csum != csum_set(j)) {
-+ pr_err("jset may be corrupted: bad csum");
-+ ret = -EIO;
-+ goto err;
-+ }
-+
-+ blocks = set_blocks(j, block_bytes(c));
-+ used_blocks += blocks;
-+
-+ offset += blocks * ca->sb.block_size;
-+ len -= blocks * ca->sb.block_size;
-+ j = ((void *) j) + blocks * block_bytes(ca);
-+ }
-+ }
-+out:
-+ c->journal.blocks_free =
-+ (c->sb.bucket_size >> c->block_bits) -
-+ used_blocks;
-+
-+ k->ptr[n++] = MAKE_PTR(0, bucket, ca->sb.nr_this_dev);
-+ }
-+
-+ BUG_ON(n == 0);
-+ bkey_init(k);
-+ SET_KEY_PTRS(k, n);
-+
-+err:
-+ return ret;
-+}
-+
- int bch_journal_read(struct cache_set *c, struct list_head *list)
- {
- #define read_bucket(b) \
-@@ -268,6 +351,10 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
- struct journal_replay,
- list)->j.seq;
-
-+ /* Initial value of c->journal.blocks_free should be 0 */
-+ BUG_ON(c->journal.blocks_free != 0);
-+ ret = bch_journal_key_reload(c);
-+
- return ret;
- #undef read_bucket
- }
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0004-bcache-fix-journal-deadlock-during-jouranl-replay.patch b/for-test/jouranl-deadlock/v2/v2-0004-bcache-fix-journal-deadlock-during-jouranl-replay.patch
deleted file mode 100644
index 39b9873..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0004-bcache-fix-journal-deadlock-during-jouranl-replay.patch
+++ /dev/null
@@ -1,276 +0,0 @@
-From 97898c33b4126381cb08f8560623325cc23291e5 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 27 Feb 2019 20:35:02 +0800
-Subject: [RFC PATCH v2 04/16] bcache: fix journal deadlock during jouranl
- replay
-
-A deadlock of bcache jouranling may happen during journal replay. Such
-deadlock happens when,
-- Journal space is totally full (no any free blocks) and system crashes
- or reboots.
-- After reboot, the first journal entry handled by jouranl replay causes
- btree split and jouranl_meta() is called to write an empty jset to
- journal space.
-- There is no journal space to write and journal_reclaim() fails to get
- any available bucket because this is the first replayed journal entry
- to be blocked.
-Then the whole cache set is blocked from running.
-
-This patch is an effort to fix such journal replay deadlock in a simpler
-way,
-- Add a bool varialbe 'in_replay' in struct journal, set it to true when
- journal replay starts, and set it to false when journal replay
- completed. in_replay is initialized to be false.
-- Reserve 6 sectors in journal bucket, do not use them in normal bcache
- runtime. These sectors are only permitted to use during journal
- replay (when c->jouranl.in_replay is true)
-
-Then in normal bcache runtime, journal space won't be totally full and
-there are 6 sectors are always reserved for journal replay time. After
-system reboots, if bch_btree_insert() in bch_journal_replay() causes
-btree split and bch_journal_beta() gets called to require 1 sector
-from journal buckets to write an empty jset, there are enough reserved
-space to serve.
-
-The reason to reserve 6 sectors is, we should choose a number that won't
-fix into a bucket size. If the reserved space happens to be a whole
-bucket, more logic has to be added in journal_replay() to handle
-journal.blocks_free with reserved spaces in journal replay time. This is
-why 6 sectors is choosed, it is 3KB and won't be any proper block size
-or bucket size.
-
-The bcache btree node size is quite large, so btree node split won't be
-a frequent event. And when btree node split happens, new added key will
-be insert directly into uppper level or neighbor nodes and won't go into
-journal again, only bch_journal_meta() is called to write jset metadata
-which occupies 1 block in journal space. If blocksize is set to 4K size,
-reserve 6 sectors indeed is 2 blocks, so there can be two continuously
-btree splitting happen during journal replay, this is very very rare in
-practice. As default blocksize is set to sector size, that equals to
-6 blocks reserved. Contiously splitting the btree for 6 times in journal
-replay is almost impossible, so the reserved space seems to be enough
-in my humble opinion.
-
-If in future the reserved space turns out to be not enough, let's extend
-it then.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 100 ++++++++++++++++++++++++++++++++++++++++----
- drivers/md/bcache/journal.h | 4 ++
- 2 files changed, 97 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index a6deb16c15c8..c60a702f53a9 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -415,6 +415,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
- uint64_t start = i->j.last_seq, end = i->j.seq, n = start;
- struct keylist keylist;
-
-+ s->journal.in_replay = true;
-+
- list_for_each_entry(i, list, list) {
- BUG_ON(i->pin && atomic_read(i->pin) != 1);
-
-@@ -448,6 +450,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
- pr_info("journal replay done, %i keys in %i entries, seq %llu",
- keys, entries, end);
- err:
-+ s->journal.in_replay = false;
- while (!list_empty(list)) {
- i = list_first_entry(list, struct journal_replay, list);
- list_del(&i->list);
-@@ -577,6 +580,22 @@ static void do_journal_discard(struct cache *ca)
- }
- }
-
-+static inline bool last_available_journal_bucket(struct cache_set *c)
-+{
-+ struct cache *ca;
-+ unsigned int iter;
-+ struct journal_device *ja;
-+
-+ for_each_cache(ca, c, iter) {
-+ ja = &ca->journal;
-+ if (unlikely((ja->cur_idx + 1) % ca->sb.njournal_buckets ==
-+ ja->last_idx))
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
- static void journal_reclaim(struct cache_set *c)
- {
- struct bkey *k = &c->journal.key;
-@@ -584,6 +603,7 @@ static void journal_reclaim(struct cache_set *c)
- uint64_t last_seq;
- unsigned int iter, n = 0;
- atomic_t p __maybe_unused;
-+ bool last, do_wakeup = false;
-
- atomic_long_inc(&c->reclaim);
-
-@@ -606,8 +626,13 @@ static void journal_reclaim(struct cache_set *c)
- for_each_cache(ca, c, iter)
- do_journal_discard(ca);
-
-- if (c->journal.blocks_free)
-+ last = last_available_journal_bucket(c);
-+ if ((!last && c->journal.blocks_free) ||
-+ (last && (c->journal.blocks_free * c->sb.block_size) >
-+ BCH_JOURNAL_RPLY_RESERVE)) {
-+ do_wakeup = true;
- goto out;
-+ }
-
- /*
- * Allocate:
-@@ -632,9 +657,10 @@ static void journal_reclaim(struct cache_set *c)
- bkey_init(k);
- SET_KEY_PTRS(k, n);
- c->journal.blocks_free = c->sb.bucket_size >> c->block_bits;
-+ do_wakeup = true;
- }
- out:
-- if (!journal_full(&c->journal))
-+ if (do_wakeup && !journal_full(&c->journal))
- __closure_wake_up(&c->journal.wait);
- }
-
-@@ -692,6 +718,21 @@ static void journal_write_unlock(struct closure *cl)
- spin_unlock(&c->journal.lock);
- }
-
-+static bool should_reclaim(struct cache_set *c,
-+ struct journal_write *w)
-+{
-+ if (unlikely(journal_full(&c->journal)))
-+ return true;
-+
-+ if (unlikely(last_available_journal_bucket(c) &&
-+ (!c->journal.in_replay) &&
-+ (c->journal.blocks_free * c->sb.block_size <=
-+ BCH_JOURNAL_RPLY_RESERVE)))
-+ return true;
-+
-+ return false;
-+}
-+
- static void journal_write_unlocked(struct closure *cl)
- __releases(c->journal.lock)
- {
-@@ -710,7 +751,7 @@ static void journal_write_unlocked(struct closure *cl)
- if (!w->need_write) {
- closure_return_with_destructor(cl, journal_write_unlock);
- return;
-- } else if (journal_full(&c->journal)) {
-+ } else if (should_reclaim(c, w)) {
- journal_reclaim(c);
- spin_unlock(&c->journal.lock);
-
-@@ -798,6 +839,52 @@ static void journal_try_write(struct cache_set *c)
- }
- }
-
-+static bool no_journal_wait(struct cache_set *c,
-+ size_t sectors)
-+{
-+ bool last = last_available_journal_bucket(c);
-+ size_t reserved_sectors = 0;
-+ size_t n = min_t(size_t,
-+ c->journal.blocks_free * c->sb.block_size,
-+ PAGE_SECTORS << JSET_BITS);
-+
-+ if (last && !c->journal.in_replay)
-+ reserved_sectors = BCH_JOURNAL_RPLY_RESERVE;
-+
-+ if (sectors <= (n - reserved_sectors))
-+ return true;
-+
-+ return false;
-+}
-+
-+static bool should_try_write(struct cache_set *c,
-+ struct journal_write *w)
-+{
-+ size_t reserved_sectors, n, sectors;
-+
-+ if (journal_full(&c->journal))
-+ return false;
-+
-+ if (!last_available_journal_bucket(c))
-+ return true;
-+
-+ /* the check in no_journal_wait exceeds BCH_JOURNAL_RPLY_RESERVE */
-+ if (w->data->keys == 0)
-+ return false;
-+
-+ reserved_sectors = BCH_JOURNAL_RPLY_RESERVE;
-+ n = min_t(size_t,
-+ (c->journal.blocks_free * c->sb.block_size),
-+ PAGE_SECTORS << JSET_BITS);
-+ sectors = __set_blocks(w->data, w->data->keys,
-+ block_bytes(c)) * c->sb.block_size;
-+ if (sectors <= (n - reserved_sectors))
-+ return true;
-+
-+ return false;
-+}
-+
-+
- static struct journal_write *journal_wait_for_write(struct cache_set *c,
- unsigned int nkeys)
- __acquires(&c->journal.lock)
-@@ -816,15 +903,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
- sectors = __set_blocks(w->data, w->data->keys + nkeys,
- block_bytes(c)) * c->sb.block_size;
-
-- if (sectors <= min_t(size_t,
-- c->journal.blocks_free * c->sb.block_size,
-- PAGE_SECTORS << JSET_BITS))
-+ if (no_journal_wait(c, sectors))
- return w;
-
- if (wait)
- closure_wait(&c->journal.wait, &cl);
-
-- if (!journal_full(&c->journal)) {
-+ if (should_try_write(c, w)) {
- if (wait)
- trace_bcache_journal_entry_full(c);
-
-@@ -933,6 +1018,7 @@ int bch_journal_alloc(struct cache_set *c)
- INIT_DELAYED_WORK(&j->work, journal_write_work);
-
- c->journal_delay_ms = 100;
-+ j->in_replay = false;
-
- j->w[0].c = c;
- j->w[1].c = c;
-diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
-index 66f0facff84b..54408e248a39 100644
---- a/drivers/md/bcache/journal.h
-+++ b/drivers/md/bcache/journal.h
-@@ -108,6 +108,7 @@ struct journal {
- struct closure io;
- int io_in_flight;
- struct delayed_work work;
-+ bool in_replay;
-
- /* Number of blocks free in the bucket(s) we're currently writing to */
- unsigned int blocks_free;
-@@ -159,6 +160,9 @@ struct journal_device {
-
- #define JOURNAL_PIN 20000
-
-+/* Reserved jouranl space in sectors */
-+#define BCH_JOURNAL_RPLY_RESERVE 6U
-+
- #define journal_full(j) \
- (!(j)->blocks_free || fifo_free(&(j)->pin) <= 1)
-
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0005-bcache-reserve-space-for-journal_meta-in-run-time.patch b/for-test/jouranl-deadlock/v2/v2-0005-bcache-reserve-space-for-journal_meta-in-run-time.patch
deleted file mode 100644
index 07050e9..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0005-bcache-reserve-space-for-journal_meta-in-run-time.patch
+++ /dev/null
@@ -1,241 +0,0 @@
-From 4d3d26818916654397a930e8ce082b650dc809eb Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Thu, 28 Feb 2019 20:29:00 +0800
-Subject: [RFC PATCH v2 05/16] bcache: reserve space for journal_meta() in run
- time
-
-Another journal deadlock of bcache jouranling can happen in normal
-bcache runtime. It is very rare to happen but there are people report
-bkey insert work queue blocked which caused by such deadlock.
-
-This is how such jouranling deadlock in runtime happens,
-- Journal space is totally full and no free space to reclaim, jouranling
- tasks waiting for space to write in journal_wait_for_write().
-- In order to have free journal space, btree_flush_write() is called to
- flush earlest journaled in-memory btree key into btree node. Then all
- journaled bkey in early used journal buckets are flushed to on-disk
- btree, this journal bucket can be reclaimed for new coming jouranl
- request.
-- But if the earlest jouranled bkey causes a btree node split during
- insert it into btree node, finally journal_meta() will be called to
- journal btree root (and other information) into the journal space.
-- Unfortunately the journal space is full, and the jouranl entries has
- to be flushed in linear turn. So bch_journal_meta() from bkey insert
- is blocked too.
-Then jouranling deadlock during bcache run time happens.
-
-A method to fix such deadlock is to reserve some journal space too. The
-reserved space can only be used when,
-- Current journal bucket is the last journal bucket which has available
- space to write into.
-- When calling bch_journal(), current jset is empty and there is no key
- in the inserting key list. This means the journal request if from
- bch_journal_meta() and no non-reserved space can be used.
-
-Then if such journaling request is from bch_journal_meta() of inserting
-the earlest journaled bkey back into btree, the deadlock condition won't
-happen any more because the reserved space can be used for such
-scenario.
-
-Since there are already 6 sectors reserved for journal replay, here we
-reserve 7 sectors for runtime meta journal from btree split caused by
-flushing journal entries back to btree node. Depends on block size from
-1 sector to 4KB, the reserved space can serve for form 7 to 2 journal
-blocks. Indeed only one journal block reserved for such journal deadlock
-scenario is enough, 2 continuous btree splits cause by two adjoin bkey
-flushing from journal is very very rare to happen. So reserve 7 sectors
-should works.
-
-Another reason for reserving 7 sectors is, there are already 6 sectors
-reserved fo journal repley, so in total there are 13 sectors reserved in
-last available journal bucket. 13 sectors won't be a proper bucket size,
-so we don't need to add more code to handle journal.blocks_free
-initialization for whole reserved jouranl bucket. Even such code logic
-is simple, less code is better in my humble opinion.
-
-Again, if in future the reserved space turns out to be not enough, let's
-extend it then.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 89 +++++++++++++++++++++++++++++++++------------
- drivers/md/bcache/journal.h | 1 +
- 2 files changed, 66 insertions(+), 24 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index c60a702f53a9..6aa68ab7cd78 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -629,7 +629,7 @@ static void journal_reclaim(struct cache_set *c)
- last = last_available_journal_bucket(c);
- if ((!last && c->journal.blocks_free) ||
- (last && (c->journal.blocks_free * c->sb.block_size) >
-- BCH_JOURNAL_RPLY_RESERVE)) {
-+ (BCH_JOURNAL_RESERVE + BCH_JOURNAL_RPLY_RESERVE))) {
- do_wakeup = true;
- goto out;
- }
-@@ -718,18 +718,27 @@ static void journal_write_unlock(struct closure *cl)
- spin_unlock(&c->journal.lock);
- }
-
--static bool should_reclaim(struct cache_set *c,
-- struct journal_write *w)
-+static inline bool should_reclaim(struct cache_set *c,
-+ struct journal_write *w)
- {
-- if (unlikely(journal_full(&c->journal)))
-- return true;
-+ bool last = last_available_journal_bucket(c);
-
-- if (unlikely(last_available_journal_bucket(c) &&
-- (!c->journal.in_replay) &&
-- (c->journal.blocks_free * c->sb.block_size <=
-- BCH_JOURNAL_RPLY_RESERVE)))
-+ if (!last && journal_full(&c->journal))
- return true;
-
-+ if (unlikely(last)) {
-+ size_t n = c->journal.blocks_free * c->sb.block_size;
-+
-+ if (!c->journal.in_replay) {
-+ if (n <= BCH_JOURNAL_RESERVE +
-+ BCH_JOURNAL_RPLY_RESERVE)
-+ return true;
-+ } else {
-+ if (n <= BCH_JOURNAL_RPLY_RESERVE)
-+ return true;
-+ }
-+ }
-+
- return false;
- }
-
-@@ -751,7 +760,9 @@ static void journal_write_unlocked(struct closure *cl)
- if (!w->need_write) {
- closure_return_with_destructor(cl, journal_write_unlock);
- return;
-- } else if (should_reclaim(c, w)) {
-+ }
-+
-+ if (should_reclaim(c, w)) {
- journal_reclaim(c);
- spin_unlock(&c->journal.lock);
-
-@@ -840,16 +851,26 @@ static void journal_try_write(struct cache_set *c)
- }
-
- static bool no_journal_wait(struct cache_set *c,
-- size_t sectors)
-+ size_t sectors,
-+ int nkeys)
- {
-+ bool is_journal_meta = (nkeys == 0) ? true : false;
- bool last = last_available_journal_bucket(c);
- size_t reserved_sectors = 0;
-- size_t n = min_t(size_t,
-- c->journal.blocks_free * c->sb.block_size,
-- PAGE_SECTORS << JSET_BITS);
-+ size_t n;
-+
-+ if (unlikely(last)) {
-+ if (!is_journal_meta)
-+ reserved_sectors = BCH_JOURNAL_RESERVE +
-+ BCH_JOURNAL_RPLY_RESERVE;
-+ else
-+ reserved_sectors = (!c->journal.in_replay) ?
-+ BCH_JOURNAL_RPLY_RESERVE : 0;
-+ }
-
-- if (last && !c->journal.in_replay)
-- reserved_sectors = BCH_JOURNAL_RPLY_RESERVE;
-+ n = min_t(size_t,
-+ c->journal.blocks_free * c->sb.block_size,
-+ PAGE_SECTORS << JSET_BITS);
-
- if (sectors <= (n - reserved_sectors))
- return true;
-@@ -858,26 +879,46 @@ static bool no_journal_wait(struct cache_set *c,
- }
-
- static bool should_try_write(struct cache_set *c,
-- struct journal_write *w)
-+ struct journal_write *w,
-+ int nkeys)
- {
- size_t reserved_sectors, n, sectors;
-+ bool last, empty_jset;
-
- if (journal_full(&c->journal))
- return false;
-
-- if (!last_available_journal_bucket(c))
-+ last = last_available_journal_bucket(c);
-+ empty_jset = (w->data->keys == 0) ? true : false;
-+
-+ if (!last) {
-+ /*
-+ * Not last available journal bucket, no reserved journal
-+ * space restriction, an empty jset should not be here.
-+ */
-+ BUG_ON(empty_jset);
- return true;
-+ }
-
-- /* the check in no_journal_wait exceeds BCH_JOURNAL_RPLY_RESERVE */
-- if (w->data->keys == 0)
-+ if (empty_jset) {
-+ /*
-+ * If nkeys is 0 it means the journaling request is for meta
-+ * data, which should be returned in journal_wait_for_write()
-+ * by checking no_journal_wait(), and won't get here.
-+ */
-+ BUG_ON(nkeys == 0);
- return false;
-+ }
-
-- reserved_sectors = BCH_JOURNAL_RPLY_RESERVE;
-+ reserved_sectors = BCH_JOURNAL_RESERVE +
-+ BCH_JOURNAL_RPLY_RESERVE;
- n = min_t(size_t,
- (c->journal.blocks_free * c->sb.block_size),
- PAGE_SECTORS << JSET_BITS);
-- sectors = __set_blocks(w->data, w->data->keys,
-+ sectors = __set_blocks(w->data,
-+ w->data->keys,
- block_bytes(c)) * c->sb.block_size;
-+
- if (sectors <= (n - reserved_sectors))
- return true;
-
-@@ -903,13 +944,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
- sectors = __set_blocks(w->data, w->data->keys + nkeys,
- block_bytes(c)) * c->sb.block_size;
-
-- if (no_journal_wait(c, sectors))
-+ if (no_journal_wait(c, sectors, nkeys))
- return w;
-
- if (wait)
- closure_wait(&c->journal.wait, &cl);
-
-- if (should_try_write(c, w)) {
-+ if (should_try_write(c, w, nkeys)) {
- if (wait)
- trace_bcache_journal_entry_full(c);
-
-diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
-index 54408e248a39..55f81443f304 100644
---- a/drivers/md/bcache/journal.h
-+++ b/drivers/md/bcache/journal.h
-@@ -162,6 +162,7 @@ struct journal_device {
-
- /* Reserved jouranl space in sectors */
- #define BCH_JOURNAL_RPLY_RESERVE 6U
-+#define BCH_JOURNAL_RESERVE 7U
-
- #define journal_full(j) \
- (!(j)->blocks_free || fifo_free(&(j)->pin) <= 1)
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0006-bcache-add-failure-check-to-run_cache_set-for-jou.patch b/for-test/jouranl-deadlock/v2/v2-0006-bcache-add-failure-check-to-run_cache_set-for-jou.patch
deleted file mode 100644
index 47fee81..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0006-bcache-add-failure-check-to-run_cache_set-for-jou.patch
+++ /dev/null
@@ -1,88 +0,0 @@
-From 7d1f183bf68623c2bea6ec5c41d091a65e426e47 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 13 Mar 2019 21:57:18 +0800
-Subject: [RFC PATCH v2 06/16] bcache: add failure check to run_cache_set() for
- journal replay
-
-Currently run_cache_set() has no return value, if there is failure in
-bch_journal_replay(), the caller of run_cache_set() has no idea about
-such failure and just continue to execute following code after
-run_cache_set(). The internal failure is triggered inside
-bch_journal_replay() and being handled in async way. This behavior is
-inefficient, while failure handling inside bch_journal_replay(), cache
-register code is still running to start the cache set. Registering and
-unregistering code running as same time may introduce some rare race
-condition, and make the code to be more hard to be understood.
-
-This patch adds return value to run_cache_set(), and returns -EIO if
-bch_journal_rreplay() fails. Then caller of run_cache_set() may detect
-such failure and stop registering code flow immedidately inside
-register_cache_set().
-
-If journal replay fails, run_cache_set() can report error immediately
-to register_cache_set(). This patch makes the failure handling for
-bch_journal_replay() be in synchronized way, easier to understand and
-debug, and avoid poetential race condition for register-and-unregister
-in same time.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 17 ++++++++++++-----
- 1 file changed, 12 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index a697a3a923cd..036bffad0bfe 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1775,7 +1775,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
- return NULL;
- }
-
--static void run_cache_set(struct cache_set *c)
-+static int run_cache_set(struct cache_set *c)
- {
- const char *err = "cannot allocate memory";
- struct cached_dev *dc, *t;
-@@ -1869,7 +1869,9 @@ static void run_cache_set(struct cache_set *c)
- if (j->version < BCACHE_JSET_VERSION_UUID)
- __uuid_write(c);
-
-- bch_journal_replay(c, &journal);
-+ err = "bcache: replay journal failed";
-+ if (bch_journal_replay(c, &journal))
-+ goto err;
- } else {
- pr_notice("invalidating existing data");
-
-@@ -1937,11 +1939,13 @@ static void run_cache_set(struct cache_set *c)
- flash_devs_run(c);
-
- set_bit(CACHE_SET_RUNNING, &c->flags);
-- return;
-+ return 0;
- err:
- closure_sync(&cl);
- /* XXX: test this, it's broken */
- bch_cache_set_error(c, "%s", err);
-+
-+ return -EIO;
- }
-
- static bool can_attach_cache(struct cache *ca, struct cache_set *c)
-@@ -2005,8 +2009,11 @@ static const char *register_cache_set(struct cache *ca)
- ca->set->cache[ca->sb.nr_this_dev] = ca;
- c->cache_by_alloc[c->caches_loaded++] = ca;
-
-- if (c->caches_loaded == c->sb.nr_in_set)
-- run_cache_set(c);
-+ if (c->caches_loaded == c->sb.nr_in_set) {
-+ err = "failed to run cache set";
-+ if (run_cache_set(c) < 0)
-+ goto err;
-+ }
-
- return NULL;
- err:
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0007-bcache-add-comments-for-kobj-release-callback-rou.patch b/for-test/jouranl-deadlock/v2/v2-0007-bcache-add-comments-for-kobj-release-callback-rou.patch
deleted file mode 100644
index c675a6d..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0007-bcache-add-comments-for-kobj-release-callback-rou.patch
+++ /dev/null
@@ -1,62 +0,0 @@
-From 79d3266fac98e11fab0d044f82decc1491344f74 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 13 Mar 2019 22:39:37 +0800
-Subject: [RFC PATCH v2 07/16] bcache: add comments for kobj release callback
- routine
-
-Bcache has several routines to release resources in implicit way, they
-are called when the associated kobj released. This patch adds code
-comments to notice when and which release callback will be called,
-- When dc->disk.kobj released:
- void bch_cached_dev_release(struct kobject *kobj)
-- When d->kobj released:
- void bch_flash_dev_release(struct kobject *kobj)
-- When c->kobj released:
- void bch_cache_set_release(struct kobject *kobj)
-- When ca->kobj released
- void bch_cache_release(struct kobject *kobj)
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 036bffad0bfe..400af446c372 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1174,6 +1174,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
- return 0;
- }
-
-+/* when dc->disk.kobj released */
- void bch_cached_dev_release(struct kobject *kobj)
- {
- struct cached_dev *dc = container_of(kobj, struct cached_dev,
-@@ -1326,6 +1327,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
-
- /* Flash only volumes */
-
-+/* When d->kobj released */
- void bch_flash_dev_release(struct kobject *kobj)
- {
- struct bcache_device *d = container_of(kobj, struct bcache_device,
-@@ -1496,6 +1498,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
- return true;
- }
-
-+/* When c->kobj released */
- void bch_cache_set_release(struct kobject *kobj)
- {
- struct cache_set *c = container_of(kobj, struct cache_set, kobj);
-@@ -2023,6 +2026,7 @@ static const char *register_cache_set(struct cache *ca)
-
- /* Cache device */
-
-+/* When ca->kobj released */
- void bch_cache_release(struct kobject *kobj)
- {
- struct cache *ca = container_of(kobj, struct cache, kobj);
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0008-bcache-return-error-immediately-in-bch_journal_re.patch b/for-test/jouranl-deadlock/v2/v2-0008-bcache-return-error-immediately-in-bch_journal_re.patch
deleted file mode 100644
index 01f188c..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0008-bcache-return-error-immediately-in-bch_journal_re.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-From 4bec08de9304ae05a5a934708813bdc61dc41f1e Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 13 Mar 2019 22:52:31 +0800
-Subject: [RFC PATCH v2 08/16] bcache: return error immediately in
- bch_journal_replay()
-
-When failure happens inside bch_journal_replay(), calling
-cache_set_err_on() and handling the failure in async way is not a good
-idea. Because after bch_journal_replay() returns, registering code will
-continue to execute following steps, and unregistering code triggered
-by cache_set_err_on() is running in same time. First it is unnecessary
-to handle failure and unregister cache set in an async way, second there
-might be potential race condition to run register and unregister code
-for same cache set.
-
-So in this patch, if failure happens in bch_journal_replay(), we don't
-call cache_set_err_on(), and just print out the same error message to
-kernel message buffer, then return -EIO immediately caller. Then caller
-can detect such failure and handle it in synchrnozied way.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 9 ++++++---
- 1 file changed, 6 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 6aa68ab7cd78..bdb6f9cefe48 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -420,9 +420,12 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
- list_for_each_entry(i, list, list) {
- BUG_ON(i->pin && atomic_read(i->pin) != 1);
-
-- cache_set_err_on(n != i->j.seq, s,
--"bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)",
-- n, i->j.seq - 1, start, end);
-+ if (n != i->j.seq) {
-+ pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)",
-+ n, i->j.seq - 1, start, end);
-+ ret = -EIO;
-+ goto err;
-+ }
-
- for (k = i->j.start;
- k < bset_bkey_last(&i->j);
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0009-bcache-add-error-check-for-calling-register_bdev.patch b/for-test/jouranl-deadlock/v2/v2-0009-bcache-add-error-check-for-calling-register_bdev.patch
deleted file mode 100644
index 4d342e2..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0009-bcache-add-error-check-for-calling-register_bdev.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From bb554ecefc017bdaa6aeb717010a8fa97036da51 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 19 Mar 2019 12:27:53 +0800
-Subject: [RFC PATCH v2 09/16] bcache: add error check for calling
- register_bdev()
-
-This patch adds return value to register_bdev(). Then if failure happens
-inside register_bdev(), its caller register_bcache() may detect and
-handle the failure more properly.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 16 ++++++++++------
- 1 file changed, 10 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 400af446c372..a435c506edba 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -1281,7 +1281,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
-
- /* Cached device - bcache superblock */
-
--static void register_bdev(struct cache_sb *sb, struct page *sb_page,
-+static int register_bdev(struct cache_sb *sb, struct page *sb_page,
- struct block_device *bdev,
- struct cached_dev *dc)
- {
-@@ -1319,10 +1319,11 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
- BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
- bch_cached_dev_run(dc);
-
-- return;
-+ return 0;
- err:
- pr_notice("error %s: %s", dc->backing_dev_name, err);
- bcache_device_stop(&dc->disk);
-+ return -EIO;
- }
-
- /* Flash only volumes */
-@@ -2273,7 +2274,7 @@ static bool bch_is_open(struct block_device *bdev)
- static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- const char *buffer, size_t size)
- {
-- ssize_t ret = size;
-+ ssize_t ret = -EINVAL;
- const char *err = "cannot allocate memory";
- char *path = NULL;
- struct cache_sb *sb = NULL;
-@@ -2307,7 +2308,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- if (!IS_ERR(bdev))
- bdput(bdev);
- if (attr == &ksysfs_register_quiet)
-- goto out;
-+ goto quiet_out;
- }
- goto err;
- }
-@@ -2328,8 +2329,10 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- goto err_close;
-
- mutex_lock(&bch_register_lock);
-- register_bdev(sb, sb_page, bdev, dc);
-+ ret = register_bdev(sb, sb_page, bdev, dc);
- mutex_unlock(&bch_register_lock);
-+ if (ret < 0)
-+ goto err;
- } else {
- struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
-
-@@ -2339,6 +2342,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- if (register_cache(sb, sb_page, bdev, ca) != 0)
- goto err;
- }
-+quiet_out:
-+ ret = size;
- out:
- if (sb_page)
- put_page(sb_page);
-@@ -2351,7 +2356,6 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
- err:
- pr_info("error %s: %s", path, err);
-- ret = -EINVAL;
- goto out;
- }
-
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0010-bcache-Add-comments-for-blkdev_put-in-registratio.patch b/for-test/jouranl-deadlock/v2/v2-0010-bcache-Add-comments-for-blkdev_put-in-registratio.patch
deleted file mode 100644
index 191177d..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0010-bcache-Add-comments-for-blkdev_put-in-registratio.patch
+++ /dev/null
@@ -1,51 +0,0 @@
-From f4a737b08d573035889cbf3c70cdde528117a2cd Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 19 Mar 2019 12:29:52 +0800
-Subject: [RFC PATCH v2 10/16] bcache: Add comments for blkdev_put() in
- registration code path
-
-Add comments to explain why in register_bcache() blkdev_put() won't
-be called in two location. Add comments to explain why blkdev_put()
-must be called in register_cache() when cache_alloc() failed.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index a435c506edba..83a7cb0e0e45 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -2191,6 +2191,12 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
-
- ret = cache_alloc(ca);
- if (ret != 0) {
-+ /*
-+ * If we failed here, it means ca->kobj is not initialzed yet,
-+ * kobject_put() won't be called and there is no chance to
-+ * call blkdev_put() to bdev in bch_cache_release(). So we
-+ * explictly call blkdev_put() here.
-+ */
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
- if (ret == -ENOMEM)
- err = "cache_alloc(): -ENOMEM";
-@@ -2331,6 +2337,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- mutex_lock(&bch_register_lock);
- ret = register_bdev(sb, sb_page, bdev, dc);
- mutex_unlock(&bch_register_lock);
-+ /* blkdev_put() will be called in cached_dev_free() */
- if (ret < 0)
- goto err;
- } else {
-@@ -2339,6 +2346,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- if (!ca)
- goto err_close;
-
-+ /* blkdev_put() will be called in bch_cache_release() */
- if (register_cache(sb, sb_page, bdev, ca) != 0)
- goto err;
- }
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0011-bcache-add-comments-for-closure_fn-to-be-called-i.patch b/for-test/jouranl-deadlock/v2/v2-0011-bcache-add-comments-for-closure_fn-to-be-called-i.patch
deleted file mode 100644
index 3b0c2e3..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0011-bcache-add-comments-for-closure_fn-to-be-called-i.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From ca49b08f0e1e634bb5082413ee34b4d8080e0d38 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Tue, 19 Mar 2019 18:58:47 +0800
-Subject: [RFC PATCH v2 11/16] bcache: add comments for closure_fn to be called
- in closure_queue()
-
-Add code comments to explain which call back function might be called
-for the closure_queue(). This is an effort to make code to be more
-understandable for readers.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 83a7cb0e0e45..9b41e0b62cc0 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -662,6 +662,11 @@ static const struct block_device_operations bcache_ops = {
- void bcache_device_stop(struct bcache_device *d)
- {
- if (!test_and_set_bit(BCACHE_DEV_CLOSING, &d->flags))
-+ /*
-+ * closure_fn set to
-+ * - cached device: cached_dev_flush()
-+ * - flash dev: flash_dev_flush()
-+ */
- closure_queue(&d->cl);
- }
-
-@@ -1677,6 +1682,7 @@ static void __cache_set_unregister(struct closure *cl)
- void bch_cache_set_stop(struct cache_set *c)
- {
- if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags))
-+ /* closure_fn set to __cache_set_unregister() */
- closure_queue(&c->caching);
- }
-
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0012-bcache-add-pendings_cleanup-to-stop-pending-bcach.patch b/for-test/jouranl-deadlock/v2/v2-0012-bcache-add-pendings_cleanup-to-stop-pending-bcach.patch
deleted file mode 100644
index d81c648..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0012-bcache-add-pendings_cleanup-to-stop-pending-bcach.patch
+++ /dev/null
@@ -1,107 +0,0 @@
-From 6da8faaaf5e2ecd2fb3d11ae6bd8ab8ee19b39bc Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Wed, 20 Mar 2019 23:11:59 +0800
-Subject: [RFC PATCH v2 12/16] bcache: add pendings_cleanup to stop pending
- bcache device
-
-If a bcache device is in dirty state and its cache set is not
-registered, this bcache deivce will not appear in /dev/bcache<N>,
-and there is no way to stop it or remove the bcache kernel module.
-
-This is an as-designed behavior, but sometimes people has to reboot
-whole system to release or stop the pending backing device.
-
-This sysfs interface may remove such pending bcache devices when
-write anything into the sysfs file manually.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 55 insertions(+)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index 9b41e0b62cc0..e988e46a6479 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -2246,9 +2246,13 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
-
- static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- const char *buffer, size_t size);
-+static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
-+ struct kobj_attribute *attr,
-+ const char *buffer, size_t size);
-
- kobj_attribute_write(register, register_bcache);
- kobj_attribute_write(register_quiet, register_bcache);
-+kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup);
-
- static bool bch_is_open_backing(struct block_device *bdev)
- {
-@@ -2373,6 +2377,56 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
- goto out;
- }
-
-+
-+struct pdev {
-+ struct list_head list;
-+ struct cached_dev *dc;
-+};
-+
-+static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
-+ struct kobj_attribute *attr,
-+ const char *buffer,
-+ size_t size)
-+{
-+ LIST_HEAD(pending_devs);
-+ ssize_t ret = size;
-+ struct cached_dev *dc, *tdc;
-+ struct pdev *pdev, *tpdev;
-+ struct cache_set *c, *tc;
-+
-+ mutex_lock(&bch_register_lock);
-+ list_for_each_entry_safe(dc, tdc, &uncached_devices, list) {
-+ pdev = kmalloc(sizeof(struct pdev), GFP_KERNEL);
-+ if (!pdev)
-+ break;
-+ pdev->dc = dc;
-+ list_add(&pdev->list, &pending_devs);
-+ }
-+
-+ list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
-+ list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
-+ char *pdev_set_uuid = pdev->dc->sb.set_uuid;
-+ char *set_uuid = c->sb.uuid;
-+
-+ if (!memcmp(pdev_set_uuid, set_uuid, 16)) {
-+ list_del(&pdev->list);
-+ kfree(pdev);
-+ break;
-+ }
-+ }
-+ }
-+ mutex_unlock(&bch_register_lock);
-+
-+ list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
-+ pr_info("delete pdev %p", pdev);
-+ list_del(&pdev->list);
-+ bcache_device_stop(&pdev->dc->disk);
-+ kfree(pdev);
-+ }
-+
-+ return ret;
-+}
-+
- static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
- {
- if (code == SYS_DOWN ||
-@@ -2483,6 +2537,7 @@ static int __init bcache_init(void)
- static const struct attribute *files[] = {
- &ksysfs_register.attr,
- &ksysfs_register_quiet.attr,
-+ &ksysfs_pendings_cleanup.attr,
- NULL
- };
-
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0013-bcache-fix-fifo-index-swapping-condition-in-btree.patch b/for-test/jouranl-deadlock/v2/v2-0013-bcache-fix-fifo-index-swapping-condition-in-btree.patch
deleted file mode 100644
index d76c955..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0013-bcache-fix-fifo-index-swapping-condition-in-btree.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From e6ac565cfb5676a9e833e62570fb8a9d786eda47 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sat, 23 Mar 2019 22:54:35 +0800
-Subject: [RFC PATCH v2 13/16] bcache: fix fifo index swapping condition in
- btree_flush_write()
-
-Current journal_max_cmp() and journal_min_cmp() assume that smaller fifo
-index indicating elder journal entries, but this is only true when fifo
-index is not swapped.
-
-Fifo structure journal.pin is implemented by a cycle buffer, if the head
-index reaches highest location of the cycle buffer, it will be swapped
-to 0. Once the swapping happens, it means a smaller fifo index might be
-associated to a newer journal entry. So the btree node with oldest
-journal entry won't be selected by btree_flush_write() to flush out to
-cache device. The result is, the oldest journal entries may always has
-no chance to be written into cache device, and after a reboot
-bch_journal_replay() may complain some journal entries are missing.
-
-This patch handles the fifo index swapping conditions properly, then in
-btree_flush_write() the btree node with oldest journal entry can be
-slected from c->flush_btree correctly.
-
-Cc: stable@vger.kernel.org
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 47 +++++++++++++++++++++++++++++++++++++++------
- 1 file changed, 41 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index bdb6f9cefe48..bc0e01151155 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -464,12 +464,47 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
- }
-
- /* Journalling */
--#define journal_max_cmp(l, r) \
-- (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) < \
-- fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
--#define journal_min_cmp(l, r) \
-- (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) > \
-- fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
-+#define journal_max_cmp(l, r) \
-+({ \
-+ int l_idx, r_idx, f_idx, b_idx; \
-+ bool _ret = true; \
-+ \
-+ l_idx = fifo_idx(&c->journal.pin, btree_current_write(l)->journal); \
-+ r_idx = fifo_idx(&c->journal.pin, btree_current_write(r)->journal); \
-+ f_idx = c->journal.pin.front; \
-+ b_idx = c->journal.pin.back; \
-+ \
-+ _ret = (l_idx < r_idx); \
-+ /* in case fifo back pointer is swapped */ \
-+ if (b_idx < f_idx) { \
-+ if (l_idx <= b_idx && r_idx >= f_idx) \
-+ _ret = false; \
-+ else if (l_idx >= f_idx && r_idx <= b_idx) \
-+ _ret = true; \
-+ } \
-+ _ret; \
-+})
-+
-+#define journal_min_cmp(l, r) \
-+({ \
-+ int l_idx, r_idx, f_idx, b_idx; \
-+ bool _ret = true; \
-+ \
-+ l_idx = fifo_idx(&c->journal.pin, btree_current_write(l)->journal); \
-+ r_idx = fifo_idx(&c->journal.pin, btree_current_write(r)->journal); \
-+ f_idx = c->journal.pin.front; \
-+ b_idx = c->journal.pin.back; \
-+ \
-+ _ret = (l_idx > r_idx); \
-+ /* in case fifo back pointer is swapped */ \
-+ if (b_idx < f_idx) { \
-+ if (l_idx <= b_idx && r_idx >= f_idx) \
-+ _ret = true; \
-+ else if (l_idx >= f_idx && r_idx <= b_idx) \
-+ _ret = false; \
-+ } \
-+ _ret; \
-+})
-
- static void btree_flush_write(struct cache_set *c)
- {
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0014-bcache-try-to-flush-btree-nodes-as-many-as-possib.patch b/for-test/jouranl-deadlock/v2/v2-0014-bcache-try-to-flush-btree-nodes-as-many-as-possib.patch
deleted file mode 100644
index 4955ef8..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0014-bcache-try-to-flush-btree-nodes-as-many-as-possib.patch
+++ /dev/null
@@ -1,82 +0,0 @@
-From d5786e57fca69b65b4b334e34d9ec8033ed6721f Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 24 Mar 2019 00:06:05 +0800
-Subject: [RFC PATCH v2 14/16] bcache: try to flush btree nodes as many as
- possible
-
-When btree_flush_write() is called, it means the journal space is
-exhuasted already. Current code only selects a single btree node to
-write out, which may introduce huge cache bounce from the spinlock on
-multiple cpu cores, when a lot of kworkers on journaling code path to
-call btree_flush_write() for journal space reclaiming.
-
-This patch tries to flush as many btree node as possible inside
-a single call to btree_flush_write(), then the frequence of calling
-btree_flush_write() can be reduced, which in turn reduces the cache
-bounce from spinlock on multiple cpu cores. Please notice that this
-patch does not reduce the total times of acquiring spinlock, a spin
-lock is still acquired when select every single btree node to write
-out, but this patch will try best to hold the spinlock on same cpu
-core, which avoids the cache bounce where the spinlock is acquired by
-multiple different cpu cores.
-
-After the patch applied, in my pressure testing, 'top' shows more than
-50% sys cpu time reduced from the kworks which competing spinlock
-inside btree_flush_write().
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 7 ++++++-
- drivers/md/bcache/journal.h | 4 ++--
- 2 files changed, 8 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index bc0e01151155..8536e76fcac9 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -514,6 +514,7 @@ static void btree_flush_write(struct cache_set *c)
- */
- struct btree *b;
- int i;
-+ int n = FLUSH_BTREE_HEAP;
-
- atomic_long_inc(&c->flush_write);
-
-@@ -552,6 +553,10 @@ static void btree_flush_write(struct cache_set *c)
-
- __bch_btree_node_write(b, NULL);
- mutex_unlock(&b->write_lock);
-+
-+ /* try to flush btree nodes as many as possible */
-+ if (--n > 0)
-+ goto retry;
- }
- }
-
-@@ -1102,7 +1107,7 @@ int bch_journal_alloc(struct cache_set *c)
- j->w[0].c = c;
- j->w[1].c = c;
-
-- if (!(init_heap(&c->flush_btree, 128, GFP_KERNEL)) ||
-+ if (!(init_heap(&c->flush_btree, FLUSH_BTREE_HEAP, GFP_KERNEL)) ||
- !(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
- !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) ||
- !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)))
-diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
-index 55f81443f304..a8be14c6f6d9 100644
---- a/drivers/md/bcache/journal.h
-+++ b/drivers/md/bcache/journal.h
-@@ -158,8 +158,8 @@ struct journal_device {
- #define journal_pin_cmp(c, l, r) \
- (fifo_idx(&(c)->journal.pin, (l)) > fifo_idx(&(c)->journal.pin, (r)))
-
--#define JOURNAL_PIN 20000
--
-+#define FLUSH_BTREE_HEAP 128
-+#define JOURNAL_PIN 20000
- /* Reserved jouranl space in sectors */
- #define BCH_JOURNAL_RPLY_RESERVE 6U
- #define BCH_JOURNAL_RESERVE 7U
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0015-bcache-improve-bcache_reboot.patch b/for-test/jouranl-deadlock/v2/v2-0015-bcache-improve-bcache_reboot.patch
deleted file mode 100644
index 3c92f1d..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0015-bcache-improve-bcache_reboot.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From a2b3bb8c5d68a17ee630a75dc4cf81df8eb7ef97 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 24 Mar 2019 12:50:50 +0800
-Subject: [RFC PATCH v2 15/16] bcache: improve bcache_reboot()
-
-This patch tries to release mutex bch_register_lock early, to give
-chance to stop cache set and bcache device early.
-
-This patch also expends time out of stopping all bcache device from
-2 seconds to 10 seconds, because stopping writeback rate update worker
-may delay for 5 seconds, 2 seconds is not enough.
-
-After this patch applied, stopping bcache devices during system reboot
-or shutdown is very hard to be observed any more.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/super.c | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
-index e988e46a6479..2d377a4a182f 100644
---- a/drivers/md/bcache/super.c
-+++ b/drivers/md/bcache/super.c
-@@ -2453,10 +2453,13 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
- list_for_each_entry_safe(dc, tdc, &uncached_devices, list)
- bcache_device_stop(&dc->disk);
-
-+ mutex_unlock(&bch_register_lock);
-+
- /* What's a condition variable? */
- while (1) {
-- long timeout = start + 2 * HZ - jiffies;
-+ long timeout = start + 10 * HZ - jiffies;
-
-+ mutex_lock(&bch_register_lock);
- stopped = list_empty(&bch_cache_sets) &&
- list_empty(&uncached_devices);
-
-@@ -2468,7 +2471,6 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
-
- mutex_unlock(&bch_register_lock);
- schedule_timeout(timeout);
-- mutex_lock(&bch_register_lock);
- }
-
- finish_wait(&unregister_wait, &wait);
---
-2.16.4
-
diff --git a/for-test/jouranl-deadlock/v2/v2-0016-bcache-introduce-spinlock_t-flush_write_lock-in-s.patch b/for-test/jouranl-deadlock/v2/v2-0016-bcache-introduce-spinlock_t-flush_write_lock-in-s.patch
deleted file mode 100644
index a3d6691..0000000
--- a/for-test/jouranl-deadlock/v2/v2-0016-bcache-introduce-spinlock_t-flush_write_lock-in-s.patch
+++ /dev/null
@@ -1,74 +0,0 @@
-From 24539bb78565d784ddabb81f24968c13835eb000 Mon Sep 17 00:00:00 2001
-From: Coly Li <colyli@suse.de>
-Date: Sun, 24 Mar 2019 23:55:27 +0800
-Subject: [RFC PATCH v2 16/16] bcache: introduce spinlock_t flush_write_lock in
- struct journal
-
-In btree_flush_write(), iterating all cached btree nodes and adding them
-into ordered heap c->flush_btree takes quite long time. In order to
-protect ordered heap c->flush_btree, spin lock c->journal.lock is held
-for all the iteration and heap ordering. When journal space is fully
-occupied, btree_flush_write() might be called frequently, if the cached
-btree node iteration takes too much time, kenrel will complain that
-normal journal kworkers are blocked too long. Of cause write performance
-drops at this moment.
-
-This patch introduces a new spin lock member in struct journal, named
-flush_write_lock. This lock is only used in btree_flush_write() and
-protect the ordered heap c->flush_btree during all the cached btree node
-iteration. Then there won't be lock contention on c->journal.lock.
-
-After this fix, when journal space is fully occupied, it is very rare to
-observe the journal kworker blocking timeout warning.
-
-Signed-off-by: Coly Li <colyli@suse.de>
----
- drivers/md/bcache/journal.c | 5 +++--
- drivers/md/bcache/journal.h | 1 +
- 2 files changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 8536e76fcac9..6e38470f6924 100644
---- a/drivers/md/bcache/journal.c
-+++ b/drivers/md/bcache/journal.c
-@@ -519,7 +519,7 @@ static void btree_flush_write(struct cache_set *c)
- atomic_long_inc(&c->flush_write);
-
- retry:
-- spin_lock(&c->journal.lock);
-+ spin_lock(&c->journal.flush_write_lock);
- if (heap_empty(&c->flush_btree)) {
- for_each_cached_btree(b, c, i)
- if (btree_current_write(b)->journal) {
-@@ -540,7 +540,7 @@ static void btree_flush_write(struct cache_set *c)
-
- b = NULL;
- heap_pop(&c->flush_btree, b, journal_min_cmp);
-- spin_unlock(&c->journal.lock);
-+ spin_unlock(&c->journal.flush_write_lock);
-
- if (b) {
- mutex_lock(&b->write_lock);
-@@ -1099,6 +1099,7 @@ int bch_journal_alloc(struct cache_set *c)
- struct journal *j = &c->journal;
-
- spin_lock_init(&j->lock);
-+ spin_lock_init(&j->flush_write_lock);
- INIT_DELAYED_WORK(&j->work, journal_write_work);
-
- c->journal_delay_ms = 100;
-diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
-index a8be14c6f6d9..d8ad99f6191b 100644
---- a/drivers/md/bcache/journal.h
-+++ b/drivers/md/bcache/journal.h
-@@ -103,6 +103,7 @@ struct journal_write {
- /* Embedded in struct cache_set */
- struct journal {
- spinlock_t lock;
-+ spinlock_t flush_write_lock;
- /* used when waiting because the journal was full */
- struct closure_waitlist wait;
- struct closure io;
---
-2.16.4
-