diff options
author | Coly Li <colyli@suse.de> | 2020-08-21 21:56:53 +0800 |
---|---|---|
committer | Coly Li <colyli@suse.de> | 2020-08-21 21:56:53 +0800 |
commit | 5a0407b3689ea45ffbfd352ec5d49644c59931d9 (patch) | |
tree | e6e1c913240661fae2959c8cf30824617aaf03e7 | |
parent | 78ba4e4981265a8f43497a5c9898abb3d85dfd2a (diff) | |
download | bcache-patches-5a0407b3689ea45ffbfd352ec5d49644c59931d9.tar.gz |
for-test: remove patcehs which are merged upstream or proven to be wrong
for-next: update patches for next merge window
54 files changed, 3383 insertions, 1146 deletions
diff --git a/for-next/0001-bcache-share-register-sysfs-with-async-register.patch b/for-next/0001-bcache-share-register-sysfs-with-async-register.patch new file mode 100644 index 0000000..3b823e1 --- /dev/null +++ b/for-next/0001-bcache-share-register-sysfs-with-async-register.patch @@ -0,0 +1,66 @@ +From 3015499a88e4a06b9923c94789d4bf44a05db0ca Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Sat, 15 Aug 2020 16:56:19 +0800 +Subject: [PATCH] bcache: share register sysfs with async register + +Previously the experimental async registration uses a separate sysfs +file register_async. Now the async registration code seems working well +for a while, we can do furtuher testing with it now. + +This patch changes the async bcache registration shares the same sysfs +file /sys/fs/bcache/register (and register_quiet). Async registration +will be default behavior if BCACHE_ASYNC_REGISTRATION is set in kernel +configure. By default, BCACHE_ASYNC_REGISTRATION is not configured yet. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/super.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 1bbdc410ee3c..61abd6499a11 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -2449,7 +2449,6 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k, + + kobj_attribute_write(register, register_bcache); + kobj_attribute_write(register_quiet, register_bcache); +-kobj_attribute_write(register_async, register_bcache); + kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup); + + static bool bch_is_open_backing(struct block_device *bdev) +@@ -2572,6 +2571,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, + struct cache_sb_disk *sb_disk; + struct block_device *bdev; + ssize_t ret; ++ bool async_registration = false; ++ ++#ifdef CONFIG_BCACHE_ASYNC_REGISTRATION ++ async_registration = true; ++#endif + + ret = -EBUSY; + err = "failed to reference bcache module"; +@@ -2625,7 +2629,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, + goto out_blkdev_put; + + err = "failed to register device"; +- if (attr == &ksysfs_register_async) { ++ ++ if (async_registration) { + /* register in asynchronous way */ + struct async_reg_args *args = + kzalloc(sizeof(struct async_reg_args), GFP_KERNEL); +@@ -2888,9 +2893,6 @@ static int __init bcache_init(void) + static const struct attribute *files[] = { + &ksysfs_register.attr, + &ksysfs_register_quiet.attr, +-#ifdef CONFIG_BCACHE_ASYNC_REGISTRATION +- &ksysfs_register_async.attr, +-#endif + &ksysfs_pendings_cleanup.attr, + NULL + }; +-- +2.26.2 + diff --git a/for-next/0001-nvme-tcp-use-sendpage_ok-to-check-page-for-kernel_se.patch b/for-next/0001-nvme-tcp-use-sendpage_ok-to-check-page-for-kernel_se.patch deleted file mode 100644 index 857db45..0000000 --- a/for-next/0001-nvme-tcp-use-sendpage_ok-to-check-page-for-kernel_se.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 1dddd4ae86389db695518d03075f92d3b5040984 Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Sat, 25 Jul 2020 23:34:36 +0800 -Subject: [PATCH 1/2] nvme-tcp: use sendpage_ok() to check page for - kernel_sendpage() - -Currently nvme_tcp_try_send_data() doesn't use kernel_sendpage() to -send slab pages. But for pages allocated by __get_free_pages() without -__GFP_COMP, which also have refcount as 0, they are still sent by -kernel_sendpage() to remote end, this is problematic. - -When bcache uses a remote NVMe SSD via nvme-over-tcp as its cache -device, writing meta data e.g. cache_set->disk_buckets to remote SSD may -trigger a kernel panic due to the above problem. Bcause the meta data -pages for cache_set->disk_buckets are allocated by __get_free_pages() -without __GFP_COMP. - -This problem should be fixed both in upper layer driver (bcache) and -nvme-over-tcp code. This patch fixes the nvme-over-tcp code by checking -whether the page refcount is 0, if yes then don't use kernel_sendpage() -and call sock_no_sendpage() to send the page into network stack. - -Such check is done by macro sendpage_ok() in this patch, which is defined -in include/linux/net.h as, - (!PageSlab(page) && page_count(page) >= 1) -If sendpage_ok() returns false, sock_no_sendpage() will handle the page -other than kernel_sendpage(). - -The code comments in this patch is copied and modified from drbd where -the similar problem already gets solved by Philipp Reisner. This is the -best code comment including my own version. - -Signed-off-by: Coly Li <colyli@suse.de> -Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> -Cc: Christoph Hellwig <hch@lst.de> -Cc: Hannes Reinecke <hare@suse.de> -Cc: Jan Kara <jack@suse.com> -Cc: Jens Axboe <axboe@kernel.dk> -Cc: Mikhail Skorzhinskii <mskorzhinskiy@solarflare.com> -Cc: Philipp Reisner <philipp.reisner@linbit.com> -Cc: Sagi Grimberg <sagi@grimberg.me> -Cc: Vlastimil Babka <vbabka@suse.com> -Cc: stable@vger.kernel.org ---- -Changelog: -v3: introduce a more common name sendpage_ok() for the open coded check -v2: fix typo in patch subject. -v1: the initial version. - - drivers/nvme/host/tcp.c | 13 +++++++++++-- - include/linux/net.h | 2 ++ - 2 files changed, 13 insertions(+), 2 deletions(-) - -diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c -index 79ef2b8e2b3c..f9952f6d94b9 100644 ---- a/drivers/nvme/host/tcp.c -+++ b/drivers/nvme/host/tcp.c -@@ -887,8 +887,17 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) - else - flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; - -- /* can't zcopy slab pages */ -- if (unlikely(PageSlab(page))) { -+ /* -+ * e.g. XFS meta- & log-data is in slab pages, or bcache meta -+ * data pages, or other high order pages allocated by -+ * __get_free_pages() without __GFP_COMP, which have a page_count -+ * of 0 and/or have PageSlab() set. We cannot use send_page for -+ * those, as that does get_page(); put_page(); and would cause -+ * either a VM_BUG directly, or __page_cache_release a page that -+ * would actually still be referenced by someone, leading to some -+ * obscure delayed Oops somewhere else. -+ */ -+ if (unlikely(!sendpage_ok(page))) { - ret = sock_no_sendpage(queue->sock, page, offset, len, - flags); - } else { -diff --git a/include/linux/net.h b/include/linux/net.h -index 016a9c5faa34..41e5d2898e97 100644 ---- a/include/linux/net.h -+++ b/include/linux/net.h -@@ -290,6 +290,8 @@ do { \ - #define net_get_random_once_wait(buf, nbytes) \ - get_random_once_wait((buf), (nbytes)) - -+#define sendpage_ok(page) (!PageSlab(page) && page_count(page) >= 1) -+ - int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, - size_t num, size_t len); - int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, --- -2.26.2 - diff --git a/for-next/bcache-tools/0000-cover-letter.patch b/for-next/bcache-tools/0000-cover-letter.patch deleted file mode 100644 index a00921d..0000000 --- a/for-next/bcache-tools/0000-cover-letter.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 2215081144dcc8fd5ee571f69a406753b46d9f47 Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Sun, 5 Jul 2020 23:59:41 +0800 -Subject: [RFC PATCH 0/4] bcache-tools: changes for large bucket size - -These are user space tools changes necessary for bcache large bucket -size. When setting bucket size with '-u' larger than 16MB for cache -device, BCACHE_SB_VERSION_CDEV_WITH_FEATURES will be set automatically. -Otherwise, the new added members in super block won't be touched. - -Coly Li ---- -Coly Li (4): - bcache-tools: comments offset for members of struct cache_sb - struct_offset: print offset of each member of the on-disk data - structure - bcache-tools: The new super block version - BCACHE_SB_VERSION_BDEV_WITH_FEATURES - bcache-tools: add large_bucket incompat feature - - Makefile | 6 +- - bcache.h | 153 ++++++++++++++++++++++++++++++++++++++---------- - features.c | 24 ++++++++ - lib.c | 24 ++++++++ - lib.h | 2 + - make.c | 36 ++++++++---- - struct_offset.c | 63 ++++++++++++++++++++ - 7 files changed, 265 insertions(+), 43 deletions(-) - create mode 100644 features.c - create mode 100644 struct_offset.c - --- -2.26.2 - diff --git a/for-next/bcache-tools/0001-bcache-tools-comments-offset-for-members-of-struct-c.patch b/for-next/bcache-tools/0001-bcache-tools-comments-offset-for-members-of-struct-c.patch deleted file mode 100644 index b0b2a9a..0000000 --- a/for-next/bcache-tools/0001-bcache-tools-comments-offset-for-members-of-struct-c.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 1afc7438f631e940b72360b1fbdbe5790010c93e Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Mon, 29 Jun 2020 21:50:57 +0800 -Subject: [RFC PATCH 1/4] bcache-tools: comments offset for members of struct - cache_sb - -This patch adds code comments to mark the offset of each member from -struct cache_sb. It is helpful for understand the super block on disk. - -Signed-off-by: Coly Li <colyli@suse.de> ---- - bcache.h | 64 ++++++++++++++++++++++++++++++-------------------------- - 1 file changed, 34 insertions(+), 30 deletions(-) - -diff --git a/bcache.h b/bcache.h -index c83f838..3fcf187 100644 ---- a/bcache.h -+++ b/bcache.h -@@ -41,54 +41,58 @@ static const char bcache_magic[] = { - #define SB_START (SB_SECTOR * 512) - - struct cache_sb { -- uint64_t csum; -- uint64_t offset; /* sector where this sb was written */ -- uint64_t version; -+/*000*/ uint64_t csum; -+ /* sector where this sb was written */ -+/*008*/ uint64_t offset; -+/*010*/ uint64_t version; - -- uint8_t magic[16]; -+/*018*/ uint8_t magic[16]; - -- uint8_t uuid[16]; -+/*028*/ uint8_t uuid[16]; - union { -- uint8_t set_uuid[16]; -- uint64_t set_magic; -+/*038*/ uint8_t set_uuid[16]; -+/*038*/ uint64_t set_magic; - }; -- uint8_t label[SB_LABEL_SIZE]; -+/*048*/ uint8_t label[SB_LABEL_SIZE]; - -- uint64_t flags; -- uint64_t seq; -- uint64_t pad[8]; -+/*068*/ uint64_t flags; -+/*070*/ uint64_t seq; -+/*078*/ uint64_t pad[8]; - - union { - struct { -- /* Cache devices */ -- uint64_t nbuckets; /* device size */ -+ /* Cache devices */ -+/*0b8*/ uint64_t nbuckets; /* device size */ - -- uint16_t block_size; /* sectors */ -- uint16_t bucket_size; /* sectors */ -+/*0c0*/ uint16_t block_size; /* sectors */ -+/*0c2*/ uint16_t bucket_size; /* sectors */ - -- uint16_t nr_in_set; -- uint16_t nr_this_dev; -+/*0c4*/ uint16_t nr_in_set; -+/*0c6*/ uint16_t nr_this_dev; - }; - struct { -- /* Backing devices */ -- uint64_t data_offset; -- -- /* -- * block_size from the cache device section is still used by -- * backing devices, so don't add anything here until we fix -- * things to not need it for backing devices anymore -- */ -+ /* Backing devices */ -+/*0b8*/ uint64_t data_offset; -+ -+ /* -+ * block_size from the cache device section is still -+ * used by backing devices, so don't add anything here -+ * until we fix things to not need it for backing -+ * devices anymore -+ */ - }; - }; - -- uint32_t last_mount; /* time_t */ -+/*0c8*/ uint32_t last_mount; /* time_t */ - -- uint16_t first_bucket; -+/*0cc*/ uint16_t first_bucket; - union { -- uint16_t njournal_buckets; -- uint16_t keys; -+/*0ce*/ uint16_t njournal_buckets; -+/*0ce*/ uint16_t keys; - }; -- uint64_t d[SB_JOURNAL_BUCKETS]; /* journal buckets */ -+ /* journal buckets */ -+/*0d0*/ uint64_t d[SB_JOURNAL_BUCKETS]; -+/*8d0*/ - }; - - static inline bool SB_IS_BDEV(const struct cache_sb *sb) --- -2.26.2 - diff --git a/for-next/bcache-tools/0002-struct_offset-print-offset-of-each-member-of-the-on-.patch b/for-next/bcache-tools/0002-struct_offset-print-offset-of-each-member-of-the-on-.patch deleted file mode 100644 index 6a39977..0000000 --- a/for-next/bcache-tools/0002-struct_offset-print-offset-of-each-member-of-the-on-.patch +++ /dev/null @@ -1,111 +0,0 @@ -From d94c7afefe13d2cc30c77b8c21004913d06187bb Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Sat, 27 Jun 2020 16:32:27 +0800 -Subject: [RFC PATCH 2/4] struct_offset: print offset of each member of the - on-disk data structure - -This is a helper small program to print out the offset in bytes of each -member of the on-disk data structure. Currently the member print lines -are coded manually, hope latter it can be more intelligent to avoid the -hard code. - -Signed-off-by: Coly Li <colyli@suse.de> ---- - Makefile | 4 +++- - struct_offset.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 65 insertions(+), 1 deletion(-) - create mode 100644 struct_offset.c - -diff --git a/Makefile b/Makefile -index 2c326cf..b352d21 100644 ---- a/Makefile -+++ b/Makefile -@@ -5,7 +5,7 @@ DRACUTLIBDIR=/lib/dracut - INSTALL=install - CFLAGS+=-O2 -Wall -g - --all: make-bcache probe-bcache bcache-super-show bcache-register bcache -+all: make-bcache probe-bcache bcache-super-show bcache-register bcache struct_offset - - install: make-bcache probe-bcache bcache-super-show - $(INSTALL) -m0755 make-bcache bcache-super-show bcache $(DESTDIR)${PREFIX}/sbin/ -@@ -22,6 +22,8 @@ clean: - - bcache-test: LDLIBS += `pkg-config --libs openssl` -lm - -+struct_offset: struct_offset.o -+ - make-bcache: LDLIBS += `pkg-config --libs uuid blkid smartcols` - make-bcache: CFLAGS += `pkg-config --cflags uuid blkid smartcols` - make-bcache: make.o crc64.o lib.o zoned.o -diff --git a/struct_offset.c b/struct_offset.c -new file mode 100644 -index 0000000..6061259 ---- /dev/null -+++ b/struct_offset.c -@@ -0,0 +1,62 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+/* -+ * Author: Coly Li <colyli@suse.de> -+ * -+ * Print out offset of each member of on-disk structure -+ */ -+ -+#include <stdio.h> -+#include <stddef.h> -+#include <inttypes.h> -+#include <stdbool.h> -+ -+#include "bcache.h" -+ -+ -+#define OFF_SB(m) offsetof(struct cache_sb, m) -+ -+void print_cache_sb() -+{ -+ printf(" struct cache_sb {:\n"); -+ printf("/* %3.3lx */ uint64_t csum;\n", OFF_SB(csum)); -+ printf("/* %3.3lx */ uint64_t offset;\n", OFF_SB(offset)); -+ printf("/* %3.3lx */ uint64_t version;\n", OFF_SB(version)); -+ printf("/* %3.3lx */ uint8_t magic[6];\n", OFF_SB(magic)); -+ printf("/* %3.3lx */ uint8_t uuid[16];\n", OFF_SB(uuid)); -+ printf(" union {;\n"); -+ printf("/* %3.3lx */ uint8_t set_uuid;\n", OFF_SB(set_uuid)); -+ printf("/* %3.3lx */ uint64_t set_magic;\n", OFF_SB(set_magic)); -+ printf(" };\n"); -+ printf("/* %3.3lx */ uint8_t label[%u];\n", OFF_SB(label), -+ SB_LABEL_SIZE); -+ printf("/* %3.3lx */ uint64_t flags;\n", OFF_SB(flags)); -+ printf("/* %3.3lx */ uint64_t seq;\n", OFF_SB(seq)); -+ printf("/* %3.3lx */ uint64_t pad[8];\n", OFF_SB(pad)); -+ printf(" union {\n"); -+ printf(" struct {\n"); -+ printf("/* %3.3lx */ uint64_t nbuckets;\n", OFF_SB(nbuckets)); -+ printf("/* %3.3lx */ uint16_t block_size;\n", OFF_SB(block_size)); -+ printf("/* %3.3lx */ uint16_t bucket_size;\n", OFF_SB(bucket_size)); -+ printf("/* %3.3lx */ uint16_t nr_in_set;\n", OFF_SB(nr_in_set)); -+ printf("/* %3.3lx */ uint16_t nr_this_dev;\n", OFF_SB(nr_this_dev)); -+ printf(" };\n"); -+ printf(" struct {\n"); -+ printf("/* %3.3lx */ uint64_t data_offset;\n", OFF_SB(data_offset)); -+ printf(" };\n"); -+ printf(" };\n"); -+ printf("/* %3.3lx */ uint32_t last_mount;\n", OFF_SB(last_mount)); -+ printf("/* %3.3lx */ uint16_t first_bucket;\n", OFF_SB(first_bucket)); -+ printf(" union {\n"); -+ printf("/* %3.3lx */ uint16_t njournal_buckets;\n", OFF_SB(njournal_buckets)); -+ printf("/* %3.3lx */ uint16_t keys;\n", OFF_SB(keys)); -+ printf(" };\n"); -+ printf("/* %3.3lx */ uint64_t d[%u];\n", OFF_SB(d), SB_JOURNAL_BUCKETS); -+ printf("/* %3.3lx */ }\n", OFF_SB(d) + sizeof(uint64_t) * SB_JOURNAL_BUCKETS); -+} -+ -+int main(int argc, char *argv[]) -+{ -+ print_cache_sb(); -+ return 0; -+} --- -2.26.2 - diff --git a/for-next/bcache-tools/0003-bcache-tools-The-new-super-block-version-BCACHE_SB_V.patch b/for-next/bcache-tools/0003-bcache-tools-The-new-super-block-version-BCACHE_SB_V.patch deleted file mode 100644 index 64245ac..0000000 --- a/for-next/bcache-tools/0003-bcache-tools-The-new-super-block-version-BCACHE_SB_V.patch +++ /dev/null @@ -1,213 +0,0 @@ -From 86ceea3fbab291223d9b42aeb1ad64e7da033fea Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Mon, 29 Jun 2020 21:55:36 +0800 -Subject: [RFC PATCH 3/4] bcache-tools: The new super block version - BCACHE_SB_VERSION_BDEV_WITH_FEATURES - -The new super block version BCACHE_SB_VERSION_BDEV_WITH_FEATURES value -is 5, both cache device and backing device share this version number. - -Devices have super block version equal to the new version will have -three new members, -/*078*/ uint64_t feature_compat; -/*080*/ uint64_t feature_incompat; -/*088*/ uint64_t feature_ro_compat; - -They are used for further new features which may introduce on-disk -format change, the very basic features handling code skeleton is also -initialized in this patch. - -Signed-off-by: Coly Li <colyli@suse.de> ---- - Makefile | 2 +- - bcache.h | 83 +++++++++++++++++++++++++++++++++++++++++++++++-- - features.c | 22 +++++++++++++ - make.c | 8 +++++ - struct_offset.c | 2 +- - 5 files changed, 113 insertions(+), 4 deletions(-) - create mode 100644 features.c - -diff --git a/Makefile b/Makefile -index b352d21..b5b41e4 100644 ---- a/Makefile -+++ b/Makefile -@@ -40,4 +40,4 @@ bcache-register: bcache-register.o - bcache: CFLAGS += `pkg-config --cflags blkid uuid smartcols` - bcache: LDLIBS += `pkg-config --libs blkid uuid smartcols` - bcache: CFLAGS += -std=gnu99 --bcache: crc64.o lib.o make.o zoned.o -+bcache: crc64.o lib.o make.o zoned.o features.o -diff --git a/bcache.h b/bcache.h -index 3fcf187..3695712 100644 ---- a/bcache.h -+++ b/bcache.h -@@ -27,12 +27,16 @@ static const char bcache_magic[] = { - * Version 2: Seed pointer into btree node checksum - * Version 3: Cache device with new UUID format - * Version 4: Backing device with data offset -+ * Version 5: Cache adn backing devices with compat/incompat/ro_compat -+ * feature sets - */ - #define BCACHE_SB_VERSION_CDEV 0 - #define BCACHE_SB_VERSION_BDEV 1 - #define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 - #define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 --#define BCACHE_SB_MAX_VERSION 4 -+#define BCACHE_SB_VERSION_CDEV_WITH_FEATURES 5 -+#define BCACHE_SB_VERSION_BDEV_WITH_FEATURES 6 -+#define BCACHE_SB_MAX_VERSION 6 - - #define SB_SECTOR 8 - #define SB_LABEL_SIZE 32 -@@ -57,7 +61,12 @@ struct cache_sb { - - /*068*/ uint64_t flags; - /*070*/ uint64_t seq; --/*078*/ uint64_t pad[8]; -+ -+/*078*/ uint64_t feature_compat; -+/*080*/ uint64_t feature_incompat; -+/*088*/ uint64_t feature_ro_compat; -+ -+/*090*/ uint64_t pad[5]; - - union { - struct { -@@ -127,4 +136,74 @@ uint64_t crc64(const void *data, size_t len); - #define csum_set(i) \ - crc64(((void *) (i)) + 8, ((void *) end(i)) - (((void *) (i)) + 8)) - -+#define BCH_FEATURE_COMPAT 0 -+#define BCH_FEATURE_INCOMPAT 1 -+#define BCH_FEATURE_RO_INCOMPAT 2 -+#define BCH_FEATURE_TYPE_MASK 0x03 -+ -+#define BCH_FEATURE_COMPAT_SUUP 0 -+#define BCH_FEATURE_INCOMPAT_SUUP 0 -+#define BCH_FEATURE_RO_COMPAT_SUUP 0 -+ -+#define BCH_HAS_COMPAT_FEATURE(sb, mask) \ -+ ((sb)->feature_compat & (mask)) -+#define BCH_HAS_RO_COMPAT_FEATURE(sb, mask) \ -+ ((sb)->feature_ro_compat & (mask)) -+#define BCH_HAS_INCOMPAT_FEATURE(sb, mask) \ -+ ((sb)->feature_incompat & (mask)) -+ -+/* Feature set definition */ -+ -+ -+#define BCH_FEATURE_COMPAT_FUNCS(name, flagname) \ -+static inline int bch_has_feature_##name(struct cache_sb *sb) \ -+{ \ -+ return (((sb)->feature_compat & \ -+ BCH##_FEATURE_COMPAT_##flagname) != 0); \ -+} \ -+static inline void bch_set_feature_##name(struct cache_sb *sb) \ -+{ \ -+ (sb)->feature_compat |= \ -+ BCH##_FEATURE_COMPAT_##flagname; \ -+} \ -+static inline void bch_clear_feature_##name(struct cache_sb *sb) \ -+{ \ -+ (sb)->feature_compat &= \ -+ ~BCH##_FEATURE_COMPAT_##flagname; \ -+} -+ -+#define BCH_FEATURE_RO_COMPAT_FUNCS(name, flagname) \ -+static inline int bch_has_feature_##name(struct cache_sb *sb) \ -+{ \ -+ return (((sb)->feature_ro_compat & \ -+ BCH##_FEATURE_RO_COMPAT_##flagname) != 0); \ -+} \ -+static inline void bch_set_feature_##name(struct cache_sb *sb) \ -+{ \ -+ (sb)->feature_ro_compat |= \ -+ BCH##_FEATURE_RO_COMPAT_##flagname; \ -+} \ -+static inline void bch_clear_feature_##name(struct cache_sb *sb) \ -+{ \ -+ (sb)->feature_ro_compat &= \ -+ ~BCH##_FEATURE_RO_COMPAT_##flagname; \ -+} -+ -+#define BCH_FEATURE_INCOMPAT_FUNCS(name, flagname) \ -+static inline int bch_has_feature_##name(struct cache_sb *sb) \ -+{ \ -+ return (((sb)->feature_incompat & \ -+ BCH##_FEATURE_INCOMPAT_##flagname) != 0); \ -+} \ -+static inline void bch_set_feature_##name(struct cache_sb *sb) \ -+{ \ -+ (sb)->feature_incompat |= \ -+ BCH##_FEATURE_INCOMPAT_##flagname; \ -+} \ -+static inline void bch_clear_feature_##name(struct cache_sb *sb) \ -+{ \ -+ (sb)->feature_incompat &= \ -+ ~BCH##_FEATURE_INCOMPAT_##flagname; \ -+} -+ - #endif -diff --git a/features.c b/features.c -new file mode 100644 -index 0000000..013a5ca ---- /dev/null -+++ b/features.c -@@ -0,0 +1,22 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Author: Coly Li <colyli@suse.de> -+ * -+ * Inspired by e2fsprogs features compat/incompat/ro_compat -+ * related code. -+ */ -+#include <stdbool.h> -+#include <stdint.h> -+#include <sys/types.h> -+ -+#include "bcache.h" -+ -+struct feature { -+ int compat; -+ unsigned int mask; -+ const char *string; -+}; -+ -+static struct feature feature_list[] = { -+ {0, 0, 0 }, -+}; -diff --git a/make.c b/make.c -index cc76863..6d37532 100644 ---- a/make.c -+++ b/make.c -@@ -250,6 +250,14 @@ static void swap_sb(struct cache_sb *sb, int write_cdev_super) - /* Backing devices */ - sb->data_offset = cpu_to_le64(sb->data_offset); - } -+ -+ /* Convert feature set and version at last */ -+ if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) { -+ sb->feature_compat = cpu_to_le64(sb->feature_compat); -+ sb->feature_incompat = cpu_to_le64(sb->feature_incompat); -+ sb->feature_ro_compat = cpu_to_le64(sb->feature_ro_compat); -+ } -+ sb->version = cpu_to_le64(sb->version); - } - - static void write_sb(char *dev, unsigned int block_size, -diff --git a/struct_offset.c b/struct_offset.c -index 6061259..54d4a34 100644 ---- a/struct_offset.c -+++ b/struct_offset.c -@@ -52,7 +52,7 @@ void print_cache_sb() - printf("/* %3.3lx */ uint16_t keys;\n", OFF_SB(keys)); - printf(" };\n"); - printf("/* %3.3lx */ uint64_t d[%u];\n", OFF_SB(d), SB_JOURNAL_BUCKETS); -- printf("/* %3.3lx */ }\n", OFF_SB(d) + sizeof(uint64_t) * SB_JOURNAL_BUCKETS); -+ printf("/* %3.3lx */ }\n", sizeof(struct cache_sb)); - } - - int main(int argc, char *argv[]) --- -2.26.2 - diff --git a/for-next/bcache-tools/0004-bcache-tools-add-large_bucket-incompat-feature.patch b/for-next/bcache-tools/0004-bcache-tools-add-large_bucket-incompat-feature.patch deleted file mode 100644 index d94de65..0000000 --- a/for-next/bcache-tools/0004-bcache-tools-add-large_bucket-incompat-feature.patch +++ /dev/null @@ -1,234 +0,0 @@ -From 2215081144dcc8fd5ee571f69a406753b46d9f47 Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Mon, 29 Jun 2020 21:59:19 +0800 -Subject: [RFC PATCH 4/4] bcache-tools: add large_bucket incompat feature - -This feature adds uint32_t bucket_size_hi into struct cache_sb, permit -bucket size to be 32bit width. Current maximum bucket size is 32MB, -extend it to 32bits will permit much large bucket size which is -desired by zoned SSD devices (a typical zone size is 256MB). - -When setting a bucket size > 32MB, large_bucket feature will be set -automatically and the super block version will also be set to -BCACHE_SB_VERSION_CDEV_WITH_FEATURES. - -Signed-off-by: Coly Li <colyli@suse.de> ---- - bcache.h | 10 +++++++++- - features.c | 2 ++ - lib.c | 24 ++++++++++++++++++++++++ - lib.h | 2 ++ - make.c | 28 ++++++++++++++++++---------- - struct_offset.c | 1 + - 6 files changed, 56 insertions(+), 11 deletions(-) - -diff --git a/bcache.h b/bcache.h -index 3695712..6e1563b 100644 ---- a/bcache.h -+++ b/bcache.h -@@ -101,7 +101,8 @@ struct cache_sb { - }; - /* journal buckets */ - /*0d0*/ uint64_t d[SB_JOURNAL_BUCKETS]; --/*8d0*/ -+/*8d0*/ uint32_t bucket_size_hi; -+/*8d4*/ - }; - - static inline bool SB_IS_BDEV(const struct cache_sb *sb) -@@ -155,6 +156,11 @@ uint64_t crc64(const void *data, size_t len); - /* Feature set definition */ - - -+/* Feature set definition */ -+ -+/* Incompat feature set */ -+#define BCH_FEATURE_INCOMPAT_LARGE_BUCKET 0x0001 /* 32bit bucket size */ -+ - #define BCH_FEATURE_COMPAT_FUNCS(name, flagname) \ - static inline int bch_has_feature_##name(struct cache_sb *sb) \ - { \ -@@ -206,4 +212,6 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ - ~BCH##_FEATURE_INCOMPAT_##flagname; \ - } - -+BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET); -+ - #endif -diff --git a/features.c b/features.c -index 013a5ca..9b6e93d 100644 ---- a/features.c -+++ b/features.c -@@ -18,5 +18,7 @@ struct feature { - }; - - static struct feature feature_list[] = { -+ {BCH_FEATURE_COMPAT, BCH_FEATURE_INCOMPAT_LARGE_BUCKET, -+ "large_bucket"}, - {0, 0, 0 }, - }; -diff --git a/lib.c b/lib.c -index 9e69419..76e8b0d 100644 ---- a/lib.c -+++ b/lib.c -@@ -4,6 +4,7 @@ - #include <stdbool.h> - #include <blkid.h> - #include <dirent.h> -+#include <limits.h> - #include <sys/types.h> - #include <unistd.h> - #include <stdio.h> -@@ -681,3 +682,26 @@ int set_label(char *devname, char *label) - close(fd); - return 0; - } -+ -+void set_bucket_size(struct cache_sb *sb, unsigned int bucket_size) -+{ -+ if (bucket_size > USHRT_MAX) { -+ sb->version = BCACHE_SB_VERSION_CDEV_WITH_FEATURES; -+ bch_set_feature_large_bucket(sb); -+ sb->bucket_size = (uint16_t)bucket_size; -+ sb->bucket_size_hi = (uint32_t)(bucket_size >> 16); -+ } else { -+ sb->bucket_size = bucket_size; -+ } -+} -+ -+unsigned int get_bucket_size(struct cache_sb *sb) -+{ -+ unsigned int bucket_size = sb->bucket_size; -+ -+ if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES && -+ bch_has_feature_large_bucket(sb)) -+ bucket_size |= sb->bucket_size_hi << 16; -+ -+ return bucket_size; -+} -diff --git a/lib.h b/lib.h -index d4537b0..a69e1b8 100644 ---- a/lib.h -+++ b/lib.h -@@ -50,6 +50,8 @@ int detach_backdev(char *devname); - int set_backdev_cachemode(char *devname, char *cachemode); - int set_label(char *devname, char *label); - int cset_to_devname(struct list_head *head, char *cset, char *devname); -+void set_bucket_size(struct cache_sb *sb, unsigned int bucket_size); -+unsigned int get_bucket_size(struct cache_sb *sb); - - - #define DEVLEN sizeof(struct dev) -diff --git a/make.c b/make.c -index 6d37532..b788de1 100644 ---- a/make.c -+++ b/make.c -@@ -83,7 +83,9 @@ uint64_t hatoi(const char *s) - return i; - } - --unsigned int hatoi_validate(const char *s, const char *msg) -+unsigned int hatoi_validate(const char *s, -+ const char *msg, -+ unsigned long max) - { - uint64_t v = hatoi(s); - -@@ -94,7 +96,7 @@ unsigned int hatoi_validate(const char *s, const char *msg) - - v /= 512; - -- if (v > USHRT_MAX) { -+ if (v > max) { - fprintf(stderr, "%s too large\n", msg); - exit(EXIT_FAILURE); - } -@@ -229,7 +231,6 @@ static void swap_sb(struct cache_sb *sb, int write_cdev_super) - - /* swap to little endian byte order to write */ - sb->offset = cpu_to_le64(sb->offset); -- sb->version = cpu_to_le64(sb->version); - sb->flags = cpu_to_le64(sb->flags); - sb->seq = cpu_to_le64(sb->seq); - sb->last_mount = cpu_to_le32(sb->last_mount); -@@ -244,6 +245,9 @@ static void swap_sb(struct cache_sb *sb, int write_cdev_super) - /* Cache devices */ - sb->nbuckets = cpu_to_le64(sb->nbuckets); - sb->bucket_size = cpu_to_le16(sb->bucket_size); -+ if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES && -+ bch_has_feature_large_bucket(sb)) -+ sb->bucket_size_hi = cpu_to_le32(sb->bucket_size_hi); - sb->nr_in_set = cpu_to_le16(sb->nr_in_set); - sb->nr_this_dev = cpu_to_le16(sb->nr_this_dev); - } else { -@@ -374,7 +378,7 @@ static void write_sb(char *dev, unsigned int block_size, - uuid_generate(sb.uuid); - memcpy(sb.set_uuid, set_uuid, sizeof(sb.set_uuid)); - -- sb.bucket_size = bucket_size; -+ set_bucket_size(&sb, bucket_size); - sb.block_size = block_size; - - uuid_unparse(sb.uuid, uuid_str); -@@ -400,7 +404,8 @@ static void write_sb(char *dev, unsigned int block_size, - } - - if (data_offset != BDEV_DATA_START_DEFAULT) { -- sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; -+ if (sb.version < BCACHE_SB_VERSION_BDEV_WITH_OFFSET) -+ sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET; - sb.data_offset = data_offset; - } - -@@ -418,9 +423,10 @@ static void write_sb(char *dev, unsigned int block_size, - data_offset); - putchar('\n'); - } else { -- sb.nbuckets = getblocks(fd) / sb.bucket_size; -+ sb.nbuckets = getblocks(fd) / get_bucket_size(&sb); - sb.nr_in_set = 1; -- sb.first_bucket = (23 / sb.bucket_size) + 1; -+ /* 23 is (SB_SECTOR + SB_SIZE) - 1 sectors */ -+ sb.first_bucket = (23 / get_bucket_size(&sb)) + 1; - - if (sb.nbuckets < 1 << 7) { - fprintf(stderr, "Not enough buckets: %ju, need %u\n", -@@ -447,7 +453,7 @@ static void write_sb(char *dev, unsigned int block_size, - (unsigned int) sb.version, - sb.nbuckets, - sb.block_size, -- sb.bucket_size, -+ get_bucket_size(&sb), - sb.nr_in_set, - sb.nr_this_dev, - sb.first_bucket); -@@ -576,10 +582,12 @@ int make_bcache(int argc, char **argv) - bdev = 1; - break; - case 'b': -- bucket_size = hatoi_validate(optarg, "bucket size"); -+ bucket_size = -+ hatoi_validate(optarg, "bucket size", UINT_MAX); - break; - case 'w': -- block_size = hatoi_validate(optarg, "block size"); -+ block_size = -+ hatoi_validate(optarg, "block size", USHRT_MAX); - break; - #if 0 - case 'U': -diff --git a/struct_offset.c b/struct_offset.c -index 54d4a34..4ffacf7 100644 ---- a/struct_offset.c -+++ b/struct_offset.c -@@ -52,6 +52,7 @@ void print_cache_sb() - printf("/* %3.3lx */ uint16_t keys;\n", OFF_SB(keys)); - printf(" };\n"); - printf("/* %3.3lx */ uint64_t d[%u];\n", OFF_SB(d), SB_JOURNAL_BUCKETS); -+ printf("/* %3.3lx */ uint32_t bucket_size_hi;\n", OFF_SB(bucket_size_hi)); - printf("/* %3.3lx */ }\n", sizeof(struct cache_sb)); - } - --- -2.26.2 - diff --git a/for-next/nvme-tcp/v7-0000-cover-letter.patch b/for-next/nvme-tcp/v7-0000-cover-letter.patch new file mode 100644 index 0000000..c873351 --- /dev/null +++ b/for-next/nvme-tcp/v7-0000-cover-letter.patch @@ -0,0 +1,81 @@ +From 91b0da8cb890ef1a07b104dc8c2a621fe267cf1d Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Tue, 18 Aug 2020 21:09:29 +0800 +Subject: [PATCH v7 0/6] Introduce sendpage_ok() to detect misused sendpage in network related drivers + +This series was original by a bug fix in nvme-over-tcp driver which only +checked whether a page was allocated from slab allcoator, but forgot to +check its page_count: The page handled by sendpage should be neither a +Slab page nor 0 page_count page. + +As Sagi Grimberg suggested, the original fix is refind to a more common +inline routine: + static inline bool sendpage_ok(struct page *page) + { + return (!PageSlab(page) && page_count(page) >= 1); + } +If sendpage_ok() returns true, the checking page can be handled by the +zero copy sendpage method in network layer. + +The first patch in this series introduces sendpage_ok() in header file +include/linux/net.h, the second patch fixes the page checking issue in +nvme-over-tcp driver, the third patch adds page_count check by using +sendpage_ok() in do_tcp_sendpages() as Eric Dumazet suggested, and all +rested patches just replace existing open coded checks with the inline +sendpage_ok() routine. + +Coly Li + +Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> +Cc: Chris Leech <cleech@redhat.com> +Cc: Christoph Hellwig <hch@lst.de> +Cc: Cong Wang <amwang@redhat.com> +Cc: David S. Miller <davem@davemloft.net> +Cc: Eric Dumazet <eric.dumazet@gmail.com> +Cc: Hannes Reinecke <hare@suse.de> +Cc: Ilya Dryomov <idryomov@gmail.com> +Cc: Jan Kara <jack@suse.com> +Cc: Jeff Layton <jlayton@kernel.org> +Cc: Jens Axboe <axboe@kernel.dk> +Cc: Lee Duncan <lduncan@suse.com> +Cc: Mike Christie <michaelc@cs.wisc.edu> +Cc: Mikhail Skorzhinskii <mskorzhinskiy@solarflare.com> +Cc: Philipp Reisner <philipp.reisner@linbit.com> +Cc: Sagi Grimberg <sagi@grimberg.me> +Cc: Vasily Averin <vvs@virtuozzo.com> +Cc: Vlastimil Babka <vbabka@suse.com> +--- +Changelog: +v7: remove outer brackets from the return line of sendpage_ok() as + Eric Dumazet suggested. +v6: fix page check in do_tcp_sendpages(), as Eric Dumazet suggested. + replace other open coded checks with sendpage_ok() in libceph, + iscsi drivers. +v5, include linux/mm.h in include/linux/net.h +v4, change sendpage_ok() as an inline helper, and post it as + separate patch, as Christoph Hellwig suggested. +v3, introduce a more common sendpage_ok() as Sagi Grimberg suggested. +v2, fix typo in patch subject +v1, the initial version. + + +Coly Li (6): + net: introduce helper sendpage_ok() in include/linux/net.h + nvme-tcp: check page by sendpage_ok() before calling kernel_sendpage() + tcp: use sendpage_ok() to detect misused .sendpage + drbd: code cleanup by using sendpage_ok() to check page for + kernel_sendpage() + scsi: libiscsi: use sendpage_ok() in iscsi_tcp_segment_map() + libceph: use sendpage_ok() in ceph_tcp_sendpage() + + drivers/block/drbd/drbd_main.c | 2 +- + drivers/nvme/host/tcp.c | 7 +++---- + drivers/scsi/libiscsi_tcp.c | 2 +- + include/linux/net.h | 16 ++++++++++++++++ + net/ceph/messenger.c | 2 +- + net/ipv4/tcp.c | 3 ++- + 6 files changed, 24 insertions(+), 8 deletions(-) + +-- +2.26.2 + diff --git a/for-next/nvme-tcp/v7-0001-net-introduce-helper-sendpage_ok-in-include-linux.patch b/for-next/nvme-tcp/v7-0001-net-introduce-helper-sendpage_ok-in-include-linux.patch new file mode 100644 index 0000000..1fa95ee --- /dev/null +++ b/for-next/nvme-tcp/v7-0001-net-introduce-helper-sendpage_ok-in-include-linux.patch @@ -0,0 +1,75 @@ +From 797f86656814805cca92af43652ed0732963f565 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Sat, 15 Aug 2020 13:40:48 +0800 +Subject: [PATCH v7 1/6] net: introduce helper sendpage_ok() in + include/linux/net.h + +The original problem was from nvme-over-tcp code, who mistakenly uses +kernel_sendpage() to send pages allocated by __get_free_pages() without +__GFP_COMP flag. Such pages don't have refcount (page_count is 0) on +tail pages, sending them by kernel_sendpage() may trigger a kernel panic +from a corrupted kernel heap, because these pages are incorrectly freed +in network stack as page_count 0 pages. + +This patch introduces a helper sendpage_ok(), it returns true if the +checking page, +- is not slab page: PageSlab(page) is false. +- has page refcount: page_count(page) is not zero + +All drivers who want to send page to remote end by kernel_sendpage() +may use this helper to check whether the page is OK. If the helper does +not return true, the driver should try other non sendpage method (e.g. +sock_no_sendpage()) to handle the page. + +Signed-off-by: Coly Li <colyli@suse.de> +Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> +Cc: Christoph Hellwig <hch@lst.de> +Cc: Hannes Reinecke <hare@suse.de> +Cc: Jan Kara <jack@suse.com> +Cc: Jens Axboe <axboe@kernel.dk> +Cc: Mikhail Skorzhinskii <mskorzhinskiy@solarflare.com> +Cc: Philipp Reisner <philipp.reisner@linbit.com> +Cc: Sagi Grimberg <sagi@grimberg.me> +Cc: Vlastimil Babka <vbabka@suse.com> +Cc: stable@vger.kernel.org +--- + include/linux/net.h | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/include/linux/net.h b/include/linux/net.h +index d48ff1180879..05db8690f67e 100644 +--- a/include/linux/net.h ++++ b/include/linux/net.h +@@ -21,6 +21,7 @@ + #include <linux/rcupdate.h> + #include <linux/once.h> + #include <linux/fs.h> ++#include <linux/mm.h> + #include <linux/sockptr.h> + + #include <uapi/linux/net.h> +@@ -286,6 +287,21 @@ do { \ + #define net_get_random_once_wait(buf, nbytes) \ + get_random_once_wait((buf), (nbytes)) + ++/* ++ * E.g. XFS meta- & log-data is in slab pages, or bcache meta ++ * data pages, or other high order pages allocated by ++ * __get_free_pages() without __GFP_COMP, which have a page_count ++ * of 0 and/or have PageSlab() set. We cannot use send_page for ++ * those, as that does get_page(); put_page(); and would cause ++ * either a VM_BUG directly, or __page_cache_release a page that ++ * would actually still be referenced by someone, leading to some ++ * obscure delayed Oops somewhere else. ++ */ ++static inline bool sendpage_ok(struct page *page) ++{ ++ return !PageSlab(page) && page_count(page) >= 1; ++} ++ + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, + size_t num, size_t len); + int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, +-- +2.26.2 + diff --git a/for-next/nvme-tcp/v7-0002-nvme-tcp-check-page-by-sendpage_ok-before-calling.patch b/for-next/nvme-tcp/v7-0002-nvme-tcp-check-page-by-sendpage_ok-before-calling.patch new file mode 100644 index 0000000..2956afb --- /dev/null +++ b/for-next/nvme-tcp/v7-0002-nvme-tcp-check-page-by-sendpage_ok-before-calling.patch @@ -0,0 +1,57 @@ +From fd1e16f6dce2cdb40f590d918e1eb7c4b7684a81 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Sat, 15 Aug 2020 15:32:59 +0800 +Subject: [PATCH v7 2/6] nvme-tcp: check page by sendpage_ok() before calling + kernel_sendpage() + +Currently nvme_tcp_try_send_data() doesn't use kernel_sendpage() to +send slab pages. But for pages allocated by __get_free_pages() without +__GFP_COMP, which also have refcount as 0, they are still sent by +kernel_sendpage() to remote end, this is problematic. + +The new introduced helper sendpage_ok() checks both PageSlab tag and +page_count counter, and returns true if the checking page is OK to be +sent by kernel_sendpage(). + +This patch fixes the page checking issue of nvme_tcp_try_send_data() +with sendpage_ok(). If sendpage_ok() returns true, send this page by +kernel_sendpage(), otherwise use sock_no_sendpage to handle this page. + +Signed-off-by: Coly Li <colyli@suse.de> +Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> +Cc: Christoph Hellwig <hch@lst.de> +Cc: Hannes Reinecke <hare@suse.de> +Cc: Jan Kara <jack@suse.com> +Cc: Jens Axboe <axboe@kernel.dk> +Cc: Mikhail Skorzhinskii <mskorzhinskiy@solarflare.com> +Cc: Philipp Reisner <philipp.reisner@linbit.com> +Cc: Sagi Grimberg <sagi@grimberg.me> +Cc: Vlastimil Babka <vbabka@suse.com> +Cc: stable@vger.kernel.org +--- + drivers/nvme/host/tcp.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c +index 62fbaecdc960..902fe742762b 100644 +--- a/drivers/nvme/host/tcp.c ++++ b/drivers/nvme/host/tcp.c +@@ -912,12 +912,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) + else + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; + +- /* can't zcopy slab pages */ +- if (unlikely(PageSlab(page))) { +- ret = sock_no_sendpage(queue->sock, page, offset, len, ++ if (sendpage_ok(page)) { ++ ret = kernel_sendpage(queue->sock, page, offset, len, + flags); + } else { +- ret = kernel_sendpage(queue->sock, page, offset, len, ++ ret = sock_no_sendpage(queue->sock, page, offset, len, + flags); + } + if (ret <= 0) +-- +2.26.2 + diff --git a/for-next/nvme-tcp/v7-0003-tcp-use-sendpage_ok-to-detect-misused-.sendpage.patch b/for-next/nvme-tcp/v7-0003-tcp-use-sendpage_ok-to-detect-misused-.sendpage.patch new file mode 100644 index 0000000..aefa3c5 --- /dev/null +++ b/for-next/nvme-tcp/v7-0003-tcp-use-sendpage_ok-to-detect-misused-.sendpage.patch @@ -0,0 +1,44 @@ +From de6c6f9a3f1284083bbafe22a70f18a98fdeee4e Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Tue, 18 Aug 2020 20:05:35 +0800 +Subject: [PATCH v7 3/6] tcp: use sendpage_ok() to detect misused .sendpage + +commit a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab +objects") adds the checks for Slab pages, but the pages don't have +page_count are still missing from the check. + +Network layer's sendpage method is not designed to send page_count 0 +pages neither, therefore both PageSlab() and page_count() should be +both checked for the sending page. This is exactly what sendpage_ok() +does. + +This patch uses sendpage_ok() in do_tcp_sendpages() to detect misused +.sendpage, to make the code more robust. + +Fixes: a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab objects") +Suggested-by: Eric Dumazet <eric.dumazet@gmail.com> +Signed-off-by: Coly Li <colyli@suse.de> +Cc: Vasily Averin <vvs@virtuozzo.com> +Cc: David S. Miller <davem@davemloft.net> +Cc: stable@vger.kernel.org +--- + net/ipv4/tcp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 31f3b858db81..2135ee7c806d 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -970,7 +970,8 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, + long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + + if (IS_ENABLED(CONFIG_DEBUG_VM) && +- WARN_ONCE(PageSlab(page), "page must not be a Slab one")) ++ WARN_ONCE(!sendpage_ok(page), ++ "page must not be a Slab one and have page_count > 0")) + return -EINVAL; + + /* Wait for a connection to finish. One exception is TCP Fast Open +-- +2.26.2 + diff --git a/for-next/0002-drbd-code-cleanup-by-using-sendpage_ok-to-check-page.patch b/for-next/nvme-tcp/v7-0004-drbd-code-cleanup-by-using-sendpage_ok-to-check-p.patch index db18988..72fdf41 100644 --- a/for-next/0002-drbd-code-cleanup-by-using-sendpage_ok-to-check-page.patch +++ b/for-next/nvme-tcp/v7-0004-drbd-code-cleanup-by-using-sendpage_ok-to-check-p.patch @@ -1,12 +1,12 @@ -From 68a5f72ec67632caa03ef44d31dfc07cfb2935d9 Mon Sep 17 00:00:00 2001 +From ddf5d134516c6a151c13556d9d2536f0658eff92 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> -Date: Sat, 25 Jul 2020 23:45:52 +0800 -Subject: [PATCH 2/2] drbd: code cleanup by using sendpage_ok() to check page - for kernel_sendpage() +Date: Sat, 15 Aug 2020 15:37:00 +0800 +Subject: [PATCH v7 4/6] drbd: code cleanup by using sendpage_ok() to check + page for kernel_sendpage() In _drbd_send_page() a page is checked by following code before sending it by kernel_sendpage(), - (page_count(page) < 1) || PageSlab(page) + (page_count(page) < 1) || PageSlab(page) If the check is true, this page won't be send by kernel_sendpage() and handled by sock_no_sendpage(). @@ -25,10 +25,10 @@ Cc: Sagi Grimberg <sagi@grimberg.me> 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c -index 45fbd526c453..567d7e1d9f76 100644 +index cb687ccdbd96..55dc0c91781e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c -@@ -1552,7 +1552,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa +@@ -1553,7 +1553,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa * put_page(); and would cause either a VM_BUG directly, or * __page_cache_release a page that would actually still be referenced * by someone, leading to some obscure delayed Oops somewhere else. */ diff --git a/for-next/nvme-tcp/v7-0005-scsi-libiscsi-use-sendpage_ok-in-iscsi_tcp_segmen.patch b/for-next/nvme-tcp/v7-0005-scsi-libiscsi-use-sendpage_ok-in-iscsi_tcp_segmen.patch new file mode 100644 index 0000000..513d1b7 --- /dev/null +++ b/for-next/nvme-tcp/v7-0005-scsi-libiscsi-use-sendpage_ok-in-iscsi_tcp_segmen.patch @@ -0,0 +1,45 @@ +From c781516d440b75ac08b8fbf5e55f14a3c556dd44 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Tue, 18 Aug 2020 19:30:04 +0800 +Subject: [PATCH v7 5/6] scsi: libiscsi: use sendpage_ok() in + iscsi_tcp_segment_map() + +In iscsci driver, iscsi_tcp_segment_map() uses the following code to +check whether the page should or not be handled by sendpage: + if (!recv && page_count(sg_page(sg)) >= 1 && !PageSlab(sg_page(sg))) + +The "page_count(sg_page(sg)) >= 1 && !PageSlab(sg_page(sg)" part is to +make sure the page can be sent to network layer's zero copy path. This +part is exactly what sendpage_ok() does. + +This patch uses use sendpage_ok() in iscsi_tcp_segment_map() to replace +the original open coded checks. + +Signed-off-by: Coly Li <colyli@suse.de> +Cc: Vasily Averin <vvs@virtuozzo.com> +Cc: Cong Wang <amwang@redhat.com> +Cc: Mike Christie <michaelc@cs.wisc.edu> +Cc: Lee Duncan <lduncan@suse.com> +Cc: Chris Leech <cleech@redhat.com> +Cc: Christoph Hellwig <hch@lst.de> +Cc: Hannes Reinecke <hare@suse.de> +--- + drivers/scsi/libiscsi_tcp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c +index 6ef93c7af954..31cd8487c16e 100644 +--- a/drivers/scsi/libiscsi_tcp.c ++++ b/drivers/scsi/libiscsi_tcp.c +@@ -128,7 +128,7 @@ static void iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv) + * coalescing neighboring slab objects into a single frag which + * triggers one of hardened usercopy checks. + */ +- if (!recv && page_count(sg_page(sg)) >= 1 && !PageSlab(sg_page(sg))) ++ if (!recv && sendpage_ok(sg_page(sg))) + return; + + if (recv) { +-- +2.26.2 + diff --git a/for-next/nvme-tcp/v7-0006-libceph-use-sendpage_ok-in-ceph_tcp_sendpage.patch b/for-next/nvme-tcp/v7-0006-libceph-use-sendpage_ok-in-ceph_tcp_sendpage.patch new file mode 100644 index 0000000..d9d2d8f --- /dev/null +++ b/for-next/nvme-tcp/v7-0006-libceph-use-sendpage_ok-in-ceph_tcp_sendpage.patch @@ -0,0 +1,35 @@ +From 91b0da8cb890ef1a07b104dc8c2a621fe267cf1d Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Tue, 18 Aug 2020 19:54:37 +0800 +Subject: [PATCH v7 6/6] libceph: use sendpage_ok() in ceph_tcp_sendpage() + +In libceph, ceph_tcp_sendpage() does the following checks before handle +the page by network layer's zero copy sendpage method, + if (page_count(page) >= 1 && !PageSlab(page)) + +This check is exactly what sendpage_ok() does. This patch replace the +open coded checks by sendpage_ok() as a code cleanup. + +Signed-off-by: Coly Li <colyli@suse.de> +Cc: Ilya Dryomov <idryomov@gmail.com> +Cc: Jeff Layton <jlayton@kernel.org> +--- + net/ceph/messenger.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c +index 27d6ab11f9ee..6a349da7f013 100644 +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -575,7 +575,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, + * coalescing neighboring slab objects into a single frag which + * triggers one of hardened usercopy checks. + */ +- if (page_count(page) >= 1 && !PageSlab(page)) ++ if (sendpage_ok(page)) + sendpage = sock->ops->sendpage; + else + sendpage = sock_no_sendpage; +-- +2.26.2 + diff --git a/for-next/v4-0001-docs-update-trusted-encrypted.rst.patch b/for-next/v4-0001-docs-update-trusted-encrypted.rst.patch new file mode 100644 index 0000000..3c33311 --- /dev/null +++ b/for-next/v4-0001-docs-update-trusted-encrypted.rst.patch @@ -0,0 +1,54 @@ +From 79c9e5d5bc7814f3597bafef7298a26adf1cf894 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 7 Aug 2020 16:41:14 +0800 +Subject: [PATCH v4] docs: trusted-encrypted.rst: update parameters for command examples + +The parameters in command examples for tpm2_createprimary and +tpm2_evictcontrol are outdated, people (like me) are not able to create +trusted key by these command examples. + +This patch updates the parameters of command example tpm2_createprimary +and tpm2_evictcontrol in trusted-encrypted.rst. With Linux kernel v5.8 +and tpm2-tools-4.1, people can create a trusted key by following the +examples in this document. + +Signed-off-by: Coly Li <colyli@suse.de> +Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> +Reviewed-by: Stefan Berger <stefanb@linux.ibm.com> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: James Bottomley <jejb@linux.ibm.com> +Cc: Jason Gunthorpe <jgg@ziepe.ca> +Cc: Jonathan Corbet <corbet@lwn.net> +Cc: Mimi Zohar <zohar@linux.ibm.com> +Cc: Peter Huewe <peterhuewe@gmx.de> +--- +Changelog: +v4: update Reviewed-by list, and Cc linux-doc and linux-integrity + maintainers. +v3: update commit log with review comments from Jarkko Sakkinen. +v2: remove the change of trusted key related operation. +v1: initial version. + + Documentation/security/keys/trusted-encrypted.rst | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/Documentation/security/keys/trusted-encrypted.rst b/Documentation/security/keys/trusted-encrypted.rst +index 9483a7425ad5..1da879a68640 100644 +--- a/Documentation/security/keys/trusted-encrypted.rst ++++ b/Documentation/security/keys/trusted-encrypted.rst +@@ -39,10 +39,9 @@ With the IBM TSS 2 stack:: + + Or with the Intel TSS 2 stack:: + +- #> tpm2_createprimary --hierarchy o -G rsa2048 -o key.ctxt ++ #> tpm2_createprimary --hierarchy o -G rsa2048 -c key.ctxt + [...] +- handle: 0x800000FF +- #> tpm2_evictcontrol -c key.ctxt -p 0x81000001 ++ #> tpm2_evictcontrol -c key.ctxt 0x81000001 + persistentHandle: 0x81000001 + + Usage:: +-- +2.26.2 + diff --git a/for-test/0001-bcache-fix-potential-deadlock-problem-in-btree_gc_co.patch b/for-test/0001-bcache-fix-potential-deadlock-problem-in-btree_gc_co.patch deleted file mode 100644 index 611ac7a..0000000 --- a/for-test/0001-bcache-fix-potential-deadlock-problem-in-btree_gc_co.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 208600e9513191045bb10a540be617e9ab01e0ba Mon Sep 17 00:00:00 2001 -From: Zhiqiang Liu <liuzhiqiang26@huawei.com> -Date: Sun, 26 Apr 2020 16:06:27 +0800 -Subject: [PATCH] bcache: fix potential deadlock problem in btree_gc_coalesce -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -coccicheck reports: - drivers/md//bcache/btree.c:1538:1-7: preceding lock on line 1417 - -btree_gc_coalesce func is designed to coalesce two adjacent nodes in -new_nodes[GC_MERGE_NODES] and finally release one node. All nodes`write_lock, -new_nodes[i]->write_lock, are holded before coalescing adjacent nodes, -and them will be released after coalescing successfully. - -However, if the coalescing process fails, such as no enough space of new_nodes[1] -to fit all of the remaining keys in new_nodes[0] and realloc keylist failed, we -will goto to out_nocoalesce tag directly without releasing new_nodes[i]->write_lock. -Then, a deadlock will occur after calling btree_node_free to free new_nodes[i], -which also try to acquire new_nodes[i]->write_lock. - -Here, we add a new tag 'out_unlock_nocoalesce' before out_nocoalesce tag to release -new_nodes[i]->write_lock when coalescing process fails. - --- -V1->V2: rewrite commit log (suggested by Coly Li) and rename the patch - -Fixes: 2a285686c1 ("bcache: btree locking rework") -Signed-off-by: Zhiqiang Liu <liuzhiqiang26@huawei.com> ---- - drivers/md/bcache/btree.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c -index 39de94edd73a..6548a601edf0 100644 ---- a/drivers/md/bcache/btree.c -+++ b/drivers/md/bcache/btree.c -@@ -1389,7 +1389,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, - if (__set_blocks(n1, n1->keys + n2->keys, - block_bytes(b->c)) > - btree_blocks(new_nodes[i])) -- goto out_nocoalesce; -+ goto out_unlock_nocoalesce; - - keys = n2->keys; - /* Take the key of the node we're getting rid of */ -@@ -1418,7 +1418,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, - - if (__bch_keylist_realloc(&keylist, - bkey_u64s(&new_nodes[i]->key))) -- goto out_nocoalesce; -+ goto out_unlock_nocoalesce; - - bch_btree_node_write(new_nodes[i], &cl); - bch_keylist_add(&keylist, &new_nodes[i]->key); -@@ -1464,6 +1464,10 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, - /* Invalidated our iterator */ - return -EINTR; - -+out_unlock_nocoalesce: -+ for (i = 0; i < nodes; i++) -+ mutex_unlock(&new_nodes[i]->write_lock); -+ - out_nocoalesce: - closure_sync(&cl); - --- -2.25.0 - diff --git a/for-test/0014-bcache-check-return-value-of-prio_read.patch b/for-test/0014-bcache-check-return-value-of-prio_read.patch deleted file mode 100644 index e5a35c0..0000000 --- a/for-test/0014-bcache-check-return-value-of-prio_read.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 2125854359b2fc478cdc9e3ba4444b95b1b7bc2e Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Sun, 22 Dec 2019 13:53:06 +0800 -Subject: [PATCH 14/24] bcache: check return value of prio_read() - -Now if prio_read() failed during starting a cache set, we can print -out error message in run_cache_set() and handle the failure properly. - -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/bcache/super.c | 21 ++++++++++++++++----- - 1 file changed, 16 insertions(+), 5 deletions(-) - -diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index 3dea1d5acd5c..2749daf09724 100644 ---- a/drivers/md/bcache/super.c -+++ b/drivers/md/bcache/super.c -@@ -609,12 +609,13 @@ int bch_prio_write(struct cache *ca, bool wait) - return 0; - } - --static void prio_read(struct cache *ca, uint64_t bucket) -+static int prio_read(struct cache *ca, uint64_t bucket) - { - struct prio_set *p = ca->disk_buckets; - struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d; - struct bucket *b; - unsigned int bucket_nr = 0; -+ int ret = -EIO; - - for (b = ca->buckets; - b < ca->buckets + ca->sb.nbuckets; -@@ -627,11 +628,15 @@ static void prio_read(struct cache *ca, uint64_t bucket) - prio_io(ca, bucket, REQ_OP_READ, 0); - - if (p->csum != -- bch_crc64(&p->magic, bucket_bytes(ca) - 8)) -+ bch_crc64(&p->magic, bucket_bytes(ca) - 8)) { - pr_warn("bad csum reading priorities"); -+ goto out; -+ } - -- if (p->magic != pset_magic(&ca->sb)) -+ if (p->magic != pset_magic(&ca->sb)) { - pr_warn("bad magic reading priorities"); -+ goto out; -+ } - - bucket = p->next_bucket; - d = p->data; -@@ -640,6 +645,10 @@ static void prio_read(struct cache *ca, uint64_t bucket) - b->prio = le16_to_cpu(d->prio); - b->gen = b->last_gc = d->gen; - } -+ -+ ret = 0; -+out: -+ return ret; - } - - /* Bcache device */ -@@ -1873,8 +1882,10 @@ static int run_cache_set(struct cache_set *c) - j = &list_entry(journal.prev, struct journal_replay, list)->j; - - err = "IO error reading priorities"; -- for_each_cache(ca, c, i) -- prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]); -+ for_each_cache(ca, c, i) { -+ if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev])) -+ goto err; -+ } - - /* - * If prio_read() fails it'll call cache_set_error and we'll --- -2.16.4 - diff --git a/for-test/0021-bcache-remove-unnecessary-mca_cannibalize.patch b/for-test/0021-bcache-remove-unnecessary-mca_cannibalize.patch deleted file mode 100644 index b5492fc..0000000 --- a/for-test/0021-bcache-remove-unnecessary-mca_cannibalize.patch +++ /dev/null @@ -1,76 +0,0 @@ -From cef2411bc2d1f0b31eed75252df2b0e7f3db7534 Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Mon, 6 Jan 2020 23:05:36 +0800 -Subject: [PATCH 21/24] bcache: remove unnecessary mca_cannibalize() - -mca_cannibalize() is used to cannibalize a btree node cache in -mca_alloc() when, -- There is no available node from c->btree_cache_freeable list. -- There is no available node from c->btree_cache_freed list. -- mca_bucket_alloc() fails to allocate new in-memory node neither. -Then mca_cannibalize() will try to shrink one node from c->btree_cache -list and allocate it to new btree node in such cannibalized way. - -Now with patch "bcache: limit bcache btree node cache memory consumption -by I/O throttle", the in-memory btree nodes can be shrunk from list -c->btree_cache proactively already, in most of time there will be enough -memory to allocate. So kzalloc() in mca_bucket_alloc() will always -success, and such cannibalized allocation is almost useless. Considering -the extra complication in mca_cannibalize_lock(), it is time to remove -the unnecessary mca_cannibalize() from bcache code. - -NOTE: mca_cannibalize_lock() and mca_cannibalize_unlock() are still kept -in bcache code, they are referenced by other btree related code yet. - -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/bcache/btree.c | 26 -------------------------- - 1 file changed, 26 deletions(-) - -diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c -index ada17113482f..48a097037da8 100644 ---- a/drivers/md/bcache/btree.c -+++ b/drivers/md/bcache/btree.c -@@ -962,28 +962,6 @@ static int mca_cannibalize_lock(struct cache_set *c, struct btree_op *op) - return 0; - } - --static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op, -- struct bkey *k) --{ -- struct btree *b; -- -- trace_bcache_btree_cache_cannibalize(c); -- -- if (mca_cannibalize_lock(c, op)) -- return ERR_PTR(-EINTR); -- -- list_for_each_entry_reverse(b, &c->btree_cache, list) -- if (!mca_reap(b, btree_order(k), false)) -- return b; -- -- list_for_each_entry_reverse(b, &c->btree_cache, list) -- if (!mca_reap(b, btree_order(k), true)) -- return b; -- -- WARN(1, "btree cache cannibalize failed\n"); -- return ERR_PTR(-ENOMEM); --} -- - /* - * We can only have one thread cannibalizing other cached btree nodes at a time, - * or we'll deadlock. We use an open coded mutex to ensure that, which a -@@ -1072,10 +1050,6 @@ static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op, - if (b) - rw_unlock(true, b); - -- b = mca_cannibalize(c, op, k); -- if (!IS_ERR(b)) -- goto out; -- - return b; - } - --- -2.16.4 - diff --git a/for-test/0023-bcache-don-t-explicitly-shrink-btree-node-cache-in-r.patch b/for-test/0023-bcache-don-t-explicitly-shrink-btree-node-cache-in-r.patch deleted file mode 100644 index 031b91d..0000000 --- a/for-test/0023-bcache-don-t-explicitly-shrink-btree-node-cache-in-r.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 8d1f76ef79fd6ee5e70b9380e35d558e34d4fd8b Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Tue, 7 Jan 2020 12:36:08 +0800 -Subject: [PATCH 23/24] bcache: don't explicitly shrink btree node cache in - run_cache_set() - -Now we have a dedicated kernel thread to shrink the in-memory btree node -cache in parallel, and this method resitricts memory consuption in -bch_btree_check() quite good, we don't need to worry potential memory -allocation failures after bch_btree_check() for creating and running -kernel threads. - -Therefore we don't need to explicitly shrink the btree node cache right -after bch_btree_check() returns in run_cache_set(). This patch removes -such code piece in run_cache_set() to make the code more simple and -clean. - -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/bcache/super.c | 17 ----------------- - 1 file changed, 17 deletions(-) - -diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index 2a47ff3da3f2..4f2742485f13 100644 ---- a/drivers/md/bcache/super.c -+++ b/drivers/md/bcache/super.c -@@ -1920,23 +1920,6 @@ static int run_cache_set(struct cache_set *c) - if (bch_btree_check(c)) - goto err; - -- /* -- * bch_btree_check() may occupy too much system memory which -- * has negative effects to user space application (e.g. data -- * base) performance. Shrink the mca cache memory proactively -- * here to avoid competing memory with user space workloads.. -- */ -- if (!c->shrinker_disabled) { -- struct shrink_control sc; -- -- sc.gfp_mask = GFP_KERNEL; -- sc.nr_to_scan = c->btree_cache_used * c->btree_pages; -- /* first run to clear b->accessed tag */ -- c->shrink.scan_objects(&c->shrink, &sc); -- /* second run to reap non-accessed nodes */ -- c->shrink.scan_objects(&c->shrink, &sc); -- } -- - bch_journal_mark(c, &journal); - bch_initial_gc_finish(c); - pr_debug("btree_check() done"); --- -2.16.4 - diff --git a/for-test/0024-bcache-ignore-pending-signals-in-run_cache_set.patch b/for-test/0024-bcache-ignore-pending-signals-in-run_cache_set.patch deleted file mode 100644 index 584a971..0000000 --- a/for-test/0024-bcache-ignore-pending-signals-in-run_cache_set.patch +++ /dev/null @@ -1,65 +0,0 @@ -From bb0506d117dacbf1a9d24221a7c01f365a6c8381 Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Tue, 7 Jan 2020 13:27:08 +0800 -Subject: [PATCH 24/24] bcache: ignore pending signals in run_cache_set() - -Now we have c->btree_cache_shrink_thread to restrict memory consumption -of in-memory btree node cache from bch_btree_check() when running a -cache set, but kernel thread creating may still fail for allocator or -gc thread. Finally it is because current process has pending signal to -make kthread_create() fail and return -EINTR. The pending signal is from -OOM killer because c->btree_cache_shrink_thread shrinks btree node cache -in parallel and the peak memory consumption by bch_btree_check() may -still triger OOM killer temporarily. - -Such failure only happens when starting a cache set, especially when -the cache set registration is triggered by udev rules during system boot -up time. Therefore ignore OOM killer's signal only once for such peak -memory consumption when starting a cache set is safe. - -This patch adds flush_signals(current) right after bch_btree_check() -returns, to flush and ignore all pending signals which are received -during bch_btree_check(). Then following kthread_create() or -kthread_run() for other bcache kthreads will not failure. - -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/bcache/super.c | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index 4f2742485f13..c364fb85826a 100644 ---- a/drivers/md/bcache/super.c -+++ b/drivers/md/bcache/super.c -@@ -23,6 +23,7 @@ - #include <linux/random.h> - #include <linux/reboot.h> - #include <linux/sysfs.h> -+#include <linux/sched/signal.h> - - unsigned int bch_cutoff_writeback; - unsigned int bch_cutoff_writeback_sync; -@@ -1920,6 +1921,20 @@ static int run_cache_set(struct cache_set *c) - if (bch_btree_check(c)) - goto err; - -+ /* -+ * If bch_btree_check() consumes too much system memory, -+ * although c->btree_cache_shrink_thread may shrink and -+ * restrict the above memory consuption, the memory shrinking -+ * is in parallel. Due to the delay of parallel shrinking, It -+ * is still possible that the registering process (which is -+ * current process) is selected by OOM killer for its peak -+ * memory consumption. Then following kernel thread creating -+ * will fail because of the pending signal sent by OOM killer. -+ * In this location, such signal can be safely ignored, to make -+ * following kthread creation function working. -+ */ -+ flush_signals(current); -+ - bch_journal_mark(c, &journal); - bch_initial_gc_finish(c); - pr_debug("btree_check() done"); --- -2.16.4 - diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/0000-cover-letter.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/0000-cover-letter.patch index 8b05636..8b05636 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/0000-cover-letter.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/0000-cover-letter.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0001-bcache-remove-int-n-from-parameter-list-of-bch_bu.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0001-bcache-remove-int-n-from-parameter-list-of-bch_bu.patch index d55c183..d55c183 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0001-bcache-remove-int-n-from-parameter-list-of-bch_bu.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0001-bcache-remove-int-n-from-parameter-list-of-bch_bu.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0002-bcache-explicitly-make-cache_set-only-have-single.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0002-bcache-explicitly-make-cache_set-only-have-single.patch index 89c492c..89c492c 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0002-bcache-explicitly-make-cache_set-only-have-single.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0002-bcache-explicitly-make-cache_set-only-have-single.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0003-bcache-remove-for_each_cache.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0003-bcache-remove-for_each_cache.patch index 2968637..2968637 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0003-bcache-remove-for_each_cache.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0003-bcache-remove-for_each_cache.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0004-bcache-add-set_uuid-in-struct-cache_set.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0004-bcache-add-set_uuid-in-struct-cache_set.patch index a735c3d..a735c3d 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0004-bcache-add-set_uuid-in-struct-cache_set.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0004-bcache-add-set_uuid-in-struct-cache_set.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0005-bcache-only-use-block_bytes-on-struct-cache.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0005-bcache-only-use-block_bytes-on-struct-cache.patch index b814ca7..b814ca7 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0005-bcache-only-use-block_bytes-on-struct-cache.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0005-bcache-only-use-block_bytes-on-struct-cache.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0006-bcache-remove-useless-alloc_bucket_pages.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0006-bcache-remove-useless-alloc_bucket_pages.patch index 2057ff6..2057ff6 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0006-bcache-remove-useless-alloc_bucket_pages.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0006-bcache-remove-useless-alloc_bucket_pages.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0007-bcache-remove-useless-bucket_pages.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0007-bcache-remove-useless-bucket_pages.patch index b7b40a9..b7b40a9 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0007-bcache-remove-useless-bucket_pages.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0007-bcache-remove-useless-bucket_pages.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0008-bcache-only-use-bucket_bytes-on-struct-cache.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0008-bcache-only-use-bucket_bytes-on-struct-cache.patch index 225cd5e..225cd5e 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0008-bcache-only-use-bucket_bytes-on-struct-cache.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0008-bcache-only-use-bucket_bytes-on-struct-cache.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0009-bcache-avoid-data-copy-between-cache_set-sb-and-c.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0009-bcache-avoid-data-copy-between-cache_set-sb-and-c.patch index f0f0dcc..f0f0dcc 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0009-bcache-avoid-data-copy-between-cache_set-sb-and-c.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0009-bcache-avoid-data-copy-between-cache_set-sb-and-c.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0010-bcache-don-t-check-seq-numbers-in-register_cache_.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0010-bcache-don-t-check-seq-numbers-in-register_cache_.patch index a00c7ad..a00c7ad 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0010-bcache-don-t-check-seq-numbers-in-register_cache_.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0010-bcache-don-t-check-seq-numbers-in-register_cache_.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0011-bcache-remove-can_attach_cache.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0011-bcache-remove-can_attach_cache.patch index fac8321..fac8321 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0011-bcache-remove-can_attach_cache.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0011-bcache-remove-can_attach_cache.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0012-bcache-check-and-set-sync-status-on-cache-s-in-me.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0012-bcache-check-and-set-sync-status-on-cache-s-in-me.patch index 96bc7c8..96bc7c8 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0012-bcache-check-and-set-sync-status-on-cache-s-in-me.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0012-bcache-check-and-set-sync-status-on-cache-s-in-me.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0013-bcache-remove-embedded-struct-cache_sb-from-struc.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0013-bcache-remove-embedded-struct-cache_sb-from-struc.patch index 693f0d0..693f0d0 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0013-bcache-remove-embedded-struct-cache_sb-from-struc.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0013-bcache-remove-embedded-struct-cache_sb-from-struc.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0014-bcache-move-struct-cache_sb-out-of-uapi-bcache.h.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0014-bcache-move-struct-cache_sb-out-of-uapi-bcache.h.patch index f2597c4..f2597c4 100644 --- a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1-0014-bcache-move-struct-cache_sb-out-of-uapi-bcache.h.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v1/v1-0014-bcache-move-struct-cache_sb-out-of-uapi-bcache.h.patch diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0001-bcache-remove-int-n-from-parameter-list-of-bch_bu.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0001-bcache-remove-int-n-from-parameter-list-of-bch_bu.patch new file mode 100644 index 0000000..8105f85 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0001-bcache-remove-int-n-from-parameter-list-of-bch_bu.patch @@ -0,0 +1,151 @@ +From 9260c7e003b7652c9a8208fa479ff4c5d72a6737 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 14 Aug 2020 00:07:05 +0800 +Subject: [PATCH v2 01/19] bcache: remove 'int n' from parameter list of + bch_bucket_alloc_set() + +The parameter 'int n' from bch_bucket_alloc_set() is not cleared +defined. From the code comments n is the number of buckets to alloc, but +from the code itself 'n' is the maximum cache to iterate. Indeed all the +locations where bch_bucket_alloc_set() is called, 'n' is alwasy 1. + +This patch removes the confused and unnecessary 'int n' from parameter +list of bch_bucket_alloc_set(), and explicitly allocates only 1 bucket +for its caller. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/alloc.c | 35 +++++++++++++++-------------------- + drivers/md/bcache/bcache.h | 4 ++-- + drivers/md/bcache/btree.c | 2 +- + drivers/md/bcache/super.c | 2 +- + 4 files changed, 19 insertions(+), 24 deletions(-) + +diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c +index 52035a78d836..4493ff57476d 100644 +--- a/drivers/md/bcache/alloc.c ++++ b/drivers/md/bcache/alloc.c +@@ -49,7 +49,7 @@ + * + * bch_bucket_alloc() allocates a single bucket from a specific cache. + * +- * bch_bucket_alloc_set() allocates one or more buckets from different caches ++ * bch_bucket_alloc_set() allocates one bucket from different caches + * out of a cache set. + * + * free_some_buckets() drives all the processes described above. It's called +@@ -488,34 +488,29 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k) + } + + int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, +- struct bkey *k, int n, bool wait) ++ struct bkey *k, bool wait) + { +- int i; ++ struct cache *ca; ++ long b; + + /* No allocation if CACHE_SET_IO_DISABLE bit is set */ + if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) + return -1; + + lockdep_assert_held(&c->bucket_lock); +- BUG_ON(!n || n > c->caches_loaded || n > MAX_CACHES_PER_SET); + + bkey_init(k); + +- /* sort by free space/prio of oldest data in caches */ +- +- for (i = 0; i < n; i++) { +- struct cache *ca = c->cache_by_alloc[i]; +- long b = bch_bucket_alloc(ca, reserve, wait); ++ ca = c->cache_by_alloc[0]; ++ b = bch_bucket_alloc(ca, reserve, wait); ++ if (b == -1) ++ goto err; + +- if (b == -1) +- goto err; ++ k->ptr[0] = MAKE_PTR(ca->buckets[b].gen, ++ bucket_to_sector(c, b), ++ ca->sb.nr_this_dev); + +- k->ptr[i] = MAKE_PTR(ca->buckets[b].gen, +- bucket_to_sector(c, b), +- ca->sb.nr_this_dev); +- +- SET_KEY_PTRS(k, i + 1); +- } ++ SET_KEY_PTRS(k, 1); + + return 0; + err: +@@ -525,12 +520,12 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, + } + + int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, +- struct bkey *k, int n, bool wait) ++ struct bkey *k, bool wait) + { + int ret; + + mutex_lock(&c->bucket_lock); +- ret = __bch_bucket_alloc_set(c, reserve, k, n, wait); ++ ret = __bch_bucket_alloc_set(c, reserve, k, wait); + mutex_unlock(&c->bucket_lock); + return ret; + } +@@ -638,7 +633,7 @@ bool bch_alloc_sectors(struct cache_set *c, + + spin_unlock(&c->data_bucket_lock); + +- if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, wait)) ++ if (bch_bucket_alloc_set(c, watermark, &alloc.key, wait)) + return false; + + spin_lock(&c->data_bucket_lock); +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 4fd03d2496d8..5ff6e9573935 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -994,9 +994,9 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k); + + long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait); + int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, +- struct bkey *k, int n, bool wait); ++ struct bkey *k, bool wait); + int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, +- struct bkey *k, int n, bool wait); ++ struct bkey *k, bool wait); + bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, + unsigned int sectors, unsigned int write_point, + unsigned int write_prio, bool wait); +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index 3d8bd0692af3..e2a719fed53b 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -1091,7 +1091,7 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, + + mutex_lock(&c->bucket_lock); + retry: +- if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait)) ++ if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait)) + goto err; + + bkey_put(c, &k.key); +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 1bbdc410ee3c..7057ec48f3d1 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -486,7 +486,7 @@ static int __uuid_write(struct cache_set *c) + closure_init_stack(&cl); + lockdep_assert_held(&bch_register_lock); + +- if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true)) ++ if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, true)) + return 1; + + size = meta_bucket_pages(&c->sb) * PAGE_SECTORS; +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0002-bcache-explicitly-make-cache_set-only-have-single.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0002-bcache-explicitly-make-cache_set-only-have-single.patch new file mode 100644 index 0000000..6400487 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0002-bcache-explicitly-make-cache_set-only-have-single.patch @@ -0,0 +1,128 @@ +From da9ff41f507337ce4797935e8ba9b70da361d59d Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 14 Aug 2020 00:30:59 +0800 +Subject: [PATCH v2 02/19] bcache: explicitly make cache_set only have single + cache + +Currently although the bcache code has a framework for multiple caches +in a cache set, but indeed the multiple caches never completed and users +use md raid1 for multiple copies of the cached data. + +This patch does the following change in struct cache_set, to explicitly +make a cache_set only have single cache, +- Change pointer array "*cache[MAX_CACHES_PER_SET]" to a single pointer + "*cache". +- Remove pointer array "*cache_by_alloc[MAX_CACHES_PER_SET]". +- Remove "caches_loaded". + +Now the code looks as exactly what it does in practic: only one cache is +used in the cache set. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/alloc.c | 2 +- + drivers/md/bcache/bcache.h | 8 +++----- + drivers/md/bcache/super.c | 19 ++++++++----------- + 3 files changed, 12 insertions(+), 17 deletions(-) + +diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c +index 4493ff57476d..3385f6add6df 100644 +--- a/drivers/md/bcache/alloc.c ++++ b/drivers/md/bcache/alloc.c +@@ -501,7 +501,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, + + bkey_init(k); + +- ca = c->cache_by_alloc[0]; ++ ca = c->cache; + b = bch_bucket_alloc(ca, reserve, wait); + if (b == -1) + goto err; +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 5ff6e9573935..aa112c1adba1 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -519,9 +519,7 @@ struct cache_set { + + struct cache_sb sb; + +- struct cache *cache[MAX_CACHES_PER_SET]; +- struct cache *cache_by_alloc[MAX_CACHES_PER_SET]; +- int caches_loaded; ++ struct cache *cache; + + struct bcache_device **devices; + unsigned int devices_max_used; +@@ -808,7 +806,7 @@ static inline struct cache *PTR_CACHE(struct cache_set *c, + const struct bkey *k, + unsigned int ptr) + { +- return c->cache[PTR_DEV(k, ptr)]; ++ return c->cache; + } + + static inline size_t PTR_BUCKET_NR(struct cache_set *c, +@@ -890,7 +888,7 @@ do { \ + /* Looping macros */ + + #define for_each_cache(ca, cs, iter) \ +- for (iter = 0; ca = cs->cache[iter], iter < (cs)->sb.nr_in_set; iter++) ++ for (iter = 0; ca = cs->cache, iter < 1; iter++) + + #define for_each_bucket(b, ca) \ + for (b = (ca)->buckets + (ca)->sb.first_bucket; \ +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 7057ec48f3d1..e9ccfa17beb8 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1675,7 +1675,7 @@ static void cache_set_free(struct closure *cl) + for_each_cache(ca, c, i) + if (ca) { + ca->set = NULL; +- c->cache[ca->sb.nr_this_dev] = NULL; ++ c->cache = NULL; + kobject_put(&ca->kobj); + } + +@@ -2166,7 +2166,7 @@ static const char *register_cache_set(struct cache *ca) + + list_for_each_entry(c, &bch_cache_sets, list) + if (!memcmp(c->sb.set_uuid, ca->sb.set_uuid, 16)) { +- if (c->cache[ca->sb.nr_this_dev]) ++ if (c->cache) + return "duplicate cache set member"; + + if (!can_attach_cache(ca, c)) +@@ -2216,14 +2216,11 @@ static const char *register_cache_set(struct cache *ca) + + kobject_get(&ca->kobj); + ca->set = c; +- ca->set->cache[ca->sb.nr_this_dev] = ca; +- c->cache_by_alloc[c->caches_loaded++] = ca; ++ ca->set->cache = ca; + +- if (c->caches_loaded == c->sb.nr_in_set) { +- err = "failed to run cache set"; +- if (run_cache_set(c) < 0) +- goto err; +- } ++ err = "failed to run cache set"; ++ if (run_cache_set(c) < 0) ++ goto err; + + return NULL; + err: +@@ -2240,8 +2237,8 @@ void bch_cache_release(struct kobject *kobj) + unsigned int i; + + if (ca->set) { +- BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca); +- ca->set->cache[ca->sb.nr_this_dev] = NULL; ++ BUG_ON(ca->set->cache != ca); ++ ca->set->cache = NULL; + } + + free_pages((unsigned long) ca->disk_buckets, ilog2(meta_bucket_pages(&ca->sb))); +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0003-bcache-remove-for_each_cache.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0003-bcache-remove-for_each_cache.patch new file mode 100644 index 0000000..605fa1b --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0003-bcache-remove-for_each_cache.patch @@ -0,0 +1,895 @@ +From 50516df3a606a49a170bb14e26ed595aff4c84d0 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 14 Aug 2020 01:26:09 +0800 +Subject: [PATCH v2 03/19] bcache: remove for_each_cache() + +Since now each cache_set explicitly has single cache, for_each_cache() +is unnecessary. This patch removes this macro, and update all locations +where it is used, and makes sure all code logic still being consistent. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/alloc.c | 17 ++- + drivers/md/bcache/bcache.h | 9 +- + drivers/md/bcache/btree.c | 103 +++++++--------- + drivers/md/bcache/journal.c | 229 ++++++++++++++++------------------- + drivers/md/bcache/movinggc.c | 58 +++++---- + drivers/md/bcache/super.c | 115 ++++++++---------- + 6 files changed, 237 insertions(+), 294 deletions(-) + +diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c +index 3385f6add6df..1b8310992dd0 100644 +--- a/drivers/md/bcache/alloc.c ++++ b/drivers/md/bcache/alloc.c +@@ -88,7 +88,6 @@ void bch_rescale_priorities(struct cache_set *c, int sectors) + struct cache *ca; + struct bucket *b; + unsigned long next = c->nbuckets * c->sb.bucket_size / 1024; +- unsigned int i; + int r; + + atomic_sub(sectors, &c->rescale); +@@ -104,14 +103,14 @@ void bch_rescale_priorities(struct cache_set *c, int sectors) + + c->min_prio = USHRT_MAX; + +- for_each_cache(ca, c, i) +- for_each_bucket(b, ca) +- if (b->prio && +- b->prio != BTREE_PRIO && +- !atomic_read(&b->pin)) { +- b->prio--; +- c->min_prio = min(c->min_prio, b->prio); +- } ++ ca = c->cache; ++ for_each_bucket(b, ca) ++ if (b->prio && ++ b->prio != BTREE_PRIO && ++ !atomic_read(&b->pin)) { ++ b->prio--; ++ c->min_prio = min(c->min_prio, b->prio); ++ } + + mutex_unlock(&c->bucket_lock); + } +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index aa112c1adba1..7ffe6b2d179b 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -887,9 +887,6 @@ do { \ + + /* Looping macros */ + +-#define for_each_cache(ca, cs, iter) \ +- for (iter = 0; ca = cs->cache, iter < 1; iter++) +- + #define for_each_bucket(b, ca) \ + for (b = (ca)->buckets + (ca)->sb.first_bucket; \ + b < (ca)->buckets + (ca)->sb.nbuckets; b++) +@@ -931,11 +928,9 @@ static inline uint8_t bucket_gc_gen(struct bucket *b) + + static inline void wake_up_allocators(struct cache_set *c) + { +- struct cache *ca; +- unsigned int i; ++ struct cache *ca = c->cache; + +- for_each_cache(ca, c, i) +- wake_up_process(ca->alloc_thread); ++ wake_up_process(ca->alloc_thread); + } + + static inline void closure_bio_submit(struct cache_set *c, +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index e2a719fed53b..0817ad510d9f 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -1167,19 +1167,18 @@ static void make_btree_freeing_key(struct btree *b, struct bkey *k) + static int btree_check_reserve(struct btree *b, struct btree_op *op) + { + struct cache_set *c = b->c; +- struct cache *ca; +- unsigned int i, reserve = (c->root->level - b->level) * 2 + 1; ++ struct cache *ca = c->cache; ++ unsigned int reserve = (c->root->level - b->level) * 2 + 1; + + mutex_lock(&c->bucket_lock); + +- for_each_cache(ca, c, i) +- if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) { +- if (op) +- prepare_to_wait(&c->btree_cache_wait, &op->wait, +- TASK_UNINTERRUPTIBLE); +- mutex_unlock(&c->bucket_lock); +- return -EINTR; +- } ++ if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) { ++ if (op) ++ prepare_to_wait(&c->btree_cache_wait, &op->wait, ++ TASK_UNINTERRUPTIBLE); ++ mutex_unlock(&c->bucket_lock); ++ return -EINTR; ++ } + + mutex_unlock(&c->bucket_lock); + +@@ -1695,7 +1694,6 @@ static void btree_gc_start(struct cache_set *c) + { + struct cache *ca; + struct bucket *b; +- unsigned int i; + + if (!c->gc_mark_valid) + return; +@@ -1705,14 +1703,14 @@ static void btree_gc_start(struct cache_set *c) + c->gc_mark_valid = 0; + c->gc_done = ZERO_KEY; + +- for_each_cache(ca, c, i) +- for_each_bucket(b, ca) { +- b->last_gc = b->gen; +- if (!atomic_read(&b->pin)) { +- SET_GC_MARK(b, 0); +- SET_GC_SECTORS_USED(b, 0); +- } ++ ca = c->cache; ++ for_each_bucket(b, ca) { ++ b->last_gc = b->gen; ++ if (!atomic_read(&b->pin)) { ++ SET_GC_MARK(b, 0); ++ SET_GC_SECTORS_USED(b, 0); + } ++ } + + mutex_unlock(&c->bucket_lock); + } +@@ -1721,7 +1719,8 @@ static void bch_btree_gc_finish(struct cache_set *c) + { + struct bucket *b; + struct cache *ca; +- unsigned int i; ++ unsigned int i, j; ++ uint64_t *k; + + mutex_lock(&c->bucket_lock); + +@@ -1739,7 +1738,6 @@ static void bch_btree_gc_finish(struct cache_set *c) + struct bcache_device *d = c->devices[i]; + struct cached_dev *dc; + struct keybuf_key *w, *n; +- unsigned int j; + + if (!d || UUID_FLASH_ONLY(&c->uuids[i])) + continue; +@@ -1756,29 +1754,27 @@ static void bch_btree_gc_finish(struct cache_set *c) + rcu_read_unlock(); + + c->avail_nbuckets = 0; +- for_each_cache(ca, c, i) { +- uint64_t *i; + +- ca->invalidate_needs_gc = 0; ++ ca = c->cache; ++ ca->invalidate_needs_gc = 0; + +- for (i = ca->sb.d; i < ca->sb.d + ca->sb.keys; i++) +- SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA); ++ for (k = ca->sb.d; k < ca->sb.d + ca->sb.keys; k++) ++ SET_GC_MARK(ca->buckets + *k, GC_MARK_METADATA); + +- for (i = ca->prio_buckets; +- i < ca->prio_buckets + prio_buckets(ca) * 2; i++) +- SET_GC_MARK(ca->buckets + *i, GC_MARK_METADATA); ++ for (k = ca->prio_buckets; ++ k < ca->prio_buckets + prio_buckets(ca) * 2; k++) ++ SET_GC_MARK(ca->buckets + *k, GC_MARK_METADATA); + +- for_each_bucket(b, ca) { +- c->need_gc = max(c->need_gc, bucket_gc_gen(b)); ++ for_each_bucket(b, ca) { ++ c->need_gc = max(c->need_gc, bucket_gc_gen(b)); + +- if (atomic_read(&b->pin)) +- continue; ++ if (atomic_read(&b->pin)) ++ continue; + +- BUG_ON(!GC_MARK(b) && GC_SECTORS_USED(b)); ++ BUG_ON(!GC_MARK(b) && GC_SECTORS_USED(b)); + +- if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) +- c->avail_nbuckets++; +- } ++ if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) ++ c->avail_nbuckets++; + } + + mutex_unlock(&c->bucket_lock); +@@ -1830,12 +1826,10 @@ static void bch_btree_gc(struct cache_set *c) + + static bool gc_should_run(struct cache_set *c) + { +- struct cache *ca; +- unsigned int i; ++ struct cache *ca = c->cache; + +- for_each_cache(ca, c, i) +- if (ca->invalidate_needs_gc) +- return true; ++ if (ca->invalidate_needs_gc) ++ return true; + + if (atomic_read(&c->sectors_to_gc) < 0) + return true; +@@ -2081,9 +2075,8 @@ int bch_btree_check(struct cache_set *c) + + void bch_initial_gc_finish(struct cache_set *c) + { +- struct cache *ca; ++ struct cache *ca = c->cache; + struct bucket *b; +- unsigned int i; + + bch_btree_gc_finish(c); + +@@ -2098,20 +2091,18 @@ void bch_initial_gc_finish(struct cache_set *c) + * This is only safe for buckets that have no live data in them, which + * there should always be some of. + */ +- for_each_cache(ca, c, i) { +- for_each_bucket(b, ca) { +- if (fifo_full(&ca->free[RESERVE_PRIO]) && +- fifo_full(&ca->free[RESERVE_BTREE])) +- break; ++ for_each_bucket(b, ca) { ++ if (fifo_full(&ca->free[RESERVE_PRIO]) && ++ fifo_full(&ca->free[RESERVE_BTREE])) ++ break; + +- if (bch_can_invalidate_bucket(ca, b) && +- !GC_MARK(b)) { +- __bch_invalidate_one_bucket(ca, b); +- if (!fifo_push(&ca->free[RESERVE_PRIO], +- b - ca->buckets)) +- fifo_push(&ca->free[RESERVE_BTREE], +- b - ca->buckets); +- } ++ if (bch_can_invalidate_bucket(ca, b) && ++ !GC_MARK(b)) { ++ __bch_invalidate_one_bucket(ca, b); ++ if (!fifo_push(&ca->free[RESERVE_PRIO], ++ b - ca->buckets)) ++ fifo_push(&ca->free[RESERVE_BTREE], ++ b - ca->buckets); + } + } + +diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c +index 77fbfd52edcf..027d0f8c4daf 100644 +--- a/drivers/md/bcache/journal.c ++++ b/drivers/md/bcache/journal.c +@@ -179,112 +179,109 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) + ret; \ + }) + +- struct cache *ca; +- unsigned int iter; ++ struct cache *ca = c->cache; + int ret = 0; ++ struct journal_device *ja = &ca->journal; ++ DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS); ++ unsigned int i, l, r, m; ++ uint64_t seq; + +- for_each_cache(ca, c, iter) { +- struct journal_device *ja = &ca->journal; +- DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS); +- unsigned int i, l, r, m; +- uint64_t seq; +- +- bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); +- pr_debug("%u journal buckets\n", ca->sb.njournal_buckets); ++ bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); ++ pr_debug("%u journal buckets\n", ca->sb.njournal_buckets); + ++ /* ++ * Read journal buckets ordered by golden ratio hash to quickly ++ * find a sequence of buckets with valid journal entries ++ */ ++ for (i = 0; i < ca->sb.njournal_buckets; i++) { + /* +- * Read journal buckets ordered by golden ratio hash to quickly +- * find a sequence of buckets with valid journal entries ++ * We must try the index l with ZERO first for ++ * correctness due to the scenario that the journal ++ * bucket is circular buffer which might have wrapped + */ +- for (i = 0; i < ca->sb.njournal_buckets; i++) { +- /* +- * We must try the index l with ZERO first for +- * correctness due to the scenario that the journal +- * bucket is circular buffer which might have wrapped +- */ +- l = (i * 2654435769U) % ca->sb.njournal_buckets; ++ l = (i * 2654435769U) % ca->sb.njournal_buckets; + +- if (test_bit(l, bitmap)) +- break; ++ if (test_bit(l, bitmap)) ++ break; + +- if (read_bucket(l)) +- goto bsearch; +- } ++ if (read_bucket(l)) ++ goto bsearch; ++ } + +- /* +- * If that fails, check all the buckets we haven't checked +- * already +- */ +- pr_debug("falling back to linear search\n"); ++ /* ++ * If that fails, check all the buckets we haven't checked ++ * already ++ */ ++ pr_debug("falling back to linear search\n"); + +- for_each_clear_bit(l, bitmap, ca->sb.njournal_buckets) +- if (read_bucket(l)) +- goto bsearch; ++ for_each_clear_bit(l, bitmap, ca->sb.njournal_buckets) ++ if (read_bucket(l)) ++ goto bsearch; + +- /* no journal entries on this device? */ +- if (l == ca->sb.njournal_buckets) +- continue; ++ /* no journal entries on this device? */ ++ if (l == ca->sb.njournal_buckets) ++ goto out; + bsearch: +- BUG_ON(list_empty(list)); ++ BUG_ON(list_empty(list)); + +- /* Binary search */ +- m = l; +- r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); +- pr_debug("starting binary search, l %u r %u\n", l, r); ++ /* Binary search */ ++ m = l; ++ r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); ++ pr_debug("starting binary search, l %u r %u\n", l, r); + +- while (l + 1 < r) { +- seq = list_entry(list->prev, struct journal_replay, +- list)->j.seq; ++ while (l + 1 < r) { ++ seq = list_entry(list->prev, struct journal_replay, ++ list)->j.seq; + +- m = (l + r) >> 1; +- read_bucket(m); ++ m = (l + r) >> 1; ++ read_bucket(m); + +- if (seq != list_entry(list->prev, struct journal_replay, +- list)->j.seq) +- l = m; +- else +- r = m; +- } ++ if (seq != list_entry(list->prev, struct journal_replay, ++ list)->j.seq) ++ l = m; ++ else ++ r = m; ++ } + +- /* +- * Read buckets in reverse order until we stop finding more +- * journal entries +- */ +- pr_debug("finishing up: m %u njournal_buckets %u\n", +- m, ca->sb.njournal_buckets); +- l = m; ++ /* ++ * Read buckets in reverse order until we stop finding more ++ * journal entries ++ */ ++ pr_debug("finishing up: m %u njournal_buckets %u\n", ++ m, ca->sb.njournal_buckets); ++ l = m; + +- while (1) { +- if (!l--) +- l = ca->sb.njournal_buckets - 1; ++ while (1) { ++ if (!l--) ++ l = ca->sb.njournal_buckets - 1; + +- if (l == m) +- break; ++ if (l == m) ++ break; + +- if (test_bit(l, bitmap)) +- continue; ++ if (test_bit(l, bitmap)) ++ continue; + +- if (!read_bucket(l)) +- break; +- } ++ if (!read_bucket(l)) ++ break; ++ } + +- seq = 0; ++ seq = 0; + +- for (i = 0; i < ca->sb.njournal_buckets; i++) +- if (ja->seq[i] > seq) { +- seq = ja->seq[i]; +- /* +- * When journal_reclaim() goes to allocate for +- * the first time, it'll use the bucket after +- * ja->cur_idx +- */ +- ja->cur_idx = i; +- ja->last_idx = ja->discard_idx = (i + 1) % +- ca->sb.njournal_buckets; ++ for (i = 0; i < ca->sb.njournal_buckets; i++) ++ if (ja->seq[i] > seq) { ++ seq = ja->seq[i]; ++ /* ++ * When journal_reclaim() goes to allocate for ++ * the first time, it'll use the bucket after ++ * ja->cur_idx ++ */ ++ ja->cur_idx = i; ++ ja->last_idx = ja->discard_idx = (i + 1) % ++ ca->sb.njournal_buckets; + +- } +- } ++ } + ++out: + if (!list_empty(list)) + c->journal.seq = list_entry(list->prev, + struct journal_replay, +@@ -342,12 +339,10 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) + + static bool is_discard_enabled(struct cache_set *s) + { +- struct cache *ca; +- unsigned int i; ++ struct cache *ca = s->cache; + +- for_each_cache(ca, s, i) +- if (ca->discard) +- return true; ++ if (ca->discard) ++ return true; + + return false; + } +@@ -633,9 +628,10 @@ static void do_journal_discard(struct cache *ca) + static void journal_reclaim(struct cache_set *c) + { + struct bkey *k = &c->journal.key; +- struct cache *ca; ++ struct cache *ca = c->cache; + uint64_t last_seq; +- unsigned int iter, n = 0; ++ unsigned int next; ++ struct journal_device *ja = &ca->journal; + atomic_t p __maybe_unused; + + atomic_long_inc(&c->reclaim); +@@ -647,46 +643,31 @@ static void journal_reclaim(struct cache_set *c) + + /* Update last_idx */ + +- for_each_cache(ca, c, iter) { +- struct journal_device *ja = &ca->journal; +- +- while (ja->last_idx != ja->cur_idx && +- ja->seq[ja->last_idx] < last_seq) +- ja->last_idx = (ja->last_idx + 1) % +- ca->sb.njournal_buckets; +- } ++ while (ja->last_idx != ja->cur_idx && ++ ja->seq[ja->last_idx] < last_seq) ++ ja->last_idx = (ja->last_idx + 1) % ++ ca->sb.njournal_buckets; + +- for_each_cache(ca, c, iter) +- do_journal_discard(ca); ++ do_journal_discard(ca); + + if (c->journal.blocks_free) + goto out; + +- /* +- * Allocate: +- * XXX: Sort by free journal space +- */ +- +- for_each_cache(ca, c, iter) { +- struct journal_device *ja = &ca->journal; +- unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets; ++ next = (ja->cur_idx + 1) % ca->sb.njournal_buckets; ++ /* No space available on this device */ ++ if (next == ja->discard_idx) ++ goto out; + +- /* No space available on this device */ +- if (next == ja->discard_idx) +- continue; ++ ja->cur_idx = next; ++ k->ptr[0] = MAKE_PTR(0, ++ bucket_to_sector(c, ca->sb.d[ja->cur_idx]), ++ ca->sb.nr_this_dev); ++ atomic_long_inc(&c->reclaimed_journal_buckets); + +- ja->cur_idx = next; +- k->ptr[n++] = MAKE_PTR(0, +- bucket_to_sector(c, ca->sb.d[ja->cur_idx]), +- ca->sb.nr_this_dev); +- atomic_long_inc(&c->reclaimed_journal_buckets); +- } ++ bkey_init(k); ++ SET_KEY_PTRS(k, 1); ++ c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; + +- if (n) { +- bkey_init(k); +- SET_KEY_PTRS(k, n); +- c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; +- } + out: + if (!journal_full(&c->journal)) + __closure_wake_up(&c->journal.wait); +@@ -750,7 +731,7 @@ static void journal_write_unlocked(struct closure *cl) + __releases(c->journal.lock) + { + struct cache_set *c = container_of(cl, struct cache_set, journal.io); +- struct cache *ca; ++ struct cache *ca = c->cache; + struct journal_write *w = c->journal.cur; + struct bkey *k = &c->journal.key; + unsigned int i, sectors = set_blocks(w->data, block_bytes(c)) * +@@ -780,9 +761,7 @@ static void journal_write_unlocked(struct closure *cl) + bkey_copy(&w->data->btree_root, &c->root->key); + bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket); + +- for_each_cache(ca, c, i) +- w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; +- ++ w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; + w->data->magic = jset_magic(&c->sb); + w->data->version = BCACHE_JSET_VERSION; + w->data->last_seq = last_seq(&c->journal); +diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c +index 5872d6470470..b9c3d27ec093 100644 +--- a/drivers/md/bcache/movinggc.c ++++ b/drivers/md/bcache/movinggc.c +@@ -196,50 +196,48 @@ static unsigned int bucket_heap_top(struct cache *ca) + + void bch_moving_gc(struct cache_set *c) + { +- struct cache *ca; ++ struct cache *ca = c->cache; + struct bucket *b; +- unsigned int i; ++ unsigned long sectors_to_move, reserve_sectors; + + if (!c->copy_gc_enabled) + return; + + mutex_lock(&c->bucket_lock); + +- for_each_cache(ca, c, i) { +- unsigned long sectors_to_move = 0; +- unsigned long reserve_sectors = ca->sb.bucket_size * ++ sectors_to_move = 0; ++ reserve_sectors = ca->sb.bucket_size * + fifo_used(&ca->free[RESERVE_MOVINGGC]); + +- ca->heap.used = 0; +- +- for_each_bucket(b, ca) { +- if (GC_MARK(b) == GC_MARK_METADATA || +- !GC_SECTORS_USED(b) || +- GC_SECTORS_USED(b) == ca->sb.bucket_size || +- atomic_read(&b->pin)) +- continue; +- +- if (!heap_full(&ca->heap)) { +- sectors_to_move += GC_SECTORS_USED(b); +- heap_add(&ca->heap, b, bucket_cmp); +- } else if (bucket_cmp(b, heap_peek(&ca->heap))) { +- sectors_to_move -= bucket_heap_top(ca); +- sectors_to_move += GC_SECTORS_USED(b); +- +- ca->heap.data[0] = b; +- heap_sift(&ca->heap, 0, bucket_cmp); +- } +- } ++ ca->heap.used = 0; ++ ++ for_each_bucket(b, ca) { ++ if (GC_MARK(b) == GC_MARK_METADATA || ++ !GC_SECTORS_USED(b) || ++ GC_SECTORS_USED(b) == ca->sb.bucket_size || ++ atomic_read(&b->pin)) ++ continue; + +- while (sectors_to_move > reserve_sectors) { +- heap_pop(&ca->heap, b, bucket_cmp); +- sectors_to_move -= GC_SECTORS_USED(b); ++ if (!heap_full(&ca->heap)) { ++ sectors_to_move += GC_SECTORS_USED(b); ++ heap_add(&ca->heap, b, bucket_cmp); ++ } else if (bucket_cmp(b, heap_peek(&ca->heap))) { ++ sectors_to_move -= bucket_heap_top(ca); ++ sectors_to_move += GC_SECTORS_USED(b); ++ ++ ca->heap.data[0] = b; ++ heap_sift(&ca->heap, 0, bucket_cmp); + } ++ } + +- while (heap_pop(&ca->heap, b, bucket_cmp)) +- SET_GC_MOVE(b, 1); ++ while (sectors_to_move > reserve_sectors) { ++ heap_pop(&ca->heap, b, bucket_cmp); ++ sectors_to_move -= GC_SECTORS_USED(b); + } + ++ while (heap_pop(&ca->heap, b, bucket_cmp)) ++ SET_GC_MOVE(b, 1); ++ + mutex_unlock(&c->bucket_lock); + + c->moving_gc_keys.last_scanned = ZERO_KEY; +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index e9ccfa17beb8..91883d5c4b62 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -343,8 +343,9 @@ static void bcache_write_super_unlock(struct closure *cl) + void bcache_write_super(struct cache_set *c) + { + struct closure *cl = &c->sb_write; +- struct cache *ca; +- unsigned int i, version = BCACHE_SB_VERSION_CDEV_WITH_UUID; ++ struct cache *ca = c->cache; ++ struct bio *bio = &ca->sb_bio; ++ unsigned int version = BCACHE_SB_VERSION_CDEV_WITH_UUID; + + down(&c->sb_write_mutex); + closure_init(cl, &c->cl); +@@ -354,23 +355,19 @@ void bcache_write_super(struct cache_set *c) + if (c->sb.version > version) + version = c->sb.version; + +- for_each_cache(ca, c, i) { +- struct bio *bio = &ca->sb_bio; +- +- ca->sb.version = version; +- ca->sb.seq = c->sb.seq; +- ca->sb.last_mount = c->sb.last_mount; ++ ca->sb.version = version; ++ ca->sb.seq = c->sb.seq; ++ ca->sb.last_mount = c->sb.last_mount; + +- SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb)); ++ SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb)); + +- bio_init(bio, ca->sb_bv, 1); +- bio_set_dev(bio, ca->bdev); +- bio->bi_end_io = write_super_endio; +- bio->bi_private = ca; ++ bio_init(bio, ca->sb_bv, 1); ++ bio_set_dev(bio, ca->bdev); ++ bio->bi_end_io = write_super_endio; ++ bio->bi_private = ca; + +- closure_get(cl); +- __write_super(&ca->sb, ca->sb_disk, bio); +- } ++ closure_get(cl); ++ __write_super(&ca->sb, ca->sb_disk, bio); + + closure_return_with_destructor(cl, bcache_write_super_unlock); + } +@@ -772,26 +769,22 @@ static void bcache_device_unlink(struct bcache_device *d) + lockdep_assert_held(&bch_register_lock); + + if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) { +- unsigned int i; +- struct cache *ca; ++ struct cache *ca = d->c->cache; + + sysfs_remove_link(&d->c->kobj, d->name); + sysfs_remove_link(&d->kobj, "cache"); + +- for_each_cache(ca, d->c, i) +- bd_unlink_disk_holder(ca->bdev, d->disk); ++ bd_unlink_disk_holder(ca->bdev, d->disk); + } + } + + static void bcache_device_link(struct bcache_device *d, struct cache_set *c, + const char *name) + { +- unsigned int i; +- struct cache *ca; ++ struct cache *ca = c->cache; + int ret; + +- for_each_cache(ca, d->c, i) +- bd_link_disk_holder(ca->bdev, d->disk); ++ bd_link_disk_holder(ca->bdev, d->disk); + + snprintf(d->name, BCACHEDEVNAME_SIZE, + "%s%u", name, d->id); +@@ -1663,7 +1656,6 @@ static void cache_set_free(struct closure *cl) + { + struct cache_set *c = container_of(cl, struct cache_set, cl); + struct cache *ca; +- unsigned int i; + + debugfs_remove(c->debug); + +@@ -1672,12 +1664,12 @@ static void cache_set_free(struct closure *cl) + bch_journal_free(c); + + mutex_lock(&bch_register_lock); +- for_each_cache(ca, c, i) +- if (ca) { +- ca->set = NULL; +- c->cache = NULL; +- kobject_put(&ca->kobj); +- } ++ ca = c->cache; ++ if (ca) { ++ ca->set = NULL; ++ c->cache = NULL; ++ kobject_put(&ca->kobj); ++ } + + bch_bset_sort_state_free(&c->sort); + free_pages((unsigned long) c->uuids, ilog2(meta_bucket_pages(&c->sb))); +@@ -1703,9 +1695,8 @@ static void cache_set_free(struct closure *cl) + static void cache_set_flush(struct closure *cl) + { + struct cache_set *c = container_of(cl, struct cache_set, caching); +- struct cache *ca; ++ struct cache *ca = c->cache; + struct btree *b; +- unsigned int i; + + bch_cache_accounting_destroy(&c->accounting); + +@@ -1730,9 +1721,8 @@ static void cache_set_flush(struct closure *cl) + mutex_unlock(&b->write_lock); + } + +- for_each_cache(ca, c, i) +- if (ca->alloc_thread) +- kthread_stop(ca->alloc_thread); ++ if (ca->alloc_thread) ++ kthread_stop(ca->alloc_thread); + + if (c->journal.cur) { + cancel_delayed_work_sync(&c->journal.work); +@@ -1973,16 +1963,14 @@ static int run_cache_set(struct cache_set *c) + { + const char *err = "cannot allocate memory"; + struct cached_dev *dc, *t; +- struct cache *ca; ++ struct cache *ca = c->cache; + struct closure cl; +- unsigned int i; + LIST_HEAD(journal); + struct journal_replay *l; + + closure_init_stack(&cl); + +- for_each_cache(ca, c, i) +- c->nbuckets += ca->sb.nbuckets; ++ c->nbuckets = ca->sb.nbuckets; + set_gc_sectors(c); + + if (CACHE_SYNC(&c->sb)) { +@@ -2002,10 +1990,8 @@ static int run_cache_set(struct cache_set *c) + j = &list_entry(journal.prev, struct journal_replay, list)->j; + + err = "IO error reading priorities"; +- for_each_cache(ca, c, i) { +- if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev])) +- goto err; +- } ++ if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev])) ++ goto err; + + /* + * If prio_read() fails it'll call cache_set_error and we'll +@@ -2049,9 +2035,8 @@ static int run_cache_set(struct cache_set *c) + bch_journal_next(&c->journal); + + err = "error starting allocator thread"; +- for_each_cache(ca, c, i) +- if (bch_cache_allocator_start(ca)) +- goto err; ++ if (bch_cache_allocator_start(ca)) ++ goto err; + + /* + * First place it's safe to allocate: btree_check() and +@@ -2070,28 +2055,23 @@ static int run_cache_set(struct cache_set *c) + if (bch_journal_replay(c, &journal)) + goto err; + } else { +- pr_notice("invalidating existing data\n"); +- +- for_each_cache(ca, c, i) { +- unsigned int j; ++ unsigned int j; + +- ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7, +- 2, SB_JOURNAL_BUCKETS); ++ pr_notice("invalidating existing data\n"); ++ ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7, ++ 2, SB_JOURNAL_BUCKETS); + +- for (j = 0; j < ca->sb.keys; j++) +- ca->sb.d[j] = ca->sb.first_bucket + j; +- } ++ for (j = 0; j < ca->sb.keys; j++) ++ ca->sb.d[j] = ca->sb.first_bucket + j; + + bch_initial_gc_finish(c); + + err = "error starting allocator thread"; +- for_each_cache(ca, c, i) +- if (bch_cache_allocator_start(ca)) +- goto err; ++ if (bch_cache_allocator_start(ca)) ++ goto err; + + mutex_lock(&c->bucket_lock); +- for_each_cache(ca, c, i) +- bch_prio_write(ca, true); ++ bch_prio_write(ca, true); + mutex_unlock(&c->bucket_lock); + + err = "cannot allocate new UUID bucket"; +@@ -2467,13 +2447,14 @@ static bool bch_is_open_backing(struct block_device *bdev) + static bool bch_is_open_cache(struct block_device *bdev) + { + struct cache_set *c, *tc; +- struct cache *ca; +- unsigned int i; + +- list_for_each_entry_safe(c, tc, &bch_cache_sets, list) +- for_each_cache(ca, c, i) +- if (ca->bdev == bdev) +- return true; ++ list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { ++ struct cache *ca = c->cache; ++ ++ if (ca->bdev == bdev) ++ return true; ++ } ++ + return false; + } + +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0004-bcache-add-set_uuid-in-struct-cache_set.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0004-bcache-add-set_uuid-in-struct-cache_set.patch new file mode 100644 index 0000000..8573fcd --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0004-bcache-add-set_uuid-in-struct-cache_set.patch @@ -0,0 +1,172 @@ +From 5f709f50fb5302b446ab136dd4673a68051b9299 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 14 Aug 2020 20:12:07 +0800 +Subject: [PATCH v2 04/19] bcache: add set_uuid in struct cache_set + +This patch adds a separated set_uuid[16] in struct cache_set, to store +the uuid of the cache set. This is the preparation to remove the +embedded struct cache_sb from struct cache_set. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/bcache.h | 1 + + drivers/md/bcache/debug.c | 2 +- + drivers/md/bcache/super.c | 24 ++++++++++++------------ + include/trace/events/bcache.h | 4 ++-- + 4 files changed, 16 insertions(+), 15 deletions(-) + +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 7ffe6b2d179b..94a62acac4fc 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -668,6 +668,7 @@ struct cache_set { + struct mutex verify_lock; + #endif + ++ uint8_t set_uuid[16]; + unsigned int nr_uuids; + struct uuid_entry *uuids; + BKEY_PADDED(uuid_bucket); +diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c +index 336f43910383..0ccc1b0baa42 100644 +--- a/drivers/md/bcache/debug.c ++++ b/drivers/md/bcache/debug.c +@@ -238,7 +238,7 @@ void bch_debug_init_cache_set(struct cache_set *c) + if (!IS_ERR_OR_NULL(bcache_debug)) { + char name[50]; + +- snprintf(name, 50, "bcache-%pU", c->sb.set_uuid); ++ snprintf(name, 50, "bcache-%pU", c->set_uuid); + c->debug = debugfs_create_file(name, 0400, bcache_debug, c, + &cache_set_debug_ops); + } +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 91883d5c4b62..90a419ad6445 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1189,8 +1189,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, + struct cached_dev *exist_dc, *t; + int ret = 0; + +- if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) || +- (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))) ++ if ((set_uuid && memcmp(set_uuid, c->set_uuid, 16)) || ++ (!set_uuid && memcmp(dc->sb.set_uuid, c->set_uuid, 16))) + return -ENOENT; + + if (dc->disk.c) { +@@ -1262,7 +1262,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, + u->first_reg = u->last_reg = rtime; + bch_uuid_write(c); + +- memcpy(dc->sb.set_uuid, c->sb.set_uuid, 16); ++ memcpy(dc->sb.set_uuid, c->set_uuid, 16); + SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); + + bch_write_bdev_super(dc, &cl); +@@ -1324,7 +1324,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, + pr_info("Caching %s as %s on set %pU\n", + dc->backing_dev_name, + dc->disk.disk->disk_name, +- dc->disk.c->sb.set_uuid); ++ dc->disk.c->set_uuid); + return 0; + } + +@@ -1632,7 +1632,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) + vaf.va = &args; + + pr_err("error on %pU: %pV, disabling caching\n", +- c->sb.set_uuid, &vaf); ++ c->set_uuid, &vaf); + + va_end(args); + +@@ -1685,7 +1685,7 @@ static void cache_set_free(struct closure *cl) + list_del(&c->list); + mutex_unlock(&bch_register_lock); + +- pr_info("Cache set %pU unregistered\n", c->sb.set_uuid); ++ pr_info("Cache set %pU unregistered\n", c->set_uuid); + wake_up(&unregister_wait); + + closure_debug_destroy(&c->cl); +@@ -1755,7 +1755,7 @@ static void conditional_stop_bcache_device(struct cache_set *c, + { + if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) { + pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n", +- d->disk->disk_name, c->sb.set_uuid); ++ d->disk->disk_name, c->set_uuid); + bcache_device_stop(d); + } else if (atomic_read(&dc->has_dirty)) { + /* +@@ -1862,7 +1862,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) + + bch_cache_accounting_init(&c->accounting, &c->cl); + +- memcpy(c->sb.set_uuid, sb->set_uuid, 16); ++ memcpy(c->set_uuid, sb->set_uuid, 16); + c->sb.block_size = sb->block_size; + c->sb.bucket_size = sb->bucket_size; + c->sb.nr_in_set = sb->nr_in_set; +@@ -2145,7 +2145,7 @@ static const char *register_cache_set(struct cache *ca) + struct cache_set *c; + + list_for_each_entry(c, &bch_cache_sets, list) +- if (!memcmp(c->sb.set_uuid, ca->sb.set_uuid, 16)) { ++ if (!memcmp(c->set_uuid, ca->sb.set_uuid, 16)) { + if (c->cache) + return "duplicate cache set member"; + +@@ -2163,7 +2163,7 @@ static const char *register_cache_set(struct cache *ca) + return err; + + err = "error creating kobject"; +- if (kobject_add(&c->kobj, bcache_kobj, "%pU", c->sb.set_uuid) || ++ if (kobject_add(&c->kobj, bcache_kobj, "%pU", c->set_uuid) || + kobject_add(&c->internal, &c->kobj, "internal")) + goto err; + +@@ -2188,7 +2188,7 @@ static const char *register_cache_set(struct cache *ca) + */ + if (ca->sb.seq > c->sb.seq || c->sb.seq == 0) { + c->sb.version = ca->sb.version; +- memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16); ++ memcpy(c->set_uuid, ca->sb.set_uuid, 16); + c->sb.flags = ca->sb.flags; + c->sb.seq = ca->sb.seq; + pr_debug("set version = %llu\n", c->sb.version); +@@ -2698,7 +2698,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k, + list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) { + list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { + char *pdev_set_uuid = pdev->dc->sb.set_uuid; +- char *set_uuid = c->sb.uuid; ++ char *set_uuid = c->set_uuid; + + if (!memcmp(pdev_set_uuid, set_uuid, 16)) { + list_del(&pdev->list); +diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h +index 0bddea663b3b..e41c611d6d3b 100644 +--- a/include/trace/events/bcache.h ++++ b/include/trace/events/bcache.h +@@ -164,7 +164,7 @@ TRACE_EVENT(bcache_write, + ), + + TP_fast_assign( +- memcpy(__entry->uuid, c->sb.set_uuid, 16); ++ memcpy(__entry->uuid, c->set_uuid, 16); + __entry->inode = inode; + __entry->sector = bio->bi_iter.bi_sector; + __entry->nr_sector = bio->bi_iter.bi_size >> 9; +@@ -200,7 +200,7 @@ DECLARE_EVENT_CLASS(cache_set, + ), + + TP_fast_assign( +- memcpy(__entry->uuid, c->sb.set_uuid, 16); ++ memcpy(__entry->uuid, c->set_uuid, 16); + ), + + TP_printk("%pU", __entry->uuid) +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0005-bcache-only-use-block_bytes-on-struct-cache.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0005-bcache-only-use-block_bytes-on-struct-cache.patch new file mode 100644 index 0000000..1c10d70 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0005-bcache-only-use-block_bytes-on-struct-cache.patch @@ -0,0 +1,257 @@ +From 178fa57c56550568bf0d4140d8dc689cc6c11682 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 14 Aug 2020 21:25:58 +0800 +Subject: [PATCH v2 05/19] bcache: only use block_bytes() on struct cache + +Because struct cache_set and struct cache both have struct cache_sb, +therefore macro block_bytes() can be used on both of them. When removing +the embedded struct cache_sb from struct cache_set, this macro won't be +used on struct cache_set anymore. + +This patch unifies all block_bytes() usage only on struct cache, this is +one of the preparation to remove the embedded struct cache_sb from +struct cache_set. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/bcache.h | 2 +- + drivers/md/bcache/btree.c | 24 ++++++++++++------------ + drivers/md/bcache/debug.c | 8 ++++---- + drivers/md/bcache/journal.c | 8 ++++---- + drivers/md/bcache/request.c | 2 +- + drivers/md/bcache/super.c | 2 +- + drivers/md/bcache/sysfs.c | 2 +- + 7 files changed, 24 insertions(+), 24 deletions(-) + +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 94a62acac4fc..29bec61cafbb 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -759,7 +759,7 @@ struct bbio { + + #define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS) + #define bucket_bytes(c) ((c)->sb.bucket_size << 9) +-#define block_bytes(c) ((c)->sb.block_size << 9) ++#define block_bytes(ca) ((ca)->sb.block_size << 9) + + static inline unsigned int meta_bucket_pages(struct cache_sb *sb) + { +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index 0817ad510d9f..c91b4d58a5b3 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -104,7 +104,7 @@ + + static inline struct bset *write_block(struct btree *b) + { +- return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c); ++ return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c->cache); + } + + static void bch_btree_init_next(struct btree *b) +@@ -173,7 +173,7 @@ void bch_btree_node_read_done(struct btree *b) + goto err; + + err = "bad btree header"; +- if (b->written + set_blocks(i, block_bytes(b->c)) > ++ if (b->written + set_blocks(i, block_bytes(b->c->cache)) > + btree_blocks(b)) + goto err; + +@@ -199,13 +199,13 @@ void bch_btree_node_read_done(struct btree *b) + + bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); + +- b->written += set_blocks(i, block_bytes(b->c)); ++ b->written += set_blocks(i, block_bytes(b->c->cache)); + } + + err = "corrupted btree"; + for (i = write_block(b); + bset_sector_offset(&b->keys, i) < KEY_SIZE(&b->key); +- i = ((void *) i) + block_bytes(b->c)) ++ i = ((void *) i) + block_bytes(b->c->cache)) + if (i->seq == b->keys.set[0].data->seq) + goto err; + +@@ -347,7 +347,7 @@ static void do_btree_node_write(struct btree *b) + + b->bio->bi_end_io = btree_node_write_endio; + b->bio->bi_private = cl; +- b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c)); ++ b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c->cache)); + b->bio->bi_opf = REQ_OP_WRITE | REQ_META | REQ_FUA; + bch_bio_map(b->bio, i); + +@@ -423,10 +423,10 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent) + + do_btree_node_write(b); + +- atomic_long_add(set_blocks(i, block_bytes(b->c)) * b->c->sb.block_size, ++ atomic_long_add(set_blocks(i, block_bytes(b->c->cache)) * b->c->sb.block_size, + &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); + +- b->written += set_blocks(i, block_bytes(b->c)); ++ b->written += set_blocks(i, block_bytes(b->c->cache)); + } + + void bch_btree_node_write(struct btree *b, struct closure *parent) +@@ -1344,7 +1344,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, + + if (nodes < 2 || + __set_blocks(b->keys.set[0].data, keys, +- block_bytes(b->c)) > blocks * (nodes - 1)) ++ block_bytes(b->c->cache)) > blocks * (nodes - 1)) + return 0; + + for (i = 0; i < nodes; i++) { +@@ -1378,7 +1378,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, + k = bkey_next(k)) { + if (__set_blocks(n1, n1->keys + keys + + bkey_u64s(k), +- block_bytes(b->c)) > blocks) ++ block_bytes(b->c->cache)) > blocks) + break; + + last = k; +@@ -1394,7 +1394,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, + * though) + */ + if (__set_blocks(n1, n1->keys + n2->keys, +- block_bytes(b->c)) > ++ block_bytes(b->c->cache)) > + btree_blocks(new_nodes[i])) + goto out_unlock_nocoalesce; + +@@ -1403,7 +1403,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, + last = &r->b->key; + } + +- BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c)) > ++ BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c->cache)) > + btree_blocks(new_nodes[i])); + + if (last) +@@ -2210,7 +2210,7 @@ static int btree_split(struct btree *b, struct btree_op *op, + goto err; + + split = set_blocks(btree_bset_first(n1), +- block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5; ++ block_bytes(n1->c->cache)) > (btree_blocks(b) * 4) / 5; + + if (split) { + unsigned int keys = 0; +diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c +index 0ccc1b0baa42..b00fd08d696b 100644 +--- a/drivers/md/bcache/debug.c ++++ b/drivers/md/bcache/debug.c +@@ -25,8 +25,8 @@ struct dentry *bcache_debug; + for (i = (start); \ + (void *) i < (void *) (start) + (KEY_SIZE(&b->key) << 9) &&\ + i->seq == (start)->seq; \ +- i = (void *) i + set_blocks(i, block_bytes(b->c)) * \ +- block_bytes(b->c)) ++ i = (void *) i + set_blocks(i, block_bytes(b->c->cache)) * \ ++ block_bytes(b->c->cache)) + + void bch_btree_verify(struct btree *b) + { +@@ -82,14 +82,14 @@ void bch_btree_verify(struct btree *b) + + for_each_written_bset(b, ondisk, i) { + unsigned int block = ((void *) i - (void *) ondisk) / +- block_bytes(b->c); ++ block_bytes(b->c->cache); + + pr_err("*** on disk block %u:\n", block); + bch_dump_bset(&b->keys, i, block); + } + + pr_err("*** block %zu not written\n", +- ((void *) i - (void *) ondisk) / block_bytes(b->c)); ++ ((void *) i - (void *) ondisk) / block_bytes(b->c->cache)); + + for (j = 0; j < inmemory->keys; j++) + if (inmemory->d[j] != sorted->d[j]) +diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c +index 027d0f8c4daf..ccd5de0ab0fe 100644 +--- a/drivers/md/bcache/journal.c ++++ b/drivers/md/bcache/journal.c +@@ -98,7 +98,7 @@ reread: left = ca->sb.bucket_size - offset; + return ret; + } + +- blocks = set_blocks(j, block_bytes(ca->set)); ++ blocks = set_blocks(j, block_bytes(ca)); + + /* + * Nodes in 'list' are in linear increasing order of +@@ -734,7 +734,7 @@ static void journal_write_unlocked(struct closure *cl) + struct cache *ca = c->cache; + struct journal_write *w = c->journal.cur; + struct bkey *k = &c->journal.key; +- unsigned int i, sectors = set_blocks(w->data, block_bytes(c)) * ++ unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) * + c->sb.block_size; + + struct bio *bio; +@@ -754,7 +754,7 @@ static void journal_write_unlocked(struct closure *cl) + return; + } + +- c->journal.blocks_free -= set_blocks(w->data, block_bytes(c)); ++ c->journal.blocks_free -= set_blocks(w->data, block_bytes(ca)); + + w->data->btree_level = c->root->level; + +@@ -847,7 +847,7 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, + struct journal_write *w = c->journal.cur; + + sectors = __set_blocks(w->data, w->data->keys + nkeys, +- block_bytes(c)) * c->sb.block_size; ++ block_bytes(c->cache)) * c->sb.block_size; + + if (sectors <= min_t(size_t, + c->journal.blocks_free * c->sb.block_size, +diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c +index c7cadaafa947..02408fdbf5bb 100644 +--- a/drivers/md/bcache/request.c ++++ b/drivers/md/bcache/request.c +@@ -99,7 +99,7 @@ static int bch_keylist_realloc(struct keylist *l, unsigned int u64s, + * bch_data_insert_keys() will insert the keys created so far + * and finish the rest when the keylist is empty. + */ +- if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset)) ++ if (newsize * sizeof(uint64_t) > block_bytes(c->cache) - sizeof(struct jset)) + return -ENOMEM; + + return __bch_keylist_realloc(l, u64s); +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 90a419ad6445..36a538c2e960 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1528,7 +1528,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) + + kobject_init(&d->kobj, &bch_flash_dev_ktype); + +- if (bcache_device_init(d, block_bytes(c), u->sectors, ++ if (bcache_device_init(d, block_bytes(c->cache), u->sectors, + NULL, &bcache_flash_ops)) + goto err; + +diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c +index ac06c0bc3c0a..b9f524ab5cc8 100644 +--- a/drivers/md/bcache/sysfs.c ++++ b/drivers/md/bcache/sysfs.c +@@ -714,7 +714,7 @@ SHOW(__bch_cache_set) + sysfs_print(synchronous, CACHE_SYNC(&c->sb)); + sysfs_print(journal_delay_ms, c->journal_delay_ms); + sysfs_hprint(bucket_size, bucket_bytes(c)); +- sysfs_hprint(block_size, block_bytes(c)); ++ sysfs_hprint(block_size, block_bytes(c->cache)); + sysfs_print(tree_depth, c->root->level); + sysfs_print(root_usage_percent, bch_root_usage(c)); + +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0006-bcache-remove-useless-alloc_bucket_pages.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0006-bcache-remove-useless-alloc_bucket_pages.patch new file mode 100644 index 0000000..38aae59 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0006-bcache-remove-useless-alloc_bucket_pages.patch @@ -0,0 +1,29 @@ +From 811f8198f1d5337729bbd855bf0e381e60eeeca3 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 14 Aug 2020 21:28:23 +0800 +Subject: [PATCH v2 06/19] bcache: remove useless alloc_bucket_pages() + +Now no one uses alloc_bucket_pages() anymore, remove it from bcache.h. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/super.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 36a538c2e960..28257f11d835 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1832,9 +1832,6 @@ void bch_cache_set_unregister(struct cache_set *c) + bch_cache_set_stop(c); + } + +-#define alloc_bucket_pages(gfp, c) \ +- ((void *) __get_free_pages(__GFP_ZERO|__GFP_COMP|gfp, ilog2(bucket_pages(c)))) +- + #define alloc_meta_bucket_pages(gfp, sb) \ + ((void *) __get_free_pages(__GFP_ZERO|__GFP_COMP|gfp, ilog2(meta_bucket_pages(sb)))) + +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0007-bcache-remove-useless-bucket_pages.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0007-bcache-remove-useless-bucket_pages.patch new file mode 100644 index 0000000..2cfd09f --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0007-bcache-remove-useless-bucket_pages.patch @@ -0,0 +1,29 @@ +From a34562e8f936f77d726fcd94746a467db5f2bf04 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 14 Aug 2020 21:15:28 +0800 +Subject: [PATCH v2 07/19] bcache: remove useless bucket_pages() + +It seems alloc_bucket_pages() is the only user of bucket_pages(). +Considering alloc_bucket_pages() is removed from bcache code, it is safe +to remove the useless macro bucket_pages() now. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/bcache.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 29bec61cafbb..48a2585b6bbb 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -757,7 +757,6 @@ struct bbio { + #define btree_default_blocks(c) \ + ((unsigned int) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits)) + +-#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS) + #define bucket_bytes(c) ((c)->sb.bucket_size << 9) + #define block_bytes(ca) ((ca)->sb.block_size << 9) + +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0008-bcache-only-use-bucket_bytes-on-struct-cache.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0008-bcache-only-use-bucket_bytes-on-struct-cache.patch new file mode 100644 index 0000000..4cd89f1 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0008-bcache-only-use-bucket_bytes-on-struct-cache.patch @@ -0,0 +1,49 @@ +From 964012dfcb5e4ae91630c5d92b51cfba698dc41d Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 14 Aug 2020 21:20:48 +0800 +Subject: [PATCH v2 08/19] bcache: only use bucket_bytes() on struct cache + +Because struct cache_set and struct cache both have struct cache_sb, +macro bucket_bytes() currently are used on both of them. When removing +the embedded struct cache_sb from struct cache_set, this macro won't be +used on struct cache_set anymore. + +This patch unifies all bucket_bytes() usage only on struct cache, this is +one of the preparation to remove the embedded struct cache_sb from +struct cache_set. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/bcache.h | 2 +- + drivers/md/bcache/sysfs.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 48a2585b6bbb..94d4baf4c405 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -757,7 +757,7 @@ struct bbio { + #define btree_default_blocks(c) \ + ((unsigned int) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits)) + +-#define bucket_bytes(c) ((c)->sb.bucket_size << 9) ++#define bucket_bytes(ca) ((ca)->sb.bucket_size << 9) + #define block_bytes(ca) ((ca)->sb.block_size << 9) + + static inline unsigned int meta_bucket_pages(struct cache_sb *sb) +diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c +index b9f524ab5cc8..4bfe98faadcc 100644 +--- a/drivers/md/bcache/sysfs.c ++++ b/drivers/md/bcache/sysfs.c +@@ -713,7 +713,7 @@ SHOW(__bch_cache_set) + + sysfs_print(synchronous, CACHE_SYNC(&c->sb)); + sysfs_print(journal_delay_ms, c->journal_delay_ms); +- sysfs_hprint(bucket_size, bucket_bytes(c)); ++ sysfs_hprint(bucket_size, bucket_bytes(c->cache)); + sysfs_hprint(block_size, block_bytes(c->cache)); + sysfs_print(tree_depth, c->root->level); + sysfs_print(root_usage_percent, bch_root_usage(c)); +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0009-bcache-avoid-data-copy-between-cache_set-sb-and-c.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0009-bcache-avoid-data-copy-between-cache_set-sb-and-c.patch new file mode 100644 index 0000000..89899d4 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0009-bcache-avoid-data-copy-between-cache_set-sb-and-c.patch @@ -0,0 +1,66 @@ +From 78c5a3367fe79f81efa030ef2cb2fc171009fc14 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 14 Aug 2020 23:18:45 +0800 +Subject: [PATCH v2 09/19] bcache: avoid data copy between cache_set->sb and + cache->sb + +struct cache_sb embedded in struct cache_set is only partial used and +not a real copy from cache's in-memory super block. When removing the +embedded cache_set->sb, it is unncessary to copy data between these two +in-memory super blocks (cache_set->sb and cache->sb), it is sufficient +to just use cache->sb. + +This patch removes the data copy between these two in-memory super +blocks in bch_cache_set_alloc() and bcache_write_super(). In future +except for set_uuid, cache's super block will be referenced by cache +set, no copy any more. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/super.c | 22 +++------------------- + 1 file changed, 3 insertions(+), 19 deletions(-) + +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 28257f11d835..20de004ab2ef 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -350,16 +350,10 @@ void bcache_write_super(struct cache_set *c) + down(&c->sb_write_mutex); + closure_init(cl, &c->cl); + +- c->sb.seq++; ++ ca->sb.seq++; + +- if (c->sb.version > version) +- version = c->sb.version; +- +- ca->sb.version = version; +- ca->sb.seq = c->sb.seq; +- ca->sb.last_mount = c->sb.last_mount; +- +- SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb)); ++ if (ca->sb.version < version) ++ ca->sb.version = version; + + bio_init(bio, ca->sb_bv, 1); + bio_set_dev(bio, ca->bdev); +@@ -1860,16 +1854,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) + bch_cache_accounting_init(&c->accounting, &c->cl); + + memcpy(c->set_uuid, sb->set_uuid, 16); +- c->sb.block_size = sb->block_size; +- c->sb.bucket_size = sb->bucket_size; +- c->sb.nr_in_set = sb->nr_in_set; +- c->sb.last_mount = sb->last_mount; +- c->sb.version = sb->version; +- if (c->sb.version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) { +- c->sb.feature_compat = sb->feature_compat; +- c->sb.feature_ro_compat = sb->feature_ro_compat; +- c->sb.feature_incompat = sb->feature_incompat; +- } + + c->bucket_bits = ilog2(sb->bucket_size); + c->block_bits = ilog2(sb->block_size); +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0010-bcache-don-t-check-seq-numbers-in-register_cache_.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0010-bcache-don-t-check-seq-numbers-in-register_cache_.patch new file mode 100644 index 0000000..1d5a20b --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0010-bcache-don-t-check-seq-numbers-in-register_cache_.patch @@ -0,0 +1,52 @@ +From 754956b7956b6c08c1d8e3eab0a2bda29e220115 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 14 Aug 2020 23:28:26 +0800 +Subject: [PATCH v2 10/19] bcache: don't check seq numbers in + register_cache_set() + +In order to update the partial super block of cache set, the seq numbers +of cache and cache set are checked in register_cache_set(). If cache's +seq number is larger than cache set's seq number, cache set must update +its partial super block from cache's super block. It is unncessary when +the embedded struct cache_sb is removed from struct cache set. + +This patch removed the seq numbers checking from register_cache_set(), +because later there will be no such partial super block in struct cache +set, the cache set will directly reference in-memory super block from +struct cache. This is a preparation patch for removing embedded struct +cache_sb from struct cache_set. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/super.c | 15 --------------- + 1 file changed, 15 deletions(-) + +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 20de004ab2ef..cdc1ebee5044 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -2160,21 +2160,6 @@ static const char *register_cache_set(struct cache *ca) + sysfs_create_link(&c->kobj, &ca->kobj, buf)) + goto err; + +- /* +- * A special case is both ca->sb.seq and c->sb.seq are 0, +- * such condition happens on a new created cache device whose +- * super block is never flushed yet. In this case c->sb.version +- * and other members should be updated too, otherwise we will +- * have a mistaken super block version in cache set. +- */ +- if (ca->sb.seq > c->sb.seq || c->sb.seq == 0) { +- c->sb.version = ca->sb.version; +- memcpy(c->set_uuid, ca->sb.set_uuid, 16); +- c->sb.flags = ca->sb.flags; +- c->sb.seq = ca->sb.seq; +- pr_debug("set version = %llu\n", c->sb.version); +- } +- + kobject_get(&ca->kobj); + ca->set = c; + ca->set->cache = ca; +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0011-bcache-remove-can_attach_cache.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0011-bcache-remove-can_attach_cache.patch new file mode 100644 index 0000000..c01af45 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0011-bcache-remove-can_attach_cache.patch @@ -0,0 +1,49 @@ +From aeb61b8c57e542123d0082054e6a65f10848a6f1 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 14 Aug 2020 23:36:56 +0800 +Subject: [PATCH v2 11/19] bcache: remove can_attach_cache() + +After removing the embedded struct cache_sb from struct cache_set, cache +set will directly reference the in-memory super block of struct cache. +It is unnecessary to compare block_size, bucket_size and nr_in_set from +the identical in-memory super block in can_attach_cache(). + +This is a preparation patch for latter removing cache_set->sb from +struct cache_set. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/super.c | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index cdc1ebee5044..80cfb9dfe93e 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -2112,13 +2112,6 @@ static int run_cache_set(struct cache_set *c) + return -EIO; + } + +-static bool can_attach_cache(struct cache *ca, struct cache_set *c) +-{ +- return ca->sb.block_size == c->sb.block_size && +- ca->sb.bucket_size == c->sb.bucket_size && +- ca->sb.nr_in_set == c->sb.nr_in_set; +-} +- + static const char *register_cache_set(struct cache *ca) + { + char buf[12]; +@@ -2130,9 +2123,6 @@ static const char *register_cache_set(struct cache *ca) + if (c->cache) + return "duplicate cache set member"; + +- if (!can_attach_cache(ca, c)) +- return "cache sb does not match set"; +- + if (!CACHE_SYNC(&ca->sb)) + SET_CACHE_SYNC(&c->sb, false); + +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0012-bcache-check-and-set-sync-status-on-cache-s-in-me.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0012-bcache-check-and-set-sync-status-on-cache-s-in-me.patch new file mode 100644 index 0000000..a9920e2 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0012-bcache-check-and-set-sync-status-on-cache-s-in-me.patch @@ -0,0 +1,109 @@ +From 9cbec8384422a47b76db64bfe880e1224893c193 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Fri, 14 Aug 2020 23:53:52 +0800 +Subject: [PATCH v2 12/19] bcache: check and set sync status on cache's + in-memory super block + +Currently the cache's sync status is checked and set on cache set's in- +memory partial super block. After removing the embedded struct cache_sb +from cache set and reference cache's in-memory super block from struct +cache_set, the sync status can set and check directly on cache's super +block. + +This patch checks and sets the cache sync status directly on cache's +in-memory super block. This is a preparation for later removing embedded +struct cache_sb from struct cache_set. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/alloc.c | 2 +- + drivers/md/bcache/journal.c | 2 +- + drivers/md/bcache/super.c | 7 ++----- + drivers/md/bcache/sysfs.c | 6 +++--- + 4 files changed, 7 insertions(+), 10 deletions(-) + +diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c +index 1b8310992dd0..65fdbdeb5134 100644 +--- a/drivers/md/bcache/alloc.c ++++ b/drivers/md/bcache/alloc.c +@@ -361,7 +361,7 @@ static int bch_allocator_thread(void *arg) + * new stuff to them: + */ + allocator_wait(ca, !atomic_read(&ca->set->prio_blocked)); +- if (CACHE_SYNC(&ca->set->sb)) { ++ if (CACHE_SYNC(&ca->sb)) { + /* + * This could deadlock if an allocation with a btree + * node locked ever blocked - having the btree node +diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c +index ccd5de0ab0fe..e2810668ede3 100644 +--- a/drivers/md/bcache/journal.c ++++ b/drivers/md/bcache/journal.c +@@ -915,7 +915,7 @@ atomic_t *bch_journal(struct cache_set *c, + if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) + return NULL; + +- if (!CACHE_SYNC(&c->sb)) ++ if (!CACHE_SYNC(&c->cache->sb)) + return NULL; + + w = journal_wait_for_write(c, bch_keylist_nkeys(keys)); +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 80cfb9dfe93e..6b94b396f9e9 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1954,7 +1954,7 @@ static int run_cache_set(struct cache_set *c) + c->nbuckets = ca->sb.nbuckets; + set_gc_sectors(c); + +- if (CACHE_SYNC(&c->sb)) { ++ if (CACHE_SYNC(&c->cache->sb)) { + struct bkey *k; + struct jset *j; + +@@ -2077,7 +2077,7 @@ static int run_cache_set(struct cache_set *c) + * everything is set up - fortunately journal entries won't be + * written until the SET_CACHE_SYNC() here: + */ +- SET_CACHE_SYNC(&c->sb, true); ++ SET_CACHE_SYNC(&c->cache->sb, true); + + bch_journal_next(&c->journal); + bch_journal_meta(c, &cl); +@@ -2123,9 +2123,6 @@ static const char *register_cache_set(struct cache *ca) + if (c->cache) + return "duplicate cache set member"; + +- if (!CACHE_SYNC(&ca->sb)) +- SET_CACHE_SYNC(&c->sb, false); +- + goto found; + } + +diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c +index 4bfe98faadcc..554e3afc9b68 100644 +--- a/drivers/md/bcache/sysfs.c ++++ b/drivers/md/bcache/sysfs.c +@@ -711,7 +711,7 @@ SHOW(__bch_cache_set) + { + struct cache_set *c = container_of(kobj, struct cache_set, kobj); + +- sysfs_print(synchronous, CACHE_SYNC(&c->sb)); ++ sysfs_print(synchronous, CACHE_SYNC(&c->cache->sb)); + sysfs_print(journal_delay_ms, c->journal_delay_ms); + sysfs_hprint(bucket_size, bucket_bytes(c->cache)); + sysfs_hprint(block_size, block_bytes(c->cache)); +@@ -812,8 +812,8 @@ STORE(__bch_cache_set) + if (attr == &sysfs_synchronous) { + bool sync = strtoul_or_return(buf); + +- if (sync != CACHE_SYNC(&c->sb)) { +- SET_CACHE_SYNC(&c->sb, sync); ++ if (sync != CACHE_SYNC(&c->cache->sb)) { ++ SET_CACHE_SYNC(&c->cache->sb, sync); + bcache_write_super(c); + } + } +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0013-bcache-remove-embedded-struct-cache_sb-from-struc.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0013-bcache-remove-embedded-struct-cache_sb-from-struc.patch new file mode 100644 index 0000000..0580cc8 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0013-bcache-remove-embedded-struct-cache_sb-from-struc.patch @@ -0,0 +1,427 @@ +From f8c4f864ef0f99ba8d34a3254bc3d03c1bd12897 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Sat, 15 Aug 2020 00:20:00 +0800 +Subject: [PATCH v2 13/19] bcache: remove embedded struct cache_sb from struct + cache_set + +Since bcache code was merged into mainline kerrnel, each cache set only +as one single cache in it. The multiple caches framework is here but the +code is far from completed. Considering the multiple copies of cached +data can also be stored on e.g. md raid1 devices, it is unnecessary to +support multiple caches in one cache set indeed. + +The previous preparation patches fix the dependencies of explicitly +making a cache set only have single cache. Now we don't have to maintain +an embedded partial super block in struct cache_set, the in-memory super +block can be directly referenced from struct cache. + +This patch removes the embedded struct cache_sb from struct cache_set, +and fixes all locations where the superb lock was referenced from this +removed super block by referencing the in-memory super block of struct +cache. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/alloc.c | 6 +++--- + drivers/md/bcache/bcache.h | 4 +--- + drivers/md/bcache/btree.c | 17 +++++++++-------- + drivers/md/bcache/btree.h | 2 +- + drivers/md/bcache/extents.c | 6 +++--- + drivers/md/bcache/features.c | 4 ++-- + drivers/md/bcache/io.c | 2 +- + drivers/md/bcache/journal.c | 11 ++++++----- + drivers/md/bcache/request.c | 4 ++-- + drivers/md/bcache/super.c | 22 ++++++++++++---------- + drivers/md/bcache/writeback.c | 2 +- + 11 files changed, 41 insertions(+), 39 deletions(-) + +diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c +index 65fdbdeb5134..8c371d5eef8e 100644 +--- a/drivers/md/bcache/alloc.c ++++ b/drivers/md/bcache/alloc.c +@@ -87,7 +87,7 @@ void bch_rescale_priorities(struct cache_set *c, int sectors) + { + struct cache *ca; + struct bucket *b; +- unsigned long next = c->nbuckets * c->sb.bucket_size / 1024; ++ unsigned long next = c->nbuckets * c->cache->sb.bucket_size / 1024; + int r; + + atomic_sub(sectors, &c->rescale); +@@ -583,7 +583,7 @@ static struct open_bucket *pick_data_bucket(struct cache_set *c, + struct open_bucket, list); + found: + if (!ret->sectors_free && KEY_PTRS(alloc)) { +- ret->sectors_free = c->sb.bucket_size; ++ ret->sectors_free = c->cache->sb.bucket_size; + bkey_copy(&ret->key, alloc); + bkey_init(alloc); + } +@@ -677,7 +677,7 @@ bool bch_alloc_sectors(struct cache_set *c, + &PTR_CACHE(c, &b->key, i)->sectors_written); + } + +- if (b->sectors_free < c->sb.block_size) ++ if (b->sectors_free < c->cache->sb.block_size) + b->sectors_free = 0; + + /* +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 94d4baf4c405..1d57f48307e6 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -517,8 +517,6 @@ struct cache_set { + atomic_t idle_counter; + atomic_t at_max_writeback_rate; + +- struct cache_sb sb; +- + struct cache *cache; + + struct bcache_device **devices; +@@ -799,7 +797,7 @@ static inline sector_t bucket_to_sector(struct cache_set *c, size_t b) + + static inline sector_t bucket_remainder(struct cache_set *c, sector_t s) + { +- return s & (c->sb.bucket_size - 1); ++ return s & (c->cache->sb.bucket_size - 1); + } + + static inline struct cache *PTR_CACHE(struct cache_set *c, +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index c91b4d58a5b3..d09103cc7da5 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -117,7 +117,7 @@ static void bch_btree_init_next(struct btree *b) + + if (b->written < btree_blocks(b)) + bch_bset_init_next(&b->keys, write_block(b), +- bset_magic(&b->c->sb)); ++ bset_magic(&b->c->cache->sb)); + + } + +@@ -155,7 +155,7 @@ void bch_btree_node_read_done(struct btree *b) + * See the comment arount cache_set->fill_iter. + */ + iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO); +- iter->size = b->c->sb.bucket_size / b->c->sb.block_size; ++ iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; + iter->used = 0; + + #ifdef CONFIG_BCACHE_DEBUG +@@ -178,7 +178,7 @@ void bch_btree_node_read_done(struct btree *b) + goto err; + + err = "bad magic"; +- if (i->magic != bset_magic(&b->c->sb)) ++ if (i->magic != bset_magic(&b->c->cache->sb)) + goto err; + + err = "bad checksum"; +@@ -219,7 +219,7 @@ void bch_btree_node_read_done(struct btree *b) + + if (b->written < btree_blocks(b)) + bch_bset_init_next(&b->keys, write_block(b), +- bset_magic(&b->c->sb)); ++ bset_magic(&b->c->cache->sb)); + out: + mempool_free(iter, &b->c->fill_iter); + return; +@@ -423,7 +423,7 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent) + + do_btree_node_write(b); + +- atomic_long_add(set_blocks(i, block_bytes(b->c->cache)) * b->c->sb.block_size, ++ atomic_long_add(set_blocks(i, block_bytes(b->c->cache)) * b->c->cache->sb.block_size, + &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); + + b->written += set_blocks(i, block_bytes(b->c->cache)); +@@ -738,7 +738,7 @@ void bch_btree_cache_free(struct cache_set *c) + if (c->verify_data) + list_move(&c->verify_data->list, &c->btree_cache); + +- free_pages((unsigned long) c->verify_ondisk, ilog2(meta_bucket_pages(&c->sb))); ++ free_pages((unsigned long) c->verify_ondisk, ilog2(meta_bucket_pages(&c->cache->sb))); + #endif + + list_splice(&c->btree_cache_freeable, +@@ -785,7 +785,8 @@ int bch_btree_cache_alloc(struct cache_set *c) + mutex_init(&c->verify_lock); + + c->verify_ondisk = (void *) +- __get_free_pages(GFP_KERNEL|__GFP_COMP, ilog2(meta_bucket_pages(&c->sb))); ++ __get_free_pages(GFP_KERNEL|__GFP_COMP, ++ ilog2(meta_bucket_pages(&c->cache->sb))); + if (!c->verify_ondisk) { + /* + * Don't worry about the mca_rereserve buckets +@@ -1108,7 +1109,7 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, + } + + b->parent = parent; +- bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb)); ++ bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->cache->sb)); + + mutex_unlock(&c->bucket_lock); + +diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h +index 257969980c49..50482107134f 100644 +--- a/drivers/md/bcache/btree.h ++++ b/drivers/md/bcache/btree.h +@@ -194,7 +194,7 @@ static inline unsigned int bset_block_offset(struct btree *b, struct bset *i) + + static inline void set_gc_sectors(struct cache_set *c) + { +- atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16); ++ atomic_set(&c->sectors_to_gc, c->cache->sb.bucket_size * c->nbuckets / 16); + } + + void bkey_put(struct cache_set *c, struct bkey *k); +diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c +index 9162af5bb6ec..f4658a1f37b8 100644 +--- a/drivers/md/bcache/extents.c ++++ b/drivers/md/bcache/extents.c +@@ -54,7 +54,7 @@ static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) + size_t bucket = PTR_BUCKET_NR(c, k, i); + size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); + +- if (KEY_SIZE(k) + r > c->sb.bucket_size || ++ if (KEY_SIZE(k) + r > c->cache->sb.bucket_size || + bucket < ca->sb.first_bucket || + bucket >= ca->sb.nbuckets) + return true; +@@ -75,7 +75,7 @@ static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k) + size_t bucket = PTR_BUCKET_NR(c, k, i); + size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); + +- if (KEY_SIZE(k) + r > c->sb.bucket_size) ++ if (KEY_SIZE(k) + r > c->cache->sb.bucket_size) + return "bad, length too big"; + if (bucket < ca->sb.first_bucket) + return "bad, short offset"; +@@ -136,7 +136,7 @@ static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k) + size_t n = PTR_BUCKET_NR(b->c, k, j); + + pr_cont(" bucket %zu", n); +- if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets) ++ if (n >= b->c->cache->sb.first_bucket && n < b->c->cache->sb.nbuckets) + pr_cont(" prio %i", + PTR_BUCKET(b->c, k, j)->prio); + } +diff --git a/drivers/md/bcache/features.c b/drivers/md/bcache/features.c +index 4442df48d28c..6469223f0b77 100644 +--- a/drivers/md/bcache/features.c ++++ b/drivers/md/bcache/features.c +@@ -30,7 +30,7 @@ static struct feature feature_list[] = { + for (f = &feature_list[0]; f->compat != 0; f++) { \ + if (f->compat != BCH_FEATURE_ ## type) \ + continue; \ +- if (BCH_HAS_ ## type ## _FEATURE(&c->sb, f->mask)) { \ ++ if (BCH_HAS_ ## type ## _FEATURE(&c->cache->sb, f->mask)) { \ + if (first) { \ + out += snprintf(out, buf + size - out, \ + "["); \ +@@ -44,7 +44,7 @@ static struct feature feature_list[] = { + \ + out += snprintf(out, buf + size - out, "%s", f->string);\ + \ +- if (BCH_HAS_ ## type ## _FEATURE(&c->sb, f->mask)) \ ++ if (BCH_HAS_ ## type ## _FEATURE(&c->cache->sb, f->mask)) \ + out += snprintf(out, buf + size - out, "]"); \ + \ + first = false; \ +diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c +index a14a445618b4..dad71a6b7889 100644 +--- a/drivers/md/bcache/io.c ++++ b/drivers/md/bcache/io.c +@@ -26,7 +26,7 @@ struct bio *bch_bbio_alloc(struct cache_set *c) + struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO); + struct bio *bio = &b->bio; + +- bio_init(bio, bio->bi_inline_vecs, meta_bucket_pages(&c->sb)); ++ bio_init(bio, bio->bi_inline_vecs, meta_bucket_pages(&c->cache->sb)); + + return bio; + } +diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c +index e2810668ede3..c5526e5087ef 100644 +--- a/drivers/md/bcache/journal.c ++++ b/drivers/md/bcache/journal.c +@@ -666,7 +666,7 @@ static void journal_reclaim(struct cache_set *c) + + bkey_init(k); + SET_KEY_PTRS(k, 1); +- c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; ++ c->journal.blocks_free = ca->sb.bucket_size >> c->block_bits; + + out: + if (!journal_full(&c->journal)) +@@ -735,7 +735,7 @@ static void journal_write_unlocked(struct closure *cl) + struct journal_write *w = c->journal.cur; + struct bkey *k = &c->journal.key; + unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) * +- c->sb.block_size; ++ ca->sb.block_size; + + struct bio *bio; + struct bio_list list; +@@ -762,7 +762,7 @@ static void journal_write_unlocked(struct closure *cl) + bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket); + + w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; +- w->data->magic = jset_magic(&c->sb); ++ w->data->magic = jset_magic(&ca->sb); + w->data->version = BCACHE_JSET_VERSION; + w->data->last_seq = last_seq(&c->journal); + w->data->csum = csum_set(w->data); +@@ -838,6 +838,7 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, + size_t sectors; + struct closure cl; + bool wait = false; ++ struct cache *ca = c->cache; + + closure_init_stack(&cl); + +@@ -847,10 +848,10 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, + struct journal_write *w = c->journal.cur; + + sectors = __set_blocks(w->data, w->data->keys + nkeys, +- block_bytes(c->cache)) * c->sb.block_size; ++ block_bytes(ca)) * ca->sb.block_size; + + if (sectors <= min_t(size_t, +- c->journal.blocks_free * c->sb.block_size, ++ c->journal.blocks_free * ca->sb.block_size, + PAGE_SECTORS << JSET_BITS)) + return w; + +diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c +index 02408fdbf5bb..37e9cf8dbfc1 100644 +--- a/drivers/md/bcache/request.c ++++ b/drivers/md/bcache/request.c +@@ -394,8 +394,8 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) + goto skip; + } + +- if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || +- bio_sectors(bio) & (c->sb.block_size - 1)) { ++ if (bio->bi_iter.bi_sector & (c->cache->sb.block_size - 1) || ++ bio_sectors(bio) & (c->cache->sb.block_size - 1)) { + pr_debug("skipping unaligned io\n"); + goto skip; + } +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 6b94b396f9e9..ad87859d744a 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -471,7 +471,7 @@ static int __uuid_write(struct cache_set *c) + { + BKEY_PADDED(key) k; + struct closure cl; +- struct cache *ca; ++ struct cache *ca = c->cache; + unsigned int size; + + closure_init_stack(&cl); +@@ -480,13 +480,12 @@ static int __uuid_write(struct cache_set *c) + if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, true)) + return 1; + +- size = meta_bucket_pages(&c->sb) * PAGE_SECTORS; ++ size = meta_bucket_pages(&ca->sb) * PAGE_SECTORS; + SET_KEY_SIZE(&k.key, size); + uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl); + closure_sync(&cl); + + /* Only one bucket used for uuid write */ +- ca = PTR_CACHE(c, &k.key, 0); + atomic_long_add(ca->sb.bucket_size, &ca->meta_sectors_written); + + bkey_copy(&c->uuid_bucket, &k.key); +@@ -1199,7 +1198,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, + return -EINVAL; + } + +- if (dc->sb.block_size < c->sb.block_size) { ++ if (dc->sb.block_size < c->cache->sb.block_size) { + /* Will die */ + pr_err("Couldn't attach %s: block size less than set's block size\n", + dc->backing_dev_name); +@@ -1666,7 +1665,7 @@ static void cache_set_free(struct closure *cl) + } + + bch_bset_sort_state_free(&c->sort); +- free_pages((unsigned long) c->uuids, ilog2(meta_bucket_pages(&c->sb))); ++ free_pages((unsigned long) c->uuids, ilog2(meta_bucket_pages(&c->cache->sb))); + + if (c->moving_gc_wq) + destroy_workqueue(c->moving_gc_wq); +@@ -1832,6 +1831,7 @@ void bch_cache_set_unregister(struct cache_set *c) + struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) + { + int iter_size; ++ struct cache *ca = container_of(sb, struct cache, sb); + struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL); + + if (!c) +@@ -1855,12 +1855,14 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) + + memcpy(c->set_uuid, sb->set_uuid, 16); + ++ c->cache = ca; ++ c->cache->set = c; + c->bucket_bits = ilog2(sb->bucket_size); + c->block_bits = ilog2(sb->block_size); +- c->nr_uuids = meta_bucket_bytes(&c->sb) / sizeof(struct uuid_entry); ++ c->nr_uuids = meta_bucket_bytes(sb) / sizeof(struct uuid_entry); + c->devices_max_used = 0; + atomic_set(&c->attached_dev_nr, 0); +- c->btree_pages = meta_bucket_pages(&c->sb); ++ c->btree_pages = meta_bucket_pages(sb); + if (c->btree_pages > BTREE_MAX_PAGES) + c->btree_pages = max_t(int, c->btree_pages / 4, + BTREE_MAX_PAGES); +@@ -1898,7 +1900,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) + + if (mempool_init_kmalloc_pool(&c->bio_meta, 2, + sizeof(struct bbio) + +- sizeof(struct bio_vec) * meta_bucket_pages(&c->sb))) ++ sizeof(struct bio_vec) * meta_bucket_pages(sb))) + goto err; + + if (mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size)) +@@ -1908,7 +1910,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) + BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) + goto err; + +- c->uuids = alloc_meta_bucket_pages(GFP_KERNEL, &c->sb); ++ c->uuids = alloc_meta_bucket_pages(GFP_KERNEL, sb); + if (!c->uuids) + goto err; + +@@ -2088,7 +2090,7 @@ static int run_cache_set(struct cache_set *c) + goto err; + + closure_sync(&cl); +- c->sb.last_mount = (u32)ktime_get_real_seconds(); ++ c->cache->sb.last_mount = (u32)ktime_get_real_seconds(); + bcache_write_super(c); + + list_for_each_entry_safe(dc, t, &uncached_devices, list) +diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c +index 4f4ad6b3d43a..3c74996978da 100644 +--- a/drivers/md/bcache/writeback.c ++++ b/drivers/md/bcache/writeback.c +@@ -35,7 +35,7 @@ static uint64_t __calc_target_rate(struct cached_dev *dc) + * This is the size of the cache, minus the amount used for + * flash-only devices + */ +- uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size - ++ uint64_t cache_sectors = c->nbuckets * c->cache->sb.bucket_size - + atomic_long_read(&c->flash_dev_dirty_sectors); + + /* +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0014-bcache-move-struct-cache_sb-out-of-uapi-bcache.h.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0014-bcache-move-struct-cache_sb-out-of-uapi-bcache.h.patch new file mode 100644 index 0000000..b5d1ce0 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0014-bcache-move-struct-cache_sb-out-of-uapi-bcache.h.patch @@ -0,0 +1,261 @@ +From fa53715b39652e9f6de5d0dca377c71cd9e31ee4 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Sat, 15 Aug 2020 00:49:17 +0800 +Subject: [PATCH v2 14/19] bcache: move struct cache_sb out of uapi bcache.h + +struct cache_sb does not exactly map to cache_sb_disk, it is only for +in-memory super block and dosn't belong to uapi bcache.h. + +This patch moves the struct cache_sb definition and other depending +macros and inline routines from include/uapi/linux/bcache.h to +drivers/md/bcache/bcache.h, this is the proper location to have them. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/bcache.h | 99 +++++++++++++++++++++++++++++++++++++ + include/uapi/linux/bcache.h | 98 ------------------------------------ + 2 files changed, 99 insertions(+), 98 deletions(-) + +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 1d57f48307e6..b755bf7832ac 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -279,6 +279,82 @@ struct bcache_device { + unsigned int cmd, unsigned long arg); + }; + ++/* ++ * This is for in-memory bcache super block. ++ * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member ++ * size, ordering and even whole struct size may be different ++ * from cache_sb_disk. ++ */ ++struct cache_sb { ++ __u64 offset; /* sector where this sb was written */ ++ __u64 version; ++ ++ __u8 magic[16]; ++ ++ __u8 uuid[16]; ++ union { ++ __u8 set_uuid[16]; ++ __u64 set_magic; ++ }; ++ __u8 label[SB_LABEL_SIZE]; ++ ++ __u64 flags; ++ __u64 seq; ++ ++ __u64 feature_compat; ++ __u64 feature_incompat; ++ __u64 feature_ro_compat; ++ ++ union { ++ struct { ++ /* Cache devices */ ++ __u64 nbuckets; /* device size */ ++ ++ __u16 block_size; /* sectors */ ++ __u16 nr_in_set; ++ __u16 nr_this_dev; ++ __u32 bucket_size; /* sectors */ ++ }; ++ struct { ++ /* Backing devices */ ++ __u64 data_offset; ++ ++ /* ++ * block_size from the cache device section is still used by ++ * backing devices, so don't add anything here until we fix ++ * things to not need it for backing devices anymore ++ */ ++ }; ++ }; ++ ++ __u32 last_mount; /* time overflow in y2106 */ ++ ++ __u16 first_bucket; ++ union { ++ __u16 njournal_buckets; ++ __u16 keys; ++ }; ++ __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ ++}; ++ ++BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); ++BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); ++BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); ++#define CACHE_REPLACEMENT_LRU 0U ++#define CACHE_REPLACEMENT_FIFO 1U ++#define CACHE_REPLACEMENT_RANDOM 2U ++ ++BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); ++#define CACHE_MODE_WRITETHROUGH 0U ++#define CACHE_MODE_WRITEBACK 1U ++#define CACHE_MODE_WRITEAROUND 2U ++#define CACHE_MODE_NONE 3U ++BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); ++#define BDEV_STATE_NONE 0U ++#define BDEV_STATE_CLEAN 1U ++#define BDEV_STATE_DIRTY 2U ++#define BDEV_STATE_STALE 3U ++ + struct io { + /* Used to track sequential IO so it can be skipped */ + struct hlist_node hash; +@@ -840,6 +916,13 @@ static inline bool ptr_available(struct cache_set *c, const struct bkey *k, + return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i); + } + ++static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) ++{ ++ return sb->version == BCACHE_SB_VERSION_BDEV ++ || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET ++ || sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES; ++} ++ + /* Btree key macros */ + + /* +@@ -958,6 +1041,22 @@ static inline void wait_for_kthread_stop(void) + } + } + ++/* generate magic number */ ++static inline __u64 jset_magic(struct cache_sb *sb) ++{ ++ return sb->set_magic ^ JSET_MAGIC; ++} ++ ++static inline __u64 pset_magic(struct cache_sb *sb) ++{ ++ return sb->set_magic ^ PSET_MAGIC; ++} ++ ++static inline __u64 bset_magic(struct cache_sb *sb) ++{ ++ return sb->set_magic ^ BSET_MAGIC; ++} ++ + /* Forward declarations */ + + void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio); +diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h +index 52e8bcb33981..18166a3d8503 100644 +--- a/include/uapi/linux/bcache.h ++++ b/include/uapi/linux/bcache.h +@@ -216,89 +216,6 @@ struct cache_sb_disk { + __le16 bucket_size_hi; + }; + +-/* +- * This is for in-memory bcache super block. +- * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member +- * size, ordering and even whole struct size may be different +- * from cache_sb_disk. +- */ +-struct cache_sb { +- __u64 offset; /* sector where this sb was written */ +- __u64 version; +- +- __u8 magic[16]; +- +- __u8 uuid[16]; +- union { +- __u8 set_uuid[16]; +- __u64 set_magic; +- }; +- __u8 label[SB_LABEL_SIZE]; +- +- __u64 flags; +- __u64 seq; +- +- __u64 feature_compat; +- __u64 feature_incompat; +- __u64 feature_ro_compat; +- +- union { +- struct { +- /* Cache devices */ +- __u64 nbuckets; /* device size */ +- +- __u16 block_size; /* sectors */ +- __u16 nr_in_set; +- __u16 nr_this_dev; +- __u32 bucket_size; /* sectors */ +- }; +- struct { +- /* Backing devices */ +- __u64 data_offset; +- +- /* +- * block_size from the cache device section is still used by +- * backing devices, so don't add anything here until we fix +- * things to not need it for backing devices anymore +- */ +- }; +- }; +- +- __u32 last_mount; /* time overflow in y2106 */ +- +- __u16 first_bucket; +- union { +- __u16 njournal_buckets; +- __u16 keys; +- }; +- __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ +-}; +- +-static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) +-{ +- return sb->version == BCACHE_SB_VERSION_BDEV +- || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET +- || sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES; +-} +- +-BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); +-BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); +-BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); +-#define CACHE_REPLACEMENT_LRU 0U +-#define CACHE_REPLACEMENT_FIFO 1U +-#define CACHE_REPLACEMENT_RANDOM 2U +- +-BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); +-#define CACHE_MODE_WRITETHROUGH 0U +-#define CACHE_MODE_WRITEBACK 1U +-#define CACHE_MODE_WRITEAROUND 2U +-#define CACHE_MODE_NONE 3U +-BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); +-#define BDEV_STATE_NONE 0U +-#define BDEV_STATE_CLEAN 1U +-#define BDEV_STATE_DIRTY 2U +-#define BDEV_STATE_STALE 3U +- + /* + * Magic numbers + * +@@ -310,21 +227,6 @@ BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); + #define PSET_MAGIC 0x6750e15f87337f91ULL + #define BSET_MAGIC 0x90135c78b99e07f5ULL + +-static inline __u64 jset_magic(struct cache_sb *sb) +-{ +- return sb->set_magic ^ JSET_MAGIC; +-} +- +-static inline __u64 pset_magic(struct cache_sb *sb) +-{ +- return sb->set_magic ^ PSET_MAGIC; +-} +- +-static inline __u64 bset_magic(struct cache_sb *sb) +-{ +- return sb->set_magic ^ BSET_MAGIC; +-} +- + /* + * Journal + * +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0015-bcache-share-register-sysfs-with-async-register.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0015-bcache-share-register-sysfs-with-async-register.patch new file mode 100644 index 0000000..471148a --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0015-bcache-share-register-sysfs-with-async-register.patch @@ -0,0 +1,66 @@ +From c4b3c187fc4c454d67731164fb88783d8f038308 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Sat, 15 Aug 2020 16:56:19 +0800 +Subject: [PATCH v2 15/19] bcache: share register sysfs with async register + +Previously the experimental async registration uses a separate sysfs +file register_async. Now the async registration code seems working well +for a while, we can do furtuher testing with it now. + +This patch changes the async bcache registration shares the same sysfs +file /sys/fs/bcache/register (and register_quiet). Async registration +will be default behavior if BCACHE_ASYNC_REGISTRATION is set in kernel +configure. By default, BCACHE_ASYNC_REGISTRATION is not configured yet. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/super.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index ad87859d744a..e24e999fea25 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -2381,7 +2381,6 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k, + + kobj_attribute_write(register, register_bcache); + kobj_attribute_write(register_quiet, register_bcache); +-kobj_attribute_write(register_async, register_bcache); + kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup); + + static bool bch_is_open_backing(struct block_device *bdev) +@@ -2505,6 +2504,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, + struct cache_sb_disk *sb_disk; + struct block_device *bdev; + ssize_t ret; ++ bool async_registration = false; ++ ++#ifdef CONFIG_BCACHE_ASYNC_REGISTRATION ++ async_registration = true; ++#endif + + ret = -EBUSY; + err = "failed to reference bcache module"; +@@ -2558,7 +2562,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, + goto out_blkdev_put; + + err = "failed to register device"; +- if (attr == &ksysfs_register_async) { ++ ++ if (async_registration) { + /* register in asynchronous way */ + struct async_reg_args *args = + kzalloc(sizeof(struct async_reg_args), GFP_KERNEL); +@@ -2821,9 +2826,6 @@ static int __init bcache_init(void) + static const struct attribute *files[] = { + &ksysfs_register.attr, + &ksysfs_register_quiet.attr, +-#ifdef CONFIG_BCACHE_ASYNC_REGISTRATION +- &ksysfs_register_async.attr, +-#endif + &ksysfs_pendings_cleanup.attr, + NULL + }; +-- +2.26.2 + diff --git a/for-next/0001-update-trusted-encrypted.rst.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0016-docs-update-trusted-encrypted.rst.patch index c041b90..3d9fe08 100644 --- a/for-next/0001-update-trusted-encrypted.rst.patch +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0016-docs-update-trusted-encrypted.rst.patch @@ -1,7 +1,7 @@ -From 1bdc2ca42785e9dd3f092838893f1aa4cb376761 Mon Sep 17 00:00:00 2001 +From 5254f03b38c7e640fb8cc6e104a03d4c9da484d6 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Fri, 7 Aug 2020 16:41:14 +0800 -Subject: [PATCH] docs: update trusted-encrypted.rst +Subject: [PATCH v2 16/19] docs: update trusted-encrypted.rst The parameters in tmp2 commands are outdated, people are not able to create trusted key by the example commands. @@ -10,9 +10,11 @@ This patch updates the paramerters of tpm2 commands, they are verified by tpm2-tools-4.1 with Linux v5.8 kernel. Signed-off-by: Coly Li <colyli@suse.de> -Cc: Stefan Berger <stefanb@linux.ibm.com> Cc: Dan Williams <dan.j.williams@intel.com> +Cc: James Bottomley <jejb@linux.ibm.com> +Cc: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> Cc: Mimi Zohar <zohar@linux.ibm.com> +Cc: Stefan Berger <stefanb@linux.ibm.com> --- Documentation/security/keys/trusted-encrypted.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0017-net-introduce-helper-sendpage_ok-in-include-linux.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0017-net-introduce-helper-sendpage_ok-in-include-linux.patch new file mode 100644 index 0000000..62506f9 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0017-net-introduce-helper-sendpage_ok-in-include-linux.patch @@ -0,0 +1,75 @@ +From 323f53faf7c202b647f7a8a2147215fa44129bac Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Sat, 15 Aug 2020 13:40:48 +0800 +Subject: [PATCH v2 17/19] net: introduce helper sendpage_ok() in + include/linux/net.h + +The original problem was from nvme-over-tcp code, who mistakenly uses +kernel_sendpage() to send pages allocated by __get_free_pages() without +__GFP_COMP flag. Such pages don't have refcount (page_count is 0) on +tail pages, sending them by kernel_sendpage() may trigger a kernel panic +from a corrupted kernel heap, because these pages are incorrectly freed +in network stack as page_count 0 pages. + +This patch introduces a helper sendpage_ok(), it returns true if the +checking page, +- is not slab page: PageSlab(page) is false. +- has page refcount: page_count(page) is not zero + +All drivers who want to send page to remote end by kernel_sendpage() +may use this helper to check whether the page is OK. If the helper does +not return true, the driver should try other non sendpage method (e.g. +sock_no_sendpage()) to handle the page. + +Signed-off-by: Coly Li <colyli@suse.de> +Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> +Cc: Christoph Hellwig <hch@lst.de> +Cc: Hannes Reinecke <hare@suse.de> +Cc: Jan Kara <jack@suse.com> +Cc: Jens Axboe <axboe@kernel.dk> +Cc: Mikhail Skorzhinskii <mskorzhinskiy@solarflare.com> +Cc: Philipp Reisner <philipp.reisner@linbit.com> +Cc: Sagi Grimberg <sagi@grimberg.me> +Cc: Vlastimil Babka <vbabka@suse.com> +Cc: stable@vger.kernel.org +--- + include/linux/net.h | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/include/linux/net.h b/include/linux/net.h +index d48ff1180879..a807fad31958 100644 +--- a/include/linux/net.h ++++ b/include/linux/net.h +@@ -21,6 +21,7 @@ + #include <linux/rcupdate.h> + #include <linux/once.h> + #include <linux/fs.h> ++#include <linux/mm.h> + #include <linux/sockptr.h> + + #include <uapi/linux/net.h> +@@ -286,6 +287,21 @@ do { \ + #define net_get_random_once_wait(buf, nbytes) \ + get_random_once_wait((buf), (nbytes)) + ++/* ++ * E.g. XFS meta- & log-data is in slab pages, or bcache meta ++ * data pages, or other high order pages allocated by ++ * __get_free_pages() without __GFP_COMP, which have a page_count ++ * of 0 and/or have PageSlab() set. We cannot use send_page for ++ * those, as that does get_page(); put_page(); and would cause ++ * either a VM_BUG directly, or __page_cache_release a page that ++ * would actually still be referenced by someone, leading to some ++ * obscure delayed Oops somewhere else. ++ */ ++static inline bool sendpage_ok(struct page *page) ++{ ++ return (!PageSlab(page) && page_count(page) >= 1); ++} ++ + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, + size_t num, size_t len); + int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0018-nvme-tcp-check-page-by-sendpage_ok-before-calling.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0018-nvme-tcp-check-page-by-sendpage_ok-before-calling.patch new file mode 100644 index 0000000..d9e0649 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0018-nvme-tcp-check-page-by-sendpage_ok-before-calling.patch @@ -0,0 +1,57 @@ +From 2a4fcbc0285d0a00e5c963a620dc625046f65002 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Sat, 15 Aug 2020 15:32:59 +0800 +Subject: [PATCH v2 18/19] nvme-tcp: check page by sendpage_ok() before calling + kernel_sendpage() + +Currently nvme_tcp_try_send_data() doesn't use kernel_sendpage() to +send slab pages. But for pages allocated by __get_free_pages() without +__GFP_COMP, which also have refcount as 0, they are still sent by +kernel_sendpage() to remote end, this is problematic. + +The new introduced helper sendpage_ok() checks both PageSlab tag and +page_count counter, and returns true if the checking page is OK to be +sent by kernel_sendpage(). + +This patch fixes the page checking issue of nvme_tcp_try_send_data() +with sendpage_ok(). If sendpage_ok() returns true, send this page by +kernel_sendpage(), otherwise use sock_no_sendpage to handle this page. + +Signed-off-by: Coly Li <colyli@suse.de> +Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> +Cc: Christoph Hellwig <hch@lst.de> +Cc: Hannes Reinecke <hare@suse.de> +Cc: Jan Kara <jack@suse.com> +Cc: Jens Axboe <axboe@kernel.dk> +Cc: Mikhail Skorzhinskii <mskorzhinskiy@solarflare.com> +Cc: Philipp Reisner <philipp.reisner@linbit.com> +Cc: Sagi Grimberg <sagi@grimberg.me> +Cc: Vlastimil Babka <vbabka@suse.com> +Cc: stable@vger.kernel.org +--- + drivers/nvme/host/tcp.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c +index 62fbaecdc960..902fe742762b 100644 +--- a/drivers/nvme/host/tcp.c ++++ b/drivers/nvme/host/tcp.c +@@ -912,12 +912,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) + else + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; + +- /* can't zcopy slab pages */ +- if (unlikely(PageSlab(page))) { +- ret = sock_no_sendpage(queue->sock, page, offset, len, ++ if (sendpage_ok(page)) { ++ ret = kernel_sendpage(queue->sock, page, offset, len, + flags); + } else { +- ret = kernel_sendpage(queue->sock, page, offset, len, ++ ret = sock_no_sendpage(queue->sock, page, offset, len, + flags); + } + if (ret <= 0) +-- +2.26.2 + diff --git a/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0019-drbd-code-cleanup-by-using-sendpage_ok-to-check-p.patch b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0019-drbd-code-cleanup-by-using-sendpage_ok-to-check-p.patch new file mode 100644 index 0000000..46ac593 --- /dev/null +++ b/for-test/remove-multiple-cache-devices/single-cache-in-cache-set/v2-0019-drbd-code-cleanup-by-using-sendpage_ok-to-check-p.patch @@ -0,0 +1,42 @@ +From 4809568371583fd9e8b613f1717ef13ae12c3356 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Sat, 15 Aug 2020 15:37:00 +0800 +Subject: [PATCH v2 19/19] drbd: code cleanup by using sendpage_ok() to check + page for kernel_sendpage() + +In _drbd_send_page() a page is checked by following code before sending +it by kernel_sendpage(), + (page_count(page) < 1) || PageSlab(page) +If the check is true, this page won't be send by kernel_sendpage() and +handled by sock_no_sendpage(). + +This kind of check is exactly what macro sendpage_ok() does, which is +introduced into include/linux/net.h to solve a similar send page issue +in nvme-tcp code. + +This patch uses macro sendpage_ok() to replace the open coded checks to +page type and refcount in _drbd_send_page(), as a code cleanup. + +Signed-off-by: Coly Li <colyli@suse.de> +Cc: Philipp Reisner <philipp.reisner@linbit.com> +Cc: Sagi Grimberg <sagi@grimberg.me> +--- + drivers/block/drbd/drbd_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c +index cb687ccdbd96..55dc0c91781e 100644 +--- a/drivers/block/drbd/drbd_main.c ++++ b/drivers/block/drbd/drbd_main.c +@@ -1553,7 +1553,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa + * put_page(); and would cause either a VM_BUG directly, or + * __page_cache_release a page that would actually still be referenced + * by someone, leading to some obscure delayed Oops somewhere else. */ +- if (drbd_disable_sendpage || (page_count(page) < 1) || PageSlab(page)) ++ if (drbd_disable_sendpage || !sendpage_ok(page)) + return _drbd_no_send_page(peer_device, page, offset, size, msg_flags); + + msg_flags |= MSG_NOSIGNAL; +-- +2.26.2 + |