diff options
author | Coly Li <colyli@suse.de> | 2021-01-04 16:36:04 +0800 |
---|---|---|
committer | Coly Li <colyli@suse.de> | 2021-01-04 16:36:04 +0800 |
commit | b05a5df0e2f34a1901ab7337563ec1599d52f69a (patch) | |
tree | 87083ef841bc00a4b2292bccef2da69f2947b852 | |
parent | 605ac5ed4754d88ceab9ca1b651f0b9126bd0584 (diff) | |
download | bcache-patches-b05a5df0e2f34a1901ab7337563ec1599d52f69a.tar.gz |
for-test: add v1 set for nvdimm-meta
8 files changed, 1743 insertions, 0 deletions
diff --git a/for-test/nvdimm-meta/v1/v1-0001-bcache-add-initial-data-structures-for-nvm-pages.patch b/for-test/nvdimm-meta/v1/v1-0001-bcache-add-initial-data-structures-for-nvm-pages.patch new file mode 100644 index 0000000..965970d --- /dev/null +++ b/for-test/nvdimm-meta/v1/v1-0001-bcache-add-initial-data-structures-for-nvm-pages.patch @@ -0,0 +1,241 @@ +From 6f782c56a8f47df473728e9d6ad1499f7f7b0f4c Mon Sep 17 00:00:00 2001 +From: Qiaowei Ren <qiaowei.ren@intel.com> +Date: Wed, 23 Dec 2020 09:44:55 -0500 +Subject: [PATCH v1 1/8] bcache: add initial data structures for nvm pages + +This patch initializes the prototype data structures for nvm pages +allocator, + +- struct nvm_pages_sb +This is the super block allocated on each nvmdimm name space. A nvdimm +set may have multiple namespaces, nvm_pages_sb->set_uuid is used to mark +which nvmdimm set this name space belongs to. Normally we will use the +bcache's cache set UUID to initialize this uuid, to connect this nvdimm +set to a specified bcache cache set. + +- struct owner_list_head +This is a table for all heads of all owner lists. A owner list records +which page(s) allocated to which owner. After reboot from power failure, +the ownwer may find all its requested and allocated pages from the owner +list by a handler which is converted by a UUID. + +- struct nvm_pages_owner_head +This is a head of an owner list. Each owner only has one owner list, +and a nvm page only belongs to an specific owner. uuid[] will be set to +owner's uuid, for bcache it is the bcache's cache set uuid. label is not +mandatory, it is a human-readable string for debug purpose. The pointer +*recs references to separated nvm page which hold the table of struct +nvm_pgalloc_rec. + +- struct nvm_pgalloc_recs +This structure occupies a whole page, owner_uuid should match the uuid +in struct nvm_pages_owner_head. recs[] is the real table contains all +allocated records. + +- struct nvm_pgalloc_rec +Each structure records a range of allocated nvm pages. pgoff is offset +in unit of page size of this allocated nvm page range. The adjoint page +ranges of same owner can be merged into a larger one, therefore pages_nr +is NOT always power of 2. + +Signed-off-by: Coly Li <colyli@suse.de> +Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com> +Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com> +--- + include/uapi/linux/bcache-nvm.h | 184 ++++++++++++++++++++++++++++++++ + 1 file changed, 184 insertions(+) + create mode 100644 include/uapi/linux/bcache-nvm.h + +diff --git a/include/uapi/linux/bcache-nvm.h b/include/uapi/linux/bcache-nvm.h +new file mode 100644 +index 000000000000..a3a8cdfc7096 +--- /dev/null ++++ b/include/uapi/linux/bcache-nvm.h +@@ -0,0 +1,184 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _UAPI_BCACHE_NVM_H ++#define _UAPI_BCACHE_NVM_H ++ ++/* ++ * Bcache on NVDIMM data structures ++ */ ++ ++/* ++ * - struct nvm_pages_sb ++ * This is the super block allocated on each nvdimm name space. A nvdimm ++ * set may have multiple namespaces, nvm_pages_sb->set_uuid is used to mark ++ * which nvdimm set this name space belongs to. Normally we will use the ++ * bcache's cache set UUID to initialize this uuid, to connect this nvdimm ++ * set to a specified bcache cache set. ++ * ++ * - struct owner_list_head ++ * This is a table for all heads of all owner lists. A owner list records ++ * which page(s) allocated to which owner. After reboot from power failure, ++ * the ownwer may find all its requested and allocated pages from the owner ++ * list by a handler which is converted by a UUID. ++ * ++ * - struct nvm_pages_owner_head ++ * This is a head of an owner list. Each owner only has one owner list, ++ * and a nvm page only belongs to an specific owner. uuid[] will be set to ++ * owner's uuid, for bcache it is the bcache's cache set uuid. label is not ++ * mandatory, it is a human-readable string for debug purpose. The pointer ++ * recs references to separated nvm page which hold the table of struct ++ * nvm_pgalloc_rec. ++ * ++ *- struct nvm_pgalloc_recs ++ * This structure occupies a whole page, owner_uuid should match the uuid ++ * in struct nvm_pages_owner_head. recs[] is the real table contains all ++ * allocated records. ++ * ++ * - struct nvm_pgalloc_rec ++ * Each structure records a range of allocated nvm pages. pgoff is offset ++ * in unit of page size of this allocated nvm page range. The adjoint page ++ * ranges of same owner can be merged into a larger one, therefore pages_nr ++ * is NOT always power of 2. ++ * ++ * ++ * Memory layout on nvdimm namespace 0 ++ * ++ * 0 +---------------------------------+ ++ * | | ++ * 4KB +---------------------------------+ ++ * | nvme_pages_sb | ++ * 8KB +---------------------------------+ <--- nvme_pages_sb.owner_list_head ++ * | owner_list_head | ++ * | | ++ * 16KB +---------------------------------+ <--- owner_list_head.heads[0].recs[0] ++ * | nvm_pgalloc_recs | ++ * | (nvm pages internal usage) | ++ * 24KB +---------------------------------+ ++ * | | ++ * | | ++ * 1MB +---------------------------------+ ++ * | allocable nvm pages | ++ * | for buddy allocator | ++ * end +---------------------------------+ ++ * ++ * ++ * ++ * Memory layout on nvdimm namespace N ++ * (doesn't have owner list) ++ * ++ * 0 +---------------------------------+ ++ * | | ++ * 4KB +---------------------------------+ ++ * | nvme_pages_sb | ++ * 8KB +---------------------------------+ ++ * | | ++ * | | ++ * | | ++ * | | ++ * | | ++ * | | ++ * 1MB +---------------------------------+ ++ * | allocable nvm pages | ++ * | for buddy allocator | ++ * end +---------------------------------+ ++ * ++ */ ++ ++#include <linux/types.h> ++ ++/* In sectors */ ++#define NVM_PAGES_SB_OFFSET 4096 ++#define NVM_PAGES_OFFSET (1 << 20) ++#define NVM_PAGES_NAMESPACE_SIZE (250UL << 30) ++ ++#define NVM_PAGES_LABEL_SIZE 32 ++#define NVM_PAGES_NAMESPACES_MAX 8 ++ ++#define NVM_PAGES_OWNER_LIST_HEAD_OFFSET (8<<10) ++#define NVM_PAGES_SYS_RECS_HEAD_OFFSET (16<<10) ++ ++#define NVM_PAGES_SB_VERSION 0 ++#define NVM_PAGES_SB_VERSION_MAX 0 ++ ++static const char nvm_pages_magic[] = { ++ 0x17, 0xbd, 0x53, 0x7f, 0x1b, 0x23, 0xd6, 0x83, ++ 0x46, 0xa4, 0xf8, 0x28, 0x17, 0xda, 0xec, 0xa9 }; ++static const char nvm_pages_pgalloc_magic[] = { ++ 0x39, 0x25, 0x3f, 0xf7, 0x27, 0x17, 0xd0, 0xb9, ++ 0x10, 0xe6, 0xd2, 0xda, 0x38, 0x68, 0x26, 0xae }; ++ ++struct nvm_pgalloc_rec { ++ __u32 pgoff; ++ __u32 nr; ++}; ++ ++struct nvm_pgalloc_recs { ++union { ++ struct { ++ struct nvm_pages_owner_head *owner; ++ struct nvm_pgalloc_recs *next; ++ __u8 magic[16]; ++ __u8 owner_uuid[16]; ++ __u32 size; ++ struct nvm_pgalloc_rec recs[]; ++ }; ++ __u8 pad[8192]; ++}; ++}; ++#define MAX_RECORD \ ++ ((sizeof(struct nvm_pgalloc_recs) - \ ++ offsetof(struct nvm_pgalloc_recs, recs)) / \ ++ sizeof(struct nvm_pgalloc_rec)) ++ ++struct nvm_pages_owner_head { ++ __u8 uuid[16]; ++ char label[NVM_PAGES_LABEL_SIZE]; ++ /* Per-namespace own lists */ ++ struct nvm_pgalloc_recs *recs[NVM_PAGES_NAMESPACES_MAX]; ++}; ++ ++/* heads[0] is always for nvm_pages internal usage */ ++struct owner_list_head { ++union { ++ struct { ++ __u32 size; ++ struct nvm_pages_owner_head heads[]; ++ }; ++ __u8 pad[8192]; ++}; ++}; ++#define MAX_OWNER_LIST \ ++ ((sizeof(struct owner_list_head) - \ ++ offsetof(struct owner_list_head, heads)) / \ ++ sizeof(struct nvm_pages_owner_head)) ++ ++/* The on-media bit order is local CPU order */ ++struct nvm_pages_sb { ++ __u64 csum; ++ __u64 sb_offset; ++ __u64 version; ++ __u8 magic[16]; ++ __u8 uuid[16]; ++ __u32 page_size; ++ __u32 total_namespaces_nr; ++ __u32 this_namespace_nr; ++ union { ++ __u8 set_uuid[16]; ++ __u64 set_magic; ++ }; ++ ++ __u64 flags; ++ __u64 seq; ++ ++ __u64 feature_compat; ++ __u64 feature_incompat; ++ __u64 feature_ro_compat; ++ ++ /* For allocable nvm pages from buddy systems */ ++ __u64 pages_offset; ++ __u64 pages_total; ++ ++ /* Only on the first name space */ ++ struct owner_list_head *owner_list_head; ++}; ++ ++#endif /* _UAPI_BCACHE_NVM_H */ +-- +2.26.2 + diff --git a/for-test/nvdimm-meta/v1/v1-0002-bcache-initialize-the-nvm-pages-allocator.patch b/for-test/nvdimm-meta/v1/v1-0002-bcache-initialize-the-nvm-pages-allocator.patch new file mode 100644 index 0000000..1296703 --- /dev/null +++ b/for-test/nvdimm-meta/v1/v1-0002-bcache-initialize-the-nvm-pages-allocator.patch @@ -0,0 +1,524 @@ +From 1ee1a1ce796bc67e224a756d4e26e8f2c5fb2ff3 Mon Sep 17 00:00:00 2001 +From: Qiaowei Ren <qiaowei.ren@intel.com> +Date: Wed, 23 Dec 2020 09:44:56 -0500 +Subject: [PATCH v1 2/8] bcache: initialize the nvm pages allocator + +This patch define the prototype data structures in memory and initializes +the nvm pages allocator. + +The nv address space which is managed by this allocatior can consist of +many nvm namespaces, and some namespaces can compose into one nvm set, +like cache set. For this initial implementation, only one set can be +supported. + +The users of this nvm pages allocator need to call regiseter_namespace() +to register the nvdimm device (like /dev/pmemX) into this allocator as +the instance of struct nvm_namespace. + +Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com> +Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com> +--- + drivers/md/bcache/Kconfig | 6 + + drivers/md/bcache/Makefile | 2 +- + drivers/md/bcache/nvm-pages.c | 337 ++++++++++++++++++++++++++++++++++ + drivers/md/bcache/nvm-pages.h | 91 +++++++++ + drivers/md/bcache/super.c | 3 + + 5 files changed, 438 insertions(+), 1 deletion(-) + create mode 100644 drivers/md/bcache/nvm-pages.c + create mode 100644 drivers/md/bcache/nvm-pages.h + +diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig +index d1ca4d059c20..fdec9905ef40 100644 +--- a/drivers/md/bcache/Kconfig ++++ b/drivers/md/bcache/Kconfig +@@ -35,3 +35,9 @@ config BCACHE_ASYNC_REGISTRATION + device path into this file will returns immediately and the real + registration work is handled in kernel work queue in asynchronous + way. ++ ++config BCACHE_NVM_PAGES ++ bool "NVDIMM support for bcache (EXPERIMENTAL)" ++ depends on BCACHE ++ help ++ nvm pages allocator for bcache. +diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile +index 5b87e59676b8..948e5ed2ca66 100644 +--- a/drivers/md/bcache/Makefile ++++ b/drivers/md/bcache/Makefile +@@ -4,4 +4,4 @@ obj-$(CONFIG_BCACHE) += bcache.o + + bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\ + io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\ +- util.o writeback.o features.o ++ util.o writeback.o features.o nvm-pages.o +diff --git a/drivers/md/bcache/nvm-pages.c b/drivers/md/bcache/nvm-pages.c +new file mode 100644 +index 000000000000..29ee4dbc504d +--- /dev/null ++++ b/drivers/md/bcache/nvm-pages.c +@@ -0,0 +1,337 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++ ++#include <linux/slab.h> ++#include <linux/list.h> ++#include <linux/mutex.h> ++#include <linux/dax.h> ++#include <linux/pfn_t.h> ++#include <linux/libnvdimm.h> ++#include <linux/mm_types.h> ++#include <linux/err.h> ++#include <linux/pagemap.h> ++#include <linux/bitmap.h> ++#include <linux/blkdev.h> ++#include "nvm-pages.h" ++ ++struct bch_nvm_set *only_set; ++ ++static struct bch_owner_list *alloc_owner_list(const char *owner_uuid, ++ const char *label, int total_namespaces) ++{ ++ struct bch_owner_list *owner_list; ++ ++ owner_list = kzalloc(sizeof(*owner_list), GFP_KERNEL); ++ if (!owner_list) ++ return NULL; ++ ++ owner_list->alloced_recs = kcalloc(total_namespaces, ++ sizeof(struct bch_nvm_alloced_recs *), GFP_KERNEL); ++ if (!owner_list->alloced_recs) { ++ kfree(owner_list); ++ return NULL; ++ } ++ ++ if (owner_uuid) ++ memcpy(owner_list->owner_uuid, owner_uuid, 16); ++ if (label) ++ memcpy(owner_list->label, label, NVM_PAGES_LABEL_SIZE); ++ ++ return owner_list; ++} ++ ++static void release_extents(struct bch_nvm_alloced_recs *extents) ++{ ++ struct list_head *list = extents->extent_head.next; ++ struct bch_extent *extent; ++ ++ while (list != &extents->extent_head) { ++ extent = container_of(list, struct bch_extent, list); ++ list_del(list); ++ kfree(extent); ++ list = extents->extent_head.next; ++ } ++ kfree(extents); ++} ++ ++static void release_owner_info(struct bch_nvm_set *nvm_set) ++{ ++ struct bch_owner_list *owner_list; ++ int i, j; ++ ++ for (i = 0; i < nvm_set->owner_list_size; i++) { ++ owner_list = nvm_set->owner_lists[i]; ++ for (j = 0; j < nvm_set->total_namespaces_nr; j++) { ++ if (owner_list->alloced_recs[j]) ++ release_extents(owner_list->alloced_recs[j]); ++ } ++ kfree(owner_list->alloced_recs); ++ kfree(owner_list); ++ } ++ kfree(nvm_set->owner_lists); ++} ++ ++static void release_nvm_namespaces(struct bch_nvm_set *nvm_set) ++{ ++ int i; ++ ++ for (i = 0; i < nvm_set->total_namespaces_nr; i++) ++ kfree(nvm_set->nss[i]); ++ ++ kfree(nvm_set->nss); ++} ++ ++static void release_nvm_set(struct bch_nvm_set *nvm_set) ++{ ++ release_nvm_namespaces(nvm_set); ++ release_owner_info(nvm_set); ++ kfree(nvm_set); ++} ++ ++static void *nvm_pgoff_to_vaddr(struct bch_nvm_namespace *ns, pgoff_t pgoff) ++{ ++ return ns->kaddr + ns->pages_offset + (pgoff << PAGE_SHIFT); ++} ++ ++static int init_owner_info(struct bch_nvm_namespace *ns) ++{ ++ struct owner_list_head *owner_list_head; ++ struct nvm_pages_owner_head *owner_head; ++ struct nvm_pgalloc_recs *nvm_pgalloc_recs; ++ struct bch_owner_list *owner_list; ++ struct bch_nvm_alloced_recs *extents; ++ struct bch_extent *extent; ++ u32 i, j, k; ++ ++ owner_list_head = (struct owner_list_head *) ++ (ns->kaddr + NVM_PAGES_OWNER_LIST_HEAD_OFFSET); ++ ++ mutex_lock(&only_set->lock); ++ only_set->owner_list_size = owner_list_head->size; ++ for (i = 0; i < owner_list_head->size; i++) { ++ owner_head = &owner_list_head->heads[i]; ++ owner_list = alloc_owner_list(owner_head->uuid, owner_head->label, ++ only_set->total_namespaces_nr); ++ if (!owner_list) { ++ mutex_unlock(&only_set->lock); ++ return -ENOMEM; ++ } ++ ++ for (j = 0; j < only_set->total_namespaces_nr; j++) { ++ if (!only_set->nss[j] || !owner_head->recs[j]) ++ continue; ++ ++ nvm_pgalloc_recs = (struct nvm_pgalloc_recs *) ++ ((long)owner_head->recs[j] + ns->kaddr); ++ ++ extents = kzalloc(sizeof(*extents), GFP_KERNEL); ++ if (!extents) { ++ mutex_unlock(&only_set->lock); ++ return -ENOMEM; ++ } ++ ++ extents->ns = only_set->nss[j]; ++ INIT_LIST_HEAD(&extents->extent_head); ++ owner_list->alloced_recs[j] = extents; ++ ++ do { ++ struct nvm_pgalloc_rec *rec; ++ ++ for (k = 0; k < nvm_pgalloc_recs->size; k++) { ++ rec = &nvm_pgalloc_recs->recs[k]; ++ extent = kzalloc(sizeof(*extent), GFP_KERNEL); ++ if (!extents) { ++ mutex_unlock(&only_set->lock); ++ return -ENOMEM; ++ } ++ extent->kaddr = nvm_pgoff_to_vaddr(extents->ns, rec->pgoff); ++ extent->nr = rec->nr; ++ list_add_tail(&extent->list, &extents->extent_head); ++ ++ extents->ns->free -= rec->nr; ++ } ++ extents->size += nvm_pgalloc_recs->size; ++ ++ if (nvm_pgalloc_recs->next) ++ nvm_pgalloc_recs = (struct nvm_pgalloc_recs *) ++ ((long)nvm_pgalloc_recs->next + ns->kaddr); ++ else ++ nvm_pgalloc_recs = NULL; ++ } while (nvm_pgalloc_recs); ++ } ++ only_set->owner_lists[i] = owner_list; ++ owner_list->nvm_set = only_set; ++ } ++ mutex_unlock(&only_set->lock); ++ ++ return 0; ++} ++ ++static bool dev_dax_supported(struct block_device *bdev) ++{ ++ char buf[BDEVNAME_SIZE]; ++ struct page *page; ++ struct nvm_pages_sb *sb; ++ bool supported = false; ++ ++ page = read_cache_page_gfp(bdev->bd_inode->i_mapping, ++ NVM_PAGES_SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL); ++ ++ if (IS_ERR(page)) ++ goto err; ++ ++ sb = page_address(page); ++ if (!bdev_dax_supported(bdev, sb->page_size)) ++ pr_info("DAX can't supported by %s\n", bdevname(bdev, buf)); ++ else ++ supported = true; ++ ++ put_page(page); ++err: ++ return supported; ++} ++ ++static bool attach_nvm_set(struct bch_nvm_namespace *ns) ++{ ++ bool rc = true; ++ ++ mutex_lock(&only_set->lock); ++ if (only_set->nss) { ++ if (memcmp(ns->sb->set_uuid, only_set->set_uuid, 16)) { ++ pr_info("namespace id does't match nvm set\n"); ++ rc = false; ++ goto unlock; ++ } ++ ++ if (only_set->nss[ns->sb->this_namespace_nr]) { ++ pr_info("already has the same position(%d) nvm\n", ++ ns->sb->this_namespace_nr); ++ rc = false; ++ goto unlock; ++ } ++ } else { ++ memcpy(only_set->set_uuid, ns->sb->set_uuid, 16); ++ only_set->total_namespaces_nr = ns->sb->total_namespaces_nr; ++ only_set->nss = kcalloc(only_set->total_namespaces_nr, ++ sizeof(struct bch_nvm_namespace *), GFP_KERNEL); ++ only_set->owner_lists = kcalloc(MAX_OWNER_LIST, ++ sizeof(struct nvm_pages_owner_head *), GFP_KERNEL); ++ if (!only_set->nss || !only_set->owner_lists) { ++ pr_info("can't alloc nss or owner_list\n"); ++ kfree(only_set->nss); ++ kfree(only_set->owner_lists); ++ rc = false; ++ goto unlock; ++ } ++ } ++ ++ only_set->nss[ns->sb->this_namespace_nr] = ns; ++ ++unlock: ++ mutex_unlock(&only_set->lock); ++ return rc; ++} ++ ++struct bch_nvm_namespace *bch_register_namespace(const char *dev_path) ++{ ++ struct bch_nvm_namespace *ns; ++ int err; ++ pgoff_t pgoff; ++ char buf[BDEVNAME_SIZE]; ++ struct block_device *bdev; ++ ++ bdev = blkdev_get_by_path(dev_path, FMODE_READ|FMODE_WRITE|FMODE_EXEC, NULL); ++ if (IS_ERR(bdev)) { ++ pr_info("get %s error\n", dev_path); ++ return ERR_PTR(PTR_ERR(bdev)); ++ } ++ ++ err = -EOPNOTSUPP; ++ if (!dev_dax_supported(bdev)) { ++ pr_info("%s don't support DAX\n", bdevname(bdev, buf)); ++ goto bdput; ++ } ++ ++ err = -EINVAL; ++ if (bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff)) { ++ pr_info("invalid offset of %s\n", bdevname(bdev, buf)); ++ goto bdput; ++ } ++ ++ err = -ENOMEM; ++ ns = kmalloc(sizeof(struct bch_nvm_namespace), GFP_KERNEL); ++ if (!ns) ++ goto bdput; ++ ++ err = -EINVAL; ++ ns->dax_dev = fs_dax_get_by_bdev(bdev); ++ if (!ns->dax_dev) { ++ pr_info("can't by dax device by %s\n", bdevname(bdev, buf)); ++ goto free_ns; ++ } ++ ++ if (dax_direct_access(ns->dax_dev, pgoff, 1, &ns->kaddr, &ns->start_pfn) < 0) { ++ pr_info("dax_direct_access error\n"); ++ goto free_ns; ++ } ++ ++ ns->sb = (struct nvm_pages_sb *)(ns->kaddr + NVM_PAGES_SB_OFFSET); ++ if (ns->sb->total_namespaces_nr != 1) { ++ pr_info("only one nvm device\n"); ++ goto free_ns; ++ } ++ ++ err = -EEXIST; ++ if (!attach_nvm_set(ns)) ++ goto free_ns; ++ ++ ns->page_size = ns->sb->page_size; ++ ns->pages_offset = ns->sb->pages_offset; ++ ns->pages_total = ns->sb->pages_total; ++ ns->start_pfn.val += ns->pages_offset >> PAGE_SHIFT; ++ ns->free = ns->pages_total; ++ ns->bdev = bdev; ++ ns->nvm_set = only_set; ++ ++ mutex_init(&ns->lock); ++ ++ if (ns->sb->this_namespace_nr == 0) { ++ pr_info("only first namespace contain owner info\n"); ++ err = init_owner_info(ns); ++ if (err < 0) { ++ pr_info("init_owner_info met error %d\n", err); ++ goto free_ns; ++ } ++ } ++ ++ return ns; ++ ++free_ns: ++ kfree(ns); ++bdput: ++ bdput(bdev); ++ ++ return ERR_PTR(err); ++} ++EXPORT_SYMBOL_GPL(bch_register_namespace); ++ ++int __init bch_nvm_init(void) ++{ ++ only_set = kzalloc(sizeof(*only_set), GFP_KERNEL); ++ if (!only_set) ++ return -ENOMEM; ++ ++ only_set->total_namespaces_nr = 0; ++ only_set->owner_lists = NULL; ++ only_set->nss = NULL; ++ ++ mutex_init(&only_set->lock); ++ ++ pr_info("bcache nvm init\n"); ++ return 0; ++} ++ ++void bch_nvm_exit(void) ++{ ++ release_nvm_set(only_set); ++ pr_info("bcache nvm exit\n"); ++} +diff --git a/drivers/md/bcache/nvm-pages.h b/drivers/md/bcache/nvm-pages.h +new file mode 100644 +index 000000000000..8850d00a4612 +--- /dev/null ++++ b/drivers/md/bcache/nvm-pages.h +@@ -0,0 +1,91 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++ ++#ifndef _BCACHE_NVM_PAGES_H ++#define _BCACHE_NVM_PAGES_H ++ ++#include <linux/bcache-nvm.h> ++ ++/* ++ * Bcache NVDIMM in memory data structures ++ */ ++ ++/* ++ * The following three structures in memory records which page(s) allocated ++ * to which owner. After reboot from power failure, they will be initialized ++ * based on nvm pages superblock in NVDIMM device. ++ */ ++struct bch_extent { ++ void *kaddr; ++ u32 nr; ++ struct list_head list; ++}; ++ ++struct bch_nvm_alloced_recs { ++ u32 size; ++ struct bch_nvm_namespace *ns; ++ struct list_head extent_head; ++}; ++ ++struct bch_owner_list { ++ u8 owner_uuid[16]; ++ char label[NVM_PAGES_LABEL_SIZE]; ++ ++ struct bch_nvm_set *nvm_set; ++ struct bch_nvm_alloced_recs **alloced_recs; ++}; ++ ++struct bch_nvm_namespace { ++ void *kaddr; ++ ++ u8 uuid[16]; ++ u64 free; ++ u32 page_size; ++ u64 pages_offset; ++ u64 pages_total; ++ pfn_t start_pfn; ++ ++ struct dax_device *dax_dev; ++ struct block_device *bdev; ++ struct nvm_pages_sb *sb; ++ struct bch_nvm_set *nvm_set; ++ ++ struct mutex lock; ++}; ++ ++/* ++ * A set of namespaces. Currently only one set can be supported. ++ */ ++struct bch_nvm_set { ++ u8 set_uuid[16]; ++ u32 total_namespaces_nr; ++ ++ u32 owner_list_size; ++ struct bch_owner_list **owner_lists; ++ ++ struct bch_nvm_namespace **nss; ++ ++ struct mutex lock; ++}; ++extern struct bch_nvm_set *only_set; ++ ++#ifdef CONFIG_BCACHE_NVM_PAGES ++ ++struct bch_nvm_namespace *bch_register_namespace(const char *dev_path); ++int bch_nvm_init(void); ++void bch_nvm_exit(void); ++ ++#else ++ ++static inline struct bch_nvm_namespace *bch_register_namespace(const char *dev_path) ++{ ++ return NULL; ++} ++static inline int bch_nvm_init(void) ++{ ++ return 0; ++} ++static inline void bch_nvm_exit(void) { } ++ ++#endif /* CONFIG_BCACHE_NVM_PAGES */ ++ ++#endif /* _BCACHE_NVM_PAGES_H */ +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index a4752ac410dc..f0c05ca9daab 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -14,6 +14,7 @@ + #include "request.h" + #include "writeback.h" + #include "features.h" ++#include "nvm-pages.h" + + #include <linux/blkdev.h> + #include <linux/debugfs.h> +@@ -2770,6 +2771,7 @@ static void bcache_exit(void) + { + bch_debug_exit(); + bch_request_exit(); ++ bch_nvm_exit(); + if (bcache_kobj) + kobject_put(bcache_kobj); + if (bcache_wq) +@@ -2849,6 +2851,7 @@ static int __init bcache_init(void) + + bch_debug_init(); + closure_debug_init(); ++ bch_nvm_init(); + + bcache_is_reboot = false; + +-- +2.26.2 + diff --git a/for-test/nvdimm-meta/v1/v1-0003-bcache-initialization-of-the-buddy.patch b/for-test/nvdimm-meta/v1/v1-0003-bcache-initialization-of-the-buddy.patch new file mode 100644 index 0000000..450d6c2 --- /dev/null +++ b/for-test/nvdimm-meta/v1/v1-0003-bcache-initialization-of-the-buddy.patch @@ -0,0 +1,199 @@ +From a9842e61df772fa4cd78b0ba7990a9eb26df6437 Mon Sep 17 00:00:00 2001 +From: Qiaowei Ren <qiaowei.ren@intel.com> +Date: Wed, 23 Dec 2020 09:44:57 -0500 +Subject: [PATCH v1 3/8] bcache: initialization of the buddy + +This nvm pages allocator will implement the simple buddy to manage the +nvm address space. This patch initializes this buddy for new namespace. + +the unit of alloc/free of the buddy is page. DAX device has their +struct page(in dram or PMEM). + + struct { /* ZONE_DEVICE pages */ + /** @pgmap: Points to the hosting device page map. */ + struct dev_pagemap *pgmap; + void *zone_device_data; + /* + * ZONE_DEVICE private pages are counted as being + * mapped so the next 3 words hold the mapping, index, + * and private fields from the source anonymous or + * page cache page while the page is migrated to device + * private memory. + * ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also + * use the mapping, index, and private fields when + * pmem backed DAX files are mapped. + */ + }; + +ZONE_DEVICE pages only use pgmap. Other 4 words[16/32 bytes] don't use. +So the second/third word will be used as 'struct list_head ' which list +in buddy. The fourth word(that is normal struct page::index) store pgoff +which the page-offset in the dax device. And the fifth word (that is +normal struct page::private) store order of buddy. page_type will be used +to store buddy flags. + +Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com> +Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com> +--- + drivers/md/bcache/nvm-pages.c | 82 +++++++++++++++++++++++++++++++++-- + drivers/md/bcache/nvm-pages.h | 3 ++ + 2 files changed, 82 insertions(+), 3 deletions(-) + +diff --git a/drivers/md/bcache/nvm-pages.c b/drivers/md/bcache/nvm-pages.c +index 29ee4dbc504d..ea36994b5b00 100644 +--- a/drivers/md/bcache/nvm-pages.c ++++ b/drivers/md/bcache/nvm-pages.c +@@ -74,8 +74,10 @@ static void release_nvm_namespaces(struct bch_nvm_set *nvm_set) + { + int i; + +- for (i = 0; i < nvm_set->total_namespaces_nr; i++) ++ for (i = 0; i < nvm_set->total_namespaces_nr; i++) { ++ kfree(nvm_set->nss[i]->pages_bitmap); + kfree(nvm_set->nss[i]); ++ } + + kfree(nvm_set->nss); + } +@@ -92,6 +94,17 @@ static void *nvm_pgoff_to_vaddr(struct bch_nvm_namespace *ns, pgoff_t pgoff) + return ns->kaddr + ns->pages_offset + (pgoff << PAGE_SHIFT); + } + ++static struct page *nvm_vaddr_to_page(struct bch_nvm_namespace *ns, void *addr) ++{ ++ return virt_to_page(addr); ++} ++ ++static inline void remove_owner_space(struct bch_nvm_namespace *ns, ++ pgoff_t pgoff, u32 nr) ++{ ++ bitmap_set(ns->pages_bitmap, pgoff, nr); ++} ++ + static int init_owner_info(struct bch_nvm_namespace *ns) + { + struct owner_list_head *owner_list_head; +@@ -146,6 +159,8 @@ static int init_owner_info(struct bch_nvm_namespace *ns) + extent->kaddr = nvm_pgoff_to_vaddr(extents->ns, rec->pgoff); + extent->nr = rec->nr; + list_add_tail(&extent->list, &extents->extent_head); ++ /*remove already alloced space*/ ++ remove_owner_space(extents->ns, rec->pgoff, rec->nr); + + extents->ns->free -= rec->nr; + } +@@ -166,6 +181,54 @@ static int init_owner_info(struct bch_nvm_namespace *ns) + return 0; + } + ++static void init_nvm_free_space(struct bch_nvm_namespace *ns) ++{ ++ unsigned int start, end, i; ++ struct page *page; ++ unsigned int pages; ++ pgoff_t pgoff_start; ++ ++ bitmap_for_each_clear_region(ns->pages_bitmap, start, end, 0, ns->pages_total) { ++ pgoff_start = start; ++ pages = end - start; ++ ++ while (pages) { ++ for (i = MAX_ORDER - 1; i >= 0 ; i--) { ++ if ((start % (1 << i) == 0) && (pages >= (1 << i))) ++ break; ++ } ++ ++ page = nvm_vaddr_to_page(ns, nvm_pgoff_to_vaddr(ns, pgoff_start)); ++ page->index = pgoff_start; ++ set_page_private(page, i); ++ __SetPageBuddy(page); ++ list_add((struct list_head *)&page->zone_device_data, &ns->free_area[i]); ++ ++ pgoff_start += 1 << i; ++ pages -= 1 << i; ++ } ++ } ++ ++ bitmap_for_each_set_region(ns->pages_bitmap, start, end, 0, ns->pages_total) { ++ pages = end - start; ++ pgoff_start = start; ++ ++ while (pages) { ++ for (i = MAX_ORDER - 1; i >= 0 ; i--) { ++ if ((start % (1 << i) == 0) && (pages >= (1 << i))) ++ break; ++ } ++ ++ page = nvm_vaddr_to_page(ns, nvm_pgoff_to_vaddr(ns, pgoff_start)); ++ page->index = pgoff_start; ++ page->private = i; ++ ++ pgoff_start += 1 << i; ++ pages -= 1 << i; ++ } ++ } ++} ++ + static bool dev_dax_supported(struct block_device *bdev) + { + char buf[BDEVNAME_SIZE]; +@@ -234,7 +297,7 @@ static bool attach_nvm_set(struct bch_nvm_namespace *ns) + struct bch_nvm_namespace *bch_register_namespace(const char *dev_path) + { + struct bch_nvm_namespace *ns; +- int err; ++ int i, err; + pgoff_t pgoff; + char buf[BDEVNAME_SIZE]; + struct block_device *bdev; +@@ -292,6 +355,15 @@ struct bch_nvm_namespace *bch_register_namespace(const char *dev_path) + ns->bdev = bdev; + ns->nvm_set = only_set; + ++ ns->pages_bitmap = bitmap_zalloc(ns->pages_total, GFP_KERNEL); ++ if (!ns->pages_bitmap) { ++ err = -ENOMEM; ++ goto free_ns; ++ } ++ ++ for (i = 0; i < MAX_ORDER; i++) ++ INIT_LIST_HEAD(&ns->free_area[i]); ++ + mutex_init(&ns->lock); + + if (ns->sb->this_namespace_nr == 0) { +@@ -299,12 +371,16 @@ struct bch_nvm_namespace *bch_register_namespace(const char *dev_path) + err = init_owner_info(ns); + if (err < 0) { + pr_info("init_owner_info met error %d\n", err); +- goto free_ns; ++ goto free_bitmap; + } ++ /* init buddy allocator */ ++ init_nvm_free_space(ns); + } + + return ns; + ++free_bitmap: ++ kfree(ns->pages_bitmap); + free_ns: + kfree(ns); + bdput: +diff --git a/drivers/md/bcache/nvm-pages.h b/drivers/md/bcache/nvm-pages.h +index 8850d00a4612..15aa0f15760f 100644 +--- a/drivers/md/bcache/nvm-pages.h ++++ b/drivers/md/bcache/nvm-pages.h +@@ -44,6 +44,9 @@ struct bch_nvm_namespace { + u64 pages_total; + pfn_t start_pfn; + ++ unsigned long *pages_bitmap; ++ struct list_head free_area[MAX_ORDER]; ++ + struct dax_device *dax_dev; + struct block_device *bdev; + struct nvm_pages_sb *sb; +-- +2.26.2 + diff --git a/for-test/nvdimm-meta/v1/v1-0004-bcache-bch_nvm_alloc_pages-of-the-buddy.patch b/for-test/nvdimm-meta/v1/v1-0004-bcache-bch_nvm_alloc_pages-of-the-buddy.patch new file mode 100644 index 0000000..91cda8b --- /dev/null +++ b/for-test/nvdimm-meta/v1/v1-0004-bcache-bch_nvm_alloc_pages-of-the-buddy.patch @@ -0,0 +1,187 @@ +From 47a0cdd59b4c55f2c9379c2d5ae03aeb97c587b7 Mon Sep 17 00:00:00 2001 +From: Qiaowei Ren <qiaowei.ren@intel.com> +Date: Wed, 23 Dec 2020 09:44:58 -0500 +Subject: [PATCH v1 4/8] bcache: bch_nvm_alloc_pages() of the buddy + +This patch implements the bch_nvm_alloc_pages() of the buddy. + +Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com> +Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com> +--- + drivers/md/bcache/nvm-pages.c | 135 ++++++++++++++++++++++++++++++++++ + drivers/md/bcache/nvm-pages.h | 6 ++ + 2 files changed, 141 insertions(+) + +diff --git a/drivers/md/bcache/nvm-pages.c b/drivers/md/bcache/nvm-pages.c +index ea36994b5b00..2e91e1c8536f 100644 +--- a/drivers/md/bcache/nvm-pages.c ++++ b/drivers/md/bcache/nvm-pages.c +@@ -105,6 +105,141 @@ static inline void remove_owner_space(struct bch_nvm_namespace *ns, + bitmap_set(ns->pages_bitmap, pgoff, nr); + } + ++/* If not found, it will create if create == true */ ++static struct bch_owner_list *find_owner_list(const char *owner_uuid, bool create) ++{ ++ struct bch_owner_list *owner_list; ++ int i; ++ ++ for (i = 0; i < only_set->owner_list_size; i++) { ++ if (!memcmp(owner_uuid, only_set->owner_lists[i]->owner_uuid, 16)) ++ return only_set->owner_lists[i]; ++ } ++ ++ if (create) { ++ owner_list = alloc_owner_list(owner_uuid, NULL, only_set->total_namespaces_nr); ++ only_set->owner_lists[only_set->owner_list_size++] = owner_list; ++ return owner_list; ++ } else ++ return NULL; ++} ++ ++static struct bch_nvm_alloced_recs *find_nvm_alloced_recs(struct bch_owner_list *owner_list, ++ struct bch_nvm_namespace *ns, bool create) ++{ ++ int position = ns->sb->this_namespace_nr; ++ ++ if (create && !owner_list->alloced_recs[position]) { ++ struct bch_nvm_alloced_recs *alloced_recs = ++ kzalloc(sizeof(*alloced_recs), GFP_KERNEL|__GFP_NOFAIL); ++ ++ alloced_recs->ns = ns; ++ INIT_LIST_HEAD(&alloced_recs->extent_head); ++ owner_list->alloced_recs[position] = alloced_recs; ++ return alloced_recs; ++ } else ++ return owner_list->alloced_recs[position]; ++} ++ ++static inline void *extent_end_addr(struct bch_extent *extent) ++{ ++ return extent->kaddr + (extent->nr << PAGE_SHIFT); ++} ++ ++static void add_extent(struct bch_nvm_alloced_recs *alloced_recs, void *addr, int order) ++{ ++ struct list_head *list = alloced_recs->extent_head.next; ++ struct bch_extent *extent, *tmp; ++ void *end_addr = addr + ((1 << order) << PAGE_SHIFT); ++ ++ while (list != &alloced_recs->extent_head) { ++ extent = container_of(list, struct bch_extent, list); ++ if (end_addr == extent->kaddr) { ++ extent->kaddr = addr; ++ extent->nr += 1 << order; ++ break; ++ } else if (extent_end_addr(extent) == addr) { ++ extent->nr += 1 << order; ++ break; ++ } else if (end_addr < extent->kaddr) { ++ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL|__GFP_NOFAIL); ++ tmp->kaddr = addr; ++ tmp->nr = 1 << order; ++ list_add_tail(&tmp->list, &extent->list); ++ alloced_recs->size++; ++ break; ++ } ++ list = list->next; ++ } ++ ++ if (list == &alloced_recs->extent_head) { ++ struct bch_extent *e = kzalloc(sizeof(*e), GFP_KERNEL); ++ ++ e->kaddr = addr; ++ e->nr = 1 << order; ++ list_add(&e->list, &alloced_recs->extent_head); ++ alloced_recs->size++; ++ } ++} ++ ++void *bch_nvm_alloc_pages(int order, const char *owner_uuid) ++{ ++ void *kaddr = NULL; ++ struct bch_owner_list *owner_list; ++ struct bch_nvm_alloced_recs *alloced_recs; ++ int i, j; ++ ++ mutex_lock(&only_set->lock); ++ owner_list = find_owner_list(owner_uuid, true); ++ ++ for (j = 0; j < only_set->total_namespaces_nr; j++) { ++ struct bch_nvm_namespace *ns = only_set->nss[j]; ++ ++ if (!ns || (ns->free < (1 << order))) ++ continue; ++ ++ for (i = order; i < MAX_ORDER; i++) { ++ struct list_head *list; ++ struct page *page, *buddy_page; ++ ++ if (list_empty(&ns->free_area[i])) ++ continue; ++ ++ list = ns->free_area[i].next; ++ page = container_of((void *)list, struct page, zone_device_data); ++ ++ list_del(list); ++ ++ while (i != order) { ++ buddy_page = nvm_vaddr_to_page(ns, ++ nvm_pgoff_to_vaddr(ns, page->index + (1 << (i - 1)))); ++ buddy_page->private = i - 1; ++ buddy_page->index = page->index + (1 << (i - 1)); ++ __SetPageBuddy(buddy_page); ++ list_add((struct list_head *)&buddy_page->zone_device_data, ++ &ns->free_area[i - 1]); ++ i--; ++ } ++ ++ page->private = order; ++ __ClearPageBuddy(page); ++ ns->free -= 1 << order; ++ kaddr = nvm_pgoff_to_vaddr(ns, page->index); ++ break; ++ } ++ ++ if (i != MAX_ORDER) { ++ alloced_recs = find_nvm_alloced_recs(owner_list, ns, true); ++ add_extent(alloced_recs, kaddr, order); ++ break; ++ } ++ } ++ ++ mutex_unlock(&only_set->lock); ++ return kaddr; ++} ++EXPORT_SYMBOL_GPL(bch_nvm_alloc_pages); ++ + static int init_owner_info(struct bch_nvm_namespace *ns) + { + struct owner_list_head *owner_list_head; +diff --git a/drivers/md/bcache/nvm-pages.h b/drivers/md/bcache/nvm-pages.h +index 15aa0f15760f..e470c21b3075 100644 +--- a/drivers/md/bcache/nvm-pages.h ++++ b/drivers/md/bcache/nvm-pages.h +@@ -76,6 +76,7 @@ extern struct bch_nvm_set *only_set; + struct bch_nvm_namespace *bch_register_namespace(const char *dev_path); + int bch_nvm_init(void); + void bch_nvm_exit(void); ++void *bch_nvm_alloc_pages(int order, const char *owner_uuid); + + #else + +@@ -89,6 +90,11 @@ static inline int bch_nvm_init(void) + } + static inline void bch_nvm_exit(void) { } + ++static inline void *bch_nvm_alloc_pages(int order, const char *owner_uuid) ++{ ++ return NULL; ++} ++ + #endif /* CONFIG_BCACHE_NVM_PAGES */ + + #endif /* _BCACHE_NVM_PAGES_H */ +-- +2.26.2 + diff --git a/for-test/nvdimm-meta/v1/v1-0005-bcache-bch_nvm_free_pages-of-the-buddy.patch b/for-test/nvdimm-meta/v1/v1-0005-bcache-bch_nvm_free_pages-of-the-buddy.patch new file mode 100644 index 0000000..758e2f7 --- /dev/null +++ b/for-test/nvdimm-meta/v1/v1-0005-bcache-bch_nvm_free_pages-of-the-buddy.patch @@ -0,0 +1,192 @@ +From f9f3f44e35b3caa3b66bfe5f82c4c7550971e4df Mon Sep 17 00:00:00 2001 +From: Qiaowei Ren <qiaowei.ren@intel.com> +Date: Wed, 23 Dec 2020 09:44:59 -0500 +Subject: [PATCH v1 5/8] bcache: bch_nvm_free_pages() of the buddy + +This patch implements the bch_nvm_free_pages() of the buddy. + +Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com> +Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com> +--- + drivers/md/bcache/nvm-pages.c | 142 ++++++++++++++++++++++++++++++++++ + drivers/md/bcache/nvm-pages.h | 4 +- + 2 files changed, 145 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/nvm-pages.c b/drivers/md/bcache/nvm-pages.c +index 2e91e1c8536f..1dcb5012eccf 100644 +--- a/drivers/md/bcache/nvm-pages.c ++++ b/drivers/md/bcache/nvm-pages.c +@@ -182,6 +182,148 @@ static void add_extent(struct bch_nvm_alloced_recs *alloced_recs, void *addr, in + } + } + ++static inline void *nvm_end_addr(struct bch_nvm_namespace *ns) ++{ ++ return ns->kaddr + ns->pages_offset + (ns->pages_total << PAGE_SHIFT); ++} ++ ++static inline bool in_nvm_range(struct bch_nvm_namespace *ns, ++ void *start_addr, void *end_addr) ++{ ++ return (start_addr >= ns->kaddr) && (end_addr <= nvm_end_addr(ns)); ++} ++ ++static struct bch_nvm_namespace *find_nvm_by_addr(void *addr, int order) ++{ ++ int i; ++ struct bch_nvm_namespace *ns; ++ ++ for (i = 0; i < only_set->total_namespaces_nr; i++) { ++ ns = only_set->nss[i]; ++ if (ns && in_nvm_range(ns, addr, addr + (1 << order))) ++ return ns; ++ } ++ return NULL; ++} ++ ++static int remove_extent(struct bch_nvm_alloced_recs *alloced_recs, void *addr, int order) ++{ ++ struct list_head *list = alloced_recs->extent_head.next; ++ struct bch_extent *extent, *tmp; ++ void *end_addr = addr + ((1 << order) << PAGE_SHIFT); ++ ++ while (list != &alloced_recs->extent_head) { ++ extent = container_of(list, struct bch_extent, list); ++ ++ if (addr < extent->kaddr || end_addr > extent_end_addr(extent)) { ++ list = list->next; ++ continue; ++ } ++ ++ if (addr == extent->kaddr) { ++ if (extent->nr == (1 << order)) { ++ list_del(list); ++ kfree(extent); ++ alloced_recs->size--; ++ } else { ++ extent->kaddr = end_addr; ++ extent->nr -= 1 << order; ++ } ++ } else { ++ if (extent_end_addr(extent) > end_addr) { ++ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL|__GFP_NOFAIL); ++ ++ tmp->kaddr = end_addr; ++ tmp->nr = (extent_end_addr(extent) - end_addr) >> PAGE_SHIFT; ++ list_add(&tmp->list, list); ++ alloced_recs->size++; ++ } ++ extent->nr = (addr - extent->kaddr) >> PAGE_SHIFT; ++ } ++ break; ++ } ++ return (list == &alloced_recs->extent_head) ? -ENOENT : 0; ++} ++ ++static void __free_space(struct bch_nvm_namespace *ns, void *addr, int order) ++{ ++ unsigned int add_pages = (1 << order); ++ pgoff_t pgoff; ++ struct page *page; ++ ++ page = nvm_vaddr_to_page(ns, addr); ++ WARN_ON(page->private != order); ++ pgoff = page->index; ++ ++ while (order < MAX_ORDER - 1) { ++ struct page *buddy_page; ++ ++ pgoff_t buddy_pgoff = pgoff ^ (1 << order); ++ pgoff_t parent_pgoff = pgoff & ~(1 << order); ++ ++ if ((parent_pgoff + (1 << (order + 1)) > ns->pages_total)) ++ break; ++ ++ buddy_page = nvm_vaddr_to_page(ns, nvm_pgoff_to_vaddr(ns, buddy_pgoff)); ++ ++ if (PageBuddy(buddy_page) && (buddy_page->private == order)) { ++ list_del((struct list_head *)&buddy_page->zone_device_data); ++ __ClearPageBuddy(buddy_page); ++ pgoff = parent_pgoff; ++ order++; ++ continue; ++ } ++ break; ++ } ++ ++ page = nvm_vaddr_to_page(ns, nvm_pgoff_to_vaddr(ns, pgoff)); ++ list_add((struct list_head *)&page->zone_device_data, &ns->free_area[order]); ++ page->index = pgoff; ++ page->private = order; ++ __SetPageBuddy(page); ++ ns->free = add_pages; ++} ++ ++void bch_nvm_free_pages(void *addr, int order, const char *owner_uuid) ++{ ++ struct bch_nvm_namespace *ns; ++ struct bch_owner_list *owner_list; ++ struct bch_nvm_alloced_recs *alloced_recs; ++ int r; ++ ++ mutex_lock(&only_set->lock); ++ ++ ns = find_nvm_by_addr(addr, order); ++ if (!ns) { ++ pr_info("can't find nvm_dev by kaddr %p\n", addr); ++ goto unlock; ++ } ++ ++ owner_list = find_owner_list(owner_uuid, false); ++ if (!owner_list) { ++ pr_info("can't found owner(uuid=%s)\n", owner_uuid); ++ goto unlock; ++ } ++ ++ alloced_recs = find_nvm_alloced_recs(owner_list, ns, false); ++ if (!alloced_recs) { ++ pr_info("can't find alloced_recs(uuid=%s)\n", ns->uuid); ++ goto unlock; ++ } ++ ++ r = remove_extent(alloced_recs, addr, order); ++ if (r < 0) { ++ pr_info("can't find extent\n"); ++ goto unlock; ++ } ++ ++ __free_space(ns, addr, order); ++ ++unlock: ++ mutex_unlock(&only_set->lock); ++} ++EXPORT_SYMBOL_GPL(bch_nvm_free_pages); ++ + void *bch_nvm_alloc_pages(int order, const char *owner_uuid) + { + void *kaddr = NULL; +diff --git a/drivers/md/bcache/nvm-pages.h b/drivers/md/bcache/nvm-pages.h +index e470c21b3075..6a56dd4a2ffc 100644 +--- a/drivers/md/bcache/nvm-pages.h ++++ b/drivers/md/bcache/nvm-pages.h +@@ -77,7 +77,7 @@ struct bch_nvm_namespace *bch_register_namespace(const char *dev_path); + int bch_nvm_init(void); + void bch_nvm_exit(void); + void *bch_nvm_alloc_pages(int order, const char *owner_uuid); +- ++void bch_nvm_free_pages(void *addr, int order, const char *owner_uuid); + #else + + static inline struct bch_nvm_namespace *bch_register_namespace(const char *dev_path) +@@ -95,6 +95,8 @@ static inline void *bch_nvm_alloc_pages(int order, const char *owner_uuid) + return NULL; + } + ++static inline void bch_nvm_free_pages(void *addr, int order, const char *owner_uuid) { } ++ + #endif /* CONFIG_BCACHE_NVM_PAGES */ + + #endif /* _BCACHE_NVM_PAGES_H */ +-- +2.26.2 + diff --git a/for-test/nvdimm-meta/v1/v1-0006-bcache-get-allocated-pages-from-specific-owner.patch b/for-test/nvdimm-meta/v1/v1-0006-bcache-get-allocated-pages-from-specific-owner.patch new file mode 100644 index 0000000..591fb3a --- /dev/null +++ b/for-test/nvdimm-meta/v1/v1-0006-bcache-get-allocated-pages-from-specific-owner.patch @@ -0,0 +1,91 @@ +From 2a5e6c49869e381e2a521f5cd39fee94541ee817 Mon Sep 17 00:00:00 2001 +From: Qiaowei Ren <qiaowei.ren@intel.com> +Date: Wed, 23 Dec 2020 09:45:00 -0500 +Subject: [PATCH v1 6/8] bcache: get allocated pages from specific owner + +This patch implements bch_get_allocated_pages() of the buddy to be used to +get allocated pages from specific owner. + +Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com> +Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com> +--- + drivers/md/bcache/nvm-pages.c | 38 +++++++++++++++++++++++++++++++++++ + drivers/md/bcache/nvm-pages.h | 6 ++++++ + 2 files changed, 44 insertions(+) + +diff --git a/drivers/md/bcache/nvm-pages.c b/drivers/md/bcache/nvm-pages.c +index 1dcb5012eccf..ff810110ee90 100644 +--- a/drivers/md/bcache/nvm-pages.c ++++ b/drivers/md/bcache/nvm-pages.c +@@ -382,6 +382,44 @@ void *bch_nvm_alloc_pages(int order, const char *owner_uuid) + } + EXPORT_SYMBOL_GPL(bch_nvm_alloc_pages); + ++struct bch_extent *bch_get_allocated_pages(const char *owner_uuid) ++{ ++ struct bch_owner_list *owner_list = find_owner_list(owner_uuid, false); ++ struct bch_nvm_alloced_recs *alloced_recs; ++ struct bch_extent *head = NULL, *e, *tmp; ++ int i; ++ ++ if (!owner_list) ++ return NULL; ++ ++ for (i = 0; i < only_set->total_namespaces_nr; i++) { ++ struct list_head *l; ++ ++ alloced_recs = owner_list->alloced_recs[i]; ++ ++ if (!alloced_recs || alloced_recs->size == 0) ++ continue; ++ ++ l = alloced_recs->extent_head.next; ++ while (l != &alloced_recs->extent_head) { ++ e = container_of(l, struct bch_extent, list); ++ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL|__GFP_NOFAIL); ++ ++ tmp->kaddr = e->kaddr; ++ tmp->nr = e->nr; ++ ++ if (head) ++ list_add_tail(&tmp->list, &head->list); ++ else ++ head = tmp; ++ ++ l = l->next; ++ } ++ } ++ return head; ++} ++EXPORT_SYMBOL_GPL(bch_get_allocated_pages); ++ + static int init_owner_info(struct bch_nvm_namespace *ns) + { + struct owner_list_head *owner_list_head; +diff --git a/drivers/md/bcache/nvm-pages.h b/drivers/md/bcache/nvm-pages.h +index 6a56dd4a2ffc..4d0b3e0f1e73 100644 +--- a/drivers/md/bcache/nvm-pages.h ++++ b/drivers/md/bcache/nvm-pages.h +@@ -78,6 +78,7 @@ int bch_nvm_init(void); + void bch_nvm_exit(void); + void *bch_nvm_alloc_pages(int order, const char *owner_uuid); + void bch_nvm_free_pages(void *addr, int order, const char *owner_uuid); ++struct bch_extent *bch_get_allocated_pages(const char *owner_uuid); + #else + + static inline struct bch_nvm_namespace *bch_register_namespace(const char *dev_path) +@@ -97,6 +98,11 @@ static inline void *bch_nvm_alloc_pages(int order, const char *owner_uuid) + + static inline void bch_nvm_free_pages(void *addr, int order, const char *owner_uuid) { } + ++static inline struct bch_extent *bch_get_allocated_pages(const char *owner_uuid) ++{ ++ return NULL; ++} ++ + #endif /* CONFIG_BCACHE_NVM_PAGES */ + + #endif /* _BCACHE_NVM_PAGES_H */ +-- +2.26.2 + diff --git a/for-test/nvdimm-meta/v1/v1-0007-bcache-persist-owner-info-when-alloc-free-pages.patch b/for-test/nvdimm-meta/v1/v1-0007-bcache-persist-owner-info-when-alloc-free-pages.patch new file mode 100644 index 0000000..109427b --- /dev/null +++ b/for-test/nvdimm-meta/v1/v1-0007-bcache-persist-owner-info-when-alloc-free-pages.patch @@ -0,0 +1,141 @@ +From 1c0d20d1b1cd443b404379fc7ea487d48b3f8ae5 Mon Sep 17 00:00:00 2001 +From: Qiaowei Ren <qiaowei.ren@intel.com> +Date: Wed, 23 Dec 2020 09:45:01 -0500 +Subject: [PATCH v1 7/8] bcache: persist owner info when alloc/free pages. + +This patch implement persist owner info on nvdimm device +when alloc/free pages. + +Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com> +Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com> +--- + drivers/md/bcache/nvm-pages.c | 92 ++++++++++++++++++++++++++++++++++- + 1 file changed, 91 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/nvm-pages.c b/drivers/md/bcache/nvm-pages.c +index ff810110ee90..69b559a4c0b2 100644 +--- a/drivers/md/bcache/nvm-pages.c ++++ b/drivers/md/bcache/nvm-pages.c +@@ -206,6 +206,17 @@ static struct bch_nvm_namespace *find_nvm_by_addr(void *addr, int order) + return NULL; + } + ++static void init_pgalloc_recs(struct nvm_pgalloc_recs *recs, const char *owner_uuid) ++{ ++ memset(recs, 0, sizeof(struct nvm_pgalloc_recs)); ++ memcpy(recs->owner_uuid, owner_uuid, 16); ++} ++ ++static pgoff_t vaddr_to_nvm_pgoff(struct bch_nvm_namespace *ns, void *kaddr) ++{ ++ return (kaddr - ns->kaddr - ns->pages_offset) / PAGE_SIZE; ++} ++ + static int remove_extent(struct bch_nvm_alloced_recs *alloced_recs, void *addr, int order) + { + struct list_head *list = alloced_recs->extent_head.next; +@@ -245,6 +256,83 @@ static int remove_extent(struct bch_nvm_alloced_recs *alloced_recs, void *addr, + return (list == &alloced_recs->extent_head) ? -ENOENT : 0; + } + ++#define RECS_LEN (sizeof(struct nvm_pgalloc_recs)) ++ ++static void write_owner_info(void) ++{ ++ struct bch_owner_list *owner_list; ++ struct nvm_pgalloc_recs *recs; ++ struct bch_extent *extent; ++ struct bch_nvm_namespace *ns = only_set->nss[0]; ++ struct owner_list_head *owner_list_head; ++ struct nvm_pages_owner_head *owner_head; ++ bool update_owner = false; ++ u64 recs_pos = NVM_PAGES_SYS_RECS_HEAD_OFFSET; ++ struct list_head *list; ++ int i, j; ++ ++ owner_list_head = kzalloc(sizeof(*owner_list_head), GFP_KERNEL); ++ recs = kmalloc(sizeof(*recs), GFP_KERNEL); ++ if (!owner_list_head || !recs) { ++ pr_info("can't alloc memory\n"); ++ kfree(owner_list_head); ++ kfree(recs); ++ return; ++ } ++ ++ // in-memory owner maybe not contain alloced-pages. ++ for (i = 0; i < only_set->owner_list_size; i++) { ++ owner_head = &owner_list_head->heads[owner_list_head->size]; ++ owner_list = only_set->owner_lists[i]; ++ ++ for (j = 0; j < only_set->total_namespaces_nr; j++) { ++ struct bch_nvm_alloced_recs *extents = owner_list->alloced_recs[j]; ++ ++ if (!extents || !extents->size) ++ continue; ++ ++ init_pgalloc_recs(recs, owner_list->owner_uuid); ++ ++ BUG_ON(recs_pos >= NVM_PAGES_OFFSET); ++ owner_head->recs[j] = (struct nvm_pgalloc_recs *)recs_pos; ++ ++ for (list = extents->extent_head.next; ++ list != &extents->extent_head; ++ list = list->next) { ++ extent = container_of(list, struct bch_extent, list); ++ ++ if (recs->size == MAX_RECORD) { ++ BUG_ON(recs_pos >= NVM_PAGES_OFFSET); ++ recs->next = ++ (struct nvm_pgalloc_recs *)(recs_pos + RECS_LEN); ++ memcpy_flushcache(ns->kaddr + recs_pos, recs, RECS_LEN); ++ init_pgalloc_recs(recs, owner_list->owner_uuid); ++ recs_pos += RECS_LEN; ++ } ++ ++ recs->recs[recs->size].pgoff = ++ vaddr_to_nvm_pgoff(only_set->nss[j], extent->kaddr); ++ recs->recs[recs->size].nr = extent->nr; ++ recs->size++; ++ } ++ ++ update_owner = true; ++ memcpy_flushcache(ns->kaddr + recs_pos, recs, RECS_LEN); ++ recs_pos += sizeof(struct nvm_pgalloc_recs); ++ } ++ ++ if (update_owner) { ++ memcpy(owner_head->uuid, owner_list->owner_uuid, 16); ++ owner_list_head->size++; ++ update_owner = false; ++ } ++ } ++ ++ memcpy_flushcache(ns->kaddr + NVM_PAGES_OWNER_LIST_HEAD_OFFSET, ++ (void *)owner_list_head, sizeof(struct owner_list_head)); ++ kfree(owner_list_head); ++} ++ + static void __free_space(struct bch_nvm_namespace *ns, void *addr, int order) + { + unsigned int add_pages = (1 << order); +@@ -318,6 +406,7 @@ void bch_nvm_free_pages(void *addr, int order, const char *owner_uuid) + } + + __free_space(ns, addr, order); ++ write_owner_info(); + + unlock: + mutex_unlock(&only_set->lock); +@@ -376,7 +465,8 @@ void *bch_nvm_alloc_pages(int order, const char *owner_uuid) + break; + } + } +- ++ if (kaddr) ++ write_owner_info(); + mutex_unlock(&only_set->lock); + return kaddr; + } +-- +2.26.2 + diff --git a/for-test/nvdimm-meta/v1/v1-0008-bcache-testing-module-for-nvm-pages-allocator.patch b/for-test/nvdimm-meta/v1/v1-0008-bcache-testing-module-for-nvm-pages-allocator.patch new file mode 100644 index 0000000..499ba9f --- /dev/null +++ b/for-test/nvdimm-meta/v1/v1-0008-bcache-testing-module-for-nvm-pages-allocator.patch @@ -0,0 +1,168 @@ +From cafe7a7d88c5585454eb1285218db1e60a174797 Mon Sep 17 00:00:00 2001 +From: Qiaowei Ren <qiaowei.ren@intel.com> +Date: Wed, 23 Dec 2020 09:45:02 -0500 +Subject: [PATCH v1 8/8] bcache: testing module for nvm pages allocator + +This patch creates the testing module for nvm pages allocator. +Before this module is loaded, the super block needs to be writen +into nvdimm device (like /dev/pmemX). + +Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com> +Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com> +--- + drivers/md/bcache/Kconfig | 6 ++ + drivers/md/bcache/Makefile | 2 + + drivers/md/bcache/test-nvm.c | 117 +++++++++++++++++++++++++++++++++++ + 3 files changed, 125 insertions(+) + create mode 100644 drivers/md/bcache/test-nvm.c + +diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig +index fdec9905ef40..68302a9cd476 100644 +--- a/drivers/md/bcache/Kconfig ++++ b/drivers/md/bcache/Kconfig +@@ -41,3 +41,9 @@ config BCACHE_NVM_PAGES + depends on BCACHE + help + nvm pages allocator for bcache. ++ ++config BCACHE_NVM_PAGES_TEST ++ tristate "Testing for NVM pages" ++ depends on BCACHE_NVM_PAGES ++ help ++ Testing module for NVM pages allocator. +diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile +index 948e5ed2ca66..7b7d3535f4ef 100644 +--- a/drivers/md/bcache/Makefile ++++ b/drivers/md/bcache/Makefile +@@ -5,3 +5,5 @@ obj-$(CONFIG_BCACHE) += bcache.o + bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\ + io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\ + util.o writeback.o features.o nvm-pages.o ++ ++obj-$(CONFIG_BCACHE_NVM_PAGES_TEST) += test-nvm.o +diff --git a/drivers/md/bcache/test-nvm.c b/drivers/md/bcache/test-nvm.c +new file mode 100644 +index 000000000000..8b9f2c1e7825 +--- /dev/null ++++ b/drivers/md/bcache/test-nvm.c +@@ -0,0 +1,117 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++#include <linux/init.h> ++#include <linux/kernel.h> ++#include <linux/module.h> ++#include <linux/slab.h> ++#include <linux/crc32.h> ++#include <linux/uuid.h> ++#include <linux/prandom.h> ++#include <linux/pagemap.h> ++#include <linux/pfn_t.h> ++#include "nvm-pages.h" ++ ++static char *host = "NVDIMM device name"; ++module_param(host, charp, 0444); ++ ++#define MAX_OWNER 10 ++ ++static pgoff_t vaddr_to_nvm_pgoff(struct bch_nvm_namespace *ns, void *kaddr) ++{ ++ return (kaddr - ns->kaddr - ns->pages_offset) / PAGE_SIZE; ++} ++ ++static void print_nvm_extent(struct bch_nvm_alloced_recs *extents) ++{ ++ struct list_head *list = extents->extent_head.next; ++ struct bch_nvm_namespace *ns = extents->ns; ++ struct bch_extent *e; ++ pgoff_t pgoff; ++ ++ while (list != &extents->extent_head) { ++ e = container_of(list, struct bch_extent, list); ++ pgoff = vaddr_to_nvm_pgoff(ns, e->kaddr); ++ pr_info(" [%ld ~ %u)", pgoff, e->nr); ++ list = list->next; ++ } ++ pr_info("\n"); ++} ++ ++static void print_owner_list_info(struct bch_nvm_set *nvm_set, bool print_extent) ++{ ++ struct bch_owner_list *owner_list; ++ struct bch_nvm_alloced_recs *extents; ++ int i, j; ++ ++ for (i = 0; i < nvm_set->owner_list_size; i++) { ++ owner_list = nvm_set->owner_lists[i]; ++ pr_info("owner uuid=%pU\n", owner_list->owner_uuid); ++ for (j = 0; j < nvm_set->total_namespaces_nr; j++) { ++ if (owner_list->alloced_recs[j]) { ++ extents = owner_list->alloced_recs[j]; ++ pr_info("\t nvm uuid=%pU, allocated extents=%u\n", ++ extents->ns->uuid, extents->size); ++ if (print_extent) ++ print_nvm_extent(extents); ++ } ++ } ++ } ++} ++ ++static void test_case(struct bch_nvm_set *nvm_set, char **owner_uuids) ++{ ++ int i, order; ++ void *addr[MAX_OWNER]; ++ ++ for (i = 0; i < MAX_OWNER; i++) { ++ order = prandom_u32() % MAX_ORDER; ++ addr[i] = bch_nvm_alloc_pages(order, owner_uuids[i]); ++ } ++ print_owner_list_info(nvm_set, true); ++ for (i = 0; i < MAX_OWNER; i++) { ++ struct page *page = virt_to_page(addr[i]); ++ ++ bch_nvm_free_pages(addr[i], page->private, owner_uuids[i]); ++ } ++ print_owner_list_info(nvm_set, true); ++} ++ ++static int __init test_nvm_init(void) ++{ ++ char **owner_uuids; ++ struct bch_nvm_set *nvm_set; ++ struct bch_nvm_namespace *ns = bch_register_namespace(host); ++ int i, r = 0; ++ ++ pr_info("nvm pages test enter: %s\n", host); ++ if (IS_ERR(ns)) { ++ pr_info("failed to register namespace: %s\n", host); ++ r = -EINVAL; ++ goto err; ++ } ++ ++ owner_uuids = kcalloc(MAX_OWNER, sizeof(char *), GFP_KERNEL); ++ for (i = 0; i < MAX_OWNER; i++) { ++ owner_uuids[i] = kmalloc(16, GFP_KERNEL); ++ generate_random_uuid(owner_uuids[i]); ++ } ++ ++ nvm_set = ns->nvm_set; ++ test_case(nvm_set, owner_uuids); ++ ++ for (i = 0; i < MAX_OWNER; i++) ++ kfree(owner_uuids[i]); ++ kfree(owner_uuids); ++ ++err: ++ return r; ++} ++module_init(test_nvm_init); ++ ++static void __exit test_nvm_exit(void) ++{ ++ pr_info("nvm pages test exit\n"); ++} ++module_exit(test_nvm_exit); ++ ++MODULE_LICENSE("GPL v2"); +-- +2.26.2 + |