aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnkit Kumar <ankit.kumar@samsung.com>2024-02-13 21:03:12 +0530
committerVincent Fu <vincent.fu@samsung.com>2024-02-13 14:24:59 -0500
commitbe5514e3827db9e1a34009a38d51b9df75fa9d03 (patch)
tree562c686f92e72562652a9c2bd2d7c59fa6853b56
parent7f6a38693b8a695f5b6d4789341c1946f77ace24 (diff)
downloadfio-be5514e3827db9e1a34009a38d51b9df75fa9d03.tar.gz
engines/xnvme: add support for metadata
This enables support for separate metadata buffers with xnvme ioengine. This is done by providing xnvme specific option md_per_io_size, which for the sake of consistency is the same option used by io_uring_cmd engine and SPDK's external ioengine. Bump up the required xnvme support to v0.7.4 Signed-off-by: Ankit Kumar <ankit.kumar@samsung.com> Reviewed-by: Jens Axboe <axboe@kernel.dk> Link: https://lore.kernel.org/r/20240213153315.134202-3-ankit.kumar@samsung.com Signed-off-by: Vincent Fu <vincent.fu@samsung.com>
-rw-r--r--HOWTO.rst2
-rwxr-xr-xconfigure2
-rw-r--r--engines/xnvme.c122
-rw-r--r--fio.12
4 files changed, 114 insertions, 14 deletions
diff --git a/HOWTO.rst b/HOWTO.rst
index e5a5be037..04b055d94 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -2491,7 +2491,7 @@ with the caveat that when used on the command line, they must come after the
want fio to use placement identifier only at indices 0, 2 and 5 specify
``fdp_pli=0,2,5``.
-.. option:: md_per_io_size=int : [io_uring_cmd]
+.. option:: md_per_io_size=int : [io_uring_cmd] [xnvme]
Size in bytes for separate metadata buffer per IO. Default: 0.
diff --git a/configure b/configure
index becb193ec..3eef022b9 100755
--- a/configure
+++ b/configure
@@ -2697,7 +2697,7 @@ fi
##########################################
# Check if we have xnvme
if test "$xnvme" != "no" ; then
- if check_min_lib_version xnvme 0.7.0; then
+ if check_min_lib_version xnvme 0.7.4; then
xnvme="yes"
xnvme_cflags=$(pkg-config --cflags xnvme)
xnvme_libs=$(pkg-config --libs xnvme)
diff --git a/engines/xnvme.c b/engines/xnvme.c
index 88bbba4e2..da32678dd 100644
--- a/engines/xnvme.c
+++ b/engines/xnvme.c
@@ -30,8 +30,10 @@ struct xnvme_fioe_fwrap {
uint32_t ssw;
uint32_t lba_nbytes;
+ uint32_t md_nbytes;
+ uint32_t lba_pow2;
- uint8_t _pad[24];
+ uint8_t _pad[16];
};
XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_fwrap) == 64, "Incorrect size")
@@ -58,19 +60,24 @@ struct xnvme_fioe_data {
uint64_t nallocated;
struct iovec *iovec;
-
- uint8_t _pad[8];
+ struct iovec *md_iovec;
struct xnvme_fioe_fwrap files[];
};
XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_data) == 64, "Incorrect size")
+struct xnvme_fioe_request {
+ /* Separate metadata buffer pointer */
+ void *md_buf;
+};
+
struct xnvme_fioe_options {
void *padding;
unsigned int hipri;
unsigned int sqpoll_thread;
unsigned int xnvme_dev_nsid;
unsigned int xnvme_iovec;
+ unsigned int md_per_io_size;
char *xnvme_be;
char *xnvme_mem;
char *xnvme_async;
@@ -171,6 +178,16 @@ static struct fio_option options[] = {
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_XNVME,
},
+ {
+ .name = "md_per_io_size",
+ .lname = "Separate Metadata Buffer Size per I/O",
+ .type = FIO_OPT_INT,
+ .off1 = offsetof(struct xnvme_fioe_options, md_per_io_size),
+ .def = "0",
+ .help = "Size of separate metadata buffer per I/O (Default: 0)",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_XNVME,
+ },
{
.name = NULL,
@@ -249,6 +266,7 @@ static void xnvme_fioe_cleanup(struct thread_data *td)
free(xd->iocq);
free(xd->iovec);
+ free(xd->md_iovec);
free(xd);
td->io_ops_data = NULL;
}
@@ -297,6 +315,12 @@ static int _dev_open(struct thread_data *td, struct fio_file *f)
fwrap->ssw = xnvme_dev_get_ssw(fwrap->dev);
fwrap->lba_nbytes = fwrap->geo->lba_nbytes;
+ fwrap->md_nbytes = fwrap->geo->nbytes_oob;
+
+ if (fwrap->geo->lba_extended)
+ fwrap->lba_pow2 = 0;
+ else
+ fwrap->lba_pow2 = 1;
fwrap->fio_file = f;
fwrap->fio_file->filetype = FIO_TYPE_BLOCK;
@@ -358,6 +382,17 @@ static int xnvme_fioe_init(struct thread_data *td)
}
}
+ if (o->xnvme_iovec && o->md_per_io_size) {
+ xd->md_iovec = calloc(td->o.iodepth, sizeof(*xd->md_iovec));
+ if (!xd->md_iovec) {
+ free(xd->iocq);
+ free(xd->iovec);
+ free(xd);
+ log_err("ioeng->init(): !calloc(xd->md_iovec), err(%d)\n", errno);
+ return 1;
+ }
+ }
+
xd->prev = -1;
td->io_ops_data = xd;
@@ -365,8 +400,8 @@ static int xnvme_fioe_init(struct thread_data *td)
{
if (_dev_open(td, f)) {
/*
- * Note: We are not freeing xd, iocq and iovec. This
- * will be done as part of cleanup routine.
+ * Note: We are not freeing xd, iocq, iovec and md_iovec.
+ * This will be done as part of cleanup routine.
*/
log_err("ioeng->init(): failed; _dev_open(%s)\n", f->file_name);
return 1;
@@ -421,13 +456,61 @@ static void xnvme_fioe_iomem_free(struct thread_data *td)
static int xnvme_fioe_io_u_init(struct thread_data *td, struct io_u *io_u)
{
+ struct xnvme_fioe_request *fio_req;
+ struct xnvme_fioe_options *o = td->eo;
+ struct xnvme_fioe_data *xd = td->io_ops_data;
+ struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
+
+ if (!fwrap->dev) {
+ log_err("ioeng->io_u_init(): failed; no dev-handle\n");
+ return 1;
+ }
+
io_u->mmap_data = td->io_ops_data;
+ io_u->engine_data = NULL;
+
+ fio_req = calloc(1, sizeof(*fio_req));
+ if (!fio_req) {
+ log_err("ioeng->io_u_init(): !calloc(fio_req), err(%d)\n", errno);
+ return 1;
+ }
+
+ if (o->md_per_io_size) {
+ fio_req->md_buf = xnvme_buf_alloc(fwrap->dev, o->md_per_io_size);
+ if (!fio_req->md_buf) {
+ free(fio_req);
+ return 1;
+ }
+ }
+
+ io_u->engine_data = fio_req;
return 0;
}
static void xnvme_fioe_io_u_free(struct thread_data *td, struct io_u *io_u)
{
+ struct xnvme_fioe_data *xd = NULL;
+ struct xnvme_fioe_fwrap *fwrap = NULL;
+ struct xnvme_fioe_request *fio_req = NULL;
+
+ if (!td->io_ops_data)
+ return;
+
+ xd = td->io_ops_data;
+ fwrap = &xd->files[0];
+
+ if (!fwrap->dev) {
+ log_err("ioeng->io_u_free(): failed no dev-handle\n");
+ return;
+ }
+
+ fio_req = io_u->engine_data;
+ if (fio_req->md_buf)
+ xnvme_buf_free(fwrap->dev, fio_req->md_buf);
+
+ free(fio_req);
+
io_u->mmap_data = NULL;
}
@@ -504,6 +587,7 @@ static enum fio_q_status xnvme_fioe_queue(struct thread_data *td, struct io_u *i
struct xnvme_fioe_data *xd = td->io_ops_data;
struct xnvme_fioe_fwrap *fwrap;
struct xnvme_cmd_ctx *ctx;
+ struct xnvme_fioe_request *fio_req = io_u->engine_data;
uint32_t nsid;
uint64_t slba;
uint16_t nlb;
@@ -516,8 +600,13 @@ static enum fio_q_status xnvme_fioe_queue(struct thread_data *td, struct io_u *i
fwrap = &xd->files[io_u->file->fileno];
nsid = xnvme_dev_get_nsid(fwrap->dev);
- slba = io_u->offset >> fwrap->ssw;
- nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
+ if (fwrap->lba_pow2) {
+ slba = io_u->offset >> fwrap->ssw;
+ nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
+ } else {
+ slba = io_u->offset / fwrap->lba_nbytes;
+ nlb = (io_u->xfer_buflen / fwrap->lba_nbytes) - 1;
+ }
ctx = xnvme_queue_get_cmd_ctx(fwrap->queue);
ctx->async.cb_arg = io_u;
@@ -551,11 +640,22 @@ static enum fio_q_status xnvme_fioe_queue(struct thread_data *td, struct io_u *i
if (vectored_io) {
xd->iovec[io_u->index].iov_base = io_u->xfer_buf;
xd->iovec[io_u->index].iov_len = io_u->xfer_buflen;
-
- err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen, NULL, 0,
- 0);
+ if (fwrap->md_nbytes && fwrap->lba_pow2) {
+ xd->md_iovec[io_u->index].iov_base = fio_req->md_buf;
+ xd->md_iovec[io_u->index].iov_len = fwrap->md_nbytes * (nlb + 1);
+ err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen,
+ &xd->md_iovec[io_u->index], 1,
+ fwrap->md_nbytes * (nlb + 1));
+ } else {
+ err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen,
+ NULL, 0, 0);
+ }
} else {
- err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
+ if (fwrap->md_nbytes && fwrap->lba_pow2)
+ err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen,
+ fio_req->md_buf, fwrap->md_nbytes * (nlb + 1));
+ else
+ err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
}
switch (err) {
case 0:
diff --git a/fio.1 b/fio.1
index d832dba2d..437fbebcf 100644
--- a/fio.1
+++ b/fio.1
@@ -2251,7 +2251,7 @@ By default, the job will cycle through all available Placement IDs, so use this
to isolate these identifiers to specific jobs. If you want fio to use placement
identifier only at indices 0, 2 and 5 specify, you would set `fdp_pli=0,2,5`.
.TP
-.BI (io_uring_cmd)md_per_io_size \fR=\fPint
+.BI (io_uring_cmd,xnvme)md_per_io_size \fR=\fPint
Size in bytes for separate metadata buffer per IO. Default: 0.
.TP
.BI (io_uring_cmd)pi_act \fR=\fPint