diff options
author | Jens Axboe <axboe@kernel.dk> | 2022-07-01 07:25:15 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2022-07-01 09:36:53 -0600 |
commit | 17bb6d996e72f85572af3cb8aa1ae92f3c2617c1 (patch) | |
tree | a2211519a66da81b51c87305f2f6e930f2a9221c | |
parent | 660879102e32a0ed3d3225afaebcc0d46625a4a6 (diff) | |
download | fio-random-fill.tar.gz |
io_u: use hw assisted random filling for buffersrandom-fill
If we're using refill_buffers and not doing any specific dedupe or
verification that requires a persistent buffer via a seed, then we
can just use hw assisted crc32c for a nice speedup here.
This bumps perf by about 2.4x for me in fill rate on aarch64, and
about a 28% improvement on x86-64 (using a 12900K).
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | io_u.c | 15 |
1 files changed, 14 insertions, 1 deletions
@@ -11,6 +11,7 @@ #include "lib/pow2.h" #include "minmax.h" #include "zbd.h" +#include "crc/crc32c.h" struct io_completion_data { int nr; /* input */ @@ -2221,6 +2222,18 @@ static void save_buf_state(struct thread_data *td, struct frand_state *rs) frand_copy(&td->buf_state_prev, rs); } +static void fill_random_buf_nostate(struct thread_data *td, void *buf, + unsigned int len) +{ +#if defined(ARCH_HAVE_CRC_CRYPTO) + crc32c_arm64(buf, len); +#elif defined(ARCH_HAVE_SSE4_2) + crc32c_intel(buf, len); +#else + fill_random_buf(&td->buf_state, buf, len); +#endif +} + void fill_io_buffer(struct thread_data *td, void *buf, unsigned long long min_write, unsigned long long max_bs) { @@ -2265,7 +2278,7 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned long long min_wr else if (o->zero_buffers) memset(buf, 0, max_bs); else - fill_random_buf(get_buf_state(td), buf, max_bs); + fill_random_buf_nostate(td, buf, max_bs); } /* |