summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <clm@fb.com>2017-05-20 15:24:17 -0700
committerChris Mason <clm@fb.com>2017-05-31 09:56:00 -0700
commitcdee006c84dac5e9e00f6eb908953d1a012c4753 (patch)
tree90d240088827d87d8d12af8daa6f99432cf3ba8d
parent8ba0813af2452d28b55a408787c7b1f29becd0dd (diff)
downloadsimoop-cdee006c84dac5e9e00f6eb908953d1a012c4753.tar.gz
add O_DIRECT support
Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--Makefile2
-rw-r--r--simoop.c247
2 files changed, 210 insertions, 39 deletions
diff --git a/Makefile b/Makefile
index 4aa0fc4..254d705 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
CC = gcc
-CFLAGS = -Wall -O2 -g -W
+CFLAGS = -Wall -O0 -g -W
ALL_CFLAGS = $(CFLAGS) -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
PROGS = simoop
diff --git a/simoop.c b/simoop.c
index b2025e4..697a096 100644
--- a/simoop.c
+++ b/simoop.c
@@ -38,10 +38,15 @@
/* how deep a directory chain to make */
#define DIR_LEVEL 64
-/* buffer size for reads and writes */
-#define BUF_SIZE (1 * 1024 * 1024)
+/* buffer size for reads and writes during filler */
+#define BUF_SIZE (10 * 1024 * 1024)
#define NAME_LEN 256
+#define FILES_SPLIT 8
+
+#ifndef O_DIRECT
+# define O_DIRECT 00040000
+#endif
/*
* we make a few different kinds of files, these are appended onto the
@@ -81,6 +86,8 @@ static int funksync = 0;
static int append_mode = 0;
/* randomize the write size */
static int oddsizes = 0;
+/* use odirect sometimes */
+static int odirect = 0;
/* -M how much memory we allocate to benchmark allocations */
static int mmap_size = 64 * 1024 * 1024;
@@ -408,7 +415,7 @@ unsigned long long parse_size(char *s)
return ret;
}
-char *option_string = "t:s:C:c:r:n:f:FR:T:m:W:M:w:i:D:oaI";
+char *option_string = "t:s:C:c:r:n:f:FR:T:m:W:M:w:i:D:oaO";
static struct option long_options[] = {
{"appendmode", required_argument, 0, 'a'},
{"mmapsize", required_argument, 0, 'M'},
@@ -428,6 +435,7 @@ static struct option long_options[] = {
{"memory", required_argument, 0, 'm'},
{"funksync", no_argument, 0, 'F'},
{"oddsizes", no_argument, 0, 'o'},
+ {"odirect", no_argument, 0, 'O'},
{"help", no_argument, 0, HELP_LONG_OPT},
{0, 0, 0, 0}
};
@@ -450,9 +458,10 @@ static void print_usage(void)
"\t-R (--readsize): amount to read from each file (2M)\n"
"\t-W (--writesize): amount to write to tmp files (2M)\n"
"\t-T (--rwthreads): how many threads to read/write (8)\n"
- "\t-D (--duthraeds): how many threads to scanning the working dirs (1)\n"
+ "\t-D (--duthraeds): how many threads to scanning the working dirs (0)\n"
"\t-F (--funksync): should we fsync;truncate(0);fsync after writes\n"
"\t-o (--oddsizes): randomize sizes to unaligned values\n"
+ "\t-O (--odirect): use O_DIRECT sometimes\n"
"\t dir1 [dir2 ... dirN]\n"
"\nall sizes are in bytes k,m,g,t modifiers can be used\n"
);
@@ -515,6 +524,7 @@ static void parse_options(int ac, char **av)
break;
case 'n':
num_files = atoi(optarg);
+ num_files = ((num_files + FILES_SPLIT - 1)/ FILES_SPLIT) * FILES_SPLIT;
break;
case 'R':
read_size = parse_size(optarg);
@@ -533,6 +543,9 @@ static void parse_options(int ac, char **av)
case 'o':
oddsizes = 1;
break;
+ case 'O':
+ odirect = 1;
+ break;
case '?':
case HELP_LONG_OPT:
print_usage();
@@ -867,19 +880,24 @@ static void join_path(char *name, char *path, int seq, char *postfix)
}
}
+static void read_whole_file(char *path, int seq, char *postfix,
+ char *buf, size_t buf_size);
+
/* unlink working files not part of the main dataset for a given filename. */
-static void unlink_extra(char *path, int seq)
+static void unlink_extra(char *path, int seq, char *buf, size_t buf_size)
{
char name[NAME_LEN];
int ret;
join_path(name, path, seq, RESULT_FILE);
+ read_whole_file(path, seq, RESULT_FILE, buf, buf_size);
ret = unlink(name);
if (ret < 0 && errno != ENOENT) {
perror("unlink");
exit(1);
}
join_path(name, path, seq, TMP_FILE);
+ read_whole_file(path, seq, TMP_FILE, buf, buf_size);
ret = unlink(name);
if (ret < 0 && errno != ENOENT) {
perror("unlink");
@@ -910,10 +928,96 @@ static loff_t randomize_size(int sz)
return rand() % sz;
}
+static void maybe_toggle_odirect(int fd, unsigned long start,
+ unsigned long len)
+{
+ int flags;
+ int ret;
+
+ if (!odirect)
+ return;
+
+ flags = fcntl(fd, F_GETFL);
+
+ /*
+ * if we're doing an unaligned IO, turn off O_DIRECT and
+ * exit
+ */
+ if ((start & 511) || (len & 511)) {
+ if (flags & O_DIRECT) {
+ ret = fcntl(fd, F_SETFL, flags & (~O_DIRECT));
+ if (ret) {
+ perror("fcntl");
+ exit(1);
+ }
+ }
+ return;
+ }
+
+ if ((rand() % 3) != 0)
+ return;
+
+ if (flags & O_DIRECT) {
+ ret = fcntl(fd, F_SETFL, flags & (~O_DIRECT));
+ } else {
+ ret = fcntl(fd, F_SETFL, flags | O_DIRECT);
+ }
+
+ if (ret) {
+ perror("fcntl");
+ exit(1);
+ }
+}
+
+static void send_pwrite(int fd, char *buf, loff_t start, ssize_t bytes)
+{
+ ssize_t this_write;
+ int ret;
+ int num_headers = bytes / VERIFY_ALIGNMENT;
+ int index;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ maybe_toggle_odirect(fd, start, this_write);
+ /*
+ * the goal here is to break up our huge IO into
+ * something that isn't completely page aligned.
+ */
+ index = rand() % num_headers;
+ num_headers -= index;
+ this_write = index * VERIFY_ALIGNMENT;
+ bytes -= this_write;
+ while (this_write > 0) {
+ ret = pwrite(fd, buf, this_write, start);
+ if (ret <= 0) {
+ perror("pwrite");
+ abort();
+ exit(1);
+ }
+ start += ret;
+ this_write -= ret;
+ buf += ret;
+ }
+ if (bytes == 0)
+ break;
+ }
+
+ while (bytes > 0) {
+ ret = pwrite(fd, buf, bytes, start);
+ if (ret <= 0) {
+ perror("pwrite");
+ abort();
+ exit(1);
+ }
+ start += ret;
+ bytes -= ret;
+ buf += ret;
+ }
+}
+
static void write_pattern(int fd, void *xxhash_state, char *buf,
int buffer_len, loff_t start, off_t length)
{
- ssize_t ret;
loff_t aligned_start;
char *p;
ssize_t this_write;
@@ -943,11 +1047,8 @@ static void write_pattern(int fd, void *xxhash_state, char *buf,
p += VERIFY_ALIGNMENT;
}
- ret = pwrite(fd, buf, this_write, aligned_start);
- if (ret != this_write) {
- perror("pwrite");
- exit(1);
- }
+ send_pwrite(fd, buf, aligned_start, this_write);
+
aligned_start += this_write;
length -= this_write;
}
@@ -972,9 +1073,11 @@ static void read_and_crc(int fd, char *filename,
this_read = buffer_len;
else
this_read = length;
+ maybe_toggle_odirect(fd, aligned_start, this_read);
ret = pread(fd, buf, this_read, aligned_start);
if (ret != this_read) {
perror("pread");
+ fprintf(stderr, "pread start %lu bytes %lu ret %d\n", aligned_start, this_read, errno);
exit(1);
}
p = buf;
@@ -992,12 +1095,12 @@ static void read_and_crc(int fd, char *filename,
}
/* helper for startup, do initial writes to a given fd */
-static void fill_one_file(int fd, void *xxhash_state)
+static void fill_one_file(int fd, void *xxhash_state, char *buf, size_t buf_size)
+
{
struct stat st;
int ret;
loff_t cur_size;
- char *buf;
loff_t this_size = randomize_size(file_size);
ret = fstat(fd, &st);
@@ -1010,14 +1113,7 @@ static void fill_one_file(int fd, void *xxhash_state)
if (cur_size >= this_size)
return;
- ret = posix_memalign((void **)(&buf), getpagesize(), BUF_SIZE);
- if (ret) {
- perror("posix_memalign");
- exit(1);
- }
-
- write_pattern(fd, xxhash_state, buf, BUF_SIZE, cur_size, this_size - cur_size);
- free(buf);
+ write_pattern(fd, xxhash_state, buf, buf_size, cur_size, this_size - cur_size);
}
/*
@@ -1126,6 +1222,42 @@ static void read_from_file(char *path, int seq, char *buf)
XXH32_digest(xxhash_state);
}
+static void read_whole_file(char *path, int seq, char *postfix,
+ char *buf, size_t buf_size)
+{
+ int fd;
+ int ret;
+ int i;
+ off_t offset;
+ ssize_t read_bytes = buf_size;
+ struct stat st;
+ void *xxhash_state;
+ char name[NAME_LEN];
+
+ join_path(name, path, seq, postfix);
+ fd = open(name, O_RDONLY, 0600);
+ if (fd < 0)
+ return;
+
+ ret = fstat(fd, &st);
+ if (ret < 0)
+ return;
+ xxhash_state = XXH32_init(global_rand_seed);
+
+ offset = 0;
+
+ read_and_crc(fd, name, xxhash_state, buf, read_bytes, offset,
+ st.st_size);
+
+ /* if we don't have writers making dirty inodes, make some here */
+ if (!write_size) {
+ for (i = 0; i < 8; i++)
+ dirty_an_inode(path);
+ }
+ close(fd);
+ XXH32_digest(xxhash_state);
+}
+
/* creates a temp file in one of the subdirs and sends down write_bytes to it */
static void write_to_file(char *path, int seq, char *buf)
{
@@ -1134,8 +1266,11 @@ static void write_to_file(char *path, int seq, char *buf)
int write_bytes = randomize_size(write_size);
loff_t offset;
void *xxhash_state = XXH32_init(global_rand_seed);
+ char *postfix;
+ char name[NAME_LEN];
if (append_mode) {
+ postfix = DATA_FILE;
fd = open_path(path, seq, DATA_FILE, O_APPEND);
offset = lseek(fd, 0, SEEK_CUR);
if (offset < 0) {
@@ -1143,13 +1278,18 @@ static void write_to_file(char *path, int seq, char *buf)
exit(1);
}
} else {
+ postfix = RESULT_FILE;
fd = open_path(path, seq, RESULT_FILE, 0);
offset = 0;
}
- write_pattern(fd, xxhash_state, buf, write_bytes, offset, write_bytes);
+ write_pattern(fd, xxhash_state, buf, write_size, offset, write_bytes * 4);
XXH32_digest(xxhash_state);
+ join_path(name, path, seq, postfix);
+ read_and_crc(fd, name, xxhash_state, buf, write_size, offset,
+ write_bytes * 4);
+
close(fd);
/* make some dirty inodes */
@@ -1163,30 +1303,51 @@ static void write_to_file(char *path, int seq, char *buf)
}
/* make all the worker files under a main path */
-static void make_files(char *path)
+static void make_files(char *path, unsigned long seq_start,
+ unsigned long seq_num)
{
unsigned long seq;
int fd;
void *xxhash_state = XXH32_init(global_rand_seed);
+ int ret;
+ char *buf;
+
+ ret = posix_memalign((void **)(&buf), getpagesize(), BUF_SIZE);
+ if (ret) {
+ perror("posix_memalign");
+ exit(1);
+ }
+
+
+ for (seq = seq_start; seq < seq_start + seq_num; seq++) {
+ read_whole_file(path, seq, DATA_FILE, buf, BUF_SIZE);
- for (seq = 0; seq < num_files; seq++) {
fd = open_path(path, seq, DATA_FILE, O_APPEND);
- fill_one_file(fd, xxhash_state);
+ fill_one_file(fd, xxhash_state, buf, BUF_SIZE);
close(fd);
/* cleanup from the last run */
- unlink_extra(path, seq);
+ unlink_extra(path, seq, buf, BUF_SIZE);
}
+ free(buf);
/* just to free the state */
XXH32_digest(xxhash_state);
}
+struct filler {
+ char *path;
+ unsigned long seq_start;
+ unsigned long seq_num;
+};
+
void *filler_thread(void *arg)
{
- char *path = arg;
- make_dirs(path);
- make_files(path);
+ struct filler *filler = arg;
+ fprintf(stderr, "filling %s start %lu num %lu\n", filler->path, filler->seq_start, filler->seq_num);
+ make_dirs(filler->path);
+ make_files(filler->path, filler->seq_start, filler->seq_num);
+ free(filler);
return 0;
}
@@ -1195,26 +1356,36 @@ void run_filler_threads(void)
{
int i;
int ret;
+ int j;
pthread_t *tids;
- tids = malloc(sizeof(*tids) * total_paths);
+ tids = malloc(sizeof(*tids) * total_paths * FILES_SPLIT);
if (!tids) {
perror("malloc");
exit(1);
}
fprintf(stderr, "Creating working files\n");
for (i = 0; i < total_paths; i++) {
- pthread_t tid;
- ret = pthread_create(&tid, NULL, filler_thread,
- paths[i]);
- if (ret) {
- fprintf(stderr, "error %d from pthread_create\n", ret);
- exit(1);
+ for (j = 0; j < FILES_SPLIT; j++) {
+ pthread_t tid;
+ struct filler *filler;
+ filler = malloc(sizeof(*filler));
+
+ filler->path = paths[i];
+ filler->seq_start = j * (num_files / FILES_SPLIT);
+ filler->seq_num = num_files / FILES_SPLIT;
+ ret = pthread_create(&tid, NULL, filler_thread, filler);
+ if (ret) {
+ fprintf(stderr, "error %d from pthread_create\n", ret);
+ exit(1);
+ }
+ tids[i * j] = tid;
}
- tids[i] = tid;
}
for (i = 0; i < total_paths; i++) {
- pthread_join(tids[i], NULL);
+ for (j = 0; j < FILES_SPLIT; j++) {
+ pthread_join(tids[i * j], NULL);
+ }
}
fprintf(stderr, "done creating working files\n");
free(tids);