diff options
author | Chris Mason <clm@fb.com> | 2017-05-20 15:24:17 -0700 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2017-05-31 09:56:00 -0700 |
commit | cdee006c84dac5e9e00f6eb908953d1a012c4753 (patch) | |
tree | 90d240088827d87d8d12af8daa6f99432cf3ba8d | |
parent | 8ba0813af2452d28b55a408787c7b1f29becd0dd (diff) | |
download | simoop-cdee006c84dac5e9e00f6eb908953d1a012c4753.tar.gz |
add O_DIRECT support
Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | simoop.c | 247 |
2 files changed, 210 insertions, 39 deletions
@@ -1,5 +1,5 @@ CC = gcc -CFLAGS = -Wall -O2 -g -W +CFLAGS = -Wall -O0 -g -W ALL_CFLAGS = $(CFLAGS) -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 PROGS = simoop @@ -38,10 +38,15 @@ /* how deep a directory chain to make */ #define DIR_LEVEL 64 -/* buffer size for reads and writes */ -#define BUF_SIZE (1 * 1024 * 1024) +/* buffer size for reads and writes during filler */ +#define BUF_SIZE (10 * 1024 * 1024) #define NAME_LEN 256 +#define FILES_SPLIT 8 + +#ifndef O_DIRECT +# define O_DIRECT 00040000 +#endif /* * we make a few different kinds of files, these are appended onto the @@ -81,6 +86,8 @@ static int funksync = 0; static int append_mode = 0; /* randomize the write size */ static int oddsizes = 0; +/* use odirect sometimes */ +static int odirect = 0; /* -M how much memory we allocate to benchmark allocations */ static int mmap_size = 64 * 1024 * 1024; @@ -408,7 +415,7 @@ unsigned long long parse_size(char *s) return ret; } -char *option_string = "t:s:C:c:r:n:f:FR:T:m:W:M:w:i:D:oaI"; +char *option_string = "t:s:C:c:r:n:f:FR:T:m:W:M:w:i:D:oaO"; static struct option long_options[] = { {"appendmode", required_argument, 0, 'a'}, {"mmapsize", required_argument, 0, 'M'}, @@ -428,6 +435,7 @@ static struct option long_options[] = { {"memory", required_argument, 0, 'm'}, {"funksync", no_argument, 0, 'F'}, {"oddsizes", no_argument, 0, 'o'}, + {"odirect", no_argument, 0, 'O'}, {"help", no_argument, 0, HELP_LONG_OPT}, {0, 0, 0, 0} }; @@ -450,9 +458,10 @@ static void print_usage(void) "\t-R (--readsize): amount to read from each file (2M)\n" "\t-W (--writesize): amount to write to tmp files (2M)\n" "\t-T (--rwthreads): how many threads to read/write (8)\n" - "\t-D (--duthraeds): how many threads to scanning the working dirs (1)\n" + "\t-D (--duthraeds): how many threads to scanning the working dirs (0)\n" "\t-F (--funksync): should we fsync;truncate(0);fsync after writes\n" "\t-o (--oddsizes): randomize sizes to unaligned values\n" + "\t-O (--odirect): use O_DIRECT sometimes\n" "\t dir1 [dir2 ... dirN]\n" "\nall sizes are in bytes k,m,g,t modifiers can be used\n" ); @@ -515,6 +524,7 @@ static void parse_options(int ac, char **av) break; case 'n': num_files = atoi(optarg); + num_files = ((num_files + FILES_SPLIT - 1)/ FILES_SPLIT) * FILES_SPLIT; break; case 'R': read_size = parse_size(optarg); @@ -533,6 +543,9 @@ static void parse_options(int ac, char **av) case 'o': oddsizes = 1; break; + case 'O': + odirect = 1; + break; case '?': case HELP_LONG_OPT: print_usage(); @@ -867,19 +880,24 @@ static void join_path(char *name, char *path, int seq, char *postfix) } } +static void read_whole_file(char *path, int seq, char *postfix, + char *buf, size_t buf_size); + /* unlink working files not part of the main dataset for a given filename. */ -static void unlink_extra(char *path, int seq) +static void unlink_extra(char *path, int seq, char *buf, size_t buf_size) { char name[NAME_LEN]; int ret; join_path(name, path, seq, RESULT_FILE); + read_whole_file(path, seq, RESULT_FILE, buf, buf_size); ret = unlink(name); if (ret < 0 && errno != ENOENT) { perror("unlink"); exit(1); } join_path(name, path, seq, TMP_FILE); + read_whole_file(path, seq, TMP_FILE, buf, buf_size); ret = unlink(name); if (ret < 0 && errno != ENOENT) { perror("unlink"); @@ -910,10 +928,96 @@ static loff_t randomize_size(int sz) return rand() % sz; } +static void maybe_toggle_odirect(int fd, unsigned long start, + unsigned long len) +{ + int flags; + int ret; + + if (!odirect) + return; + + flags = fcntl(fd, F_GETFL); + + /* + * if we're doing an unaligned IO, turn off O_DIRECT and + * exit + */ + if ((start & 511) || (len & 511)) { + if (flags & O_DIRECT) { + ret = fcntl(fd, F_SETFL, flags & (~O_DIRECT)); + if (ret) { + perror("fcntl"); + exit(1); + } + } + return; + } + + if ((rand() % 3) != 0) + return; + + if (flags & O_DIRECT) { + ret = fcntl(fd, F_SETFL, flags & (~O_DIRECT)); + } else { + ret = fcntl(fd, F_SETFL, flags | O_DIRECT); + } + + if (ret) { + perror("fcntl"); + exit(1); + } +} + +static void send_pwrite(int fd, char *buf, loff_t start, ssize_t bytes) +{ + ssize_t this_write; + int ret; + int num_headers = bytes / VERIFY_ALIGNMENT; + int index; + int i; + + for (i = 0; i < 3; i++) { + maybe_toggle_odirect(fd, start, this_write); + /* + * the goal here is to break up our huge IO into + * something that isn't completely page aligned. + */ + index = rand() % num_headers; + num_headers -= index; + this_write = index * VERIFY_ALIGNMENT; + bytes -= this_write; + while (this_write > 0) { + ret = pwrite(fd, buf, this_write, start); + if (ret <= 0) { + perror("pwrite"); + abort(); + exit(1); + } + start += ret; + this_write -= ret; + buf += ret; + } + if (bytes == 0) + break; + } + + while (bytes > 0) { + ret = pwrite(fd, buf, bytes, start); + if (ret <= 0) { + perror("pwrite"); + abort(); + exit(1); + } + start += ret; + bytes -= ret; + buf += ret; + } +} + static void write_pattern(int fd, void *xxhash_state, char *buf, int buffer_len, loff_t start, off_t length) { - ssize_t ret; loff_t aligned_start; char *p; ssize_t this_write; @@ -943,11 +1047,8 @@ static void write_pattern(int fd, void *xxhash_state, char *buf, p += VERIFY_ALIGNMENT; } - ret = pwrite(fd, buf, this_write, aligned_start); - if (ret != this_write) { - perror("pwrite"); - exit(1); - } + send_pwrite(fd, buf, aligned_start, this_write); + aligned_start += this_write; length -= this_write; } @@ -972,9 +1073,11 @@ static void read_and_crc(int fd, char *filename, this_read = buffer_len; else this_read = length; + maybe_toggle_odirect(fd, aligned_start, this_read); ret = pread(fd, buf, this_read, aligned_start); if (ret != this_read) { perror("pread"); + fprintf(stderr, "pread start %lu bytes %lu ret %d\n", aligned_start, this_read, errno); exit(1); } p = buf; @@ -992,12 +1095,12 @@ static void read_and_crc(int fd, char *filename, } /* helper for startup, do initial writes to a given fd */ -static void fill_one_file(int fd, void *xxhash_state) +static void fill_one_file(int fd, void *xxhash_state, char *buf, size_t buf_size) + { struct stat st; int ret; loff_t cur_size; - char *buf; loff_t this_size = randomize_size(file_size); ret = fstat(fd, &st); @@ -1010,14 +1113,7 @@ static void fill_one_file(int fd, void *xxhash_state) if (cur_size >= this_size) return; - ret = posix_memalign((void **)(&buf), getpagesize(), BUF_SIZE); - if (ret) { - perror("posix_memalign"); - exit(1); - } - - write_pattern(fd, xxhash_state, buf, BUF_SIZE, cur_size, this_size - cur_size); - free(buf); + write_pattern(fd, xxhash_state, buf, buf_size, cur_size, this_size - cur_size); } /* @@ -1126,6 +1222,42 @@ static void read_from_file(char *path, int seq, char *buf) XXH32_digest(xxhash_state); } +static void read_whole_file(char *path, int seq, char *postfix, + char *buf, size_t buf_size) +{ + int fd; + int ret; + int i; + off_t offset; + ssize_t read_bytes = buf_size; + struct stat st; + void *xxhash_state; + char name[NAME_LEN]; + + join_path(name, path, seq, postfix); + fd = open(name, O_RDONLY, 0600); + if (fd < 0) + return; + + ret = fstat(fd, &st); + if (ret < 0) + return; + xxhash_state = XXH32_init(global_rand_seed); + + offset = 0; + + read_and_crc(fd, name, xxhash_state, buf, read_bytes, offset, + st.st_size); + + /* if we don't have writers making dirty inodes, make some here */ + if (!write_size) { + for (i = 0; i < 8; i++) + dirty_an_inode(path); + } + close(fd); + XXH32_digest(xxhash_state); +} + /* creates a temp file in one of the subdirs and sends down write_bytes to it */ static void write_to_file(char *path, int seq, char *buf) { @@ -1134,8 +1266,11 @@ static void write_to_file(char *path, int seq, char *buf) int write_bytes = randomize_size(write_size); loff_t offset; void *xxhash_state = XXH32_init(global_rand_seed); + char *postfix; + char name[NAME_LEN]; if (append_mode) { + postfix = DATA_FILE; fd = open_path(path, seq, DATA_FILE, O_APPEND); offset = lseek(fd, 0, SEEK_CUR); if (offset < 0) { @@ -1143,13 +1278,18 @@ static void write_to_file(char *path, int seq, char *buf) exit(1); } } else { + postfix = RESULT_FILE; fd = open_path(path, seq, RESULT_FILE, 0); offset = 0; } - write_pattern(fd, xxhash_state, buf, write_bytes, offset, write_bytes); + write_pattern(fd, xxhash_state, buf, write_size, offset, write_bytes * 4); XXH32_digest(xxhash_state); + join_path(name, path, seq, postfix); + read_and_crc(fd, name, xxhash_state, buf, write_size, offset, + write_bytes * 4); + close(fd); /* make some dirty inodes */ @@ -1163,30 +1303,51 @@ static void write_to_file(char *path, int seq, char *buf) } /* make all the worker files under a main path */ -static void make_files(char *path) +static void make_files(char *path, unsigned long seq_start, + unsigned long seq_num) { unsigned long seq; int fd; void *xxhash_state = XXH32_init(global_rand_seed); + int ret; + char *buf; + + ret = posix_memalign((void **)(&buf), getpagesize(), BUF_SIZE); + if (ret) { + perror("posix_memalign"); + exit(1); + } + + + for (seq = seq_start; seq < seq_start + seq_num; seq++) { + read_whole_file(path, seq, DATA_FILE, buf, BUF_SIZE); - for (seq = 0; seq < num_files; seq++) { fd = open_path(path, seq, DATA_FILE, O_APPEND); - fill_one_file(fd, xxhash_state); + fill_one_file(fd, xxhash_state, buf, BUF_SIZE); close(fd); /* cleanup from the last run */ - unlink_extra(path, seq); + unlink_extra(path, seq, buf, BUF_SIZE); } + free(buf); /* just to free the state */ XXH32_digest(xxhash_state); } +struct filler { + char *path; + unsigned long seq_start; + unsigned long seq_num; +}; + void *filler_thread(void *arg) { - char *path = arg; - make_dirs(path); - make_files(path); + struct filler *filler = arg; + fprintf(stderr, "filling %s start %lu num %lu\n", filler->path, filler->seq_start, filler->seq_num); + make_dirs(filler->path); + make_files(filler->path, filler->seq_start, filler->seq_num); + free(filler); return 0; } @@ -1195,26 +1356,36 @@ void run_filler_threads(void) { int i; int ret; + int j; pthread_t *tids; - tids = malloc(sizeof(*tids) * total_paths); + tids = malloc(sizeof(*tids) * total_paths * FILES_SPLIT); if (!tids) { perror("malloc"); exit(1); } fprintf(stderr, "Creating working files\n"); for (i = 0; i < total_paths; i++) { - pthread_t tid; - ret = pthread_create(&tid, NULL, filler_thread, - paths[i]); - if (ret) { - fprintf(stderr, "error %d from pthread_create\n", ret); - exit(1); + for (j = 0; j < FILES_SPLIT; j++) { + pthread_t tid; + struct filler *filler; + filler = malloc(sizeof(*filler)); + + filler->path = paths[i]; + filler->seq_start = j * (num_files / FILES_SPLIT); + filler->seq_num = num_files / FILES_SPLIT; + ret = pthread_create(&tid, NULL, filler_thread, filler); + if (ret) { + fprintf(stderr, "error %d from pthread_create\n", ret); + exit(1); + } + tids[i * j] = tid; } - tids[i] = tid; } for (i = 0; i < total_paths; i++) { - pthread_join(tids[i], NULL); + for (j = 0; j < FILES_SPLIT; j++) { + pthread_join(tids[i * j], NULL); + } } fprintf(stderr, "done creating working files\n"); free(tids); |