From: Jens Axboe Date: Mon, 7 Nov 2005 15:29:54 +0000 (+0100) Subject: [PATCH] fio: Switch to bytes instead and add map of pending random io X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=49d2caabf5bf7b7da6dd69f325395ab0351ed997;p=disktools.git [PATCH] fio: Switch to bytes instead and add map of pending random io Eliminated overlaps, random verify should now work for any bsrange setting. --- diff --git a/fio.c b/fio.c index 748b6c4..b46c01c 100644 --- a/fio.c +++ b/fio.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include #include #include +#include #include "list.h" #include "md5.h" @@ -225,6 +227,10 @@ struct verify_header { #define td_read(td) ((td)->ddir == DDIR_READ) #define should_fsync(td) (!td_read(td) && !(td)->odirect) +#define BLOCKS_PER_MAP (8 * sizeof(long)) +#define RAND_MAP_IDX(sector) ((sector) / BLOCKS_PER_MAP) +#define RAND_MAP_BIT(sector) ((sector) & (BLOCKS_PER_MAP - 1)) + struct thread_data { char file_name[256]; int thread_number; @@ -281,18 +287,21 @@ struct thread_data { unsigned int ratecycle; unsigned long rate_usec_cycle; long rate_pending_usleep; - unsigned long rate_sectors; + unsigned long rate_bytes; struct timeval lastrate; unsigned long runtime; /* sec */ - unsigned long sectors; + unsigned long long io_size; unsigned long io_blocks; - unsigned long io_sectors; - unsigned long this_io_sectors; - unsigned long last_sectors; + unsigned long io_bytes; + unsigned long this_io_bytes; + unsigned long last_bytes; sem_t mutex; + struct drand48_data random_state; + unsigned long *file_map; + unsigned int num_maps; /* * bandwidth and latency stats @@ -301,7 +310,7 @@ struct thread_data { struct io_stat slat_stat; /* submission latency */ struct io_stat bw_stat; /* bandwidth stats */ - unsigned long stat_io_sectors; + unsigned long stat_io_bytes; struct timeval stat_sample_time; struct io_log *lat_log; @@ -332,7 +341,7 @@ static void sig_handler(int sig) static int init_random_state(struct thread_data *td) { unsigned long seed; - int fd; + int fd, num_maps, blocks; fd = open("/dev/random", O_RDONLY); if (fd == -1) { @@ -357,6 +366,12 @@ static int init_random_state(struct thread_data *td) if (repeatable) seed = DEF_RANDSEED; + blocks = (td->io_size + td->min_bs - 1) / td->min_bs; + num_maps = blocks / BLOCKS_PER_MAP; + td->file_map = malloc(num_maps * sizeof(long)); + td->num_maps = num_maps; + memset(td->file_map, 0, num_maps * sizeof(long)); + srand48_r(seed, &td->random_state); return 0; } @@ -407,22 +422,82 @@ static inline unsigned long msec_now(struct timeval *s) return s->tv_sec * 1000 + s->tv_usec / 1000; } -static unsigned long long get_next_offset(struct thread_data *td) +static int random_map_free(struct thread_data *td, unsigned long long block) +{ + unsigned int idx = RAND_MAP_IDX(block); + unsigned int bit = RAND_MAP_BIT(block); + + return (td->file_map[idx] & (1UL << bit)) == 0; +} + +static int get_next_free_block(struct thread_data *td, unsigned long long *b) { - unsigned long long kb; + int i; + + *b = 0; + i = 0; + while ((*b) * td->min_bs < td->io_size) { + if (td->file_map[i] != -1UL) { + *b += ffz(td->file_map[i]); + return 0; + } + + *b += BLOCKS_PER_MAP; + i++; + } + + return 1; +} + +static void mark_random_map(struct thread_data *td, struct io_u *io_u) +{ + unsigned long block = io_u->offset / td->min_bs; + unsigned int blocks = 0; + + while (blocks < (io_u->buflen / td->min_bs)) { + int idx, bit; + + if (!random_map_free(td, block)) + break; + + idx = RAND_MAP_IDX(block); + bit = RAND_MAP_BIT(block); + + assert(idx < td->num_maps); + + td->file_map[idx] |= (1UL << bit); + block++; + blocks++; + } + + if ((blocks * td->min_bs) < io_u->buflen) + io_u->buflen = blocks * td->min_bs; +} + +static int get_next_offset(struct thread_data *td, unsigned long long *offset) +{ + unsigned long long b; long r; if (!td->sequential) { - int min_bs_kb = td->min_bs >> 10; - unsigned long max_kb = td->sectors << 1; - - lrand48_r(&td->random_state, &r); - kb = (1+(double) (max_kb-1) * r / (RAND_MAX+1.0)); - kb = (kb + min_bs_kb - 1) & ~(min_bs_kb - 1); + unsigned long max_blocks = td->io_size / td->min_bs; + int loops = 50; + + do { + lrand48_r(&td->random_state, &r); + b = ((max_blocks - 1) * r / (RAND_MAX+1.0)); + loops--; + } while (!random_map_free(td, b) && loops); + + if (!loops) { + if (get_next_free_block(td, &b)) + return 1; + } } else - kb = td->last_sectors << 1; + b = td->last_bytes / td->min_bs; - return (kb << 10) + td->file_offset; + *offset = (b * td->min_bs) + td->file_offset; + return 0; } static unsigned int get_next_buflen(struct thread_data *td) @@ -438,10 +513,9 @@ static unsigned int get_next_buflen(struct thread_data *td) buflen = (buflen + td->min_bs - 1) & ~(td->min_bs - 1); } - if (buflen > ((td->sectors - td->this_io_sectors) << 9)) - buflen = (td->sectors - td->this_io_sectors) << 9; + if (buflen > td->io_size - td->this_io_bytes) + buflen = td->io_size - td->this_io_bytes; - td->last_sectors += buflen >> 9; return buflen; } @@ -494,14 +568,14 @@ static void add_bw_sample(struct thread_data *td) if (spent < td->bw_avg_time) return; - rate = ((td->this_io_sectors - td->stat_io_sectors) << 9) / spent; + rate = (td->this_io_bytes - td->stat_io_bytes) / spent; add_stat_sample(td, &td->bw_stat, rate); if (td->bw_log) add_log_sample(td, td->bw_log, rate); gettimeofday(&td->stat_sample_time, NULL); - td->stat_io_sectors = td->this_io_sectors; + td->stat_io_bytes = td->this_io_bytes; } static void usec_sleep(int usec) @@ -558,12 +632,12 @@ static int check_min_rate(struct thread_data *td, struct timeval *now) /* * if rate blocks is set, sample is running */ - if (td->rate_sectors) { + if (td->rate_bytes) { spent = mtime_since(&td->lastrate, now); if (spent < td->ratecycle) return 0; - rate = ((td->this_io_sectors - td->rate_sectors) << 9) / spent; + rate = (td->this_io_bytes - td->rate_bytes) / spent; if (rate < td->ratemin) { printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate); if (rate_quit) @@ -572,7 +646,7 @@ static int check_min_rate(struct thread_data *td, struct timeval *now) } } - td->rate_sectors = td->this_io_sectors; + td->rate_bytes = td->this_io_bytes; memcpy(&td->lastrate, now, sizeof(*now)); return 0; } @@ -694,13 +768,23 @@ static struct io_u *get_io_u(struct thread_data *td) if (!io_u) return NULL; - io_u->offset = get_next_offset(td); + if (get_next_offset(td, &io_u->offset)) + return NULL; + io_u->buflen = get_next_buflen(td); if (!io_u->buflen) { put_io_u(td, io_u); return NULL; } + if (io_u->buflen + io_u->offset > td->io_size) + io_u->buflen = td->io_size - io_u->offset; + + if (!td->sequential) + mark_random_map(td, io_u); + + td->last_bytes += io_u->buflen; + if (td->verify) populate_io_u(td, io_u); @@ -750,38 +834,24 @@ static void prune_io_piece_log(struct thread_data *td) } } -/* - * if ipo's overlap, kill old ipo - */ -static int ipo_overlap(struct io_piece *old, struct io_piece *new) -{ - unsigned long long old_end = old->offset + old->len; - unsigned long long new_end = new->offset + new->len; - - if ((new->offset > old->offset && new->offset < old_end) || - (new_end > old->offset && new_end < old_end)) { - list_add(&new->list, &old->list); - list_del(&old->list); - free(old); - return 1; - } - - return 0; -} - /* * log a succesful write, so we can unwind the log for verify */ static void log_io_piece(struct thread_data *td, struct io_u *io_u) { - struct io_piece *ipo = malloc(sizeof(*ipo)); + struct io_piece *ipo = malloc(sizeof(struct io_piece)); struct list_head *entry; INIT_LIST_HEAD(&ipo->list); ipo->offset = io_u->offset; ipo->len = io_u->buflen; - if (td->sequential) { + /* + * for random io where the writes extend the file, it will typically + * be laid out with the block scattered as written. it's faster to + * read them in in that order again, so don't sort + */ + if (td->sequential || !td->overwrite) { list_add_tail(&ipo->list, &td->io_hist_list); return; } @@ -793,9 +863,6 @@ static void log_io_piece(struct thread_data *td, struct io_u *io_u) while ((entry = entry->prev) != &td->io_hist_list) { struct io_piece *__ipo = list_entry(entry, struct io_piece, list); - if (ipo_overlap(__ipo, ipo)) - return; - if (__ipo->offset < ipo->offset) break; } @@ -814,9 +881,7 @@ static void do_sync_verify(struct thread_data *td) io_u = __get_io_u(td); if (!td->odirect) { - unsigned long size = td->sectors << 9; - - if (fadvise(td->fd, 0, size, POSIX_FADV_DONTNEED) < 0) { + if (fadvise(td->fd, td->file_offset, td->io_size, POSIX_FADV_DONTNEED) < 0) { td->error = errno; goto out; } @@ -868,7 +933,7 @@ static void do_sync_io(struct thread_data *td) struct io_u *io_u = NULL; struct timeval e; - while (td->this_io_sectors < td->sectors) { + while (td->this_io_bytes < td->io_size) { int ret; if (td->terminate) @@ -900,8 +965,8 @@ static void do_sync_io(struct thread_data *td) log_io_piece(td, io_u); td->io_blocks++; - td->io_sectors += io_u->buflen >> 9; - td->this_io_sectors += io_u->buflen >> 9; + td->io_bytes += io_u->buflen; + td->this_io_bytes += io_u->buflen; td->cur_off = io_u->offset + io_u->buflen; gettimeofday(&e, NULL); @@ -976,8 +1041,8 @@ static int ios_completed(struct thread_data *td, int nr) io_u = ev_to_iou(td->aio_events + i); td->io_blocks++; - td->io_sectors += io_u->buflen >> 9; - td->this_io_sectors += io_u->buflen >> 9; + td->io_bytes += io_u->buflen; + td->this_io_bytes += io_u->buflen; msec = mtime_since(&io_u->issue_time, &e); @@ -1112,7 +1177,7 @@ static void do_async_io(struct thread_data *td) struct timeval s, e; unsigned long usec; - while (td->this_io_sectors < td->sectors) { + while (td->this_io_bytes < td->io_size) { struct timespec ts = { .tv_sec = 0, .tv_nsec = 0}; struct timespec *timeout; int ret, min_evts = 0; @@ -1334,7 +1399,7 @@ static int create_file(struct thread_data *td) return 1; } - td->sectors = td->file_size >> 9; + td->io_size = td->file_size; b = malloc(td->max_bs); memset(b, 0, td->max_bs); @@ -1430,8 +1495,13 @@ static int setup_file(struct thread_data *td) st.st_size = td->file_size; } - td->sectors = (st.st_size - td->file_offset) / 1024; - if (!td->sectors) { + if (td->file_offset > st.st_size) { + fprintf(stderr, "Client%d: offset larger than length\n", td->thread_number); + return 1; + } + + td->io_size = st.st_size - td->file_offset; + if (td->io_size == 0) { fprintf(stderr, "Client%d: no io blocks\n", td->thread_number); td->error = EINVAL; return 1; @@ -1453,9 +1523,12 @@ static void clear_io_state(struct thread_data *td) lseek(td->fd, SEEK_SET, 0); td->cur_off = 0; - td->last_sectors = 0; - td->stat_io_sectors = 0; - td->this_io_sectors = 0; + td->last_bytes = 0; + td->stat_io_bytes = 0; + td->this_io_bytes = 0; + + if (td->file_map) + memset(td->file_map, 0, td->num_maps * sizeof(long)); } static void *thread_main(int shm_id, int offset, char *argv[]) @@ -1488,9 +1561,6 @@ static void *thread_main(int shm_id, int offset, char *argv[]) if (td->use_aio && init_aio(td)) goto err; - if (init_random_state(td)) - goto err; - if (td->ioprio) { if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) { td->error = errno; @@ -1504,6 +1574,9 @@ static void *thread_main(int shm_id, int offset, char *argv[]) if (!td->create_serialize && setup_file(td)) goto err; + if (init_random_state(td)) + goto err; + gettimeofday(&td->start, NULL); while (td->loops--) { @@ -1599,16 +1672,16 @@ static void show_thread_status(struct thread_data *td) unsigned long min, max, bw = 0; double mean, dev; - if (!td->io_sectors && !td->error) + if (!td->io_bytes && !td->error) return; if (td->runtime) - bw = (td->io_sectors << 9) / td->runtime; + bw = td->io_bytes / td->runtime; prio = td->ioprio & 0xff; prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT; - printf("Client%d: err=%2d, io=%6luMiB, bw=%6luKiB/s, runt=%6lumsec\n", td->thread_number, td->error, td->io_sectors << 9, bw, td->runtime); + printf("Client%d: err=%2d, io=%6luMiB, bw=%6luKiB/s, runt=%6lumsec\n", td->thread_number, td->error, td->io_bytes >> 20, bw, td->runtime); if (calc_lat(&td->slat_stat, &min, &max, &mean, &dev)) printf(" slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev); @@ -1630,7 +1703,7 @@ static int setup_rate(struct thread_data *td) return -1; } - nr_reads_per_sec = td->rate * 1024 / td->min_bs; + nr_reads_per_sec = (td->rate * 1024) / td->min_bs; td->rate_usec_cycle = 1000000 / nr_reads_per_sec; td->rate_pending_usleep = 0; return 0; @@ -2422,22 +2495,22 @@ int main(int argc, char *argv[]) max_run[td->ddir] = td->runtime; if (td->runtime) - bw = (td->io_sectors << 9) / td->runtime; + bw = td->io_bytes / td->runtime; if (bw < min_bw[td->ddir]) min_bw[td->ddir] = bw; if (bw > max_bw[td->ddir]) max_bw[td->ddir] = bw; - io_mb[td->ddir] += td->io_sectors >> 9; + io_mb[td->ddir] += td->io_bytes >> 20; } show_thread_status(td); } if (max_run[0]) - agg[0] = io_mb[0] * 1024 * 1000 / max_run[0]; + agg[0] = (io_mb[0] * 1024 * 1000) / max_run[0]; if (max_run[1]) - agg[1] = io_mb[1] * 1024 * 1000 / max_run[1]; + agg[1] = (io_mb[1] * 1024 * 1000) / max_run[1]; printf("\nRun status:\n"); if (max_run[DDIR_READ])