X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=io_u.c;h=ca33e589c6500f36a76e75cbdadc98a73ef4750f;hp=1a45706f160556f2c153bf82505370b484d7055e;hb=629f1d7150b142d146f8a3ee3cf0a08d1843ca2e;hpb=2b13e716c0921356c0930522718e00b8df34293a diff --git a/io_u.c b/io_u.c index 1a45706f..ca33e589 100644 --- a/io_u.c +++ b/io_u.c @@ -13,6 +13,7 @@ struct io_completion_data { int nr; /* input */ + int account; /* input */ int error; /* output */ unsigned long bytes_done[2]; /* output */ @@ -30,7 +31,7 @@ static int random_map_free(struct fio_file *f, const unsigned long long block) dprint(FD_RANDOM, "free: b=%llu, idx=%u, bit=%u\n", block, idx, bit); - return (f->file_map[idx] & (1 << bit)) == 0; + return (f->file_map[idx] & (1UL << bit)) == 0; } /* @@ -50,8 +51,8 @@ static void mark_random_map(struct thread_data *td, struct io_u *io_u) busy_check = !(io_u->flags & IO_U_F_BUSY_OK); while (nr_blocks) { - unsigned int this_blocks, mask; unsigned int idx, bit; + unsigned long mask, this_blocks; /* * If we have a mixed random workload, we may @@ -75,9 +76,9 @@ static void mark_random_map(struct thread_data *td, struct io_u *io_u) do { if (this_blocks == BLOCKS_PER_MAP) - mask = -1U; + mask = -1UL; else - mask = ((1U << this_blocks) - 1) << bit; + mask = ((1UL << this_blocks) - 1) << bit; if (!(f->file_map[idx] & mask)) break; @@ -113,6 +114,9 @@ static unsigned long long last_block(struct thread_data *td, struct fio_file *f, if (max_size > f->real_file_size) max_size = f->real_file_size; + if (td->o.zone_range) + max_size = td->o.zone_range; + max_blocks = max_size / (unsigned long long) td->o.ba[ddir]; if (!max_blocks) return 0; @@ -126,7 +130,7 @@ static unsigned long long last_block(struct thread_data *td, struct fio_file *f, static int get_next_free_block(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir, unsigned long long *b) { - unsigned long long min_bs = td->o.rw_min_bs, lastb; + unsigned long long block, min_bs = td->o.rw_min_bs, lastb; int i; lastb = last_block(td, f, ddir); @@ -134,18 +138,19 @@ static int get_next_free_block(struct thread_data *td, struct fio_file *f, return 1; i = f->last_free_lookup; - *b = (i * BLOCKS_PER_MAP); - while ((*b) * min_bs < f->real_file_size && - (*b) * min_bs < f->io_size) { - if (f->file_map[i] != (unsigned int) -1) { - *b += ffz(f->file_map[i]); - if (*b > lastb) + block = i * BLOCKS_PER_MAP; + while (block * min_bs < f->real_file_size && + block * min_bs < f->io_size) { + if (f->file_map[i] != -1UL) { + block += ffz(f->file_map[i]); + if (block > lastb) break; f->last_free_lookup = i; + *b = block; return 0; } - *b += BLOCKS_PER_MAP; + block += BLOCKS_PER_MAP; i++; } @@ -156,34 +161,47 @@ static int get_next_free_block(struct thread_data *td, struct fio_file *f, static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir, unsigned long long *b) { - unsigned long long r, lastb; + unsigned long long rmax, r, lastb; int loops = 5; lastb = last_block(td, f, ddir); if (!lastb) return 1; + if (f->failed_rands >= 200) + goto ffz; + + rmax = td->o.use_os_rand ? OS_RAND_MAX : FRAND_MAX; do { - r = os_random_long(&td->random_state); + if (td->o.use_os_rand) + r = os_random_long(&td->random_state); + else + r = __rand(&td->__random_state); + + *b = (lastb - 1) * (r / ((unsigned long long) rmax + 1.0)); + dprint(FD_RANDOM, "off rand %llu\n", r); - *b = (lastb - 1) * (r / ((unsigned long long) OS_RAND_MAX + 1.0)); + /* * if we are not maintaining a random map, we are done. */ if (!file_randommap(td, f)) - return 0; + goto ret_good; /* * calculate map offset and check if it's free */ if (random_map_free(f, *b)) - return 0; + goto ret_good; dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n", *b); } while (--loops); + if (!f->failed_rands++) + f->last_free_lookup = 0; + /* * we get here, if we didn't suceed in looking up a block. generate * a random start offset into the filemap, and find the first free @@ -192,18 +210,29 @@ static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, loops = 10; do { f->last_free_lookup = (f->num_maps - 1) * - (r / (OS_RAND_MAX + 1.0)); + (r / ((unsigned long long) rmax + 1.0)); if (!get_next_free_block(td, f, ddir, b)) - return 0; + goto ret; - r = os_random_long(&td->random_state); + if (td->o.use_os_rand) + r = os_random_long(&td->random_state); + else + r = __rand(&td->__random_state); } while (--loops); /* * that didn't work either, try exhaustive search from the start */ f->last_free_lookup = 0; +ffz: + if (!get_next_free_block(td, f, ddir, b)) + return 0; + f->last_free_lookup = 0; return get_next_free_block(td, f, ddir, b); +ret_good: + f->failed_rands = 0; +ret: + return 0; } static int get_next_rand_block(struct thread_data *td, struct fio_file *f, @@ -224,7 +253,16 @@ static int get_next_seq_block(struct thread_data *td, struct fio_file *f, assert(ddir_rw(ddir)); if (f->last_pos < f->real_file_size) { - *b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir]; + unsigned long long pos; + + if (f->last_pos == f->file_offset && td->o.ddir_seq_add < 0) + f->last_pos = f->real_file_size; + + pos = f->last_pos - f->file_offset; + if (pos) + pos += td->o.ddir_seq_add; + + *b = pos / td->o.min_bs[ddir]; return 0; } @@ -316,12 +354,20 @@ static int get_next_offset(struct thread_data *td, struct io_u *io_u) return __get_next_offset(td, io_u); } +static inline int io_u_fits(struct thread_data *td, struct io_u *io_u, + unsigned int buflen) +{ + struct fio_file *f = io_u->file; + + return io_u->offset + buflen <= f->io_size + td->o.start_offset; +} + static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u) { const int ddir = io_u->ddir; unsigned int uninitialized_var(buflen); unsigned int minbs, maxbs; - long r; + unsigned long r, rand_max; assert(ddir_rw(ddir)); @@ -329,12 +375,28 @@ static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u) maxbs = td->o.max_bs[ddir]; if (minbs == maxbs) - buflen = minbs; - else { - r = os_random_long(&td->bsrange_state); + return minbs; + + /* + * If we can't satisfy the min block size from here, then fail + */ + if (!io_u_fits(td, io_u, minbs)) + return 0; + + if (td->o.use_os_rand) + rand_max = OS_RAND_MAX; + else + rand_max = FRAND_MAX; + + do { + if (td->o.use_os_rand) + r = os_random_long(&td->bsrange_state); + else + r = __rand(&td->__bsrange_state); + if (!td->o.bssplit_nr[ddir]) { buflen = 1 + (unsigned int) ((double) maxbs * - (r / (OS_RAND_MAX + 1.0))); + (r / (rand_max + 1.0))); if (buflen < minbs) buflen = minbs; } else { @@ -346,19 +408,16 @@ static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u) buflen = bsp->bs; perc += bsp->perc; - if (r <= ((OS_RAND_MAX / 100L) * perc)) + if ((r <= ((rand_max / 100L) * perc)) && + io_u_fits(td, io_u, buflen)) break; } } + if (!td->o.bs_unaligned && is_power_of_2(minbs)) buflen = (buflen + minbs - 1) & ~(minbs - 1); - } - if (io_u->offset + buflen > io_u->file->real_file_size) { - dprint(FD_IO, "lower buflen %u -> %u (ddir=%d)\n", buflen, - minbs, ddir); - buflen = minbs; - } + } while (!io_u_fits(td, io_u, buflen)); return buflen; } @@ -389,10 +448,16 @@ static void set_rwmix_bytes(struct thread_data *td) static inline enum fio_ddir get_rand_ddir(struct thread_data *td) { unsigned int v; - long r; + unsigned long r; + + if (td->o.use_os_rand) { + r = os_random_long(&td->rwmix_state); + v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); + } else { + r = __rand(&td->__rwmix_state); + v = 1 + (int) (100.0 * (r / (FRAND_MAX + 1.0))); + } - r = os_random_long(&td->rwmix_state); - v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); if (v <= td->o.rwmix[DDIR_READ]) return DDIR_READ; @@ -435,6 +500,21 @@ static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir) } else usec = td->rate_pending_usleep[ddir]; + /* + * We are going to sleep, ensure that we flush anything pending as + * not to skew our latency numbers. + * + * Changed to only monitor 'in flight' requests here instead of the + * td->cur_depth, b/c td->cur_depth does not accurately represent + * io's that have been actually submitted to an async engine, + * and cur_depth is meaningless for sync engines. + */ + if (td->io_u_in_flight) { + int fio_unused ret; + + ret = io_u_queued_complete(td, td->io_u_in_flight, NULL); + } + fio_gettime(&t, NULL); usec_sleep(td, usec); usec = utime_since_now(&t); @@ -531,13 +611,12 @@ void put_io_u(struct thread_data *td, struct io_u *io_u) { td_io_u_lock(td); - io_u->flags |= IO_U_F_FREE; - io_u->flags &= ~IO_U_F_FREE_DEF; - - if (io_u->file) + if (io_u->file && !(io_u->flags & IO_U_F_FREE_DEF)) put_file_log(td, io_u->file); - io_u->file = NULL; + io_u->flags &= ~IO_U_F_FREE_DEF; + io_u->flags |= IO_U_F_FREE; + if (io_u->flags & IO_U_F_IN_CUR_DEPTH) td->cur_depth--; flist_del_init(&io_u->list); @@ -589,9 +668,10 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u) /* * See if it's time to switch to a new zone */ - if (td->zone_bytes >= td->o.zone_size) { + if (td->zone_bytes >= td->o.zone_size && td->o.zone_skip) { td->zone_bytes = 0; - io_u->file->last_pos += td->o.zone_skip; + io_u->file->file_offset += td->o.zone_range + td->o.zone_skip; + io_u->file->last_pos = io_u->file->file_offset; td->io_skip_bytes += td->o.zone_skip; } @@ -808,11 +888,19 @@ static struct fio_file *get_next_file_rand(struct thread_data *td, int fno; do { - long r = os_random_long(&td->next_file_state); int opened = 0; + unsigned long r; + + if (td->o.use_os_rand) { + r = os_random_long(&td->next_file_state); + fno = (unsigned int) ((double) td->o.nr_files + * (r / (OS_RAND_MAX + 1.0))); + } else { + r = __rand(&td->__next_file_state); + fno = (unsigned int) ((double) td->o.nr_files + * (r / (FRAND_MAX + 1.0))); + } - fno = (unsigned int) ((double) td->o.nr_files - * (r / (OS_RAND_MAX + 1.0))); f = td->files[fno]; if (fio_file_done(f)) continue; @@ -1029,10 +1117,9 @@ static int check_get_verify(struct thread_data *td, struct io_u *io_u) if (td->o.verify_backlog && td->io_hist_len) { int get_verify = 0; - if (td->verify_batch) { - td->verify_batch--; + if (td->verify_batch) get_verify = 1; - } else if (!(td->io_hist_len % td->o.verify_backlog) && + else if (!(td->io_hist_len % td->o.verify_backlog) && td->last_ddir != DDIR_READ) { td->verify_batch = td->o.verify_batch; if (!td->verify_batch) @@ -1040,13 +1127,54 @@ static int check_get_verify(struct thread_data *td, struct io_u *io_u) get_verify = 1; } - if (get_verify && !get_next_verify(td, io_u)) + if (get_verify && !get_next_verify(td, io_u)) { + td->verify_batch--; return 1; + } } return 0; } +/* + * Fill offset and start time into the buffer content, to prevent too + * easy compressible data for simple de-dupe attempts. Do this for every + * 512b block in the range, since that should be the smallest block size + * we can expect from a device. + */ +static void small_content_scramble(struct io_u *io_u) +{ + unsigned int i, nr_blocks = io_u->buflen / 512; + unsigned long long boffset; + unsigned int offset; + void *p, *end; + + if (!nr_blocks) + return; + + p = io_u->xfer_buf; + boffset = io_u->offset; + io_u->buf_filled_len = 0; + + for (i = 0; i < nr_blocks; i++) { + /* + * Fill the byte offset into a "random" start offset of + * the buffer, given by the product of the usec time + * and the actual offset. + */ + offset = (io_u->start_time.tv_usec ^ boffset) & 511; + offset &= ~(sizeof(unsigned long long) - 1); + if (offset >= 512 - sizeof(unsigned long long)) + offset -= sizeof(unsigned long long); + memcpy(p + offset, &boffset, sizeof(boffset)); + + end = p + 512 - sizeof(io_u->start_time); + memcpy(end, &io_u->start_time, sizeof(io_u->start_time)); + p += 512; + boffset += 512; + } +} + /* * Return an io_u to be processed. Gets a buflen and offset, sets direction, * etc. The returned io_u is fully ready to be prepped and submitted. @@ -1055,6 +1183,7 @@ struct io_u *get_io_u(struct thread_data *td) { struct fio_file *f; struct io_u *io_u; + int do_scramble = 0; io_u = __get_io_u(td); if (!io_u) { @@ -1096,11 +1225,17 @@ struct io_u *get_io_u(struct thread_data *td) f->last_start = io_u->offset; f->last_pos = io_u->offset + io_u->buflen; - if (td->o.verify != VERIFY_NONE && io_u->ddir == DDIR_WRITE) - populate_verify_io_u(td, io_u); - else if (td->o.refill_buffers && io_u->ddir == DDIR_WRITE) - io_u_fill_buffer(td, io_u, io_u->xfer_buflen); - else if (io_u->ddir == DDIR_READ) { + if (io_u->ddir == DDIR_WRITE) { + if (td->o.refill_buffers) { + io_u_fill_buffer(td, io_u, + io_u->xfer_buflen, io_u->xfer_buflen); + } else if (td->o.scramble_buffers) + do_scramble = 1; + if (td->o.verify != VERIFY_NONE) { + populate_verify_io_u(td, io_u); + do_scramble = 0; + } + } else if (io_u->ddir == DDIR_READ) { /* * Reset the buf_filled parameters so next time if the * buffer is used for writes it is refilled. @@ -1120,6 +1255,8 @@ out: if (!td_io_prep(td, io_u)) { if (!td->o.disable_slat) fio_gettime(&io_u->start_time, NULL); + if (do_scramble) + small_content_scramble(io_u); return io_u; } err_put: @@ -1149,6 +1286,46 @@ void io_u_log_error(struct thread_data *td, struct io_u *io_u) td_verror(td, io_u->error, "io_u error"); } +static void account_io_completion(struct thread_data *td, struct io_u *io_u, + struct io_completion_data *icd, + const enum fio_ddir idx, unsigned int bytes) +{ + unsigned long uninitialized_var(lusec); + + if (!icd->account) + return; + + if (!td->o.disable_clat || !td->o.disable_bw) + lusec = utime_since(&io_u->issue_time, &icd->time); + + if (!td->o.disable_lat) { + unsigned long tusec; + + tusec = utime_since(&io_u->start_time, &icd->time); + add_lat_sample(td, idx, tusec, bytes); + } + + if (!td->o.disable_clat) { + add_clat_sample(td, idx, lusec, bytes); + io_u_mark_latency(td, lusec); + } + + if (!td->o.disable_bw) + add_bw_sample(td, idx, bytes, &icd->time); + + add_iops_sample(td, idx, &icd->time); +} + +static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir) +{ + unsigned long long secs, remainder, bps, bytes; + bytes = td->this_io_bytes[ddir]; + bps = td->rate_bps[ddir]; + secs = bytes / bps; + remainder = bytes % bps; + return remainder * 1000000 / bps + secs * 1000000; +} + static void io_completed(struct thread_data *td, struct io_u *io_u, struct io_completion_data *icd) { @@ -1186,6 +1363,7 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, int ret; td->io_blocks[idx]++; + td->this_io_blocks[idx]++; td->io_bytes[idx] += bytes; td->this_io_bytes[idx] += bytes; @@ -1201,35 +1379,18 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, } } - if (ramp_time_over(td)) { - unsigned long uninitialized_var(lusec); - - if (!td->o.disable_clat || !td->o.disable_bw) - lusec = utime_since(&io_u->issue_time, - &icd->time); - if (!td->o.disable_lat) { - unsigned long tusec; + if (ramp_time_over(td) && (td->runstate == TD_RUNNING || + td->runstate == TD_VERIFYING)) { + account_io_completion(td, io_u, icd, idx, bytes); - tusec = utime_since(&io_u->start_time, - &icd->time); - add_lat_sample(td, idx, tusec, bytes); - } - if (!td->o.disable_clat) { - add_clat_sample(td, idx, lusec, bytes); - io_u_mark_latency(td, lusec); - } - if (!td->o.disable_bw) - add_bw_sample(td, idx, bytes, &icd->time); if (__should_check_rate(td, idx)) { td->rate_pending_usleep[idx] = - ((td->this_io_bytes[idx] * - td->rate_nsec_cycle[idx]) / 1000 - + (usec_for_io(td, idx) - utime_since_now(&td->start)); } - if (__should_check_rate(td, idx ^ 1)) + if (__should_check_rate(td, odx)) td->rate_pending_usleep[odx] = - ((td->this_io_bytes[odx] * - td->rate_nsec_cycle[odx]) / 1000 - + (usec_for_io(td, odx) - utime_since_now(&td->start)); } @@ -1249,8 +1410,8 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, icd->error = io_u->error; io_u_log_error(td, io_u); } - if (td->o.continue_on_error && icd->error && - td_non_fatal_error(icd->error)) { + if (icd->error && td_non_fatal_error(icd->error) && + (td->o.continue_on_error & td_error_type(io_u->ddir, icd->error))) { /* * If there is a non_fatal error, then add to the error count * and clear all the errors. @@ -1269,6 +1430,7 @@ static void init_icd(struct thread_data *td, struct io_completion_data *icd, fio_gettime(&icd->time, NULL); icd->nr = nr; + icd->account = 1; icd->error = 0; icd->bytes_done[0] = icd->bytes_done[1] = 0; @@ -1287,6 +1449,8 @@ static void ios_completed(struct thread_data *td, if (!(io_u->flags & IO_U_F_FREE_DEF)) put_io_u(td, io_u); + + icd->account = 0; } } @@ -1372,12 +1536,18 @@ void io_u_queued(struct thread_data *td, struct io_u *io_u) * "randomly" fill the buffer contents */ void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u, - unsigned int max_bs) + unsigned int min_write, unsigned int max_bs) { io_u->buf_filled_len = 0; - if (!td->o.zero_buffers) - fill_random_buf(io_u->buf, max_bs); - else + if (!td->o.zero_buffers) { + unsigned int perc = td->o.compress_percentage; + + if (perc) { + fill_random_buf_percentage(&td->buf_state, io_u->buf, + perc, min_write, max_bs); + } else + fill_random_buf(&td->buf_state, io_u->buf, max_bs); + } else memset(io_u->buf, 0, max_bs); }