X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=io_u.c;h=db0a6dc50d2d509c5445e5cde26899628ea03e1d;hp=e9ce37ef6804670371d86e4833181c6cbc6abc64;hb=6eaf09d6e9ca1f8accb057cdb18620b7e53ae33f;hpb=2615cc4b28e7d0e436a625dff92e6a71ccc6c49b diff --git a/io_u.c b/io_u.c index e9ce37ef..db0a6dc5 100644 --- a/io_u.c +++ b/io_u.c @@ -15,7 +15,7 @@ struct io_completion_data { int nr; /* input */ int error; /* output */ - unsigned long bytes_done[2]; /* output */ + unsigned long bytes_done[DDIR_RWDIR_CNT]; /* output */ struct timeval time; /* output */ }; @@ -113,6 +113,9 @@ static unsigned long long last_block(struct thread_data *td, struct fio_file *f, if (max_size > f->real_file_size) max_size = f->real_file_size; + if (td->o.zone_range) + max_size = td->o.zone_range; + max_blocks = max_size / (unsigned long long) td->o.ba[ddir]; if (!max_blocks) return 0; @@ -157,7 +160,7 @@ static int get_next_free_block(struct thread_data *td, struct fio_file *f, static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir, unsigned long long *b) { - unsigned long long r, lastb; + unsigned long long rmax, r, lastb; int loops = 5; lastb = last_block(td, f, ddir); @@ -167,14 +170,14 @@ static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, if (f->failed_rands >= 200) goto ffz; + rmax = td->o.use_os_rand ? OS_RAND_MAX : FRAND_MAX; do { - if (td->o.use_os_rand) { + if (td->o.use_os_rand) r = os_random_long(&td->random_state); - *b = (lastb - 1) * (r / ((unsigned long long) OS_RAND_MAX + 1.0)); - } else { + else r = __rand(&td->__random_state); - *b = (lastb - 1) * (r / ((unsigned long long) FRAND_MAX + 1.0)); - } + + *b = (lastb - 1) * (r / ((unsigned long long) rmax + 1.0)); dprint(FD_RANDOM, "off rand %llu\n", r); @@ -206,11 +209,14 @@ static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, loops = 10; do { f->last_free_lookup = (f->num_maps - 1) * - (r / (OS_RAND_MAX + 1.0)); + (r / ((unsigned long long) rmax + 1.0)); if (!get_next_free_block(td, f, ddir, b)) goto ret; - r = os_random_long(&td->random_state); + if (td->o.use_os_rand) + r = os_random_long(&td->random_state); + else + r = __rand(&td->__random_state); } while (--loops); /* @@ -231,22 +237,39 @@ ret: static int get_next_rand_block(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir, unsigned long long *b) { - if (get_next_rand_offset(td, f, ddir, b)) { - dprint(FD_IO, "%s: rand offset failed, last=%llu, size=%llu\n", - f->file_name, f->last_pos, f->real_file_size); - return 1; + if (!get_next_rand_offset(td, f, ddir, b)) + return 0; + + if (td->o.time_based) { + fio_file_reset(f); + if (!get_next_rand_offset(td, f, ddir, b)) + return 0; } - return 0; + dprint(FD_IO, "%s: rand offset failed, last=%llu, size=%llu\n", + f->file_name, f->last_pos, f->real_file_size); + return 1; } -static int get_next_seq_block(struct thread_data *td, struct fio_file *f, - enum fio_ddir ddir, unsigned long long *b) +static int get_next_seq_offset(struct thread_data *td, struct fio_file *f, + enum fio_ddir ddir, unsigned long long *offset) { assert(ddir_rw(ddir)); + if (f->last_pos >= f->io_size + get_start_offset(td) && td->o.time_based) + f->last_pos = f->last_pos - f->io_size; + if (f->last_pos < f->real_file_size) { - *b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir]; + unsigned long long pos; + + if (f->last_pos == f->file_offset && td->o.ddir_seq_add < 0) + f->last_pos = f->real_file_size; + + pos = f->last_pos - f->file_offset; + if (pos) + pos += td->o.ddir_seq_add; + + *offset = pos; return 0; } @@ -254,31 +277,33 @@ static int get_next_seq_block(struct thread_data *td, struct fio_file *f, } static int get_next_block(struct thread_data *td, struct io_u *io_u, - enum fio_ddir ddir, int rw_seq, unsigned long long *b) + enum fio_ddir ddir, int rw_seq) { struct fio_file *f = io_u->file; + unsigned long long b, offset; int ret; assert(ddir_rw(ddir)); + b = offset = -1ULL; + if (rw_seq) { if (td_random(td)) - ret = get_next_rand_block(td, f, ddir, b); + ret = get_next_rand_block(td, f, ddir, &b); else - ret = get_next_seq_block(td, f, ddir, b); + ret = get_next_seq_offset(td, f, ddir, &offset); } else { io_u->flags |= IO_U_F_BUSY_OK; if (td->o.rw_seq == RW_SEQ_SEQ) { - ret = get_next_seq_block(td, f, ddir, b); + ret = get_next_seq_offset(td, f, ddir, &offset); if (ret) - ret = get_next_rand_block(td, f, ddir, b); + ret = get_next_rand_block(td, f, ddir, &b); } else if (td->o.rw_seq == RW_SEQ_IDENT) { if (f->last_start != -1ULL) - *b = (f->last_start - f->file_offset) - / td->o.min_bs[ddir]; + offset = f->last_start - f->file_offset; else - *b = 0; + offset = 0; ret = 0; } else { log_err("fio: unknown rw_seq=%d\n", td->o.rw_seq); @@ -286,6 +311,17 @@ static int get_next_block(struct thread_data *td, struct io_u *io_u, } } + if (!ret) { + if (offset != -1ULL) + io_u->offset = offset; + else if (b != -1ULL) + io_u->offset = b * td->o.ba[ddir]; + else { + log_err("fio: bug in offset generation\n"); + ret = 1; + } + } + return ret; } @@ -297,7 +333,6 @@ static int get_next_block(struct thread_data *td, struct io_u *io_u, static int __get_next_offset(struct thread_data *td, struct io_u *io_u) { struct fio_file *f = io_u->file; - unsigned long long b; enum fio_ddir ddir = io_u->ddir; int rw_seq_hit = 0; @@ -308,10 +343,9 @@ static int __get_next_offset(struct thread_data *td, struct io_u *io_u) td->ddir_seq_nr = td->o.ddir_seq_nr; } - if (get_next_block(td, io_u, ddir, rw_seq_hit, &b)) + if (get_next_block(td, io_u, ddir, rw_seq_hit)) return 1; - io_u->offset = b * td->o.ba[ddir]; if (io_u->offset >= f->io_size) { dprint(FD_IO, "get_next_offset: offset %llu >= io_size %llu\n", io_u->offset, f->io_size); @@ -338,12 +372,20 @@ static int get_next_offset(struct thread_data *td, struct io_u *io_u) return __get_next_offset(td, io_u); } +static inline int io_u_fits(struct thread_data *td, struct io_u *io_u, + unsigned int buflen) +{ + struct fio_file *f = io_u->file; + + return io_u->offset + buflen <= f->io_size + get_start_offset(td); +} + static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u) { const int ddir = io_u->ddir; unsigned int uninitialized_var(buflen); unsigned int minbs, maxbs; - long r; + unsigned long r, rand_max; assert(ddir_rw(ddir)); @@ -351,12 +393,28 @@ static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u) maxbs = td->o.max_bs[ddir]; if (minbs == maxbs) - buflen = minbs; - else { - r = os_random_long(&td->bsrange_state); + return minbs; + + /* + * If we can't satisfy the min block size from here, then fail + */ + if (!io_u_fits(td, io_u, minbs)) + return 0; + + if (td->o.use_os_rand) + rand_max = OS_RAND_MAX; + else + rand_max = FRAND_MAX; + + do { + if (td->o.use_os_rand) + r = os_random_long(&td->bsrange_state); + else + r = __rand(&td->__bsrange_state); + if (!td->o.bssplit_nr[ddir]) { buflen = 1 + (unsigned int) ((double) maxbs * - (r / (OS_RAND_MAX + 1.0))); + (r / (rand_max + 1.0))); if (buflen < minbs) buflen = minbs; } else { @@ -368,19 +426,16 @@ static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u) buflen = bsp->bs; perc += bsp->perc; - if (r <= ((OS_RAND_MAX / 100L) * perc)) + if ((r <= ((rand_max / 100L) * perc)) && + io_u_fits(td, io_u, buflen)) break; } } + if (!td->o.bs_unaligned && is_power_of_2(minbs)) buflen = (buflen + minbs - 1) & ~(minbs - 1); - } - if (io_u->offset + buflen > io_u->file->real_file_size) { - dprint(FD_IO, "lower buflen %u -> %u (ddir=%d)\n", buflen, - minbs, ddir); - buflen = minbs; - } + } while (!io_u_fits(td, io_u, buflen)); return buflen; } @@ -411,10 +466,16 @@ static void set_rwmix_bytes(struct thread_data *td) static inline enum fio_ddir get_rand_ddir(struct thread_data *td) { unsigned int v; - long r; + unsigned long r; + + if (td->o.use_os_rand) { + r = os_random_long(&td->rwmix_state); + v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); + } else { + r = __rand(&td->__rwmix_state); + v = 1 + (int) (100.0 * (r / (FRAND_MAX + 1.0))); + } - r = os_random_long(&td->rwmix_state); - v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); if (v <= td->o.rwmix[DDIR_READ]) return DDIR_READ; @@ -457,6 +518,21 @@ static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir) } else usec = td->rate_pending_usleep[ddir]; + /* + * We are going to sleep, ensure that we flush anything pending as + * not to skew our latency numbers. + * + * Changed to only monitor 'in flight' requests here instead of the + * td->cur_depth, b/c td->cur_depth does not accurately represent + * io's that have been actually submitted to an async engine, + * and cur_depth is meaningless for sync engines. + */ + if (td->io_u_in_flight) { + int fio_unused ret; + + ret = io_u_queued_complete(td, td->io_u_in_flight, NULL); + } + fio_gettime(&t, NULL); usec_sleep(td, usec); usec = utime_since_now(&t); @@ -467,6 +543,8 @@ static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir) if (td_rw(td) && __should_check_rate(td, odir)) td->rate_pending_usleep[odir] -= usec; + if (ddir_trim(ddir)) + return ddir; return ddir; } @@ -523,8 +601,10 @@ static enum fio_ddir get_rw_ddir(struct thread_data *td) ddir = td->rwmix_ddir; } else if (td_read(td)) ddir = DDIR_READ; - else + else if (td_write(td)) ddir = DDIR_WRITE; + else + ddir = DDIR_TRIM; td->rwmix_ddir = rate_ddir(td, ddir); return td->rwmix_ddir; @@ -553,13 +633,12 @@ void put_io_u(struct thread_data *td, struct io_u *io_u) { td_io_u_lock(td); - io_u->flags |= IO_U_F_FREE; - io_u->flags &= ~IO_U_F_FREE_DEF; - - if (io_u->file) + if (io_u->file && !(io_u->flags & IO_U_F_FREE_DEF)) put_file_log(td, io_u->file); - io_u->file = NULL; + io_u->flags &= ~IO_U_F_FREE_DEF; + io_u->flags |= IO_U_F_FREE; + if (io_u->flags & IO_U_F_IN_CUR_DEPTH) td->cur_depth--; flist_del_init(&io_u->list); @@ -611,9 +690,10 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u) /* * See if it's time to switch to a new zone */ - if (td->zone_bytes >= td->o.zone_size) { + if (td->zone_bytes >= td->o.zone_size && td->o.zone_skip) { td->zone_bytes = 0; - io_u->file->last_pos += td->o.zone_skip; + io_u->file->file_offset += td->o.zone_range + td->o.zone_skip; + io_u->file->last_pos = io_u->file->file_offset; td->io_skip_bytes += td->o.zone_skip; } @@ -830,11 +910,19 @@ static struct fio_file *get_next_file_rand(struct thread_data *td, int fno; do { - long r = os_random_long(&td->next_file_state); int opened = 0; + unsigned long r; + + if (td->o.use_os_rand) { + r = os_random_long(&td->next_file_state); + fno = (unsigned int) ((double) td->o.nr_files + * (r / (OS_RAND_MAX + 1.0))); + } else { + r = __rand(&td->__next_file_state); + fno = (unsigned int) ((double) td->o.nr_files + * (r / (FRAND_MAX + 1.0))); + } - fno = (unsigned int) ((double) td->o.nr_files - * (r / (OS_RAND_MAX + 1.0))); f = td->files[fno]; if (fio_file_done(f)) continue; @@ -1004,6 +1092,7 @@ again: assert(io_u->flags & IO_U_F_FREE); io_u->flags &= ~(IO_U_F_FREE | IO_U_F_FREE_DEF); io_u->flags &= ~(IO_U_F_TRIMMED | IO_U_F_BARRIER); + io_u->flags &= ~IO_U_F_VER_LIST; io_u->error = 0; flist_del(&io_u->list); @@ -1051,10 +1140,9 @@ static int check_get_verify(struct thread_data *td, struct io_u *io_u) if (td->o.verify_backlog && td->io_hist_len) { int get_verify = 0; - if (td->verify_batch) { - td->verify_batch--; + if (td->verify_batch) get_verify = 1; - } else if (!(td->io_hist_len % td->o.verify_backlog) && + else if (!(td->io_hist_len % td->o.verify_backlog) && td->last_ddir != DDIR_READ) { td->verify_batch = td->o.verify_batch; if (!td->verify_batch) @@ -1062,13 +1150,54 @@ static int check_get_verify(struct thread_data *td, struct io_u *io_u) get_verify = 1; } - if (get_verify && !get_next_verify(td, io_u)) + if (get_verify && !get_next_verify(td, io_u)) { + td->verify_batch--; return 1; + } } return 0; } +/* + * Fill offset and start time into the buffer content, to prevent too + * easy compressible data for simple de-dupe attempts. Do this for every + * 512b block in the range, since that should be the smallest block size + * we can expect from a device. + */ +static void small_content_scramble(struct io_u *io_u) +{ + unsigned int i, nr_blocks = io_u->buflen / 512; + unsigned long long boffset; + unsigned int offset; + void *p, *end; + + if (!nr_blocks) + return; + + p = io_u->xfer_buf; + boffset = io_u->offset; + io_u->buf_filled_len = 0; + + for (i = 0; i < nr_blocks; i++) { + /* + * Fill the byte offset into a "random" start offset of + * the buffer, given by the product of the usec time + * and the actual offset. + */ + offset = (io_u->start_time.tv_usec ^ boffset) & 511; + offset &= ~(sizeof(unsigned long long) - 1); + if (offset >= 512 - sizeof(unsigned long long)) + offset -= sizeof(unsigned long long); + memcpy(p + offset, &boffset, sizeof(boffset)); + + end = p + 512 - sizeof(io_u->start_time); + memcpy(end, &io_u->start_time, sizeof(io_u->start_time)); + p += 512; + boffset += 512; + } +} + /* * Return an io_u to be processed. Gets a buflen and offset, sets direction, * etc. The returned io_u is fully ready to be prepped and submitted. @@ -1077,6 +1206,7 @@ struct io_u *get_io_u(struct thread_data *td) { struct fio_file *f; struct io_u *io_u; + int do_scramble = 0; io_u = __get_io_u(td); if (!io_u) { @@ -1118,11 +1248,17 @@ struct io_u *get_io_u(struct thread_data *td) f->last_start = io_u->offset; f->last_pos = io_u->offset + io_u->buflen; - if (td->o.verify != VERIFY_NONE && io_u->ddir == DDIR_WRITE) - populate_verify_io_u(td, io_u); - else if (td->o.refill_buffers && io_u->ddir == DDIR_WRITE) - io_u_fill_buffer(td, io_u, io_u->xfer_buflen); - else if (io_u->ddir == DDIR_READ) { + if (io_u->ddir == DDIR_WRITE) { + if (td->o.refill_buffers) { + io_u_fill_buffer(td, io_u, + io_u->xfer_buflen, io_u->xfer_buflen); + } else if (td->o.scramble_buffers) + do_scramble = 1; + if (td->o.verify != VERIFY_NONE) { + populate_verify_io_u(td, io_u); + do_scramble = 0; + } + } else if (io_u->ddir == DDIR_READ) { /* * Reset the buf_filled parameters so next time if the * buffer is used for writes it is refilled. @@ -1142,6 +1278,8 @@ out: if (!td_io_prep(td, io_u)) { if (!td->o.disable_slat) fio_gettime(&io_u->start_time, NULL); + if (do_scramble) + small_content_scramble(io_u); return io_u; } err_put: @@ -1171,6 +1309,43 @@ void io_u_log_error(struct thread_data *td, struct io_u *io_u) td_verror(td, io_u->error, "io_u error"); } +static void account_io_completion(struct thread_data *td, struct io_u *io_u, + struct io_completion_data *icd, + const enum fio_ddir idx, unsigned int bytes) +{ + unsigned long uninitialized_var(lusec); + + if (!td->o.disable_clat || !td->o.disable_bw) + lusec = utime_since(&io_u->issue_time, &icd->time); + + if (!td->o.disable_lat) { + unsigned long tusec; + + tusec = utime_since(&io_u->start_time, &icd->time); + add_lat_sample(td, idx, tusec, bytes); + } + + if (!td->o.disable_clat) { + add_clat_sample(td, idx, lusec, bytes); + io_u_mark_latency(td, lusec); + } + + if (!td->o.disable_bw) + add_bw_sample(td, idx, bytes, &icd->time); + + add_iops_sample(td, idx, &icd->time); +} + +static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir) +{ + unsigned long long secs, remainder, bps, bytes; + bytes = td->this_io_bytes[ddir]; + bps = td->rate_bps[ddir]; + secs = bytes / bps; + remainder = bytes % bps; + return remainder * 1000000 / bps + secs * 1000000; +} + static void io_completed(struct thread_data *td, struct io_u *io_u, struct io_completion_data *icd) { @@ -1208,8 +1383,11 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, int ret; td->io_blocks[idx]++; + td->this_io_blocks[idx]++; td->io_bytes[idx] += bytes; - td->this_io_bytes[idx] += bytes; + + if (!(io_u->flags & IO_U_F_VER_LIST)) + td->this_io_bytes[idx] += bytes; if (idx == DDIR_WRITE) { f = io_u->file; @@ -1223,35 +1401,18 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, } } - if (ramp_time_over(td)) { - unsigned long uninitialized_var(lusec); + if (ramp_time_over(td) && (td->runstate == TD_RUNNING || + td->runstate == TD_VERIFYING)) { + account_io_completion(td, io_u, icd, idx, bytes); - if (!td->o.disable_clat || !td->o.disable_bw) - lusec = utime_since(&io_u->issue_time, - &icd->time); - if (!td->o.disable_lat) { - unsigned long tusec; - - tusec = utime_since(&io_u->start_time, - &icd->time); - add_lat_sample(td, idx, tusec, bytes); - } - if (!td->o.disable_clat) { - add_clat_sample(td, idx, lusec, bytes); - io_u_mark_latency(td, lusec); - } - if (!td->o.disable_bw) - add_bw_sample(td, idx, bytes, &icd->time); if (__should_check_rate(td, idx)) { td->rate_pending_usleep[idx] = - ((td->this_io_bytes[idx] * - td->rate_nsec_cycle[idx]) / 1000 - + (usec_for_io(td, idx) - utime_since_now(&td->start)); } - if (__should_check_rate(td, idx ^ 1)) + if (idx != DDIR_TRIM && __should_check_rate(td, odx)) td->rate_pending_usleep[odx] = - ((td->this_io_bytes[odx] * - td->rate_nsec_cycle[odx]) / 1000 - + (usec_for_io(td, odx) - utime_since_now(&td->start)); } @@ -1271,8 +1432,8 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, icd->error = io_u->error; io_u_log_error(td, io_u); } - if (td->o.continue_on_error && icd->error && - td_non_fatal_error(icd->error)) { + if (icd->error && td_non_fatal_error(icd->error) && + (td->o.continue_on_error & td_error_type(io_u->ddir, icd->error))) { /* * If there is a non_fatal error, then add to the error count * and clear all the errors. @@ -1287,13 +1448,15 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, static void init_icd(struct thread_data *td, struct io_completion_data *icd, int nr) { + int ddir; if (!td->o.disable_clat || !td->o.disable_bw) fio_gettime(&icd->time, NULL); icd->nr = nr; icd->error = 0; - icd->bytes_done[0] = icd->bytes_done[1] = 0; + for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) + icd->bytes_done[ddir] = 0; } static void ios_completed(struct thread_data *td, @@ -1332,8 +1495,10 @@ int io_u_sync_complete(struct thread_data *td, struct io_u *io_u, } if (bytes) { - bytes[0] += icd.bytes_done[0]; - bytes[1] += icd.bytes_done[1]; + int ddir; + + for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) + bytes[ddir] += icd.bytes_done[ddir]; } return 0; @@ -1370,8 +1535,10 @@ int io_u_queued_complete(struct thread_data *td, int min_evts, } if (bytes) { - bytes[0] += icd.bytes_done[0]; - bytes[1] += icd.bytes_done[1]; + int ddir; + + for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) + bytes[ddir] += icd.bytes_done[ddir]; } return 0; @@ -1394,12 +1561,21 @@ void io_u_queued(struct thread_data *td, struct io_u *io_u) * "randomly" fill the buffer contents */ void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u, - unsigned int max_bs) + unsigned int min_write, unsigned int max_bs) { io_u->buf_filled_len = 0; - if (!td->o.zero_buffers) - fill_random_buf(io_u->buf, max_bs); - else + if (!td->o.zero_buffers) { + unsigned int perc = td->o.compress_percentage; + + if (perc) { + unsigned int seg = min_write; + + seg = min(min_write, td->o.compress_chunk); + fill_random_buf_percentage(&td->buf_state, io_u->buf, + perc, seg, max_bs); + } else + fill_random_buf(&td->buf_state, io_u->buf, max_bs); + } else memset(io_u->buf, 0, max_bs); }