X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=io_u.c;h=8a033485e7cf9eee2adbb893ab2d13bd2695dde0;hp=ea0d46c5db3dd0d4e32da6b726b0abda9e249bd9;hb=ecc35cf45f60e658856c6f0cbd5b7785753981bb;hpb=0d29de831183dfd049c97a03008d425ce21e2fa4 diff --git a/io_u.c b/io_u.c index ea0d46c5..8a033485 100644 --- a/io_u.c +++ b/io_u.c @@ -13,6 +13,7 @@ struct io_completion_data { int nr; /* input */ + int account; /* input */ int error; /* output */ unsigned long bytes_done[2]; /* output */ @@ -30,7 +31,7 @@ static int random_map_free(struct fio_file *f, const unsigned long long block) dprint(FD_RANDOM, "free: b=%llu, idx=%u, bit=%u\n", block, idx, bit); - return (f->file_map[idx] & (1 << bit)) == 0; + return (f->file_map[idx] & (1UL << bit)) == 0; } /* @@ -50,8 +51,8 @@ static void mark_random_map(struct thread_data *td, struct io_u *io_u) busy_check = !(io_u->flags & IO_U_F_BUSY_OK); while (nr_blocks) { - unsigned int this_blocks, mask; unsigned int idx, bit; + unsigned long mask, this_blocks; /* * If we have a mixed random workload, we may @@ -75,9 +76,9 @@ static void mark_random_map(struct thread_data *td, struct io_u *io_u) do { if (this_blocks == BLOCKS_PER_MAP) - mask = -1U; + mask = -1UL; else - mask = ((1U << this_blocks) - 1) << bit; + mask = ((1UL << this_blocks) - 1) << bit; if (!(f->file_map[idx] & mask)) break; @@ -113,6 +114,9 @@ static unsigned long long last_block(struct thread_data *td, struct fio_file *f, if (max_size > f->real_file_size) max_size = f->real_file_size; + if (td->o.zone_range) + max_size = td->o.zone_range; + max_blocks = max_size / (unsigned long long) td->o.ba[ddir]; if (!max_blocks) return 0; @@ -126,22 +130,27 @@ static unsigned long long last_block(struct thread_data *td, struct fio_file *f, static int get_next_free_block(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir, unsigned long long *b) { - unsigned long long min_bs = td->o.rw_min_bs; + unsigned long long block, min_bs = td->o.rw_min_bs, lastb; int i; + lastb = last_block(td, f, ddir); + if (!lastb) + return 1; + i = f->last_free_lookup; - *b = (i * BLOCKS_PER_MAP); - while ((*b) * min_bs < f->real_file_size && - (*b) * min_bs < f->io_size) { - if (f->file_map[i] != (unsigned int) -1) { - *b += ffz(f->file_map[i]); - if (*b > last_block(td, f, ddir)) + block = i * BLOCKS_PER_MAP; + while (block * min_bs < f->real_file_size && + block * min_bs < f->io_size) { + if (f->file_map[i] != -1UL) { + block += ffz(f->file_map[i]); + if (block > lastb) break; f->last_free_lookup = i; + *b = block; return 0; } - *b += BLOCKS_PER_MAP; + block += BLOCKS_PER_MAP; i++; } @@ -152,31 +161,47 @@ static int get_next_free_block(struct thread_data *td, struct fio_file *f, static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir, unsigned long long *b) { - unsigned long long r; + unsigned long long rmax, r, lastb; int loops = 5; + lastb = last_block(td, f, ddir); + if (!lastb) + return 1; + + if (f->failed_rands >= 200) + goto ffz; + + rmax = td->o.use_os_rand ? OS_RAND_MAX : FRAND_MAX; do { - r = os_random_long(&td->random_state); + if (td->o.use_os_rand) + r = os_random_long(&td->random_state); + else + r = __rand(&td->__random_state); + + *b = (lastb - 1) * (r / ((unsigned long long) rmax + 1.0)); + dprint(FD_RANDOM, "off rand %llu\n", r); - *b = (last_block(td, f, ddir) - 1) - * (r / ((unsigned long long) OS_RAND_MAX + 1.0)); + /* * if we are not maintaining a random map, we are done. */ if (!file_randommap(td, f)) - return 0; + goto ret_good; /* * calculate map offset and check if it's free */ if (random_map_free(f, *b)) - return 0; + goto ret_good; dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n", *b); } while (--loops); + if (!f->failed_rands++) + f->last_free_lookup = 0; + /* * we get here, if we didn't suceed in looking up a block. generate * a random start offset into the filemap, and find the first free @@ -185,18 +210,29 @@ static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, loops = 10; do { f->last_free_lookup = (f->num_maps - 1) * - (r / (OS_RAND_MAX + 1.0)); + (r / ((unsigned long long) rmax + 1.0)); if (!get_next_free_block(td, f, ddir, b)) - return 0; + goto ret; - r = os_random_long(&td->random_state); + if (td->o.use_os_rand) + r = os_random_long(&td->random_state); + else + r = __rand(&td->__random_state); } while (--loops); /* * that didn't work either, try exhaustive search from the start */ f->last_free_lookup = 0; +ffz: + if (!get_next_free_block(td, f, ddir, b)) + return 0; + f->last_free_lookup = 0; return get_next_free_block(td, f, ddir, b); +ret_good: + f->failed_rands = 0; +ret: + return 0; } static int get_next_rand_block(struct thread_data *td, struct fio_file *f, @@ -217,7 +253,16 @@ static int get_next_seq_block(struct thread_data *td, struct fio_file *f, assert(ddir_rw(ddir)); if (f->last_pos < f->real_file_size) { - *b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir]; + unsigned long long pos; + + if (f->last_pos == f->file_offset && td->o.ddir_seq_add < 0) + f->last_pos = f->real_file_size; + + pos = f->last_pos - f->file_offset; + if (pos) + pos += td->o.ddir_seq_add; + + *b = pos / td->o.min_bs[ddir]; return 0; } @@ -246,7 +291,8 @@ static int get_next_block(struct thread_data *td, struct io_u *io_u, ret = get_next_rand_block(td, f, ddir, b); } else if (td->o.rw_seq == RW_SEQ_IDENT) { if (f->last_start != -1ULL) - *b = (f->last_start - f->file_offset) / td->o.min_bs[ddir]; + *b = (f->last_start - f->file_offset) + / td->o.min_bs[ddir]; else *b = 0; ret = 0; @@ -278,10 +324,8 @@ static int __get_next_offset(struct thread_data *td, struct io_u *io_u) td->ddir_seq_nr = td->o.ddir_seq_nr; } - if (get_next_block(td, io_u, ddir, rw_seq_hit, &b)) { - printf("fail\n"); + if (get_next_block(td, io_u, ddir, rw_seq_hit, &b)) return 1; - } io_u->offset = b * td->o.ba[ddir]; if (io_u->offset >= f->io_size) { @@ -310,12 +354,20 @@ static int get_next_offset(struct thread_data *td, struct io_u *io_u) return __get_next_offset(td, io_u); } +static inline int io_u_fits(struct thread_data *td, struct io_u *io_u, + unsigned int buflen) +{ + struct fio_file *f = io_u->file; + + return io_u->offset + buflen <= f->io_size + td->o.start_offset; +} + static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u) { const int ddir = io_u->ddir; unsigned int uninitialized_var(buflen); unsigned int minbs, maxbs; - long r; + unsigned long r, rand_max; assert(ddir_rw(ddir)); @@ -323,12 +375,28 @@ static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u) maxbs = td->o.max_bs[ddir]; if (minbs == maxbs) - buflen = minbs; - else { - r = os_random_long(&td->bsrange_state); + return minbs; + + /* + * If we can't satisfy the min block size from here, then fail + */ + if (!io_u_fits(td, io_u, minbs)) + return 0; + + if (td->o.use_os_rand) + rand_max = OS_RAND_MAX; + else + rand_max = FRAND_MAX; + + do { + if (td->o.use_os_rand) + r = os_random_long(&td->bsrange_state); + else + r = __rand(&td->__bsrange_state); + if (!td->o.bssplit_nr[ddir]) { buflen = 1 + (unsigned int) ((double) maxbs * - (r / (OS_RAND_MAX + 1.0))); + (r / (rand_max + 1.0))); if (buflen < minbs) buflen = minbs; } else { @@ -340,19 +408,16 @@ static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u) buflen = bsp->bs; perc += bsp->perc; - if (r <= ((OS_RAND_MAX / 100L) * perc)) + if ((r <= ((rand_max / 100L) * perc)) && + io_u_fits(td, io_u, buflen)) break; } } + if (!td->o.bs_unaligned && is_power_of_2(minbs)) buflen = (buflen + minbs - 1) & ~(minbs - 1); - } - if (io_u->offset + buflen > io_u->file->real_file_size) { - dprint(FD_IO, "lower buflen %u -> %u (ddir=%d)\n", buflen, - minbs, ddir); - buflen = minbs; - } + } while (!io_u_fits(td, io_u, buflen)); return buflen; } @@ -383,10 +448,16 @@ static void set_rwmix_bytes(struct thread_data *td) static inline enum fio_ddir get_rand_ddir(struct thread_data *td) { unsigned int v; - long r; + unsigned long r; + + if (td->o.use_os_rand) { + r = os_random_long(&td->rwmix_state); + v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); + } else { + r = __rand(&td->__rwmix_state); + v = 1 + (int) (100.0 * (r / (FRAND_MAX + 1.0))); + } - r = os_random_long(&td->rwmix_state); - v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); if (v <= td->o.rwmix[DDIR_READ]) return DDIR_READ; @@ -429,6 +500,21 @@ static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir) } else usec = td->rate_pending_usleep[ddir]; + /* + * We are going to sleep, ensure that we flush anything pending as + * not to skew our latency numbers. + * + * Changed to only monitor 'in flight' requests here instead of the + * td->cur_depth, b/c td->cur_depth does not accurately represent + * io's that have been actually submitted to an async engine, + * and cur_depth is meaningless for sync engines. + */ + if (td->io_u_in_flight) { + int fio_unused ret; + + ret = io_u_queued_complete(td, td->io_u_in_flight, NULL); + } + fio_gettime(&t, NULL); usec_sleep(td, usec); usec = utime_since_now(&t); @@ -502,6 +588,17 @@ static enum fio_ddir get_rw_ddir(struct thread_data *td) return td->rwmix_ddir; } +static void set_rw_ddir(struct thread_data *td, struct io_u *io_u) +{ + io_u->ddir = get_rw_ddir(td); + + if (io_u->ddir == DDIR_WRITE && (td->io_ops->flags & FIO_BARRIER) && + td->o.barrier_blocks && + !(td->io_issues[DDIR_WRITE] % td->o.barrier_blocks) && + td->io_issues[DDIR_WRITE]) + io_u->flags |= IO_U_F_BARRIER; +} + void put_file_log(struct thread_data *td, struct fio_file *f) { int ret = put_file(td, f); @@ -514,13 +611,12 @@ void put_io_u(struct thread_data *td, struct io_u *io_u) { td_io_u_lock(td); - io_u->flags |= IO_U_F_FREE; - io_u->flags &= ~IO_U_F_FREE_DEF; - - if (io_u->file) + if (io_u->file && !(io_u->flags & IO_U_F_FREE_DEF)) put_file_log(td, io_u->file); - io_u->file = NULL; + io_u->flags &= ~IO_U_F_FREE_DEF; + io_u->flags |= IO_U_F_FREE; + if (io_u->flags & IO_U_F_IN_CUR_DEPTH) td->cur_depth--; flist_del_init(&io_u->list); @@ -561,7 +657,7 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u) if (td->io_ops->flags & FIO_NOIO) goto out; - io_u->ddir = get_rw_ddir(td); + set_rw_ddir(td, io_u); /* * fsync() or fdatasync() or trim etc, we are done @@ -574,7 +670,8 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u) */ if (td->zone_bytes >= td->o.zone_size) { td->zone_bytes = 0; - io_u->file->last_pos += td->o.zone_skip; + io_u->file->file_offset += td->o.zone_range + td->o.zone_skip; + io_u->file->last_pos = io_u->file->file_offset; td->io_skip_bytes += td->o.zone_skip; } @@ -618,31 +715,31 @@ out: static void __io_u_mark_map(unsigned int *map, unsigned int nr) { - int index = 0; + int idx = 0; switch (nr) { default: - index = 6; + idx = 6; break; case 33 ... 64: - index = 5; + idx = 5; break; case 17 ... 32: - index = 4; + idx = 4; break; case 9 ... 16: - index = 3; + idx = 3; break; case 5 ... 8: - index = 2; + idx = 2; break; case 1 ... 4: - index = 1; + idx = 1; case 0: break; } - map[index]++; + map[idx]++; } void io_u_mark_submit(struct thread_data *td, unsigned int nr) @@ -659,117 +756,117 @@ void io_u_mark_complete(struct thread_data *td, unsigned int nr) void io_u_mark_depth(struct thread_data *td, unsigned int nr) { - int index = 0; + int idx = 0; switch (td->cur_depth) { default: - index = 6; + idx = 6; break; case 32 ... 63: - index = 5; + idx = 5; break; case 16 ... 31: - index = 4; + idx = 4; break; case 8 ... 15: - index = 3; + idx = 3; break; case 4 ... 7: - index = 2; + idx = 2; break; case 2 ... 3: - index = 1; + idx = 1; case 1: break; } - td->ts.io_u_map[index] += nr; + td->ts.io_u_map[idx] += nr; } static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec) { - int index = 0; + int idx = 0; assert(usec < 1000); switch (usec) { case 750 ... 999: - index = 9; + idx = 9; break; case 500 ... 749: - index = 8; + idx = 8; break; case 250 ... 499: - index = 7; + idx = 7; break; case 100 ... 249: - index = 6; + idx = 6; break; case 50 ... 99: - index = 5; + idx = 5; break; case 20 ... 49: - index = 4; + idx = 4; break; case 10 ... 19: - index = 3; + idx = 3; break; case 4 ... 9: - index = 2; + idx = 2; break; case 2 ... 3: - index = 1; + idx = 1; case 0 ... 1: break; } - assert(index < FIO_IO_U_LAT_U_NR); - td->ts.io_u_lat_u[index]++; + assert(idx < FIO_IO_U_LAT_U_NR); + td->ts.io_u_lat_u[idx]++; } static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec) { - int index = 0; + int idx = 0; switch (msec) { default: - index = 11; + idx = 11; break; case 1000 ... 1999: - index = 10; + idx = 10; break; case 750 ... 999: - index = 9; + idx = 9; break; case 500 ... 749: - index = 8; + idx = 8; break; case 250 ... 499: - index = 7; + idx = 7; break; case 100 ... 249: - index = 6; + idx = 6; break; case 50 ... 99: - index = 5; + idx = 5; break; case 20 ... 49: - index = 4; + idx = 4; break; case 10 ... 19: - index = 3; + idx = 3; break; case 4 ... 9: - index = 2; + idx = 2; break; case 2 ... 3: - index = 1; + idx = 1; case 0 ... 1: break; } - assert(index < FIO_IO_U_LAT_M_NR); - td->ts.io_u_lat_m[index]++; + assert(idx < FIO_IO_U_LAT_M_NR); + td->ts.io_u_lat_m[idx]++; } static void io_u_mark_latency(struct thread_data *td, unsigned long usec) @@ -791,11 +888,19 @@ static struct fio_file *get_next_file_rand(struct thread_data *td, int fno; do { - long r = os_random_long(&td->next_file_state); int opened = 0; + unsigned long r; + + if (td->o.use_os_rand) { + r = os_random_long(&td->next_file_state); + fno = (unsigned int) ((double) td->o.nr_files + * (r / (OS_RAND_MAX + 1.0))); + } else { + r = __rand(&td->__next_file_state); + fno = (unsigned int) ((double) td->o.nr_files + * (r / (FRAND_MAX + 1.0))); + } - fno = (unsigned int) ((double) td->o.nr_files - * (r / (OS_RAND_MAX + 1.0))); f = td->files[fno]; if (fio_file_done(f)) continue; @@ -964,6 +1069,7 @@ again: if (io_u) { assert(io_u->flags & IO_U_F_FREE); io_u->flags &= ~(IO_U_F_FREE | IO_U_F_FREE_DEF); + io_u->flags &= ~(IO_U_F_TRIMMED | IO_U_F_BARRIER); io_u->error = 0; flist_del(&io_u->list); @@ -1029,6 +1135,45 @@ static int check_get_verify(struct thread_data *td, struct io_u *io_u) return 0; } +/* + * Fill offset and start time into the buffer content, to prevent too + * easy compressible data for simple de-dupe attempts. Do this for every + * 512b block in the range, since that should be the smallest block size + * we can expect from a device. + */ +static void small_content_scramble(struct io_u *io_u) +{ + unsigned int i, nr_blocks = io_u->buflen / 512; + unsigned long long boffset; + unsigned int offset; + void *p, *end; + + if (!nr_blocks) + return; + + p = io_u->xfer_buf; + boffset = io_u->offset; + io_u->buf_filled_len = 0; + + for (i = 0; i < nr_blocks; i++) { + /* + * Fill the byte offset into a "random" start offset of + * the buffer, given by the product of the usec time + * and the actual offset. + */ + offset = (io_u->start_time.tv_usec ^ boffset) & 511; + offset &= ~(sizeof(unsigned long long) - 1); + if (offset >= 512 - sizeof(unsigned long long)) + offset -= sizeof(unsigned long long); + memcpy(p + offset, &boffset, sizeof(boffset)); + + end = p + 512 - sizeof(io_u->start_time); + memcpy(end, &io_u->start_time, sizeof(io_u->start_time)); + p += 512; + boffset += 512; + } +} + /* * Return an io_u to be processed. Gets a buflen and offset, sets direction, * etc. The returned io_u is fully ready to be prepped and submitted. @@ -1037,6 +1182,7 @@ struct io_u *get_io_u(struct thread_data *td) { struct fio_file *f; struct io_u *io_u; + int do_scramble = 0; io_u = __get_io_u(td); if (!io_u) { @@ -1078,11 +1224,14 @@ struct io_u *get_io_u(struct thread_data *td) f->last_start = io_u->offset; f->last_pos = io_u->offset + io_u->buflen; - if (td->o.verify != VERIFY_NONE && io_u->ddir == DDIR_WRITE) - populate_verify_io_u(td, io_u); - else if (td->o.refill_buffers && io_u->ddir == DDIR_WRITE) - io_u_fill_buffer(td, io_u, io_u->xfer_buflen); - else if (io_u->ddir == DDIR_READ) { + if (io_u->ddir == DDIR_WRITE) { + if (td->o.verify != VERIFY_NONE) + populate_verify_io_u(td, io_u); + else if (td->o.refill_buffers) + io_u_fill_buffer(td, io_u, io_u->xfer_buflen); + else if (td->o.scramble_buffers) + do_scramble = 1; + } else if (io_u->ddir == DDIR_READ) { /* * Reset the buf_filled parameters so next time if the * buffer is used for writes it is refilled. @@ -1102,6 +1251,8 @@ out: if (!td_io_prep(td, io_u)) { if (!td->o.disable_slat) fio_gettime(&io_u->start_time, NULL); + if (do_scramble) + small_content_scramble(io_u); return io_u; } err_put: @@ -1131,6 +1282,46 @@ void io_u_log_error(struct thread_data *td, struct io_u *io_u) td_verror(td, io_u->error, "io_u error"); } +static void account_io_completion(struct thread_data *td, struct io_u *io_u, + struct io_completion_data *icd, + const enum fio_ddir idx, unsigned int bytes) +{ + unsigned long uninitialized_var(lusec); + + if (!icd->account) + return; + + if (!td->o.disable_clat || !td->o.disable_bw) + lusec = utime_since(&io_u->issue_time, &icd->time); + + if (!td->o.disable_lat) { + unsigned long tusec; + + tusec = utime_since(&io_u->start_time, &icd->time); + add_lat_sample(td, idx, tusec, bytes); + } + + if (!td->o.disable_clat) { + add_clat_sample(td, idx, lusec, bytes); + io_u_mark_latency(td, lusec); + } + + if (!td->o.disable_bw) + add_bw_sample(td, idx, bytes, &icd->time); + + add_iops_sample(td, idx, &icd->time); +} + +static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir) +{ + unsigned long long secs, remainder, bps, bytes; + bytes = td->this_io_bytes[ddir]; + bps = td->rate_bps[ddir]; + secs = bytes / bps; + remainder = bytes % bps; + return remainder * 1000000 / bps + secs * 1000000; +} + static void io_completed(struct thread_data *td, struct io_u *io_u, struct io_completion_data *icd) { @@ -1168,6 +1359,7 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, int ret; td->io_blocks[idx]++; + td->this_io_blocks[idx]++; td->io_bytes[idx] += bytes; td->this_io_bytes[idx] += bytes; @@ -1183,35 +1375,17 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, } } - if (ramp_time_over(td)) { - unsigned long uninitialized_var(lusec); - - if (!td->o.disable_clat || !td->o.disable_bw) - lusec = utime_since(&io_u->issue_time, - &icd->time); - if (!td->o.disable_lat) { - unsigned long tusec; + if (ramp_time_over(td) && td->runstate == TD_RUNNING) { + account_io_completion(td, io_u, icd, idx, bytes); - tusec = utime_since(&io_u->start_time, - &icd->time); - add_lat_sample(td, idx, tusec, bytes); - } - if (!td->o.disable_clat) { - add_clat_sample(td, idx, lusec, bytes); - io_u_mark_latency(td, lusec); - } - if (!td->o.disable_bw) - add_bw_sample(td, idx, bytes, &icd->time); if (__should_check_rate(td, idx)) { td->rate_pending_usleep[idx] = - ((td->this_io_bytes[idx] * - td->rate_nsec_cycle[idx]) / 1000 - + (usec_for_io(td, idx) - utime_since_now(&td->start)); } - if (__should_check_rate(td, idx ^ 1)) + if (__should_check_rate(td, odx)) td->rate_pending_usleep[odx] = - ((td->this_io_bytes[odx] * - td->rate_nsec_cycle[odx]) / 1000 - + (usec_for_io(td, odx) - utime_since_now(&td->start)); } @@ -1231,8 +1405,8 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, icd->error = io_u->error; io_u_log_error(td, io_u); } - if (td->o.continue_on_error && icd->error && - td_non_fatal_error(icd->error)) { + if (icd->error && td_non_fatal_error(icd->error) && + (td->o.continue_on_error & td_error_type(io_u->ddir, icd->error))) { /* * If there is a non_fatal error, then add to the error count * and clear all the errors. @@ -1251,6 +1425,7 @@ static void init_icd(struct thread_data *td, struct io_completion_data *icd, fio_gettime(&icd->time, NULL); icd->nr = nr; + icd->account = 1; icd->error = 0; icd->bytes_done[0] = icd->bytes_done[1] = 0; @@ -1269,6 +1444,8 @@ static void ios_completed(struct thread_data *td, if (!(io_u->flags & IO_U_F_FREE_DEF)) put_io_u(td, io_u); + + icd->account = 0; } } @@ -1356,8 +1533,10 @@ void io_u_queued(struct thread_data *td, struct io_u *io_u) void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u, unsigned int max_bs) { + io_u->buf_filled_len = 0; + if (!td->o.zero_buffers) - fill_random_buf(io_u->buf, max_bs); + fill_random_buf(&td->buf_state, io_u->buf, max_bs); else memset(io_u->buf, 0, max_bs); }