From: Jens Axboe Date: Fri, 3 Aug 2018 15:16:10 +0000 (-0600) Subject: Merge branch 'read_iolog-from-unix-socket' of https://github.com/aclamk/fio X-Git-Tag: fio-3.9~50 X-Git-Url: https://git.kernel.dk/?p=fio.git;a=commitdiff_plain;h=a6d7953002576946bd7d6703fca698a16ad454a5;hp=2f8f4821ef614ce3e0b221701c36a6d2d2044bb2 Merge branch 'read_iolog-from-unix-socket' of https://github.com/aclamk/fio * 'read_iolog-from-unix-socket' of https://github.com/aclamk/fio: iolog: allow to read_iolog from unix socket --- diff --git a/FIO-VERSION-GEN b/FIO-VERSION-GEN index b28a1f3a..99261fba 100755 --- a/FIO-VERSION-GEN +++ b/FIO-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=FIO-VERSION-FILE -DEF_VER=fio-3.7 +DEF_VER=fio-3.8 LF=' ' diff --git a/HOWTO b/HOWTO index 70eed280..804d93e4 100644 --- a/HOWTO +++ b/HOWTO @@ -991,13 +991,15 @@ I/O type **write** Sequential writes. **trim** - Sequential trims (Linux block devices only). + Sequential trims (Linux block devices and SCSI + character devices only). **randread** Random reads. **randwrite** Random writes. **randtrim** - Random trims (Linux block devices only). + Random trims (Linux block devices and SCSI + character devices only). **rw,readwrite** Sequential mixed reads and writes. **randrw** @@ -1748,7 +1750,7 @@ I/O engine ioctl, or if the target is an sg character device we use :manpage:`read(2)` and :manpage:`write(2)` for asynchronous I/O. Requires :option:`filename` option to specify either block or - character devices. + character devices. This engine supports trim operations. The sg engine includes engine specific options. **null** @@ -2082,6 +2084,7 @@ with the caveat that when used on the command line, they must come after the the force unit access (fua) flag. Default is 0. .. option:: sg_write_mode=str : [sg] + Specify the type of write commands to issue. This option can take three values: **write** diff --git a/backend.c b/backend.c index a7e91843..3c45e789 100644 --- a/backend.c +++ b/backend.c @@ -454,7 +454,7 @@ int io_queue_event(struct thread_data *td, struct io_u *io_u, int *ret, *ret = -io_u->error; clear_io_u(td, io_u); } else if (io_u->resid) { - int bytes = io_u->xfer_buflen - io_u->resid; + long long bytes = io_u->xfer_buflen - io_u->resid; struct fio_file *f = io_u->file; if (bytes_issued) @@ -583,7 +583,7 @@ static bool in_flight_overlap(struct io_u_queue *q, struct io_u *io_u) if (x1 < y2 && y1 < x2) { overlap = true; - dprint(FD_IO, "in-flight overlap: %llu/%lu, %llu/%lu\n", + dprint(FD_IO, "in-flight overlap: %llu/%llu, %llu/%llu\n", x1, io_u->buflen, y1, check_io_u->buflen); break; @@ -1033,7 +1033,7 @@ static void do_io(struct thread_data *td, uint64_t *bytes_done) log_io_piece(td, io_u); if (td->o.io_submit_mode == IO_MODE_OFFLOAD) { - const unsigned long blen = io_u->xfer_buflen; + const unsigned long long blen = io_u->xfer_buflen; const enum fio_ddir __ddir = acct_ddir(io_u); if (td->error) @@ -1199,7 +1199,7 @@ static void cleanup_io_u(struct thread_data *td) static int init_io_u(struct thread_data *td) { struct io_u *io_u; - unsigned int max_bs, min_write; + unsigned long long max_bs, min_write; int cl_align, i, max_units; int data_xfer = 1, err; char *p; @@ -1234,7 +1234,7 @@ static int init_io_u(struct thread_data *td) td->orig_buffer_size += page_mask + td->o.mem_align; if (td->o.mem_type == MEM_SHMHUGE || td->o.mem_type == MEM_MMAPHUGE) { - unsigned long bs; + unsigned long long bs; bs = td->orig_buffer_size + td->o.hugepage_size - 1; td->orig_buffer_size = bs & ~(td->o.hugepage_size - 1); diff --git a/cconv.c b/cconv.c index bfd699da..534bfb07 100644 --- a/cconv.c +++ b/cconv.c @@ -110,16 +110,16 @@ void convert_thread_options_to_cpu(struct thread_options *o, o->start_offset_percent = le32_to_cpu(top->start_offset_percent); for (i = 0; i < DDIR_RWDIR_CNT; i++) { - o->bs[i] = le32_to_cpu(top->bs[i]); - o->ba[i] = le32_to_cpu(top->ba[i]); - o->min_bs[i] = le32_to_cpu(top->min_bs[i]); - o->max_bs[i] = le32_to_cpu(top->max_bs[i]); + o->bs[i] = le64_to_cpu(top->bs[i]); + o->ba[i] = le64_to_cpu(top->ba[i]); + o->min_bs[i] = le64_to_cpu(top->min_bs[i]); + o->max_bs[i] = le64_to_cpu(top->max_bs[i]); o->bssplit_nr[i] = le32_to_cpu(top->bssplit_nr[i]); if (o->bssplit_nr[i]) { o->bssplit[i] = malloc(o->bssplit_nr[i] * sizeof(struct bssplit)); for (j = 0; j < o->bssplit_nr[i]; j++) { - o->bssplit[i][j].bs = le32_to_cpu(top->bssplit[i][j].bs); + o->bssplit[i][j].bs = le64_to_cpu(top->bssplit[i][j].bs); o->bssplit[i][j].perc = le32_to_cpu(top->bssplit[i][j].perc); } } @@ -203,7 +203,7 @@ void convert_thread_options_to_cpu(struct thread_options *o, o->gauss_dev.u.f = fio_uint64_to_double(le64_to_cpu(top->gauss_dev.u.i)); o->random_generator = le32_to_cpu(top->random_generator); o->hugepage_size = le32_to_cpu(top->hugepage_size); - o->rw_min_bs = le32_to_cpu(top->rw_min_bs); + o->rw_min_bs = le64_to_cpu(top->rw_min_bs); o->thinktime = le32_to_cpu(top->thinktime); o->thinktime_spin = le32_to_cpu(top->thinktime_spin); o->thinktime_blocks = le32_to_cpu(top->thinktime_blocks); @@ -410,7 +410,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->gauss_dev.u.i = __cpu_to_le64(fio_double_to_uint64(o->gauss_dev.u.f)); top->random_generator = cpu_to_le32(o->random_generator); top->hugepage_size = cpu_to_le32(o->hugepage_size); - top->rw_min_bs = cpu_to_le32(o->rw_min_bs); + top->rw_min_bs = __cpu_to_le64(o->rw_min_bs); top->thinktime = cpu_to_le32(o->thinktime); top->thinktime_spin = cpu_to_le32(o->thinktime_spin); top->thinktime_blocks = cpu_to_le32(o->thinktime_blocks); @@ -488,10 +488,10 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->write_hist_log = cpu_to_le32(o->write_hist_log); for (i = 0; i < DDIR_RWDIR_CNT; i++) { - top->bs[i] = cpu_to_le32(o->bs[i]); - top->ba[i] = cpu_to_le32(o->ba[i]); - top->min_bs[i] = cpu_to_le32(o->min_bs[i]); - top->max_bs[i] = cpu_to_le32(o->max_bs[i]); + top->bs[i] = __cpu_to_le64(o->bs[i]); + top->ba[i] = __cpu_to_le64(o->ba[i]); + top->min_bs[i] = __cpu_to_le64(o->min_bs[i]); + top->max_bs[i] = __cpu_to_le64(o->max_bs[i]); top->bssplit_nr[i] = cpu_to_le32(o->bssplit_nr[i]); if (o->bssplit_nr[i]) { @@ -502,7 +502,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top, bssplit_nr = BSSPLIT_MAX; } for (j = 0; j < bssplit_nr; j++) { - top->bssplit[i][j].bs = cpu_to_le32(o->bssplit[i][j].bs); + top->bssplit[i][j].bs = cpu_to_le64(o->bssplit[i][j].bs); top->bssplit[i][j].perc = cpu_to_le32(o->bssplit[i][j].perc); } } diff --git a/client.c b/client.c index 2a86ea97..e2525c81 100644 --- a/client.c +++ b/client.c @@ -1357,8 +1357,8 @@ static void client_flush_hist_samples(FILE *f, int hist_coarseness, void *sample entry = s->data.plat_entry; io_u_plat = entry->io_u_plat; - fprintf(f, "%lu, %u, %u, ", (unsigned long) s->time, - io_sample_ddir(s), s->bs); + fprintf(f, "%lu, %u, %llu, ", (unsigned long) s->time, + io_sample_ddir(s), (unsigned long long) s->bs); for (j = 0; j < FIO_IO_U_PLAT_NR - stride; j += stride) { fprintf(f, "%llu, ", (unsigned long long)hist_sum(j, stride, io_u_plat, NULL)); } @@ -1647,7 +1647,7 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd, s->time = le64_to_cpu(s->time); s->data.val = le64_to_cpu(s->data.val); s->__ddir = le32_to_cpu(s->__ddir); - s->bs = le32_to_cpu(s->bs); + s->bs = le64_to_cpu(s->bs); if (ret->log_offset) { struct io_sample_offset *so = (void *) s; diff --git a/compiler/compiler.h b/compiler/compiler.h index dacb7379..ddfbcc12 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -28,7 +28,7 @@ */ #define typecheck(type,x) \ ({ type __dummy; \ - typeof(x) __dummy2; \ + __typeof__(x) __dummy2; \ (void)(&__dummy == &__dummy2); \ 1; \ }) @@ -70,7 +70,7 @@ #ifdef FIO_INTERNAL #define ARRAY_SIZE(x) (sizeof((x)) / (sizeof((x)[0]))) -#define FIELD_SIZE(s, f) (sizeof(((typeof(s))0)->f)) +#define FIELD_SIZE(s, f) (sizeof(((__typeof__(s))0)->f)) #endif #endif diff --git a/doc/fio-histo-log-pctiles.pdf b/doc/fio-histo-log-pctiles.pdf new file mode 100644 index 00000000..069ab994 Binary files /dev/null and b/doc/fio-histo-log-pctiles.pdf differ diff --git a/engines/glusterfs_sync.c b/engines/glusterfs_sync.c index a10e0ed6..099a5af1 100644 --- a/engines/glusterfs_sync.c +++ b/engines/glusterfs_sync.c @@ -34,7 +34,7 @@ static enum fio_q_status fio_gf_queue(struct thread_data *td, struct io_u *io_u) struct gf_data *g = td->io_ops_data; int ret = 0; - dprint(FD_FILE, "fio queue len %lu\n", io_u->xfer_buflen); + dprint(FD_FILE, "fio queue len %llu\n", io_u->xfer_buflen); fio_ro_check(td, io_u); if (io_u->ddir == DDIR_READ) @@ -50,7 +50,7 @@ static enum fio_q_status fio_gf_queue(struct thread_data *td, struct io_u *io_u) io_u->error = EINVAL; return FIO_Q_COMPLETED; } - dprint(FD_FILE, "fio len %lu ret %d\n", io_u->xfer_buflen, ret); + dprint(FD_FILE, "fio len %llu ret %d\n", io_u->xfer_buflen, ret); if (io_u->file && ret >= 0 && ddir_rw(io_u->ddir)) LAST_POS(io_u->file) = io_u->offset + ret; diff --git a/engines/libaio.c b/engines/libaio.c index dae2a70b..7ac36b23 100644 --- a/engines/libaio.c +++ b/engines/libaio.c @@ -207,6 +207,8 @@ static enum fio_q_status fio_libaio_queue(struct thread_data *td, return FIO_Q_BUSY; do_io_u_trim(td, io_u); + io_u_mark_submit(td, 1); + io_u_mark_complete(td, 1); return FIO_Q_COMPLETED; } diff --git a/engines/libpmem.c b/engines/libpmem.c index 21ff4f66..4ef3094e 100644 --- a/engines/libpmem.c +++ b/engines/libpmem.c @@ -499,7 +499,7 @@ static int fio_libpmem_init(struct thread_data *td) { struct thread_options *o = &td->o; - dprint(FD_IO,"o->rw_min_bs %d \n o->fsync_blocks %d \n o->fdatasync_blocks %d \n", + dprint(FD_IO,"o->rw_min_bs %llu \n o->fsync_blocks %d \n o->fdatasync_blocks %d \n", o->rw_min_bs,o->fsync_blocks,o->fdatasync_blocks); dprint(FD_IO, "DEBUG fio_libpmem_init\n"); diff --git a/engines/sg.c b/engines/sg.c index 06cd1946..7741f838 100644 --- a/engines/sg.c +++ b/engines/sg.c @@ -3,6 +3,51 @@ * * IO engine that uses the Linux SG v3 interface to talk to SCSI devices * + * This ioengine can operate in two modes: + * sync with block devices (/dev/sdX) or + * with character devices (/dev/sgY) with direct=1 or sync=1 + * async with character devices with direct=0 and sync=0 + * + * What value does queue() return for the different cases? + * queue() return value + * In sync mode: + * /dev/sdX RWT FIO_Q_COMPLETED + * /dev/sgY RWT FIO_Q_COMPLETED + * with direct=1 or sync=1 + * + * In async mode: + * /dev/sgY RWT FIO_Q_QUEUED + * direct=0 and sync=0 + * + * Because FIO_SYNCIO is set for this ioengine td_io_queue() will fill in + * issue_time *before* each IO is sent to queue() + * + * Where are the IO counting functions called for the different cases? + * + * In sync mode: + * /dev/sdX (commit==NULL) + * RWT + * io_u_mark_depth() called in td_io_queue() + * io_u_mark_submit/complete() called in td_io_queue() + * issue_time set in td_io_queue() + * + * /dev/sgY with direct=1 or sync=1 (commit does nothing) + * RWT + * io_u_mark_depth() called in td_io_queue() + * io_u_mark_submit/complete() called in queue() + * issue_time set in td_io_queue() + * + * In async mode: + * /dev/sgY with direct=0 and sync=0 + * RW: read and write operations are submitted in queue() + * io_u_mark_depth() called in td_io_commit() + * io_u_mark_submit() called in queue() + * issue_time set in td_io_queue() + * T: trim operations are queued in queue() and submitted in commit() + * io_u_mark_depth() called in td_io_commit() + * io_u_mark_submit() called in commit() + * issue_time set in commit() + * */ #include #include @@ -81,6 +126,9 @@ static struct fio_option options[] = { #define MAX_10B_LBA 0xFFFFFFFFULL #define SCSI_TIMEOUT_MS 30000 // 30 second timeout; currently no method to override #define MAX_SB 64 // sense block maximum return size +/* +#define FIO_SGIO_DEBUG +*/ struct sgio_cmd { unsigned char cdb[16]; // enhanced from 10 to support 16 byte commands @@ -88,6 +136,12 @@ struct sgio_cmd { int nr; }; +struct sgio_trim { + char *unmap_param; + unsigned int unmap_range_count; + struct io_u **trim_io_us; +}; + struct sgio_data { struct sgio_cmd *cmds; struct io_u **events; @@ -96,8 +150,18 @@ struct sgio_data { void *sgbuf; unsigned int bs; int type_checked; + struct sgio_trim **trim_queues; + int current_queue; +#ifdef FIO_SGIO_DEBUG + unsigned int *trim_queue_map; +#endif }; +static inline bool sgio_unbuffered(struct thread_data *td) +{ + return (td->o.odirect || td->o.sync_io); +} + static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr, struct io_u *io_u, int fs) { @@ -113,6 +177,7 @@ static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr, hdr->mx_sb_len = sizeof(sc->sb); hdr->pack_id = io_u->index; hdr->usr_ptr = io_u; + hdr->timeout = SCSI_TIMEOUT_MS; if (fs) { hdr->dxferp = io_u->xfer_buf; @@ -165,10 +230,11 @@ static int fio_sgio_getevents(struct thread_data *td, unsigned int min, const struct timespec fio_unused *t) { struct sgio_data *sd = td->io_ops_data; - int left = max, eventNum, ret, r = 0; + int left = max, eventNum, ret, r = 0, trims = 0; void *buf = sd->sgbuf; - unsigned int i, events; + unsigned int i, j, events; struct fio_file *f; + struct io_u *io_u; /* * Fill in the file descriptors @@ -186,10 +252,20 @@ static int fio_sgio_getevents(struct thread_data *td, unsigned int min, sd->pfds[i].events = POLLIN; } - while (left) { + /* + ** There are two counters here: + ** - number of SCSI commands completed + ** - number of io_us completed + ** + ** These are the same with reads and writes, but + ** could differ with trim/unmap commands because + ** a single unmap can include multiple io_us + */ + + while (left > 0) { char *p; - dprint(FD_IO, "sgio_getevents: sd %p: left=%d\n", sd, left); + dprint(FD_IO, "sgio_getevents: sd %p: min=%d, max=%d, left=%d\n", sd, min, max, left); do { if (!min) @@ -217,15 +293,21 @@ re_read: for_each_file(td, f, i) { for (eventNum = 0; eventNum < left; eventNum++) { ret = sg_fd_read(f->fd, p, sizeof(struct sg_io_hdr)); - dprint(FD_IO, "sgio_getevents: ret: %d\n", ret); + dprint(FD_IO, "sgio_getevents: sg_fd_read ret: %d\n", ret); if (ret) { r = -ret; td_verror(td, r, "sg_read"); break; } + io_u = ((struct sg_io_hdr *)p)->usr_ptr; + if (io_u->ddir == DDIR_TRIM) { + events += sd->trim_queues[io_u->index]->unmap_range_count; + eventNum += sd->trim_queues[io_u->index]->unmap_range_count - 1; + } else + events++; + p += sizeof(struct sg_io_hdr); - events++; - dprint(FD_IO, "sgio_getevents: events: %d\n", events); + dprint(FD_IO, "sgio_getevents: events: %d, eventNum: %d, left: %d\n", events, eventNum, left); } } @@ -241,14 +323,38 @@ re_read: for (i = 0; i < events; i++) { struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i; - sd->events[i] = hdr->usr_ptr; + sd->events[i + trims] = hdr->usr_ptr; + io_u = (struct io_u *)(hdr->usr_ptr); - /* record if an io error occurred, ignore resid */ if (hdr->info & SG_INFO_CHECK) { - struct io_u *io_u; - io_u = (struct io_u *)(hdr->usr_ptr); + /* record if an io error occurred, ignore resid */ memcpy(&io_u->hdr, hdr, sizeof(struct sg_io_hdr)); - sd->events[i]->error = EIO; + sd->events[i + trims]->error = EIO; + } + + if (io_u->ddir == DDIR_TRIM) { + struct sgio_trim *st = sd->trim_queues[io_u->index]; +#ifdef FIO_SGIO_DEBUG + assert(st->trim_io_us[0] == io_u); + assert(sd->trim_queue_map[io_u->index] == io_u->index); + dprint(FD_IO, "sgio_getevents: reaping %d io_us from trim queue %d\n", st->unmap_range_count, io_u->index); + dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", io_u->index, i+trims); +#endif + for (j = 1; j < st->unmap_range_count; j++) { + ++trims; + sd->events[i + trims] = st->trim_io_us[j]; +#ifdef FIO_SGIO_DEBUG + dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", st->trim_io_us[j]->index, i+trims); + assert(sd->trim_queue_map[st->trim_io_us[j]->index] == io_u->index); +#endif + if (hdr->info & SG_INFO_CHECK) { + /* record if an io error occurred, ignore resid */ + memcpy(&st->trim_io_us[j]->hdr, hdr, sizeof(struct sg_io_hdr)); + sd->events[i + trims]->error = EIO; + } + } + events -= st->unmap_range_count - 1; + st->unmap_range_count = 0; } } } @@ -287,7 +393,8 @@ static enum fio_q_status fio_sgio_ioctl_doio(struct thread_data *td, return FIO_Q_COMPLETED; } -static int fio_sgio_rw_doio(struct fio_file *f, struct io_u *io_u, int do_sync) +static enum fio_q_status fio_sgio_rw_doio(struct fio_file *f, + struct io_u *io_u, int do_sync) { struct sg_io_hdr *hdr = &io_u->hdr; int ret; @@ -311,10 +418,11 @@ static int fio_sgio_rw_doio(struct fio_file *f, struct io_u *io_u, int do_sync) return FIO_Q_QUEUED; } -static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int do_sync) +static enum fio_q_status fio_sgio_doio(struct thread_data *td, + struct io_u *io_u, int do_sync) { struct fio_file *f = io_u->file; - int ret; + enum fio_q_status ret; if (f->filetype == FIO_TYPE_BLOCK) { ret = fio_sgio_ioctl_doio(td, f, io_u); @@ -328,12 +436,41 @@ static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int do_sync) return ret; } +static void fio_sgio_rw_lba(struct sg_io_hdr *hdr, unsigned long long lba, + unsigned long long nr_blocks) +{ + if (lba < MAX_10B_LBA) { + hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff); + hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff); + hdr->cmdp[4] = (unsigned char) ((lba >> 8) & 0xff); + hdr->cmdp[5] = (unsigned char) (lba & 0xff); + hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff); + hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff); + } else { + hdr->cmdp[2] = (unsigned char) ((lba >> 56) & 0xff); + hdr->cmdp[3] = (unsigned char) ((lba >> 48) & 0xff); + hdr->cmdp[4] = (unsigned char) ((lba >> 40) & 0xff); + hdr->cmdp[5] = (unsigned char) ((lba >> 32) & 0xff); + hdr->cmdp[6] = (unsigned char) ((lba >> 24) & 0xff); + hdr->cmdp[7] = (unsigned char) ((lba >> 16) & 0xff); + hdr->cmdp[8] = (unsigned char) ((lba >> 8) & 0xff); + hdr->cmdp[9] = (unsigned char) (lba & 0xff); + hdr->cmdp[10] = (unsigned char) ((nr_blocks >> 32) & 0xff); + hdr->cmdp[11] = (unsigned char) ((nr_blocks >> 16) & 0xff); + hdr->cmdp[12] = (unsigned char) ((nr_blocks >> 8) & 0xff); + hdr->cmdp[13] = (unsigned char) (nr_blocks & 0xff); + } + + return; +} + static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u) { struct sg_io_hdr *hdr = &io_u->hdr; struct sg_options *o = td->eo; struct sgio_data *sd = td->io_ops_data; - long long nr_blocks, lba; + unsigned long long nr_blocks, lba; + int offset; if (io_u->xfer_buflen & (sd->bs - 1)) { log_err("read/write not sector aligned\n"); @@ -355,6 +492,8 @@ static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u) if (o->readfua) hdr->cmdp[1] |= 0x08; + fio_sgio_rw_lba(hdr, lba, nr_blocks); + } else if (io_u->ddir == DDIR_WRITE) { sgio_hdr_init(sd, hdr, io_u, 1); @@ -383,58 +522,111 @@ static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u) hdr->cmdp[0] = 0x93; // write same(16) break; }; - } else { + + fio_sgio_rw_lba(hdr, lba, nr_blocks); + + } else if (io_u->ddir == DDIR_TRIM) { + struct sgio_trim *st; + + if (sd->current_queue == -1) { + sgio_hdr_init(sd, hdr, io_u, 0); + + hdr->cmd_len = 10; + hdr->dxfer_direction = SG_DXFER_TO_DEV; + hdr->cmdp[0] = 0x42; // unmap + sd->current_queue = io_u->index; + st = sd->trim_queues[sd->current_queue]; + hdr->dxferp = st->unmap_param; +#ifdef FIO_SGIO_DEBUG + assert(sd->trim_queues[io_u->index]->unmap_range_count == 0); + dprint(FD_IO, "sg: creating new queue based on io_u %d\n", io_u->index); +#endif + } + else + st = sd->trim_queues[sd->current_queue]; + + dprint(FD_IO, "sg: adding io_u %d to trim queue %d\n", io_u->index, sd->current_queue); + st->trim_io_us[st->unmap_range_count] = io_u; +#ifdef FIO_SGIO_DEBUG + sd->trim_queue_map[io_u->index] = sd->current_queue; +#endif + + offset = 8 + 16 * st->unmap_range_count; + st->unmap_param[offset] = (unsigned char) ((lba >> 56) & 0xff); + st->unmap_param[offset+1] = (unsigned char) ((lba >> 48) & 0xff); + st->unmap_param[offset+2] = (unsigned char) ((lba >> 40) & 0xff); + st->unmap_param[offset+3] = (unsigned char) ((lba >> 32) & 0xff); + st->unmap_param[offset+4] = (unsigned char) ((lba >> 24) & 0xff); + st->unmap_param[offset+5] = (unsigned char) ((lba >> 16) & 0xff); + st->unmap_param[offset+6] = (unsigned char) ((lba >> 8) & 0xff); + st->unmap_param[offset+7] = (unsigned char) (lba & 0xff); + st->unmap_param[offset+8] = (unsigned char) ((nr_blocks >> 32) & 0xff); + st->unmap_param[offset+9] = (unsigned char) ((nr_blocks >> 16) & 0xff); + st->unmap_param[offset+10] = (unsigned char) ((nr_blocks >> 8) & 0xff); + st->unmap_param[offset+11] = (unsigned char) (nr_blocks & 0xff); + + st->unmap_range_count++; + + } else if (ddir_sync(io_u->ddir)) { sgio_hdr_init(sd, hdr, io_u, 0); hdr->dxfer_direction = SG_DXFER_NONE; if (lba < MAX_10B_LBA) hdr->cmdp[0] = 0x35; // synccache(10) else hdr->cmdp[0] = 0x91; // synccache(16) - } + } else + assert(0); - /* - * for synccache, we leave lba and length to 0 to sync all - * blocks on medium. - */ - if (hdr->dxfer_direction != SG_DXFER_NONE) { - if (lba < MAX_10B_LBA) { - hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff); - hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff); - hdr->cmdp[4] = (unsigned char) ((lba >> 8) & 0xff); - hdr->cmdp[5] = (unsigned char) (lba & 0xff); - hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff); - hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff); - } else { - hdr->cmdp[2] = (unsigned char) ((lba >> 56) & 0xff); - hdr->cmdp[3] = (unsigned char) ((lba >> 48) & 0xff); - hdr->cmdp[4] = (unsigned char) ((lba >> 40) & 0xff); - hdr->cmdp[5] = (unsigned char) ((lba >> 32) & 0xff); - hdr->cmdp[6] = (unsigned char) ((lba >> 24) & 0xff); - hdr->cmdp[7] = (unsigned char) ((lba >> 16) & 0xff); - hdr->cmdp[8] = (unsigned char) ((lba >> 8) & 0xff); - hdr->cmdp[9] = (unsigned char) (lba & 0xff); - hdr->cmdp[10] = (unsigned char) ((nr_blocks >> 32) & 0xff); - hdr->cmdp[11] = (unsigned char) ((nr_blocks >> 16) & 0xff); - hdr->cmdp[12] = (unsigned char) ((nr_blocks >> 8) & 0xff); - hdr->cmdp[13] = (unsigned char) (nr_blocks & 0xff); - } - } - - hdr->timeout = SCSI_TIMEOUT_MS; return 0; } +static void fio_sgio_unmap_setup(struct sg_io_hdr *hdr, struct sgio_trim *st) +{ + hdr->dxfer_len = st->unmap_range_count * 16 + 8; + hdr->cmdp[7] = (unsigned char) (((st->unmap_range_count * 16 + 8) >> 8) & 0xff); + hdr->cmdp[8] = (unsigned char) ((st->unmap_range_count * 16 + 8) & 0xff); + + st->unmap_param[0] = (unsigned char) (((16 * st->unmap_range_count + 6) >> 8) & 0xff); + st->unmap_param[1] = (unsigned char) ((16 * st->unmap_range_count + 6) & 0xff); + st->unmap_param[2] = (unsigned char) (((16 * st->unmap_range_count) >> 8) & 0xff); + st->unmap_param[3] = (unsigned char) ((16 * st->unmap_range_count) & 0xff); + + return; +} + static enum fio_q_status fio_sgio_queue(struct thread_data *td, struct io_u *io_u) { struct sg_io_hdr *hdr = &io_u->hdr; + struct sgio_data *sd = td->io_ops_data; int ret, do_sync = 0; fio_ro_check(td, io_u); - if (td->o.sync_io || td->o.odirect || ddir_sync(io_u->ddir)) + if (sgio_unbuffered(td) || ddir_sync(io_u->ddir)) do_sync = 1; + if (io_u->ddir == DDIR_TRIM) { + if (do_sync || io_u->file->filetype == FIO_TYPE_BLOCK) { + struct sgio_trim *st = sd->trim_queues[sd->current_queue]; + + /* finish cdb setup for unmap because we are + ** doing unmap commands synchronously */ +#ifdef FIO_SGIO_DEBUG + assert(st->unmap_range_count == 1); + assert(io_u == st->trim_io_us[0]); +#endif + hdr = &io_u->hdr; + + fio_sgio_unmap_setup(hdr, st); + + st->unmap_range_count = 0; + sd->current_queue = -1; + } else + /* queue up trim ranges and submit in commit() */ + return FIO_Q_QUEUED; + } + ret = fio_sgio_doio(td, io_u, do_sync); if (ret < 0) @@ -442,6 +634,14 @@ static enum fio_q_status fio_sgio_queue(struct thread_data *td, else if (hdr->status) { io_u->resid = hdr->resid; io_u->error = EIO; + } else if (td->io_ops->commit != NULL) { + if (do_sync && !ddir_sync(io_u->ddir)) { + io_u_mark_submit(td, 1); + io_u_mark_complete(td, 1); + } else if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) { + io_u_mark_submit(td, 1); + io_u_queued(td, io_u); + } } if (io_u->error) { @@ -452,6 +652,61 @@ static enum fio_q_status fio_sgio_queue(struct thread_data *td, return ret; } +static int fio_sgio_commit(struct thread_data *td) +{ + struct sgio_data *sd = td->io_ops_data; + struct sgio_trim *st; + struct io_u *io_u; + struct sg_io_hdr *hdr; + struct timespec now; + unsigned int i; + int ret; + + if (sd->current_queue == -1) + return 0; + + st = sd->trim_queues[sd->current_queue]; + io_u = st->trim_io_us[0]; + hdr = &io_u->hdr; + + fio_sgio_unmap_setup(hdr, st); + + sd->current_queue = -1; + + ret = fio_sgio_rw_doio(io_u->file, io_u, 0); + + if (ret < 0) + for (i = 0; i < st->unmap_range_count; i++) + st->trim_io_us[i]->error = errno; + else if (hdr->status) + for (i = 0; i < st->unmap_range_count; i++) { + st->trim_io_us[i]->resid = hdr->resid; + st->trim_io_us[i]->error = EIO; + } + else { + if (fio_fill_issue_time(td)) { + fio_gettime(&now, NULL); + for (i = 0; i < st->unmap_range_count; i++) { + struct io_u *io_u = st->trim_io_us[i]; + + memcpy(&io_u->issue_time, &now, sizeof(now)); + io_u_queued(td, io_u); + } + } + io_u_mark_submit(td, st->unmap_range_count); + } + + if (io_u->error) { + td_verror(td, io_u->error, "xfer"); + return 0; + } + + if (ret == FIO_Q_QUEUED) + return 0; + else + return ret; +} + static struct io_u *fio_sgio_event(struct thread_data *td, int event) { struct sgio_data *sd = td->io_ops_data; @@ -553,6 +808,7 @@ static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs, static void fio_sgio_cleanup(struct thread_data *td) { struct sgio_data *sd = td->io_ops_data; + int i; if (sd) { free(sd->events); @@ -560,6 +816,17 @@ static void fio_sgio_cleanup(struct thread_data *td) free(sd->fd_flags); free(sd->pfds); free(sd->sgbuf); +#ifdef FIO_SGIO_DEBUG + free(sd->trim_queue_map); +#endif + + for (i = 0; i < td->o.iodepth; i++) { + free(sd->trim_queues[i]->unmap_param); + free(sd->trim_queues[i]->trim_io_us); + free(sd->trim_queues[i]); + } + + free(sd->trim_queues); free(sd); } } @@ -567,20 +834,30 @@ static void fio_sgio_cleanup(struct thread_data *td) static int fio_sgio_init(struct thread_data *td) { struct sgio_data *sd; + struct sgio_trim *st; + int i; - sd = malloc(sizeof(*sd)); - memset(sd, 0, sizeof(*sd)); - sd->cmds = malloc(td->o.iodepth * sizeof(struct sgio_cmd)); - memset(sd->cmds, 0, td->o.iodepth * sizeof(struct sgio_cmd)); - sd->events = malloc(td->o.iodepth * sizeof(struct io_u *)); - memset(sd->events, 0, td->o.iodepth * sizeof(struct io_u *)); - sd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files); - memset(sd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files); - sd->fd_flags = malloc(sizeof(int) * td->o.nr_files); - memset(sd->fd_flags, 0, sizeof(int) * td->o.nr_files); - sd->sgbuf = malloc(sizeof(struct sg_io_hdr) * td->o.iodepth); - memset(sd->sgbuf, 0, sizeof(struct sg_io_hdr) * td->o.iodepth); + sd = calloc(1, sizeof(*sd)); + sd->cmds = calloc(td->o.iodepth, sizeof(struct sgio_cmd)); + sd->sgbuf = calloc(td->o.iodepth, sizeof(struct sg_io_hdr)); + sd->events = calloc(td->o.iodepth, sizeof(struct io_u *)); + sd->pfds = calloc(td->o.nr_files, sizeof(struct pollfd)); + sd->fd_flags = calloc(td->o.nr_files, sizeof(int)); sd->type_checked = 0; + + sd->trim_queues = calloc(td->o.iodepth, sizeof(struct sgio_trim *)); + sd->current_queue = -1; +#ifdef FIO_SGIO_DEBUG + sd->trim_queue_map = calloc(td->o.iodepth, sizeof(int)); +#endif + for (i = 0; i < td->o.iodepth; i++) { + sd->trim_queues[i] = calloc(1, sizeof(struct sgio_trim)); + st = sd->trim_queues[i]; + st->unmap_param = calloc(td->o.iodepth + 1, sizeof(char[16])); + st->unmap_range_count = 0; + st->trim_io_us = calloc(td->o.iodepth, sizeof(struct io_u *)); + } + td->io_ops_data = sd; /* @@ -632,6 +909,12 @@ static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f) if (f->filetype == FIO_TYPE_BLOCK) { td->io_ops->getevents = NULL; td->io_ops->event = NULL; + td->io_ops->commit = NULL; + /* + ** Setting these functions to null may cause problems + ** with filename=/dev/sda:/dev/sg0 since we are only + ** considering a single file + */ } sd->type_checked = 1; @@ -848,6 +1131,23 @@ static char *fio_sgio_errdetails(struct io_u *io_u) snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len); strlcat(msg, msgchunk, MAXERRDETAIL); } + if (hdr->cmdp) { + strlcat(msg, "cdb:", MAXERRDETAIL); + for (i = 0; i < hdr->cmd_len; i++) { + snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->cmdp[i]); + strlcat(msg, msgchunk, MAXERRDETAIL); + } + strlcat(msg, ". ", MAXERRDETAIL); + if (io_u->ddir == DDIR_TRIM) { + unsigned char *param_list = hdr->dxferp; + strlcat(msg, "dxferp:", MAXERRDETAIL); + for (i = 0; i < hdr->dxfer_len; i++) { + snprintf(msgchunk, MAXMSGCHUNK, " %02x", param_list[i]); + strlcat(msg, msgchunk, MAXERRDETAIL); + } + strlcat(msg, ". ", MAXERRDETAIL); + } + } } if (!(hdr->info & SG_INFO_CHECK) && !strlen(msg)) @@ -906,6 +1206,7 @@ static struct ioengine_ops ioengine = { .init = fio_sgio_init, .prep = fio_sgio_prep, .queue = fio_sgio_queue, + .commit = fio_sgio_commit, .getevents = fio_sgio_getevents, .errdetails = fio_sgio_errdetails, .event = fio_sgio_event, diff --git a/engines/solarisaio.c b/engines/solarisaio.c index 151f31d4..21e95935 100644 --- a/engines/solarisaio.c +++ b/engines/solarisaio.c @@ -105,7 +105,7 @@ static struct io_u *fio_solarisaio_event(struct thread_data *td, int event) return sd->aio_events[event]; } -static int fio_solarisaio_queue(struct thread_data fio_unused *td, +static enum fio_q_status fio_solarisaio_queue(struct thread_data fio_unused *td, struct io_u *io_u) { struct solarisaio_data *sd = td->io_ops_data; diff --git a/file.h b/file.h index 8fd34b13..c0a547eb 100644 --- a/file.h +++ b/file.h @@ -86,7 +86,7 @@ struct fio_file { */ unsigned int major, minor; int fileno; - int bs; + unsigned long long bs; char *file_name; /* diff --git a/filesetup.c b/filesetup.c index a2427a1a..accb67ac 100644 --- a/filesetup.c +++ b/filesetup.c @@ -107,7 +107,7 @@ static int extend_file(struct thread_data *td, struct fio_file *f) { int new_layout = 0, unlink_file = 0, flags; unsigned long long left; - unsigned int bs; + unsigned long long bs; char *b = NULL; if (read_only) { @@ -260,7 +260,7 @@ static bool pre_read_file(struct thread_data *td, struct fio_file *f) { int r, did_open = 0, old_runstate; unsigned long long left; - unsigned int bs; + unsigned long long bs; bool ret = true; char *b; @@ -900,7 +900,7 @@ int setup_files(struct thread_data *td) unsigned int i, nr_fs_extra = 0; int err = 0, need_extend; int old_state; - const unsigned int bs = td_min_bs(td); + const unsigned long long bs = td_min_bs(td); uint64_t fs = 0; dprint(FD_FILE, "setup files\n"); diff --git a/fio.1 b/fio.1 index 6d2eba67..a446aba5 100644 --- a/fio.1 +++ b/fio.1 @@ -757,7 +757,7 @@ Sequential reads. Sequential writes. .TP .B trim -Sequential trims (Linux block devices only). +Sequential trims (Linux block devices and SCSI character devices only). .TP .B randread Random reads. @@ -766,7 +766,7 @@ Random reads. Random writes. .TP .B randtrim -Random trims (Linux block devices only). +Random trims (Linux block devices and SCSI character devices only). .TP .B rw,readwrite Sequential mixed reads and writes. @@ -1524,7 +1524,8 @@ SCSI generic sg v3 I/O. May either be synchronous using the SG_IO ioctl, or if the target is an sg character device we use \fBread\fR\|(2) and \fBwrite\fR\|(2) for asynchronous I/O. Requires \fBfilename\fR option to specify either block or -character devices. The sg engine includes engine specific options. +character devices. This engine supports trim operations. The +sg engine includes engine specific options. .TP .B null Doesn't transfer any data, just pretends to. This is mainly used to diff --git a/fio.h b/fio.h index 3ac552b2..685aab19 100644 --- a/fio.h +++ b/fio.h @@ -736,17 +736,17 @@ static inline bool should_check_rate(struct thread_data *td) return ddir_rw_sum(td->bytes_done) != 0; } -static inline unsigned int td_max_bs(struct thread_data *td) +static inline unsigned long long td_max_bs(struct thread_data *td) { - unsigned int max_bs; + unsigned long long max_bs; max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]); return max(td->o.max_bs[DDIR_TRIM], max_bs); } -static inline unsigned int td_min_bs(struct thread_data *td) +static inline unsigned long long td_min_bs(struct thread_data *td) { - unsigned int min_bs; + unsigned long long min_bs; min_bs = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]); return min(td->o.min_bs[DDIR_TRIM], min_bs); diff --git a/flist.h b/flist.h index 2ca3d777..5437cd80 100644 --- a/flist.h +++ b/flist.h @@ -4,8 +4,8 @@ #include #include -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ +#define container_of(ptr, type, member) ({ \ + const __typeof__( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) /* diff --git a/gclient.c b/gclient.c index bcd7a880..7e5071d6 100644 --- a/gclient.c +++ b/gclient.c @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/gerror.c b/gerror.c index 43bdabae..1ebcb272 100644 --- a/gerror.c +++ b/gerror.c @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/gfio.c b/gfio.c index d222a1c2..f59238cd 100644 --- a/gfio.c +++ b/gfio.c @@ -22,8 +22,9 @@ * */ #include -#include +#include #include +#include #include #include diff --git a/goptions.c b/goptions.c index 16938eda..f44254bf 100644 --- a/goptions.c +++ b/goptions.c @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/graph.c b/graph.c index f82b52ad..7a174170 100644 --- a/graph.c +++ b/graph.c @@ -21,7 +21,7 @@ * */ #include -#include +#include #include #include #include diff --git a/init.c b/init.c index af4cc6b7..ede0a8b9 100644 --- a/init.c +++ b/init.c @@ -531,7 +531,7 @@ static void put_job(struct thread_data *td) static int __setup_rate(struct thread_data *td, enum fio_ddir ddir) { - unsigned int bs = td->o.min_bs[ddir]; + unsigned long long bs = td->o.min_bs[ddir]; assert(ddir_rw(ddir)); @@ -891,7 +891,7 @@ static int fixup_options(struct thread_data *td) * If size is set but less than the min block size, complain */ if (o->size && o->size < td_min_bs(td)) { - log_err("fio: size too small, must not be less than minimum block size: %llu < %u\n", + log_err("fio: size too small, must not be less than minimum block size: %llu < %llu\n", (unsigned long long) o->size, td_min_bs(td)); ret |= 1; } @@ -2158,7 +2158,7 @@ static void usage(const char *name) printf(" --showcmd\t\tTurn a job file into command line options\n"); printf(" --eta=when\t\tWhen ETA estimate should be printed\n"); printf(" \t\tMay be \"always\", \"never\" or \"auto\"\n"); - printf(" --eta-newline=time\tForce a new line for every 'time'"); + printf(" --eta-newline=t\tForce a new line for every 't'"); printf(" period passed\n"); printf(" --status-interval=t\tForce full status dump every"); printf(" 't' period passed\n"); diff --git a/io_u.c b/io_u.c index 5221a78c..c58dcf04 100644 --- a/io_u.c +++ b/io_u.c @@ -33,9 +33,9 @@ static bool random_map_free(struct fio_file *f, const uint64_t block) */ static void mark_random_map(struct thread_data *td, struct io_u *io_u) { - unsigned int min_bs = td->o.min_bs[io_u->ddir]; + unsigned long long min_bs = td->o.min_bs[io_u->ddir]; struct fio_file *f = io_u->file; - unsigned int nr_blocks; + unsigned long long nr_blocks; uint64_t block; block = (io_u->offset - f->file_offset) / (uint64_t) min_bs; @@ -503,19 +503,19 @@ static int get_next_offset(struct thread_data *td, struct io_u *io_u, } static inline bool io_u_fits(struct thread_data *td, struct io_u *io_u, - unsigned int buflen) + unsigned long long buflen) { struct fio_file *f = io_u->file; return io_u->offset + buflen <= f->io_size + get_start_offset(td, f); } -static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u, +static unsigned long long get_next_buflen(struct thread_data *td, struct io_u *io_u, bool is_random) { int ddir = io_u->ddir; - unsigned int buflen = 0; - unsigned int minbs, maxbs; + unsigned long long buflen = 0; + unsigned long long minbs, maxbs; uint64_t frand_max, r; bool power_2; @@ -541,7 +541,7 @@ static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u, r = __rand(&td->bsrange_state[ddir]); if (!td->o.bssplit_nr[ddir]) { - buflen = minbs + (unsigned int) ((double) maxbs * + buflen = minbs + (unsigned long long) ((double) maxbs * (r / (frand_max + 1.0))); } else { long long perc = 0; @@ -891,7 +891,7 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u) } if (io_u->offset + io_u->buflen > io_u->file->real_file_size) { - dprint(FD_IO, "io_u %p, off=0x%llx + len=0x%lx exceeds file size=0x%llx\n", + dprint(FD_IO, "io_u %p, off=0x%llx + len=0x%llx exceeds file size=0x%llx\n", io_u, (unsigned long long) io_u->offset, io_u->buflen, (unsigned long long) io_u->file->real_file_size); @@ -1582,7 +1582,7 @@ static bool check_get_verify(struct thread_data *td, struct io_u *io_u) */ static void small_content_scramble(struct io_u *io_u) { - unsigned int i, nr_blocks = io_u->buflen >> 9; + unsigned long long i, nr_blocks = io_u->buflen >> 9; unsigned int offset; uint64_t boffset, *iptr; char *p; @@ -1726,7 +1726,7 @@ static void __io_u_log_error(struct thread_data *td, struct io_u *io_u) if (td_non_fatal_error(td, eb, io_u->error) && !td->o.error_dump) return; - log_err("fio: io_u error%s%s: %s: %s offset=%llu, buflen=%lu\n", + log_err("fio: io_u error%s%s: %s: %s offset=%llu, buflen=%llu\n", io_u->file ? " on file " : "", io_u->file ? io_u->file->file_name : "", strerror(io_u->error), @@ -1892,7 +1892,7 @@ static void io_completed(struct thread_data *td, struct io_u **io_u_ptr, td->last_ddir = ddir; if (!io_u->error && ddir_rw(ddir)) { - unsigned int bytes = io_u->buflen - io_u->resid; + unsigned long long bytes = io_u->buflen - io_u->resid; int ret; td->io_blocks[ddir]++; @@ -2082,8 +2082,8 @@ static void save_buf_state(struct thread_data *td, struct frand_state *rs) frand_copy(&td->buf_state_prev, rs); } -void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write, - unsigned int max_bs) +void fill_io_buffer(struct thread_data *td, void *buf, unsigned long long min_write, + unsigned long long max_bs) { struct thread_options *o = &td->o; @@ -2093,8 +2093,8 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write, if (o->compress_percentage || o->dedupe_percentage) { unsigned int perc = td->o.compress_percentage; struct frand_state *rs; - unsigned int left = max_bs; - unsigned int this_write; + unsigned long long left = max_bs; + unsigned long long this_write; do { rs = get_buf_state(td); @@ -2103,7 +2103,7 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write, if (perc) { this_write = min_not_zero(min_write, - td->o.compress_chunk); + (unsigned long long) td->o.compress_chunk); fill_random_buf_percentage(rs, buf, perc, this_write, this_write, @@ -2130,7 +2130,7 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write, * "randomly" fill the buffer contents */ void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u, - unsigned int min_write, unsigned int max_bs) + unsigned long long min_write, unsigned long long max_bs) { io_u->buf_filled_len = 0; fill_io_buffer(td, io_u->buf, min_write, max_bs); diff --git a/io_u.h b/io_u.h index 4f433c3f..9a423b24 100644 --- a/io_u.h +++ b/io_u.h @@ -51,7 +51,7 @@ struct io_u { /* * Allocated/set buffer and length */ - unsigned long buflen; + unsigned long long buflen; unsigned long long offset; void *buf; @@ -65,13 +65,13 @@ struct io_u { * partial transfers / residual data counts */ void *xfer_buf; - unsigned long xfer_buflen; + unsigned long long xfer_buflen; /* * Parameter related to pre-filled buffers and * their size to handle variable block sizes. */ - unsigned long buf_filled_len; + unsigned long long buf_filled_len; struct io_piece *ipo; @@ -134,8 +134,8 @@ extern void io_u_queued(struct thread_data *, struct io_u *); extern int io_u_quiesce(struct thread_data *); extern void io_u_log_error(struct thread_data *, struct io_u *); extern void io_u_mark_depth(struct thread_data *, unsigned int); -extern void fill_io_buffer(struct thread_data *, void *, unsigned int, unsigned int); -extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned int, unsigned int); +extern void fill_io_buffer(struct thread_data *, void *, unsigned long long, unsigned long long); +extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned long long, unsigned long long); void io_u_mark_complete(struct thread_data *, unsigned int); void io_u_mark_submit(struct thread_data *, unsigned int); bool queue_full(const struct thread_data *); @@ -149,13 +149,13 @@ static inline void dprint_io_u(struct io_u *io_u, const char *p) struct fio_file *f = io_u->file; if (f) - dprint(FD_IO, "%s: io_u %p: off=0x%llx,len=0x%lx,ddir=%d,file=%s\n", + dprint(FD_IO, "%s: io_u %p: off=0x%llx,len=0x%llx,ddir=%d,file=%s\n", p, io_u, (unsigned long long) io_u->offset, io_u->buflen, io_u->ddir, f->file_name); else - dprint(FD_IO, "%s: io_u %p: off=0x%llx,len=0x%lx,ddir=%d\n", + dprint(FD_IO, "%s: io_u %p: off=0x%llx,len=0x%llx,ddir=%d\n", p, io_u, (unsigned long long) io_u->offset, io_u->buflen, io_u->ddir); diff --git a/ioengines.c b/ioengines.c index d579682f..e5fbcd43 100644 --- a/ioengines.c +++ b/ioengines.c @@ -279,7 +279,7 @@ out: enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u) { const enum fio_ddir ddir = acct_ddir(io_u); - unsigned long buflen = io_u->xfer_buflen; + unsigned long long buflen = io_u->xfer_buflen; enum fio_q_status ret; dprint_io_u(io_u, "queue"); @@ -350,7 +350,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u) "invalid block size. Try setting direct=0.\n"); } - if (!td->io_ops->commit || io_u->ddir == DDIR_TRIM) { + if (!td->io_ops->commit) { io_u_mark_submit(td, 1); io_u_mark_complete(td, 1); } diff --git a/iolog.c b/iolog.c index 3b04195e..eb38027e 100644 --- a/iolog.c +++ b/iolog.c @@ -42,7 +42,7 @@ void log_io_u(const struct thread_data *td, const struct io_u *io_u) if (!td->o.write_iolog_file) return; - fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name, + fprintf(td->iolog_f, "%s %s %llu %llu\n", io_u->file->file_name, io_ddir_name(io_u->ddir), io_u->offset, io_u->buflen); } @@ -168,7 +168,7 @@ int read_iolog_get(struct thread_data *td, struct io_u *io_u) io_u->buflen = ipo->len; io_u->file = td->files[ipo->fileno]; get_file(io_u->file); - dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset, + dprint(FD_IO, "iolog: get %llu/%llu/%s\n", io_u->offset, io_u->buflen, io_u->file->file_name); if (ipo->delay) iolog_delay(td, ipo->delay); @@ -774,8 +774,8 @@ static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples, entry_before = flist_first_entry(&entry->list, struct io_u_plat_entry, list); io_u_plat_before = entry_before->io_u_plat; - fprintf(f, "%lu, %u, %u, ", (unsigned long) s->time, - io_sample_ddir(s), s->bs); + fprintf(f, "%lu, %u, %llu, ", (unsigned long) s->time, + io_sample_ddir(s), (unsigned long long) s->bs); for (j = 0; j < FIO_IO_U_PLAT_NR - stride; j += stride) { fprintf(f, "%llu, ", (unsigned long long) hist_sum(j, stride, io_u_plat, io_u_plat_before)); @@ -807,17 +807,17 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size) s = __get_sample(samples, log_offset, i); if (!log_offset) { - fprintf(f, "%lu, %" PRId64 ", %u, %u\n", + fprintf(f, "%lu, %" PRId64 ", %u, %llu\n", (unsigned long) s->time, s->data.val, - io_sample_ddir(s), s->bs); + io_sample_ddir(s), (unsigned long long) s->bs); } else { struct io_sample_offset *so = (void *) s; - fprintf(f, "%lu, %" PRId64 ", %u, %u, %llu\n", + fprintf(f, "%lu, %" PRId64 ", %u, %llu, %llu\n", (unsigned long) s->time, s->data.val, - io_sample_ddir(s), s->bs, + io_sample_ddir(s), (unsigned long long) s->bs, (unsigned long long) so->offset); } } diff --git a/iolog.h b/iolog.h index a4e335ab..3b8c9014 100644 --- a/iolog.h +++ b/iolog.h @@ -42,7 +42,7 @@ struct io_sample { uint64_t time; union io_sample_data data; uint32_t __ddir; - uint32_t bs; + uint64_t bs; }; struct io_sample_offset { diff --git a/lib/axmap.c b/lib/axmap.c index 4047f236..454af0b9 100644 --- a/lib/axmap.c +++ b/lib/axmap.c @@ -156,10 +156,10 @@ static bool axmap_handler(struct axmap *axmap, uint64_t bit_nr, void *), void *data) { struct axmap_level *al; + uint64_t index = bit_nr; int i; for (i = 0; i < axmap->nr_levels; i++) { - unsigned long index = ulog64(bit_nr, i); unsigned long offset = index >> UNIT_SHIFT; unsigned int bit = index & BLOCKS_PER_UNIT_MASK; @@ -167,6 +167,9 @@ static bool axmap_handler(struct axmap *axmap, uint64_t bit_nr, if (func(al, offset, bit, data)) return true; + + if (index) + index >>= UNIT_SHIFT; } return false; diff --git a/minmax.h b/minmax.h index afc78f02..ec0848c0 100644 --- a/minmax.h +++ b/minmax.h @@ -3,23 +3,23 @@ #ifndef min #define min(x,y) ({ \ - typeof(x) _x = (x); \ - typeof(y) _y = (y); \ + __typeof__(x) _x = (x); \ + __typeof__(y) _y = (y); \ (void) (&_x == &_y); \ _x < _y ? _x : _y; }) #endif #ifndef max #define max(x,y) ({ \ - typeof(x) _x = (x); \ - typeof(y) _y = (y); \ + __typeof__(x) _x = (x); \ + __typeof__(y) _y = (y); \ (void) (&_x == &_y); \ _x > _y ? _x : _y; }) #endif #define min_not_zero(x, y) ({ \ - typeof(x) __x = (x); \ - typeof(y) __y = (y); \ + __typeof__(x) __x = (x); \ + __typeof__(y) __y = (y); \ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) #endif diff --git a/options.c b/options.c index a174e2cd..4b464028 100644 --- a/options.c +++ b/options.c @@ -52,7 +52,7 @@ static int bs_cmp(const void *p1, const void *p2) struct split { unsigned int nr; - unsigned int val1[ZONESPLIT_MAX]; + unsigned long long val1[ZONESPLIT_MAX]; unsigned long long val2[ZONESPLIT_MAX]; }; @@ -119,7 +119,7 @@ static int bssplit_ddir(struct thread_options *o, enum fio_ddir ddir, char *str, bool data) { unsigned int i, perc, perc_missing; - unsigned int max_bs, min_bs; + unsigned long long max_bs, min_bs; struct split split; memset(&split, 0, sizeof(split)); @@ -2112,7 +2112,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .name = "bs", .lname = "Block size", .alias = "blocksize", - .type = FIO_OPT_INT, + .type = FIO_OPT_ULL, .off1 = offsetof(struct thread_options, bs[DDIR_READ]), .off2 = offsetof(struct thread_options, bs[DDIR_WRITE]), .off3 = offsetof(struct thread_options, bs[DDIR_TRIM]), @@ -2129,7 +2129,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .name = "ba", .lname = "Block size align", .alias = "blockalign", - .type = FIO_OPT_INT, + .type = FIO_OPT_ULL, .off1 = offsetof(struct thread_options, ba[DDIR_READ]), .off2 = offsetof(struct thread_options, ba[DDIR_WRITE]), .off3 = offsetof(struct thread_options, ba[DDIR_TRIM]), @@ -2163,7 +2163,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { { .name = "bssplit", .lname = "Block size split", - .type = FIO_OPT_STR, + .type = FIO_OPT_STR_ULL, .cb = str_bssplit_cb, .off1 = offsetof(struct thread_options, bssplit), .help = "Set a specific mix of block sizes", diff --git a/os/windows/posix.c b/os/windows/posix.c old mode 100755 new mode 100644 diff --git a/oslib/libmtd_common.h b/oslib/libmtd_common.h index 87f93b61..4ed9f0ba 100644 --- a/oslib/libmtd_common.h +++ b/oslib/libmtd_common.h @@ -49,18 +49,18 @@ extern "C" { #define min(a, b) MIN(a, b) /* glue for linux kernel source */ #define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) -#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) +#define ALIGN(x,a) __ALIGN_MASK(x,(__typeof__(x))(a)-1) #define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) #define min_t(t,x,y) ({ \ - typeof((x)) _x = (x); \ - typeof((y)) _y = (y); \ + __typeof__((x)) _x = (x); \ + __typeof__((y)) _y = (y); \ (_x < _y) ? _x : _y; \ }) #define max_t(t,x,y) ({ \ - typeof((x)) _x = (x); \ - typeof((y)) _y = (y); \ + __typeof__((x)) _x = (x); \ + __typeof__((y)) _y = (y); \ (_x > _y) ? _x : _y; \ }) diff --git a/parse.c b/parse.c index 6261fca2..194ad594 100644 --- a/parse.c +++ b/parse.c @@ -26,12 +26,14 @@ static const char *opt_type_names[] = { "OPT_INVALID", "OPT_STR", + "OPT_STR_ULL", "OPT_STR_MULTI", "OPT_STR_VAL", "OPT_STR_VAL_TIME", "OPT_STR_STORE", "OPT_RANGE", "OPT_INT", + "OPT_ULL", "OPT_BOOL", "OPT_FLOAT_LIST", "OPT_STR_SET", @@ -438,7 +440,7 @@ void strip_blank_end(char *p) *(s + 1) = '\0'; } -static int check_range_bytes(const char *str, long *val, void *data) +static int check_range_bytes(const char *str, long long *val, void *data) { long long __val; @@ -507,7 +509,8 @@ static int __handle_option(const struct fio_option *o, const char *ptr, int il=0, *ilp; fio_fp64_t *flp; long long ull, *ullp; - long ul1, ul2; + long ul2; + long long ull1, ull2; double uf; char **cp = NULL; int ret = 0, is_time = 0; @@ -525,6 +528,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr, switch (o->type) { case FIO_OPT_STR: + case FIO_OPT_STR_ULL: case FIO_OPT_STR_MULTI: { fio_opt_str_fn *fn = o->cb; @@ -540,7 +544,11 @@ static int __handle_option(const struct fio_option *o, const char *ptr, break; if (!strncmp(vp->ival, ptr, str_match_len(vp, ptr))) { ret = 0; - if (o->off1) + if (!o->off1) + continue; + if (o->type == FIO_OPT_STR_ULL) + val_store(ullp, vp->oval, o->off1, vp->orval, data, o); + else val_store(ilp, vp->oval, o->off1, vp->orval, data, o); continue; } @@ -554,6 +562,8 @@ static int __handle_option(const struct fio_option *o, const char *ptr, } case FIO_OPT_STR_VAL_TIME: is_time = 1; + /* fall through */ + case FIO_OPT_ULL: case FIO_OPT_INT: case FIO_OPT_STR_VAL: { fio_opt_str_val_fn *fn = o->cb; @@ -584,7 +594,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr, if (o->maxval && ull > o->maxval) { log_err("max value out of range: %llu" - " (%u max)\n", ull, o->maxval); + " (%llu max)\n", ull, o->maxval); return 1; } if (o->minval && ull < o->minval) { @@ -636,6 +646,27 @@ static int __handle_option(const struct fio_option *o, const char *ptr, val_store(ilp, ull, o->off3, 0, data, o); } } + } else if (o->type == FIO_OPT_ULL) { + if (first) + val_store(ullp, ull, o->off1, 0, data, o); + if (curr == 1) { + if (o->off2) + val_store(ullp, ull, o->off2, 0, data, o); + } + if (curr == 2) { + if (o->off3) + val_store(ullp, ull, o->off3, 0, data, o); + } + if (!more) { + if (curr < 1) { + if (o->off2) + val_store(ullp, ull, o->off2, 0, data, o); + } + if (curr < 2) { + if (o->off3) + val_store(ullp, ull, o->off3, 0, data, o); + } + } } else { if (first) val_store(ullp, ull, o->off1, 0, data, o); @@ -790,43 +821,43 @@ static int __handle_option(const struct fio_option *o, const char *ptr, p1 = tmp; ret = 1; - if (!check_range_bytes(p1, &ul1, data) && - !check_range_bytes(p2, &ul2, data)) { + if (!check_range_bytes(p1, &ull1, data) && + !check_range_bytes(p2, &ull2, data)) { ret = 0; - if (ul1 > ul2) { - unsigned long foo = ul1; + if (ull1 > ull2) { + unsigned long long foo = ull1; - ul1 = ul2; - ul2 = foo; + ull1 = ull2; + ull2 = foo; } if (first) { - val_store(ilp, ul1, o->off1, 0, data, o); - val_store(ilp, ul2, o->off2, 0, data, o); + val_store(ullp, ull1, o->off1, 0, data, o); + val_store(ullp, ull2, o->off2, 0, data, o); } if (curr == 1) { if (o->off3 && o->off4) { - val_store(ilp, ul1, o->off3, 0, data, o); - val_store(ilp, ul2, o->off4, 0, data, o); + val_store(ullp, ull1, o->off3, 0, data, o); + val_store(ullp, ull2, o->off4, 0, data, o); } } if (curr == 2) { if (o->off5 && o->off6) { - val_store(ilp, ul1, o->off5, 0, data, o); - val_store(ilp, ul2, o->off6, 0, data, o); + val_store(ullp, ull1, o->off5, 0, data, o); + val_store(ullp, ull2, o->off6, 0, data, o); } } if (!more) { if (curr < 1) { if (o->off3 && o->off4) { - val_store(ilp, ul1, o->off3, 0, data, o); - val_store(ilp, ul2, o->off4, 0, data, o); + val_store(ullp, ull1, o->off3, 0, data, o); + val_store(ullp, ull2, o->off4, 0, data, o); } } if (curr < 2) { if (o->off5 && o->off6) { - val_store(ilp, ul1, o->off5, 0, data, o); - val_store(ilp, ul2, o->off6, 0, data, o); + val_store(ullp, ull1, o->off5, 0, data, o); + val_store(ullp, ull2, o->off6, 0, data, o); } } } @@ -851,7 +882,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr, break; if (o->maxval && il > (int) o->maxval) { - log_err("max value out of range: %d (%d max)\n", + log_err("max value out of range: %d (%llu max)\n", il, o->maxval); return 1; } @@ -878,6 +909,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr, } case FIO_OPT_DEPRECATED: ret = 1; + /* fall through */ case FIO_OPT_SOFT_DEPRECATED: log_info("Option %s is deprecated\n", o->name); break; @@ -1325,6 +1357,10 @@ static void option_init(struct fio_option *o) if (!o->maxval) o->maxval = UINT_MAX; } + if (o->type == FIO_OPT_ULL) { + if (!o->maxval) + o->maxval = ULLONG_MAX; + } if (o->type == FIO_OPT_STR_SET && o->def && !o->no_warn_def) { log_err("Option %s: string set option with" " default will always be true\n", o->name); diff --git a/parse.h b/parse.h index 4de5e77d..b47a02c7 100644 --- a/parse.h +++ b/parse.h @@ -10,12 +10,14 @@ enum fio_opt_type { FIO_OPT_INVALID = 0, FIO_OPT_STR, + FIO_OPT_STR_ULL, FIO_OPT_STR_MULTI, FIO_OPT_STR_VAL, FIO_OPT_STR_VAL_TIME, FIO_OPT_STR_STORE, FIO_OPT_RANGE, FIO_OPT_INT, + FIO_OPT_ULL, FIO_OPT_BOOL, FIO_OPT_FLOAT_LIST, FIO_OPT_STR_SET, @@ -29,7 +31,7 @@ enum fio_opt_type { */ struct value_pair { const char *ival; /* string option */ - unsigned int oval; /* output value */ + unsigned long long oval;/* output value */ const char *help; /* help text for sub option */ int orval; /* OR value */ void *cb; /* sub-option callback */ @@ -52,7 +54,7 @@ struct fio_option { unsigned int off4; unsigned int off5; unsigned int off6; - unsigned int maxval; /* max and min value */ + unsigned long long maxval; /* max and min value */ int minval; double maxfp; /* max and min floating value */ double minfp; diff --git a/server.c b/server.c index 7e7ffedc..b966c66c 100644 --- a/server.c +++ b/server.c @@ -1985,7 +1985,7 @@ int fio_send_iolog(struct thread_data *td, struct io_log *log, const char *name) s->time = cpu_to_le64(s->time); s->data.val = cpu_to_le64(s->data.val); s->__ddir = cpu_to_le32(s->__ddir); - s->bs = cpu_to_le32(s->bs); + s->bs = cpu_to_le64(s->bs); if (log->log_offset) { struct io_sample_offset *so = (void *) s; diff --git a/server.h b/server.h index b48bbe16..37d2f76a 100644 --- a/server.h +++ b/server.h @@ -48,7 +48,7 @@ struct fio_net_cmd_reply { }; enum { - FIO_SERVER_VER = 73, + FIO_SERVER_VER = 74, FIO_SERVER_MAX_FRAGMENT_PDU = 1024, FIO_SERVER_MAX_CMD_MB = 2048, diff --git a/stat.c b/stat.c index a308eb88..82e79dfc 100644 --- a/stat.c +++ b/stat.c @@ -619,8 +619,8 @@ static int block_state_category(int block_state) static int compare_block_infos(const void *bs1, const void *bs2) { - uint32_t block1 = *(uint32_t *)bs1; - uint32_t block2 = *(uint32_t *)bs2; + uint64_t block1 = *(uint64_t *)bs1; + uint64_t block2 = *(uint64_t *)bs2; int state1 = BLOCK_INFO_STATE(block1); int state2 = BLOCK_INFO_STATE(block2); int bscat1 = block_state_category(state1); @@ -1295,13 +1295,8 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts, json_object_add_value_int(root, "majf", ts->majf); json_object_add_value_int(root, "minf", ts->minf); - - /* Calc % distribution of IO depths, usecond, msecond latency */ + /* Calc % distribution of IO depths */ stat_calc_dist(ts->io_u_map, ddir_rw_sum(ts->total_io_u), io_u_dist); - stat_calc_lat_n(ts, io_u_lat_n); - stat_calc_lat_u(ts, io_u_lat_u); - stat_calc_lat_m(ts, io_u_lat_m); - tmp = json_create_object(); json_object_add_value_object(root, "iodepth_level", tmp); /* Only show fixed 7 I/O depth levels*/ @@ -1314,6 +1309,44 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts, json_object_add_value_float(tmp, (const char *)name, io_u_dist[i]); } + /* Calc % distribution of submit IO depths */ + stat_calc_dist(ts->io_u_submit, ts->total_submit, io_u_dist); + tmp = json_create_object(); + json_object_add_value_object(root, "iodepth_submit", tmp); + /* Only show fixed 7 I/O depth levels*/ + for (i = 0; i < 7; i++) { + char name[20]; + if (i == 0) + snprintf(name, 20, "0"); + else if (i < 6) + snprintf(name, 20, "%d", 1 << (i+1)); + else + snprintf(name, 20, ">=%d", 1 << i); + json_object_add_value_float(tmp, (const char *)name, io_u_dist[i]); + } + + /* Calc % distribution of completion IO depths */ + stat_calc_dist(ts->io_u_complete, ts->total_complete, io_u_dist); + tmp = json_create_object(); + json_object_add_value_object(root, "iodepth_complete", tmp); + /* Only show fixed 7 I/O depth levels*/ + for (i = 0; i < 7; i++) { + char name[20]; + if (i == 0) + snprintf(name, 20, "0"); + else if (i < 6) + snprintf(name, 20, "%d", 1 << (i+1)); + else + snprintf(name, 20, ">=%d", 1 << i); + json_object_add_value_float(tmp, (const char *)name, io_u_dist[i]); + } + + /* Calc % distribution of nsecond, usecond, msecond latency */ + stat_calc_dist(ts->io_u_map, ddir_rw_sum(ts->total_io_u), io_u_dist); + stat_calc_lat_n(ts, io_u_lat_n); + stat_calc_lat_u(ts, io_u_lat_u); + stat_calc_lat_m(ts, io_u_lat_m); + /* Nanosecond latency */ tmp = json_create_object(); json_object_add_value_object(root, "latency_ns", tmp); @@ -2220,7 +2253,7 @@ static struct io_logs *get_cur_log(struct io_log *iolog) } static void __add_log_sample(struct io_log *iolog, union io_sample_data data, - enum fio_ddir ddir, unsigned int bs, + enum fio_ddir ddir, unsigned long long bs, unsigned long t, uint64_t offset) { struct io_logs *cur_log; @@ -2338,7 +2371,7 @@ static void _add_stat_to_log(struct io_log *iolog, unsigned long elapsed, static unsigned long add_log_sample(struct thread_data *td, struct io_log *iolog, union io_sample_data data, - enum fio_ddir ddir, unsigned int bs, + enum fio_ddir ddir, unsigned long long bs, uint64_t offset) { unsigned long elapsed, this_window; @@ -2400,7 +2433,7 @@ void finalize_logs(struct thread_data *td, bool unit_logs) _add_stat_to_log(td->iops_log, elapsed, td->o.log_max != 0); } -void add_agg_sample(union io_sample_data data, enum fio_ddir ddir, unsigned int bs) +void add_agg_sample(union io_sample_data data, enum fio_ddir ddir, unsigned long long bs) { struct io_log *iolog; @@ -2430,7 +2463,8 @@ static void add_clat_percentile_sample(struct thread_stat *ts, } void add_clat_sample(struct thread_data *td, enum fio_ddir ddir, - unsigned long long nsec, unsigned int bs, uint64_t offset) + unsigned long long nsec, unsigned long long bs, + uint64_t offset) { unsigned long elapsed, this_window; struct thread_stat *ts = &td->ts; @@ -2489,7 +2523,7 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir, } void add_slat_sample(struct thread_data *td, enum fio_ddir ddir, - unsigned long usec, unsigned int bs, uint64_t offset) + unsigned long usec, unsigned long long bs, uint64_t offset) { struct thread_stat *ts = &td->ts; @@ -2507,7 +2541,8 @@ void add_slat_sample(struct thread_data *td, enum fio_ddir ddir, } void add_lat_sample(struct thread_data *td, enum fio_ddir ddir, - unsigned long long nsec, unsigned int bs, uint64_t offset) + unsigned long long nsec, unsigned long long bs, + uint64_t offset) { struct thread_stat *ts = &td->ts; @@ -2590,7 +2625,7 @@ static int __add_samples(struct thread_data *td, struct timespec *parent_tv, add_stat_sample(&stat[ddir], rate); if (log) { - unsigned int bs = 0; + unsigned long long bs = 0; if (td->o.min_bs[ddir] == td->o.max_bs[ddir]) bs = td->o.min_bs[ddir]; diff --git a/stat.h b/stat.h index c5b81854..5dcaae02 100644 --- a/stat.h +++ b/stat.h @@ -308,12 +308,12 @@ extern void update_rusage_stat(struct thread_data *); extern void clear_rusage_stat(struct thread_data *); extern void add_lat_sample(struct thread_data *, enum fio_ddir, unsigned long long, - unsigned int, uint64_t); + unsigned long long, uint64_t); extern void add_clat_sample(struct thread_data *, enum fio_ddir, unsigned long long, - unsigned int, uint64_t); + unsigned long long, uint64_t); extern void add_slat_sample(struct thread_data *, enum fio_ddir, unsigned long, - unsigned int, uint64_t); -extern void add_agg_sample(union io_sample_data, enum fio_ddir, unsigned int); + unsigned long long, uint64_t); +extern void add_agg_sample(union io_sample_data, enum fio_ddir, unsigned long long); extern void add_iops_sample(struct thread_data *, struct io_u *, unsigned int); extern void add_bw_sample(struct thread_data *, struct io_u *, diff --git a/t/sgunmap-perf.py b/t/sgunmap-perf.py new file mode 100755 index 00000000..fadbb859 --- /dev/null +++ b/t/sgunmap-perf.py @@ -0,0 +1,115 @@ +#!/usr/bin/python2.7 +# +# sgunmap-test.py +# +# Basic performance testing using fio's sg ioengine +# +# USAGE +# sgunmap-perf.py char-device block-device fio-executable +# +# EXAMPLE +# t/sgunmap-perf.py /dev/sg1 /dev/sdb ./fio +# +# REQUIREMENTS +# Python 2.6+ +# +# + +from __future__ import absolute_import +from __future__ import print_function +import sys +import json +import argparse +import subprocess +from six.moves import range + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('cdev', + help='character device target (e.g., /dev/sg0)') + parser.add_argument('bdev', + help='block device target (e.g., /dev/sda)') + parser.add_argument('fioc', + help='path to candidate fio executable (e.g., ./fio)') + parser.add_argument('fior', + help='path to reference fio executable (e.g., ./fio)') + args = parser.parse_args() + + return args + + +def fulldevice(fio, dev, ioengine='psync', rw='trim', bs='1M'): + parameters = ["--name=test", + "--output-format=json", + "--random_generator=lfsr", + "--bs={0}".format(bs), + "--rw={0}".format(rw), + "--ioengine={0}".format(ioengine), + "--filename={0}".format(dev)] + + output = subprocess.check_output([fio] + parameters) + jsondata = json.loads(output) + jobdata = jsondata['jobs'][0] + return jobdata + + +def runtest(fio, dev, rw, qd, batch, bs='512', runtime='30s'): + parameters = ["--name=test", + "--random_generator=tausworthe64", + "--time_based", + "--runtime={0}".format(runtime), + "--output-format=json", + "--ioengine=sg", + "--blocksize={0}".format(bs), + "--rw={0}".format(rw), + "--filename={0}".format(dev), + "--iodepth={0}".format(qd), + "--iodepth_batch={0}".format(batch)] + + output = subprocess.check_output([fio] + parameters) + jsondata = json.loads(output) + jobdata = jsondata['jobs'][0] +# print(parameters) + + return jobdata + + +def runtests(fio, dev, qd, batch, rw, bs='512', trials=5): + iops = [] + for x in range(trials): + jd = runtest(fio, dev, rw, qd, batch, bs=bs) + total = jd['read']['iops'] + jd['write']['iops'] + jd['trim']['iops'] +# print(total) + iops.extend([total]) + return iops, (sum(iops) / trials) + +if __name__ == '__main__': + args = parse_args() + + print("Trimming full device {0}".format(args.cdev)) + fulldevice(args.fior, args.cdev, ioengine='sg') + + print("Running rand read tests on {0}" + " with fio candidate build {1}".format(args.cdev, args.fioc)) + randread, rrmean = runtests(args.fioc, args.cdev, 16, 1, 'randread', + trials=5) + print("IOPS mean {0}, trials {1}".format(rrmean, randread)) + + print("Running rand read tests on {0}" + " with fio reference build {1}".format(args.cdev, args.fior)) + randread, rrmean = runtests(args.fior, args.cdev, 16, 1, 'randread', + trials=5) + print("IOPS mean {0}, trials {1}".format(rrmean, randread)) + + print("Running rand write tests on {0}" + " with fio candidate build {1}".format(args.cdev, args.fioc)) + randwrite, rwmean = runtests(args.fioc, args.cdev, 16, 1, 'randwrite', + trials=5) + print("IOPS mean {0}, trials {1}".format(rwmean, randwrite)) + + print("Running rand write tests on {0}" + " with fio reference build {1}".format(args.cdev, args.fior)) + randwrite, rwmean = runtests(args.fior, args.cdev, 16, 1, 'randwrite', + trials=5) + print("IOPS mean {0}, trials {1}".format(rwmean, randwrite)) diff --git a/t/sgunmap-test.py b/t/sgunmap-test.py new file mode 100755 index 00000000..d2caa5fd --- /dev/null +++ b/t/sgunmap-test.py @@ -0,0 +1,173 @@ +#!/usr/bin/python2.7 +# Note: this script is python2 and python 3 compatible. +# +# sgunmap-test.py +# +# Limited functonality test for trim workloads using fio's sg ioengine +# This checks only the three sets of reported iodepths +# +# !!!WARNING!!! +# This script carries out destructive tests. Be sure that +# there is no data you want to keep on the supplied devices. +# +# USAGE +# sgunmap-test.py char-device block-device fio-executable +# +# EXAMPLE +# t/sgunmap-test.py /dev/sg1 /dev/sdb ./fio +# +# REQUIREMENTS +# Python 2.6+ +# +# TEST MATRIX +# For both char-dev and block-dev these are the expected +# submit/complete IO depths +# +# blockdev chardev +# iodepth iodepth +# R QD1 sub/comp: 1-4=100% sub/comp: 1-4=100% +# W QD1 sub/comp: 1-4=100% sub/comp: 1-4=100% +# T QD1 sub/comp: 1-4=100% sub/comp: 1-4=100% +# +# R QD16, batch8 sub/comp: 1-4=100% sub/comp: 1-4=100% +# W QD16, batch8 sub/comp: 1-4=100% sub/comp: 1-4=100% +# T QD16, batch8 sub/comp: 1-4=100% sub/comp: 5-8=100% +# +# R QD16, batch16 sub/comp: 1-4=100% sub/comp: 1-4=100% +# W QD16, batch16 sub/comp: 1-4=100% sub/comp: 1-4=100% +# T QD16, batch16 sub/comp: 1-4=100% sub/comp: 9-16=100% +# + +from __future__ import absolute_import +from __future__ import print_function +import sys +import json +import argparse +import traceback +import subprocess +from six.moves import range + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('chardev', + help='character device target (e.g., /dev/sg0)') + parser.add_argument('blockdev', + help='block device target (e.g., /dev/sda)') + parser.add_argument('fio', + help='path to fio executable (e.g., ./fio)') + args = parser.parse_args() + + return args + +# +# With block devices, +# iodepth = 1 always +# submit = complete = 1-4 always +# With character devices, +# RW +# iodepth = qd +# submit = 1-4 +# complete = 1-4 except for the IOs in flight +# when the job is ending +# T +# iodepth = qd +# submit = qdbatch +# complete = qdbatch except for the IOs in flight +# when the job is ending +# + + +def check(jsondata, parameters, block, qd, qdbatch, rw): + iodepth = jsondata['iodepth_level'] + submit = jsondata['iodepth_submit'] + complete = jsondata['iodepth_complete'] + + try: + if block: + assert iodepth['1'] == 100.0 + assert submit['4'] == 100.0 + assert complete['4'] == 100.0 + elif 'read' in rw or 'write' in rw: + assert iodepth[str(qd)] > 99.9 + assert submit['4'] == 100.0 + assert complete['4'] > 99.9 + else: + if qdbatch <= 4: + batchkey = '4' + elif qdbatch > 64: + batchkey = '>=64' + else: + batchkey = str(qdbatch) + if qd >= 64: + qdkey = ">=64" + else: + qdkey = str(qd) + assert iodepth[qdkey] > 99 + assert submit[batchkey] == 100.0 + assert complete[batchkey] > 99 + except AssertionError: + print("Assertion failed") + traceback.print_exc() + print(jsondata) + return + + print("**********passed*********") + + +def runalltests(args, qd, batch): + block = False + for dev in [args.chardev, args.blockdev]: + for rw in ["randread", "randwrite", "randtrim"]: + parameters = ["--name=test", + "--time_based", + "--runtime=30s", + "--output-format=json", + "--ioengine=sg", + "--rw={0}".format(rw), + "--filename={0}".format(dev), + "--iodepth={0}".format(qd), + "--iodepth_batch={0}".format(batch)] + + print(parameters) + output = subprocess.check_output([args.fio] + parameters) + jsondata = json.loads(output) + jobdata = jsondata['jobs'][0] + check(jobdata, parameters, block, qd, batch, rw) + block = True + + +def runcdevtrimtest(args, qd, batch): + parameters = ["--name=test", + "--time_based", + "--runtime=30s", + "--output-format=json", + "--ioengine=sg", + "--rw=randtrim", + "--filename={0}".format(args.chardev), + "--iodepth={0}".format(qd), + "--iodepth_batch={0}".format(batch)] + + print(parameters) + output = subprocess.check_output([args.fio] + parameters) + jsondata = json.loads(output) + jobdata = jsondata['jobs'][0] + check(jobdata, parameters, False, qd, batch, "randtrim") + + +if __name__ == '__main__': + args = parse_args() + + runcdevtrimtest(args, 32, 2) + runcdevtrimtest(args, 32, 4) + runcdevtrimtest(args, 32, 8) + runcdevtrimtest(args, 64, 4) + runcdevtrimtest(args, 64, 8) + runcdevtrimtest(args, 64, 16) + runcdevtrimtest(args, 128, 8) + runcdevtrimtest(args, 128, 16) + runcdevtrimtest(args, 128, 32) + + runalltests(args, 1, 1) + runalltests(args, 16, 2) + runalltests(args, 16, 16) diff --git a/thread_options.h b/thread_options.h index 8d13b79a..8adba48c 100644 --- a/thread_options.h +++ b/thread_options.h @@ -29,7 +29,7 @@ enum fio_memtype { #define ZONESPLIT_MAX 256 struct bssplit { - uint32_t bs; + uint64_t bs; uint32_t perc; }; @@ -82,10 +82,10 @@ struct thread_options { unsigned long long start_offset; unsigned long long start_offset_align; - unsigned int bs[DDIR_RWDIR_CNT]; - unsigned int ba[DDIR_RWDIR_CNT]; - unsigned int min_bs[DDIR_RWDIR_CNT]; - unsigned int max_bs[DDIR_RWDIR_CNT]; + unsigned long long bs[DDIR_RWDIR_CNT]; + unsigned long long ba[DDIR_RWDIR_CNT]; + unsigned long long min_bs[DDIR_RWDIR_CNT]; + unsigned long long max_bs[DDIR_RWDIR_CNT]; struct bssplit *bssplit[DDIR_RWDIR_CNT]; unsigned int bssplit_nr[DDIR_RWDIR_CNT]; @@ -164,7 +164,8 @@ struct thread_options { unsigned int perc_rand[DDIR_RWDIR_CNT]; unsigned int hugepage_size; - unsigned int rw_min_bs; + unsigned long long rw_min_bs; + unsigned int pad2; unsigned int thinktime; unsigned int thinktime_spin; unsigned int thinktime_blocks; @@ -363,10 +364,10 @@ struct thread_options_pack { uint64_t start_offset; uint64_t start_offset_align; - uint32_t bs[DDIR_RWDIR_CNT]; - uint32_t ba[DDIR_RWDIR_CNT]; - uint32_t min_bs[DDIR_RWDIR_CNT]; - uint32_t max_bs[DDIR_RWDIR_CNT]; + uint64_t bs[DDIR_RWDIR_CNT]; + uint64_t ba[DDIR_RWDIR_CNT]; + uint64_t min_bs[DDIR_RWDIR_CNT]; + uint64_t max_bs[DDIR_RWDIR_CNT]; struct bssplit bssplit[DDIR_RWDIR_CNT][BSSPLIT_MAX]; uint32_t bssplit_nr[DDIR_RWDIR_CNT]; @@ -443,7 +444,8 @@ struct thread_options_pack { uint32_t perc_rand[DDIR_RWDIR_CNT]; uint32_t hugepage_size; - uint32_t rw_min_bs; + uint64_t rw_min_bs; + uint32_t pad2; uint32_t thinktime; uint32_t thinktime_spin; uint32_t thinktime_blocks; diff --git a/tickmarks.c b/tickmarks.c index 808de676..88bace09 100644 --- a/tickmarks.c +++ b/tickmarks.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include /* diff --git a/tools/hist/fio-histo-log-pctiles.py b/tools/hist/fio-histo-log-pctiles.py new file mode 100755 index 00000000..c398113c --- /dev/null +++ b/tools/hist/fio-histo-log-pctiles.py @@ -0,0 +1,657 @@ +#!/usr/bin/env python + +# module to parse fio histogram log files, not using pandas +# runs in python v2 or v3 +# to get help with the CLI: $ python fio-histo-log-pctiles.py -h +# this can be run standalone as a script but is callable +# assumes all threads run for same time duration +# assumes all threads are doing the same thing for the entire run + +# percentiles: +# 0 - min latency +# 50 - median +# 100 - max latency + +# TO-DO: +# separate read and write stats for randrw mixed workload +# report average latency if needed +# prove that it works (partially done with unit tests) + +# to run unit tests, set UNITTEST environment variable to anything +# if you do this, don't pass normal CLI parameters to it +# otherwise it runs the CLI + +import sys, os, math, copy +from copy import deepcopy +import argparse +import unittest2 + +msec_per_sec = 1000 +nsec_per_usec = 1000 + +class FioHistoLogExc(Exception): + pass + +# if there is an error, print message, and exit with error status + +def myabort(msg): + print('ERROR: ' + msg) + sys.exit(1) + +# convert histogram log file into a list of +# (time_ms, direction, bsz, buckets) tuples where +# - time_ms is the time in msec at which the log record was written +# - direction is 0 (read) or 1 (write) +# - bsz is block size (not used) +# - buckets is a CSV list of counters that make up the histogram +# caller decides if the expected number of counters are present + + +def exception_suffix( record_num, pathname ): + return 'in histogram record %d file %s' % (record_num+1, pathname) + +# log file parser raises FioHistoLogExc exceptions +# it returns histogram buckets in whatever unit fio uses + +def parse_hist_file(logfn, buckets_per_interval): + max_timestamp_ms = 0.0 + + with open(logfn, 'r') as f: + records = [ l.strip() for l in f.readlines() ] + intervals = [] + for k, r in enumerate(records): + if r == '': + continue + tokens = r.split(',') + try: + int_tokens = [ int(t) for t in tokens ] + except ValueError as e: + raise FioHistoLogExc('non-integer value %s' % exception_suffix(k+1, logfn)) + + neg_ints = list(filter( lambda tk : tk < 0, int_tokens )) + if len(neg_ints) > 0: + raise FioHistoLogExc('negative integer value %s' % exception_suffix(k+1, logfn)) + + if len(int_tokens) < 3: + raise FioHistoLogExc('too few numbers %s' % exception_suffix(k+1, logfn)) + + time_ms = int_tokens[0] + if time_ms > max_timestamp_ms: + max_timestamp_ms = time_ms + + direction = int_tokens[1] + if direction != 0 and direction != 1: + raise FioHistoLogExc('invalid I/O direction %s' % exception_suffix(k+1, logfn)) + + bsz = int_tokens[2] + if bsz > (1 << 24): + raise FioHistoLogExc('block size too large %s' % exception_suffix(k+1, logfn)) + + buckets = int_tokens[3:] + if len(buckets) != buckets_per_interval: + raise FioHistoLogExc('%d buckets per interval but %d expected in %s' % + (len(buckets), buckets_per_interval, exception_suffix(k+1, logfn))) + intervals.append((time_ms, direction, bsz, buckets)) + if len(intervals) == 0: + raise FioHistoLogExc('no records in %s' % logfn) + return (intervals, max_timestamp_ms) + + +# compute time range for each bucket index in histogram record +# see comments in https://github.com/axboe/fio/blob/master/stat.h +# for description of bucket groups and buckets +# fio v3 bucket ranges are in nanosec (since response times are measured in nanosec) +# but we convert fio v3 nanosecs to floating-point microseconds + +def time_ranges(groups, counters_per_group, fio_version=3): + bucket_width = 1 + bucket_base = 0 + bucket_intervals = [] + for g in range(0, groups): + for b in range(0, counters_per_group): + rmin = float(bucket_base) + rmax = rmin + bucket_width + if fio_version == 3: + rmin /= nsec_per_usec + rmax /= nsec_per_usec + bucket_intervals.append( [rmin, rmax] ) + bucket_base += bucket_width + if g != 0: + bucket_width *= 2 + return bucket_intervals + + +# compute number of time quantum intervals in the test + +def get_time_intervals(time_quantum, max_timestamp_ms): + # round down to nearest second + max_timestamp = max_timestamp_ms // msec_per_sec + # round up to nearest whole multiple of time_quantum + time_interval_count = (max_timestamp + time_quantum) // time_quantum + end_time = time_interval_count * time_quantum + return (end_time, time_interval_count) + +# align raw histogram log data to time quantum so +# we can then combine histograms from different threads with addition +# for randrw workload we count both reads and writes in same output bucket +# but we separate reads and writes for purposes of calculating +# end time for histogram record. +# this requires us to weight a raw histogram bucket by the +# fraction of time quantum that the bucket overlaps the current +# time quantum interval +# for example, if we have a bucket with 515 samples for time interval +# [ 1010, 2014 ] msec since start of test, and time quantum is 1 sec, then +# for time quantum interval [ 1000, 2000 ] msec, the overlap is +# (2000 - 1010) / (2000 - 1000) = 0.99 +# so the contribution of this bucket to this time quantum is +# 515 x 0.99 = 509.85 + +def align_histo_log(raw_histogram_log, time_quantum, bucket_count, max_timestamp_ms): + + # slice up test time int intervals of time_quantum seconds + + (end_time, time_interval_count) = get_time_intervals(time_quantum, max_timestamp_ms) + time_qtm_ms = time_quantum * msec_per_sec + end_time_ms = end_time * msec_per_sec + aligned_intervals = [] + for j in range(0, time_interval_count): + aligned_intervals.append(( + j * time_qtm_ms, + [ 0.0 for j in range(0, bucket_count) ] )) + + log_record_count = len(raw_histogram_log) + for k, record in enumerate(raw_histogram_log): + + # find next record with same direction to get end-time + # have to avoid going past end of array + # for fio randrw workload, + # we have read and write records on same time interval + # sometimes read and write records are in opposite order + # assertion checks that next read/write record + # can be separated by at most 2 other records + + (time_msec, direction, sz, interval_buckets) = record + if k+1 < log_record_count: + (time_msec_end, direction2, _, _) = raw_histogram_log[k+1] + if direction2 != direction: + if k+2 < log_record_count: + (time_msec_end, direction2, _, _) = raw_histogram_log[k+2] + if direction2 != direction: + if k+3 < log_record_count: + (time_msec_end, direction2, _, _) = raw_histogram_log[k+3] + assert direction2 == direction + else: + time_msec_end = end_time_ms + else: + time_msec_end = end_time_ms + else: + time_msec_end = end_time_ms + + # calculate first quantum that overlaps this histogram record + + qtm_start_ms = (time_msec // time_qtm_ms) * time_qtm_ms + qtm_end_ms = ((time_msec + time_qtm_ms) // time_qtm_ms) * time_qtm_ms + qtm_index = qtm_start_ms // time_qtm_ms + + # for each quantum that overlaps this histogram record's time interval + + while qtm_start_ms < time_msec_end: # while quantum overlaps record + + # calculate fraction of time that this quantum + # overlaps histogram record's time interval + + overlap_start = max(qtm_start_ms, time_msec) + overlap_end = min(qtm_end_ms, time_msec_end) + weight = float(overlap_end - overlap_start) + weight /= (time_msec_end - time_msec) + (_,aligned_histogram) = aligned_intervals[qtm_index] + for bx, b in enumerate(interval_buckets): + weighted_bucket = weight * b + aligned_histogram[bx] += weighted_bucket + + # advance to the next time quantum + + qtm_start_ms += time_qtm_ms + qtm_end_ms += time_qtm_ms + qtm_index += 1 + + return aligned_intervals + +# add histogram in "source" to histogram in "target" +# it is assumed that the 2 histograms are precisely time-aligned + +def add_to_histo_from( target, source ): + for b in range(0, len(source)): + target[b] += source[b] + +# compute percentiles +# inputs: +# buckets: histogram bucket array +# wanted: list of floating-pt percentiles to calculate +# time_ranges: [tmin,tmax) time interval for each bucket +# returns None if no I/O reported. +# otherwise we would be dividing by zero +# think of buckets as probability distribution function +# and this loop is integrating to get cumulative distribution function + +def get_pctiles(buckets, wanted, time_ranges): + + # get total of IO requests done + total_ios = 0 + for io_count in buckets: + total_ios += io_count + + # don't return percentiles if no I/O was done during interval + if total_ios == 0.0: + return None + + pctile_count = len(wanted) + + # results returned as dictionary keyed by percentile + pctile_result = {} + + # index of next percentile in list + pctile_index = 0 + + # next percentile + next_pctile = wanted[pctile_index] + + # no one is interested in percentiles bigger than this but not 100.0 + # this prevents floating-point error from preventing loop exit + almost_100 = 99.9999 + + # pct is the percentile corresponding to + # all I/O requests up through bucket b + pct = 0.0 + total_so_far = 0 + for b, io_count in enumerate(buckets): + if io_count == 0: + continue + total_so_far += io_count + # last_pct_lt is the percentile corresponding to + # all I/O requests up to, but not including, bucket b + last_pct = pct + pct = 100.0 * float(total_so_far) / total_ios + # a single bucket could satisfy multiple pctiles + # so this must be a while loop + # for 100-percentile (max latency) case, no bucket exceeds it + # so we must stop there. + while ((next_pctile == 100.0 and pct >= almost_100) or + (next_pctile < 100.0 and pct > next_pctile)): + # interpolate between min and max time for bucket time interval + # we keep the time_ranges access inside this loop, + # even though it could be above the loop, + # because in many cases we will not be even entering + # the loop so we optimize out these accesses + range_max_time = time_ranges[b][1] + range_min_time = time_ranges[b][0] + offset_frac = (next_pctile - last_pct)/(pct - last_pct) + interpolation = range_min_time + (offset_frac*(range_max_time - range_min_time)) + pctile_result[next_pctile] = interpolation + pctile_index += 1 + if pctile_index == pctile_count: + break + next_pctile = wanted[pctile_index] + if pctile_index == pctile_count: + break + assert pctile_index == pctile_count + return pctile_result + + +# this is really the main program + +def compute_percentiles_from_logs(): + parser = argparse.ArgumentParser() + parser.add_argument("--fio-version", dest="fio_version", + default="3", choices=[2,3], type=int, + help="fio version (default=3)") + parser.add_argument("--bucket-groups", dest="bucket_groups", default="29", type=int, + help="fio histogram bucket groups (default=29)") + parser.add_argument("--bucket-bits", dest="bucket_bits", + default="6", type=int, + help="fio histogram buckets-per-group bits (default=6 means 64 buckets/group)") + parser.add_argument("--percentiles", dest="pctiles_wanted", + default=[ 0., 50., 95., 99., 100.], type=float, nargs='+', + help="fio histogram buckets-per-group bits (default=6 means 64 buckets/group)") + parser.add_argument("--time-quantum", dest="time_quantum", + default="1", type=int, + help="time quantum in seconds (default=1)") + parser.add_argument("--output-unit", dest="output_unit", + default="usec", type=str, + help="Latency percentile output unit: msec|usec|nsec (default usec)") + parser.add_argument("file_list", nargs='+', + help='list of files, preceded by " -- " if necessary') + args = parser.parse_args() + + # default changes based on fio version + if args.fio_version == 2: + args.bucket_groups = 19 + + # print parameters + + print('fio version = %d' % args.fio_version) + print('bucket groups = %d' % args.bucket_groups) + print('bucket bits = %d' % args.bucket_bits) + print('time quantum = %d sec' % args.time_quantum) + print('percentiles = %s' % ','.join([ str(p) for p in args.pctiles_wanted ])) + buckets_per_group = 1 << args.bucket_bits + print('buckets per group = %d' % buckets_per_group) + buckets_per_interval = buckets_per_group * args.bucket_groups + print('buckets per interval = %d ' % buckets_per_interval) + bucket_index_range = range(0, buckets_per_interval) + if args.time_quantum == 0: + print('ERROR: time-quantum must be a positive number of seconds') + print('output unit = ' + args.output_unit) + if args.output_unit == 'msec': + time_divisor = 1000.0 + elif args.output_unit == 'usec': + time_divisor = 1.0 + + # calculate response time interval associated with each histogram bucket + + bucket_times = time_ranges(args.bucket_groups, buckets_per_group, fio_version=args.fio_version) + + # construct template for each histogram bucket array with buckets all zeroes + # we just copy this for each new histogram + + zeroed_buckets = [ 0.0 for r in bucket_index_range ] + + # print CSV header just like fiologparser_hist does + + header = 'msec, ' + for p in args.pctiles_wanted: + header += '%3.1f, ' % p + print('time (millisec), percentiles in increasing order with values in ' + args.output_unit) + print(header) + + # parse the histogram logs + # assumption: each bucket has a monotonically increasing time + # assumption: time ranges do not overlap for a single thread's records + # (exception: if randrw workload, then there is a read and a write + # record for the same time interval) + + max_timestamp_all_logs = 0 + hist_files = {} + for fn in args.file_list: + try: + (hist_files[fn], max_timestamp_ms) = parse_hist_file(fn, buckets_per_interval) + except FioHistoLogExc as e: + myabort(str(e)) + max_timestamp_all_logs = max(max_timestamp_all_logs, max_timestamp_ms) + + (end_time, time_interval_count) = get_time_intervals(args.time_quantum, max_timestamp_all_logs) + all_threads_histograms = [ ((j*args.time_quantum*msec_per_sec), deepcopy(zeroed_buckets)) + for j in range(0, time_interval_count) ] + + for logfn in hist_files.keys(): + aligned_per_thread = align_histo_log(hist_files[logfn], + args.time_quantum, + buckets_per_interval, + max_timestamp_all_logs) + for t in range(0, time_interval_count): + (_, all_threads_histo_t) = all_threads_histograms[t] + (_, log_histo_t) = aligned_per_thread[t] + add_to_histo_from( all_threads_histo_t, log_histo_t ) + + # calculate percentiles across aggregate histogram for all threads + + for (t_msec, all_threads_histo_t) in all_threads_histograms: + record = '%d, ' % t_msec + pct = get_pctiles(all_threads_histo_t, args.pctiles_wanted, bucket_times) + if not pct: + for w in args.pctiles_wanted: + record += ', ' + else: + pct_keys = [ k for k in pct.keys() ] + pct_values = [ str(pct[wanted]/time_divisor) for wanted in sorted(pct_keys) ] + record += ', '.join(pct_values) + print(record) + + + +#end of MAIN PROGRAM + + + +##### below are unit tests ############## + +import tempfile, shutil +from os.path import join +should_not_get_here = False + +class Test(unittest2.TestCase): + tempdir = None + + # a little less typing please + def A(self, boolean_val): + self.assertTrue(boolean_val) + + # initialize unit test environment + + @classmethod + def setUpClass(cls): + d = tempfile.mkdtemp() + Test.tempdir = d + + # remove anything left by unit test environment + # unless user sets UNITTEST_LEAVE_FILES environment variable + + @classmethod + def tearDownClass(cls): + if not os.getenv("UNITTEST_LEAVE_FILES"): + shutil.rmtree(cls.tempdir) + + def setUp(self): + self.fn = join(Test.tempdir, self.id()) + + def test_a_add_histos(self): + a = [ 1.0, 2.0 ] + b = [ 1.5, 2.5 ] + add_to_histo_from( a, b ) + self.A(a == [2.5, 4.5]) + self.A(b == [1.5, 2.5]) + + def test_b1_parse_log(self): + with open(self.fn, 'w') as f: + f.write('1234, 0, 4096, 1, 2, 3, 4\n') + f.write('5678,1,16384,5,6,7,8 \n') + (raw_histo_log, max_timestamp) = parse_hist_file(self.fn, 4) # 4 buckets per interval + self.A(len(raw_histo_log) == 2 and max_timestamp == 5678) + (time_ms, direction, bsz, histo) = raw_histo_log[0] + self.A(time_ms == 1234 and direction == 0 and bsz == 4096 and histo == [ 1, 2, 3, 4 ]) + (time_ms, direction, bsz, histo) = raw_histo_log[1] + self.A(time_ms == 5678 and direction == 1 and bsz == 16384 and histo == [ 5, 6, 7, 8 ]) + + def test_b2_parse_empty_log(self): + with open(self.fn, 'w') as f: + pass + try: + (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4) + self.A(should_not_get_here) + except FioHistoLogExc as e: + self.A(str(e).startswith('no records')) + + def test_b3_parse_empty_records(self): + with open(self.fn, 'w') as f: + f.write('\n') + f.write('1234, 0, 4096, 1, 2, 3, 4\n') + f.write('5678,1,16384,5,6,7,8 \n') + f.write('\n') + (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4) + self.A(len(raw_histo_log) == 2 and max_timestamp_ms == 5678) + (time_ms, direction, bsz, histo) = raw_histo_log[0] + self.A(time_ms == 1234 and direction == 0 and bsz == 4096 and histo == [ 1, 2, 3, 4 ]) + (time_ms, direction, bsz, histo) = raw_histo_log[1] + self.A(time_ms == 5678 and direction == 1 and bsz == 16384 and histo == [ 5, 6, 7, 8 ]) + + def test_b4_parse_non_int(self): + with open(self.fn, 'w') as f: + f.write('12, 0, 4096, 1a, 2, 3, 4\n') + try: + (raw_histo_log, _) = parse_hist_file(self.fn, 4) + self.A(False) + except FioHistoLogExc as e: + self.A(str(e).startswith('non-integer')) + + def test_b5_parse_neg_int(self): + with open(self.fn, 'w') as f: + f.write('-12, 0, 4096, 1, 2, 3, 4\n') + try: + (raw_histo_log, _) = parse_hist_file(self.fn, 4) + self.A(False) + except FioHistoLogExc as e: + self.A(str(e).startswith('negative integer')) + + def test_b6_parse_too_few_int(self): + with open(self.fn, 'w') as f: + f.write('0, 0\n') + try: + (raw_histo_log, _) = parse_hist_file(self.fn, 4) + self.A(False) + except FioHistoLogExc as e: + self.A(str(e).startswith('too few numbers')) + + def test_b7_parse_invalid_direction(self): + with open(self.fn, 'w') as f: + f.write('100, 2, 4096, 1, 2, 3, 4\n') + try: + (raw_histo_log, _) = parse_hist_file(self.fn, 4) + self.A(False) + except FioHistoLogExc as e: + self.A(str(e).startswith('invalid I/O direction')) + + def test_b8_parse_bsz_too_big(self): + with open(self.fn+'_good', 'w') as f: + f.write('100, 1, %d, 1, 2, 3, 4\n' % (1<<24)) + (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn+'_good', 4) + with open(self.fn+'_bad', 'w') as f: + f.write('100, 1, 20000000, 1, 2, 3, 4\n') + try: + (raw_histo_log, _) = parse_hist_file(self.fn+'_bad', 4) + self.A(False) + except FioHistoLogExc as e: + self.A(str(e).startswith('block size too large')) + + def test_b9_parse_wrong_bucket_count(self): + with open(self.fn, 'w') as f: + f.write('100, 1, %d, 1, 2, 3, 4, 5\n' % (1<<24)) + try: + (raw_histo_log, _) = parse_hist_file(self.fn, 4) + self.A(False) + except FioHistoLogExc as e: + self.A(str(e).__contains__('buckets per interval')) + + def test_c1_time_ranges(self): + ranges = time_ranges(3, 2) # fio_version defaults to 3 + expected_ranges = [ # fio_version 3 is in nanoseconds + [0.000, 0.001], [0.001, 0.002], # first group + [0.002, 0.003], [0.003, 0.004], # second group same width + [0.004, 0.006], [0.006, 0.008]] # subsequent groups double width + self.A(ranges == expected_ranges) + ranges = time_ranges(3, 2, fio_version=3) + self.A(ranges == expected_ranges) + ranges = time_ranges(3, 2, fio_version=2) + expected_ranges_v2 = [ [ 1000.0 * min_or_max for min_or_max in time_range ] + for time_range in expected_ranges ] + self.A(ranges == expected_ranges_v2) + # see fio V3 stat.h for why 29 groups and 2^6 buckets/group + normal_ranges_v3 = time_ranges(29, 64) + # for v3, bucket time intervals are measured in nanoseconds + self.A(len(normal_ranges_v3) == 29 * 64 and normal_ranges_v3[-1][1] == 64*(1<<(29-1))/1000.0) + normal_ranges_v2 = time_ranges(19, 64, fio_version=2) + # for v2, bucket time intervals are measured in microseconds so we have fewer buckets + self.A(len(normal_ranges_v2) == 19 * 64 and normal_ranges_v2[-1][1] == 64*(1<<(19-1))) + + def test_d1_align_histo_log_1_quantum(self): + with open(self.fn, 'w') as f: + f.write('100, 1, 4096, 1, 2, 3, 4') + (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4) + self.A(max_timestamp_ms == 100) + aligned_log = align_histo_log(raw_histo_log, 5, 4, max_timestamp_ms) + self.A(len(aligned_log) == 1) + (time_ms0, h) = aligned_log[0] + self.A(time_ms0 == 0 and h == [1.0, 2.0, 3.0, 4.0]) + + # we need this to compare 2 lists of floating point numbers for equality + # because of floating-point imprecision + + def compare_2_floats(self, x, y): + if x == 0.0 or y == 0.0: + return (x+y) < 0.0000001 + else: + return (math.fabs(x-y)/x) < 0.00001 + + def is_close(self, buckets, buckets_expected): + if len(buckets) != len(buckets_expected): + return False + compare_buckets = lambda k: self.compare_2_floats(buckets[k], buckets_expected[k]) + indices_close = list(filter(compare_buckets, range(0, len(buckets)))) + return len(indices_close) == len(buckets) + + def test_d2_align_histo_log_2_quantum(self): + with open(self.fn, 'w') as f: + f.write('2000, 1, 4096, 1, 2, 3, 4\n') + f.write('7000, 1, 4096, 1, 2, 3, 4\n') + (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4) + self.A(max_timestamp_ms == 7000) + (_, _, _, raw_buckets1) = raw_histo_log[0] + (_, _, _, raw_buckets2) = raw_histo_log[1] + aligned_log = align_histo_log(raw_histo_log, 5, 4, max_timestamp_ms) + self.A(len(aligned_log) == 2) + (time_ms1, h1) = aligned_log[0] + (time_ms2, h2) = aligned_log[1] + # because first record is from time interval [2000, 7000] + # we weight it according + expect1 = [float(b) * 0.6 for b in raw_buckets1] + expect2 = [float(b) * 0.4 for b in raw_buckets1] + for e in range(0, len(expect2)): + expect2[e] += raw_buckets2[e] + self.A(time_ms1 == 0 and self.is_close(h1, expect1)) + self.A(time_ms2 == 5000 and self.is_close(h2, expect2)) + + # what to expect if histogram buckets are all equal + def test_e1_get_pctiles_flat_histo(self): + with open(self.fn, 'w') as f: + buckets = [ 100 for j in range(0, 128) ] + f.write('9000, 1, 4096, %s\n' % ', '.join([str(b) for b in buckets])) + (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 128) + self.A(max_timestamp_ms == 9000) + aligned_log = align_histo_log(raw_histo_log, 5, 128, max_timestamp_ms) + time_intervals = time_ranges(4, 32) + # since buckets are all equal, then median is halfway through time_intervals + # and max latency interval is at end of time_intervals + self.A(time_intervals[64][1] == 0.066 and time_intervals[127][1] == 0.256) + pctiles_wanted = [ 0, 50, 100 ] + pct_vs_time = [] + for (time_ms, histo) in aligned_log: + pct_vs_time.append(get_pctiles(histo, pctiles_wanted, time_intervals)) + self.A(pct_vs_time[0] == None) # no I/O in this time interval + expected_pctiles = { 0:0.000, 50:0.064, 100:0.256 } + self.A(pct_vs_time[1] == expected_pctiles) + + # what to expect if just the highest histogram bucket is used + def test_e2_get_pctiles_highest_pct(self): + fio_v3_bucket_count = 29 * 64 + with open(self.fn, 'w') as f: + # make a empty fio v3 histogram + buckets = [ 0 for j in range(0, fio_v3_bucket_count) ] + # add one I/O request to last bucket + buckets[-1] = 1 + f.write('9000, 1, 4096, %s\n' % ', '.join([str(b) for b in buckets])) + (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, fio_v3_bucket_count) + self.A(max_timestamp_ms == 9000) + aligned_log = align_histo_log(raw_histo_log, 5, fio_v3_bucket_count, max_timestamp_ms) + (time_ms, histo) = aligned_log[1] + time_intervals = time_ranges(29, 64) + expected_pctiles = { 100.0:(64*(1<<28))/1000.0 } + pct = get_pctiles( histo, [ 100.0 ], time_intervals ) + self.A(pct == expected_pctiles) + +# we are using this module as a standalone program + +if __name__ == '__main__': + if os.getenv('UNITTEST'): + sys.exit(unittest2.main()) + else: + compute_percentiles_from_logs() + diff --git a/verify.c b/verify.c index 40d484b5..01492f24 100644 --- a/verify.c +++ b/verify.c @@ -801,7 +801,7 @@ static int verify_trimmed_io_u(struct thread_data *td, struct io_u *io_u) mem_is_zero_slow(io_u->buf, io_u->buflen, &offset); - log_err("trim: verify failed at file %s offset %llu, length %lu" + log_err("trim: verify failed at file %s offset %llu, length %llu" ", block offset %lu\n", io_u->file->file_name, io_u->offset, io_u->buflen, (unsigned long) offset); @@ -1517,7 +1517,7 @@ int paste_blockoff(char *buf, unsigned int len, void *priv) struct io_u *io = priv; unsigned long long off; - typecheck(typeof(off), io->offset); + typecheck(__typeof__(off), io->offset); off = cpu_to_le64((uint64_t)io->offset); len = min(len, (unsigned int)sizeof(off)); memcpy(buf, &off, len);