- fprintf(td->iolog_f, "%d,%llu,%u\n", io_u->ddir, io_u->offset, io_u->buflen);
-}
-
-static int read_iolog_get(struct thread_data *td, struct io_u *io_u)
-{
- struct io_piece *ipo;
-
- if (!list_empty(&td->io_log_list)) {
- ipo = list_entry(td->io_log_list.next, struct io_piece, list);
- list_del(&ipo->list);
- io_u->offset = ipo->offset;
- io_u->buflen = ipo->len;
- io_u->ddir = ipo->ddir;
- free(ipo);
- return 0;
- }
-
- return 1;
-}
-
-static int fill_io_u(struct thread_data *td, struct io_u *io_u)
-{
- /*
- * If using an iolog, grab next piece if any available.
- */
- if (td->read_iolog)
- return read_iolog_get(td, io_u);
-
- /*
- * No log, let the seq/rand engine retrieve the next position.
- */
- if (!get_next_offset(td, &io_u->offset)) {
- io_u->buflen = get_next_buflen(td);
-
- if (io_u->buflen) {
- io_u->ddir = get_rw_ddir(td);
-
- /*
- * If using a write iolog, store this entry.
- */
- if (td->write_iolog)
- write_iolog_put(td, io_u);
-
- return 0;
- }
- }
-
- return 1;
-}
-
-#define queue_full(td) (list_empty(&(td)->io_u_freelist))
-
-struct io_u *__get_io_u(struct thread_data *td)
-{
- struct io_u *io_u;
-
- if (queue_full(td))
- return NULL;
-
- io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
- io_u->error = 0;
- io_u->resid = 0;
- list_del(&io_u->list);
- list_add(&io_u->list, &td->io_u_busylist);
- td->cur_depth++;
- return io_u;
-}
-
-static struct io_u *get_io_u(struct thread_data *td)
-{
- struct io_u *io_u;
-
- io_u = __get_io_u(td);
- if (!io_u)
- return NULL;
-
- if (td->zone_bytes >= td->zone_size) {
- td->zone_bytes = 0;
- td->last_pos += td->zone_skip;
- }
-
- if (fill_io_u(td, io_u)) {
- put_io_u(td, io_u);
- return NULL;
- }
-
- if (io_u->buflen + io_u->offset > td->real_file_size)
- io_u->buflen = td->real_file_size - io_u->offset;
-
- if (!io_u->buflen) {
- put_io_u(td, io_u);
- return NULL;
- }
-
- if (!td->read_iolog && !td->sequential)
- mark_random_map(td, io_u);
-
- td->last_pos += io_u->buflen;
-
- if (td->verify != VERIFY_NONE)
- populate_io_u(td, io_u);
-
- if (td_io_prep(td, io_u)) {
- put_io_u(td, io_u);
- return NULL;
- }
-
- gettimeofday(&io_u->start_time, NULL);
- return io_u;
-}
-
-static inline void td_set_runstate(struct thread_data *td, int runstate)
-{
- td->old_runstate = td->runstate;
- td->runstate = runstate;
-}
-
-static int get_next_verify(struct thread_data *td, struct io_u *io_u)
-{
- struct io_piece *ipo;
-
- if (list_empty(&td->io_hist_list))
- return 1;
-
- ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
- list_del(&ipo->list);
-
- io_u->offset = ipo->offset;
- io_u->buflen = ipo->len;
- io_u->ddir = DDIR_READ;
- free(ipo);
- return 0;
-}
-
-static void prune_io_piece_log(struct thread_data *td)
-{
- struct io_piece *ipo;
-
- while (!list_empty(&td->io_hist_list)) {
- ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
-
- list_del(&ipo->list);
- free(ipo);
- }
-}
-
-/*
- * log a succesful write, so we can unwind the log for verify
- */
-static void log_io_piece(struct thread_data *td, struct io_u *io_u)
-{
- struct io_piece *ipo = malloc(sizeof(struct io_piece));
- struct list_head *entry;
-
- INIT_LIST_HEAD(&ipo->list);
- ipo->offset = io_u->offset;
- ipo->len = io_u->buflen;
-
- /*
- * for random io where the writes extend the file, it will typically
- * be laid out with the block scattered as written. it's faster to
- * read them in in that order again, so don't sort
- */
- if (td->sequential || !td->overwrite) {
- list_add_tail(&ipo->list, &td->io_hist_list);
- return;
- }
-
- /*
- * for random io, sort the list so verify will run faster
- */
- entry = &td->io_hist_list;
- while ((entry = entry->prev) != &td->io_hist_list) {
- struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
-
- if (__ipo->offset < ipo->offset)
- break;
- }
-
- list_add(&ipo->list, entry);
-}
-
-static void write_iolog_close(struct thread_data *td)
-{
- fflush(td->iolog_f);
- fclose(td->iolog_f);
- free(td->iolog_buf);
-}
-
-static int init_iolog(struct thread_data *td)
-{
- unsigned long long offset;
- unsigned int bytes;
- char *str, *p;
- FILE *f;
- int rw, i, reads, writes;
-
- if (!td->read_iolog && !td->write_iolog)
- return 0;
-
- if (td->read_iolog)
- f = fopen(td->iolog_file, "r");
- else
- f = fopen(td->iolog_file, "w");
-
- if (!f) {
- perror("fopen iolog");
- printf("file %s, %d/%d\n", td->iolog_file, td->read_iolog, td->write_iolog);
- return 1;
- }
-
- /*
- * That's it for writing, setup a log buffer and we're done.
- */
- if (td->write_iolog) {
- td->iolog_f = f;
- td->iolog_buf = malloc(8192);
- setvbuf(f, td->iolog_buf, _IOFBF, 8192);
- return 0;
- }
-
- /*
- * Read in the read iolog and store it, reuse the infrastructure
- * for doing verifications.
- */
- str = malloc(4096);
- reads = writes = i = 0;
- while ((p = fgets(str, 4096, f)) != NULL) {
- struct io_piece *ipo;
-
- if (sscanf(p, "%d,%llu,%u", &rw, &offset, &bytes) != 3) {
- fprintf(stderr, "bad iolog: %s\n", p);
- continue;
- }
- if (rw == DDIR_READ)
- reads++;
- else if (rw == DDIR_WRITE)
- writes++;
- else {
- fprintf(stderr, "bad ddir: %d\n", rw);
- continue;
- }
-
- ipo = malloc(sizeof(*ipo));
- INIT_LIST_HEAD(&ipo->list);
- ipo->offset = offset;
- ipo->len = bytes;
- if (bytes > td->max_bs)
- td->max_bs = bytes;
- ipo->ddir = rw;
- list_add_tail(&ipo->list, &td->io_log_list);
- i++;
- }
-
- free(str);
- fclose(f);
-
- if (!i)
- return 1;
-
- if (reads && !writes)
- td->ddir = DDIR_READ;
- else if (!reads && writes)
- td->ddir = DDIR_READ;
- else
- td->iomix = 1;
-
- return 0;
-}
-
-static int sync_td(struct thread_data *td)
-{
- if (td->io_sync)
- return td->io_sync(td);
-
- return 0;
-}
-
-static int io_u_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- return td->io_getevents(td, min, max, t);
-}
-
-static int io_u_queue(struct thread_data *td, struct io_u *io_u)
-{
- gettimeofday(&io_u->issue_time, NULL);
-
- return td->io_queue(td, io_u);
-}
-
-#define iocb_time(iocb) ((unsigned long) (iocb)->data)
-
-static void io_completed(struct thread_data *td, struct io_u *io_u,
- struct io_completion_data *icd)
-{
- struct timeval e;
- unsigned long msec;
-
- gettimeofday(&e, NULL);
-
- if (!io_u->error) {
- unsigned int bytes = io_u->buflen - io_u->resid;
- const int idx = io_u->ddir;
-
- td->io_blocks[idx]++;
- td->io_bytes[idx] += bytes;
- td->zone_bytes += bytes;
- td->this_io_bytes[idx] += bytes;
-
- msec = mtime_since(&io_u->issue_time, &e);
-
- add_clat_sample(td, idx, msec);
- add_bw_sample(td, idx);
-
- if ((td_rw(td) || td_write(td)) && idx == DDIR_WRITE)
- log_io_piece(td, io_u);
-
- icd->bytes_done[idx] += bytes;
- } else
- icd->error = io_u->error;
-}
-
-static void ios_completed(struct thread_data *td,struct io_completion_data *icd)
-{
- struct io_u *io_u;
- int i;
-
- icd->error = 0;
- icd->bytes_done[0] = icd->bytes_done[1] = 0;
-
- for (i = 0; i < icd->nr; i++) {
- io_u = td->io_event(td, i);
-
- io_completed(td, io_u, icd);
- put_io_u(td, io_u);
- }
-}
-
-static void cleanup_pending_aio(struct thread_data *td)
-{
- struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
- struct list_head *entry, *n;
- struct io_completion_data icd;
- struct io_u *io_u;
- int r;
-
- /*
- * get immediately available events, if any
- */
- r = io_u_getevents(td, 0, td->cur_depth, &ts);
- if (r > 0) {
- icd.nr = r;
- ios_completed(td, &icd);
- }
-
- /*
- * now cancel remaining active events
- */
- if (td->io_cancel) {
- list_for_each_safe(entry, n, &td->io_u_busylist) {
- io_u = list_entry(entry, struct io_u, list);
-
- r = td->io_cancel(td, io_u);
- if (!r)
- put_io_u(td, io_u);
- }
- }
-
- if (td->cur_depth) {
- r = io_u_getevents(td, td->cur_depth, td->cur_depth, NULL);
- if (r > 0) {
- icd.nr = r;
- ios_completed(td, &icd);
- }
- }
-}
-
-static int do_io_u_verify(struct thread_data *td, struct io_u **io_u)
-{
- struct io_u *v_io_u = *io_u;
- int ret = 0;
-
- if (v_io_u) {
- ret = verify_io_u(v_io_u);
- put_io_u(td, v_io_u);
- *io_u = NULL;
- }
-
- return ret;
-}
-
-static void do_verify(struct thread_data *td)
-{
- struct timeval t;
- struct io_u *io_u, *v_io_u = NULL;
- struct io_completion_data icd;
- int ret;
-
- td_set_runstate(td, TD_VERIFYING);
-
- do {
- if (td->terminate)
- break;
-
- gettimeofday(&t, NULL);
- if (runtime_exceeded(td, &t))
- break;
-
- io_u = __get_io_u(td);
- if (!io_u)
- break;
-
- if (get_next_verify(td, io_u)) {
- put_io_u(td, io_u);
- break;
- }
-
- if (td_io_prep(td, io_u)) {
- put_io_u(td, io_u);
- break;
- }
-
- ret = io_u_queue(td, io_u);
- if (ret) {
- put_io_u(td, io_u);
- td_verror(td, ret);
- break;
- }
-
- /*
- * we have one pending to verify, do that while
- * we are doing io on the next one
- */
- if (do_io_u_verify(td, &v_io_u))
- break;
-
- ret = io_u_getevents(td, 1, 1, NULL);
- if (ret != 1) {
- if (ret < 0)
- td_verror(td, ret);
- break;
- }
-
- v_io_u = td->io_event(td, 0);
- icd.nr = 1;
- icd.error = 0;
- io_completed(td, v_io_u, &icd);
-
- if (icd.error) {
- td_verror(td, icd.error);
- put_io_u(td, v_io_u);
- v_io_u = NULL;
- break;
- }
-
- /*
- * if we can't submit more io, we need to verify now
- */
- if (queue_full(td) && do_io_u_verify(td, &v_io_u))
- break;
-
- } while (1);
-
- do_io_u_verify(td, &v_io_u);
-
- if (td->cur_depth)
- cleanup_pending_aio(td);
-
- td_set_runstate(td, TD_RUNNING);
-}
-
-static void do_io(struct thread_data *td)
-{
- struct io_completion_data icd;
- struct timeval s, e;
- unsigned long usec;
-
- while (td->this_io_bytes[td->ddir] < td->io_size) {
- struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
- struct timespec *timeout;
- int ret, min_evts = 0;
- struct io_u *io_u;
-
- if (td->terminate)
- break;
-
- io_u = get_io_u(td);
- if (!io_u)
- break;
-
- memcpy(&s, &io_u->start_time, sizeof(s));
-
- ret = io_u_queue(td, io_u);
- if (ret) {
- put_io_u(td, io_u);
- td_verror(td, ret);
- break;
- }
-
- add_slat_sample(td, io_u->ddir, mtime_since(&io_u->start_time, &io_u->issue_time));
-
- if (td->cur_depth < td->iodepth) {
- timeout = &ts;
- min_evts = 0;
- } else {
- timeout = NULL;
- min_evts = 1;
- }
-
- ret = io_u_getevents(td, min_evts, td->cur_depth, timeout);
- if (ret < 0) {
- td_verror(td, ret);
- break;
- } else if (!ret)
- continue;
-
- icd.nr = ret;
- ios_completed(td, &icd);
- if (icd.error) {
- td_verror(td, icd.error);
- break;
- }
-
- /*
- * the rate is batched for now, it should work for batches
- * of completions except the very first one which may look
- * a little bursty
- */
- gettimeofday(&e, NULL);
- usec = utime_since(&s, &e);
-
- rate_throttle(td, usec, icd.bytes_done[td->ddir]);
-
- if (check_min_rate(td, &e)) {
- td_verror(td, ENOMEM);
- break;
- }
-
- if (runtime_exceeded(td, &e))
- break;
-
- if (td->thinktime)
- usec_sleep(td, td->thinktime);
-
- if (should_fsync(td) && td->fsync_blocks &&
- (td->io_blocks[DDIR_WRITE] % td->fsync_blocks) == 0)
- sync_td(td);
- }
-
- if (td->cur_depth)
- cleanup_pending_aio(td);
-
- if (should_fsync(td) && td->end_fsync)
- sync_td(td);
-}
-
-static void cleanup_io(struct thread_data *td)
-{
- if (td->io_cleanup)
- td->io_cleanup(td);
-}
-
-static int init_io(struct thread_data *td)
-{
- if (td->io_engine == FIO_SYNCIO)
- return fio_syncio_init(td);
- else if (td->io_engine == FIO_MMAPIO)
- return fio_mmapio_init(td);
- else if (td->io_engine == FIO_LIBAIO)
- return fio_libaio_init(td);
- else if (td->io_engine == FIO_POSIXAIO)
- return fio_posixaio_init(td);
- else if (td->io_engine == FIO_SGIO)
- return fio_sgio_init(td);
- else if (td->io_engine == FIO_SPLICEIO)
- return fio_spliceio_init(td);
- else {
- fprintf(stderr, "bad io_engine %d\n", td->io_engine);
- return 1;
- }
-}
-
-static void cleanup_io_u(struct thread_data *td)
-{
- struct list_head *entry, *n;
- struct io_u *io_u;
-
- list_for_each_safe(entry, n, &td->io_u_freelist) {
- io_u = list_entry(entry, struct io_u, list);
-
- list_del(&io_u->list);
- free(io_u);
- }
-
- if (td->mem_type == MEM_MALLOC)
- free(td->orig_buffer);
- else if (td->mem_type == MEM_SHM) {
- struct shmid_ds sbuf;
-
- shmdt(td->orig_buffer);
- shmctl(td->shm_id, IPC_RMID, &sbuf);
- } else if (td->mem_type == MEM_MMAP)
- munmap(td->orig_buffer, td->orig_buffer_size);
- else
- fprintf(stderr, "Bad memory type %d\n", td->mem_type);
-
- td->orig_buffer = NULL;
-}
-
-static int init_io_u(struct thread_data *td)
-{
- struct io_u *io_u;
- int i, max_units;
- char *p;
-
- if (td->io_engine & FIO_SYNCIO)
- max_units = 1;
- else
- max_units = td->iodepth;
-
- td->orig_buffer_size = td->max_bs * max_units + MASK;
-
- if (td->mem_type == MEM_MALLOC)
- td->orig_buffer = malloc(td->orig_buffer_size);
- else if (td->mem_type == MEM_SHM) {
- td->shm_id = shmget(IPC_PRIVATE, td->orig_buffer_size, IPC_CREAT | 0600);
- if (td->shm_id < 0) {
- td_verror(td, errno);
- perror("shmget");
- return 1;
- }
-
- td->orig_buffer = shmat(td->shm_id, NULL, 0);
- if (td->orig_buffer == (void *) -1) {
- td_verror(td, errno);
- perror("shmat");
- td->orig_buffer = NULL;
- return 1;
- }
- } else if (td->mem_type == MEM_MMAP) {
- td->orig_buffer = mmap(NULL, td->orig_buffer_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | OS_MAP_ANON, 0, 0);
- if (td->orig_buffer == MAP_FAILED) {
- td_verror(td, errno);
- perror("mmap");
- td->orig_buffer = NULL;
- return 1;
- }
- }
-
- p = ALIGN(td->orig_buffer);
- for (i = 0; i < max_units; i++) {
- io_u = malloc(sizeof(*io_u));
- memset(io_u, 0, sizeof(*io_u));
- INIT_LIST_HEAD(&io_u->list);
-
- io_u->buf = p + td->max_bs * i;
- io_u->index = i;
- list_add(&io_u->list, &td->io_u_freelist);
- }
-
- return 0;
-}
-
-static int create_file(struct thread_data *td, unsigned long long size,
- int extend)
-{
- unsigned long long left;
- unsigned int bs;
- int r, oflags;
- char *b;
-
- /*
- * unless specifically asked for overwrite, let normal io extend it
- */
- if (td_write(td) && !td->overwrite)
- return 0;
-
- if (!size) {
- fprintf(stderr, "Need size for create\n");
- td_verror(td, EINVAL);
- return 1;
- }
-
- if (!extend) {
- oflags = O_CREAT | O_TRUNC;
- printf("Client%d: Laying out IO file (%LuMiB)\n", td->thread_number, size >> 20);
- } else {
- oflags = O_APPEND;
- printf("Client%d: Extending IO file (%Lu -> %LuMiB)\n", td->thread_number, (td->file_size - size) >> 20, td->file_size >> 20);
- }
-
- td->fd = open(td->file_name, O_WRONLY | oflags, 0644);
- if (td->fd < 0) {
- td_verror(td, errno);
- return 1;
- }