-static int check_min_rate(struct thread_data *td, struct timeval *now)
-{
- unsigned long spent;
- unsigned long rate;
- int ddir = td->ddir;
-
- /*
- * allow a 2 second settle period in the beginning
- */
- if (mtime_since(&td->start, now) < 2000)
- return 0;
-
- /*
- * if rate blocks is set, sample is running
- */
- if (td->rate_bytes) {
- spent = mtime_since(&td->lastrate, now);
- if (spent < td->ratecycle)
- return 0;
-
- rate = (td->this_io_bytes[ddir] - td->rate_bytes) / spent;
- if (rate < td->ratemin) {
- printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
- if (rate_quit)
- terminate_threads(td->groupid);
- return 1;
- }
- }
-
- td->rate_bytes = td->this_io_bytes[ddir];
- memcpy(&td->lastrate, now, sizeof(*now));
- return 0;
-}
-
-static inline int runtime_exceeded(struct thread_data *td, struct timeval *t)
-{
- if (!td->timeout)
- return 0;
- if (mtime_since(&td->epoch, t) >= td->timeout * 1000)
- return 1;
-
- return 0;
-}
-
-static void fill_random_bytes(struct thread_data *td,
- unsigned char *p, unsigned int len)
-{
- unsigned int todo;
- double r;
-
- while (len) {
- drand48_r(&td->verify_state, &r);
-
- /*
- * lrand48_r seems to be broken and only fill the bottom
- * 32-bits, even on 64-bit archs with 64-bit longs
- */
- todo = sizeof(r);
- if (todo > len)
- todo = len;
-
- memcpy(p, &r, todo);
-
- len -= todo;
- p += todo;
- }
-}
-
-static void hexdump(void *buffer, int len)
-{
- unsigned char *p = buffer;
- int i;
-
- for (i = 0; i < len; i++)
- printf("%02x", p[i]);
- printf("\n");
-}
-
-static int verify_io_u_crc32(struct verify_header *hdr, struct io_u *io_u)
-{
- unsigned char *p = (unsigned char *) io_u->buf;
- unsigned long c;
- int ret;
-
- p += sizeof(*hdr);
- c = crc32(p, hdr->len - sizeof(*hdr));
- ret = c != hdr->crc32;
-
- if (ret) {
- fprintf(stderr, "crc32: verify failed at %llu/%u\n", io_u->offset, io_u->buflen);
- fprintf(stderr, "crc32: wanted %lx, got %lx\n", hdr->crc32, c);
- }
-
- return ret;
-}
-
-static int verify_io_u_md5(struct verify_header *hdr, struct io_u *io_u)
-{
- unsigned char *p = (unsigned char *) io_u->buf;
- struct md5_ctx md5_ctx;
- int ret;
-
- memset(&md5_ctx, 0, sizeof(md5_ctx));
- p += sizeof(*hdr);
- md5_update(&md5_ctx, p, hdr->len - sizeof(*hdr));
-
- ret = memcmp(hdr->md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
- if (ret) {
- fprintf(stderr, "md5: verify failed at %llu/%u\n", io_u->offset, io_u->buflen);
- hexdump(hdr->md5_digest, sizeof(hdr->md5_digest));
- hexdump(md5_ctx.hash, sizeof(md5_ctx.hash));
- }
-
- return ret;
-}
-
-static int verify_io_u(struct io_u *io_u)
-{
- struct verify_header *hdr = (struct verify_header *) io_u->buf;
- int ret;
-
- if (hdr->fio_magic != FIO_HDR_MAGIC)
- return 1;
-
- if (hdr->verify_type == VERIFY_MD5)
- ret = verify_io_u_md5(hdr, io_u);
- else if (hdr->verify_type == VERIFY_CRC32)
- ret = verify_io_u_crc32(hdr, io_u);
- else {
- fprintf(stderr, "Bad verify type %d\n", hdr->verify_type);
- ret = 1;
- }
-
- return ret;
-}
-
-static void fill_crc32(struct verify_header *hdr, void *p, unsigned int len)
-{
- hdr->crc32 = crc32(p, len);
-}
-
-static void fill_md5(struct verify_header *hdr, void *p, unsigned int len)
-{
- struct md5_ctx md5_ctx;
-
- memset(&md5_ctx, 0, sizeof(md5_ctx));
- md5_update(&md5_ctx, p, len);
- memcpy(hdr->md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
-}
-
-static int get_rw_ddir(struct thread_data *td)
-{
- if (td_rw(td)) {
- struct timeval now;
- unsigned long elapsed;
-
- gettimeofday(&now, NULL);
- elapsed = mtime_since_now(&td->rwmix_switch);
-
- /*
- * Check if it's time to seed a new data direction.
- */
- if (elapsed >= td->rwmixcycle) {
- unsigned long v;
- long r;
-
- lrand48_r(&td->random_state, &r);
- v = 100UL * r / (unsigned long) (RAND_MAX + 1.0);
- if (v < td->rwmixread)
- td->rwmix_ddir = DDIR_READ;
- else
- td->rwmix_ddir = DDIR_WRITE;
- memcpy(&td->rwmix_switch, &now, sizeof(now));
- }
- return td->rwmix_ddir;
- } else if (td_read(td))
- return DDIR_READ;
- else
- return DDIR_WRITE;
-}
-
-/*
- * fill body of io_u->buf with random data and add a header with the
- * (eg) sha1sum of that data.
- */
-static void populate_io_u(struct thread_data *td, struct io_u *io_u)
-{
- unsigned char *p = (unsigned char *) io_u->buf;
- struct verify_header hdr;
-
- hdr.fio_magic = FIO_HDR_MAGIC;
- hdr.len = io_u->buflen;
- p += sizeof(hdr);
- fill_random_bytes(td, p, io_u->buflen - sizeof(hdr));
-
- if (td->verify == VERIFY_MD5) {
- fill_md5(&hdr, p, io_u->buflen - sizeof(hdr));
- hdr.verify_type = VERIFY_MD5;
- } else {
- fill_crc32(&hdr, p, io_u->buflen - sizeof(hdr));
- hdr.verify_type = VERIFY_CRC32;
- }
-
- memcpy(io_u->buf, &hdr, sizeof(hdr));
-}
-
-static int td_io_prep(struct thread_data *td, struct io_u *io_u)
-{
- if (td->io_prep && td->io_prep(td, io_u))
- return 1;
-
- return 0;
-}
-
-void put_io_u(struct thread_data *td, struct io_u *io_u)
-{
- list_del(&io_u->list);
- list_add(&io_u->list, &td->io_u_freelist);
- td->cur_depth--;
-}
-
-static void write_iolog_put(struct thread_data *td, struct io_u *io_u)
-{
- fprintf(td->iolog_f, "%d,%llu,%u\n", io_u->ddir, io_u->offset, io_u->buflen);
-}
-
-static int read_iolog_get(struct thread_data *td, struct io_u *io_u)
-{
- struct io_piece *ipo;
-
- if (!list_empty(&td->io_log_list)) {
- ipo = list_entry(td->io_log_list.next, struct io_piece, list);
- list_del(&ipo->list);
- io_u->offset = ipo->offset;
- io_u->buflen = ipo->len;
- io_u->ddir = ipo->ddir;
- free(ipo);
- return 0;
- }
-
- return 1;
-}
-
-static int fill_io_u(struct thread_data *td, struct io_u *io_u)
-{
- /*
- * If using an iolog, grab next piece if any available.
- */
- if (td->read_iolog)
- return read_iolog_get(td, io_u);
-
- /*
- * No log, let the seq/rand engine retrieve the next position.
- */
- if (!get_next_offset(td, &io_u->offset)) {
- io_u->buflen = get_next_buflen(td);
-
- if (io_u->buflen) {
- io_u->ddir = get_rw_ddir(td);
-
- /*
- * If using a write iolog, store this entry.
- */
- if (td->write_iolog)
- write_iolog_put(td, io_u);
-
- return 0;
- }
- }
-
- return 1;
-}
-
-#define queue_full(td) (list_empty(&(td)->io_u_freelist))
-
-struct io_u *__get_io_u(struct thread_data *td)
-{
- struct io_u *io_u;
-
- if (queue_full(td))
- return NULL;
-
- io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
- io_u->error = 0;
- io_u->resid = 0;
- list_del(&io_u->list);
- list_add(&io_u->list, &td->io_u_busylist);
- td->cur_depth++;
- return io_u;
-}
-
-static struct io_u *get_io_u(struct thread_data *td)
-{
- struct io_u *io_u;
-
- io_u = __get_io_u(td);
- if (!io_u)
- return NULL;
-
- if (td->zone_bytes >= td->zone_size) {
- td->zone_bytes = 0;
- td->last_pos += td->zone_skip;
- }
-
- if (fill_io_u(td, io_u)) {
- put_io_u(td, io_u);
- return NULL;
- }
-
- if (io_u->buflen + io_u->offset > td->real_file_size)
- io_u->buflen = td->real_file_size - io_u->offset;
-
- if (!io_u->buflen) {
- put_io_u(td, io_u);
- return NULL;
- }
-
- if (!td->read_iolog && !td->sequential)
- mark_random_map(td, io_u);
-
- td->last_pos += io_u->buflen;
-
- if (td->verify != VERIFY_NONE)
- populate_io_u(td, io_u);
-
- if (td_io_prep(td, io_u)) {
- put_io_u(td, io_u);
- return NULL;
- }
-
- gettimeofday(&io_u->start_time, NULL);
- return io_u;
-}
-
-static inline void td_set_runstate(struct thread_data *td, int runstate)
-{
- td->old_runstate = td->runstate;
- td->runstate = runstate;
-}
-
-static int get_next_verify(struct thread_data *td, struct io_u *io_u)
-{
- struct io_piece *ipo;
-
- if (list_empty(&td->io_hist_list))
- return 1;
-
- ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
- list_del(&ipo->list);
-
- io_u->offset = ipo->offset;
- io_u->buflen = ipo->len;
- io_u->ddir = DDIR_READ;
- free(ipo);
- return 0;
-}
-
-static void prune_io_piece_log(struct thread_data *td)
-{
- struct io_piece *ipo;
-
- while (!list_empty(&td->io_hist_list)) {
- ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
-
- list_del(&ipo->list);
- free(ipo);
- }
-}
-
-/*
- * log a succesful write, so we can unwind the log for verify
- */
-static void log_io_piece(struct thread_data *td, struct io_u *io_u)
-{
- struct io_piece *ipo = malloc(sizeof(struct io_piece));
- struct list_head *entry;
-
- INIT_LIST_HEAD(&ipo->list);
- ipo->offset = io_u->offset;
- ipo->len = io_u->buflen;
-
- /*
- * for random io where the writes extend the file, it will typically
- * be laid out with the block scattered as written. it's faster to
- * read them in in that order again, so don't sort
- */
- if (td->sequential || !td->overwrite) {
- list_add_tail(&ipo->list, &td->io_hist_list);
- return;
- }
-
- /*
- * for random io, sort the list so verify will run faster
- */
- entry = &td->io_hist_list;
- while ((entry = entry->prev) != &td->io_hist_list) {
- struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
-
- if (__ipo->offset < ipo->offset)
- break;
- }
-
- list_add(&ipo->list, entry);
-}
-
-static void write_iolog_close(struct thread_data *td)
-{
- fflush(td->iolog_f);
- fclose(td->iolog_f);
- free(td->iolog_buf);
-}
-
-static int init_iolog(struct thread_data *td)
-{
- unsigned long long offset;
- unsigned int bytes;
- char *str, *p;
- FILE *f;
- int rw, i, reads, writes;
-
- if (!td->read_iolog && !td->write_iolog)
- return 0;
-
- if (td->read_iolog)
- f = fopen(td->iolog_file, "r");
- else
- f = fopen(td->iolog_file, "w");
-
- if (!f) {
- perror("fopen iolog");
- printf("file %s, %d/%d\n", td->iolog_file, td->read_iolog, td->write_iolog);
- return 1;
- }
-
- /*
- * That's it for writing, setup a log buffer and we're done.
- */
- if (td->write_iolog) {
- td->iolog_f = f;
- td->iolog_buf = malloc(8192);
- setvbuf(f, td->iolog_buf, _IOFBF, 8192);
- return 0;
- }
-
- /*
- * Read in the read iolog and store it, reuse the infrastructure
- * for doing verifications.
- */
- str = malloc(4096);
- reads = writes = i = 0;
- while ((p = fgets(str, 4096, f)) != NULL) {
- struct io_piece *ipo;
-
- if (sscanf(p, "%d,%llu,%u", &rw, &offset, &bytes) != 3) {
- fprintf(stderr, "bad iolog: %s\n", p);
- continue;
- }
- if (rw == DDIR_READ)
- reads++;
- else if (rw == DDIR_WRITE)
- writes++;
- else {
- fprintf(stderr, "bad ddir: %d\n", rw);
- continue;
- }
-
- ipo = malloc(sizeof(*ipo));
- INIT_LIST_HEAD(&ipo->list);
- ipo->offset = offset;
- ipo->len = bytes;
- if (bytes > td->max_bs)
- td->max_bs = bytes;
- ipo->ddir = rw;
- list_add_tail(&ipo->list, &td->io_log_list);
- i++;
- }
-
- free(str);
- fclose(f);
-
- if (!i)
- return 1;
-
- if (reads && !writes)
- td->ddir = DDIR_READ;
- else if (!reads && writes)
- td->ddir = DDIR_READ;
- else
- td->iomix = 1;
-
- return 0;
-}
-
-static int sync_td(struct thread_data *td)
-{
- if (td->io_sync)
- return td->io_sync(td);
-
- return 0;
-}
-
-static int io_u_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- return td->io_getevents(td, min, max, t);
-}
-
-static int io_u_queue(struct thread_data *td, struct io_u *io_u)
-{
- gettimeofday(&io_u->issue_time, NULL);
-
- return td->io_queue(td, io_u);
-}
-
-#define iocb_time(iocb) ((unsigned long) (iocb)->data)
-
-static void io_completed(struct thread_data *td, struct io_u *io_u,
- struct io_completion_data *icd)
-{
- struct timeval e;
- unsigned long msec;
-
- gettimeofday(&e, NULL);
-
- if (!io_u->error) {
- unsigned int bytes = io_u->buflen - io_u->resid;
- const int idx = io_u->ddir;
-
- td->io_blocks[idx]++;
- td->io_bytes[idx] += bytes;
- td->zone_bytes += bytes;
- td->this_io_bytes[idx] += bytes;
-
- msec = mtime_since(&io_u->issue_time, &e);
-
- add_clat_sample(td, idx, msec);
- add_bw_sample(td, idx);
-
- if ((td_rw(td) || td_write(td)) && idx == DDIR_WRITE)
- log_io_piece(td, io_u);
-
- icd->bytes_done[idx] += bytes;
- } else
- icd->error = io_u->error;
-}
-
-static void ios_completed(struct thread_data *td,struct io_completion_data *icd)
-{
- struct io_u *io_u;
- int i;
-
- icd->error = 0;
- icd->bytes_done[0] = icd->bytes_done[1] = 0;
-
- for (i = 0; i < icd->nr; i++) {
- io_u = td->io_event(td, i);
-
- io_completed(td, io_u, icd);
- put_io_u(td, io_u);
- }
-}
-
-static void cleanup_pending_aio(struct thread_data *td)
-{
- struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
- struct list_head *entry, *n;
- struct io_completion_data icd;
- struct io_u *io_u;
- int r;
-
- /*
- * get immediately available events, if any
- */
- r = io_u_getevents(td, 0, td->cur_depth, &ts);
- if (r > 0) {
- icd.nr = r;
- ios_completed(td, &icd);
- }
-
- /*
- * now cancel remaining active events
- */
- if (td->io_cancel) {
- list_for_each_safe(entry, n, &td->io_u_busylist) {
- io_u = list_entry(entry, struct io_u, list);
-
- r = td->io_cancel(td, io_u);
- if (!r)
- put_io_u(td, io_u);
- }
- }
-
- if (td->cur_depth) {
- r = io_u_getevents(td, td->cur_depth, td->cur_depth, NULL);
- if (r > 0) {
- icd.nr = r;
- ios_completed(td, &icd);
- }
- }
-}
-
-static int do_io_u_verify(struct thread_data *td, struct io_u **io_u)
-{
- struct io_u *v_io_u = *io_u;
- int ret = 0;
-
- if (v_io_u) {
- ret = verify_io_u(v_io_u);
- put_io_u(td, v_io_u);
- *io_u = NULL;
- }
-
- return ret;
-}
-
-static void do_verify(struct thread_data *td)
-{
- struct timeval t;
- struct io_u *io_u, *v_io_u = NULL;
- struct io_completion_data icd;
- int ret;
-
- td_set_runstate(td, TD_VERIFYING);
-
- do {
- if (td->terminate)
- break;
-
- gettimeofday(&t, NULL);
- if (runtime_exceeded(td, &t))
- break;
-
- io_u = __get_io_u(td);
- if (!io_u)
- break;
-
- if (get_next_verify(td, io_u)) {
- put_io_u(td, io_u);
- break;
- }
-
- if (td_io_prep(td, io_u)) {
- put_io_u(td, io_u);
- break;
- }
-
- ret = io_u_queue(td, io_u);
- if (ret) {
- put_io_u(td, io_u);
- td_verror(td, ret);
- break;
- }
-
- /*
- * we have one pending to verify, do that while
- * we are doing io on the next one
- */
- if (do_io_u_verify(td, &v_io_u))
- break;
-
- ret = io_u_getevents(td, 1, 1, NULL);
- if (ret != 1) {
- if (ret < 0)
- td_verror(td, ret);
- break;
- }
-
- v_io_u = td->io_event(td, 0);
- icd.nr = 1;
- icd.error = 0;
- io_completed(td, v_io_u, &icd);
-
- if (icd.error) {
- td_verror(td, icd.error);
- put_io_u(td, v_io_u);
- v_io_u = NULL;
- break;
- }
-
- /*
- * if we can't submit more io, we need to verify now
- */
- if (queue_full(td) && do_io_u_verify(td, &v_io_u))
- break;
-
- } while (1);
-
- do_io_u_verify(td, &v_io_u);
-
- if (td->cur_depth)
- cleanup_pending_aio(td);
-
- td_set_runstate(td, TD_RUNNING);
-}
-
-static void do_io(struct thread_data *td)
-{
- struct io_completion_data icd;
- struct timeval s, e;
- unsigned long usec;
-
- while (td->this_io_bytes[td->ddir] < td->io_size) {
- struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
- struct timespec *timeout;
- int ret, min_evts = 0;
- struct io_u *io_u;
-
- if (td->terminate)
- break;
-
- io_u = get_io_u(td);
- if (!io_u)
- break;
-
- memcpy(&s, &io_u->start_time, sizeof(s));
-
- ret = io_u_queue(td, io_u);
- if (ret) {
- put_io_u(td, io_u);
- td_verror(td, ret);
- break;
- }
-
- add_slat_sample(td, io_u->ddir, mtime_since(&io_u->start_time, &io_u->issue_time));
-
- if (td->cur_depth < td->iodepth) {
- timeout = &ts;
- min_evts = 0;
- } else {
- timeout = NULL;
- min_evts = 1;
- }
-
- ret = io_u_getevents(td, min_evts, td->cur_depth, timeout);
- if (ret < 0) {
- td_verror(td, ret);
- break;
- } else if (!ret)
- continue;
-
- icd.nr = ret;
- ios_completed(td, &icd);
- if (icd.error) {
- td_verror(td, icd.error);
- break;
- }
-
- /*
- * the rate is batched for now, it should work for batches
- * of completions except the very first one which may look
- * a little bursty
- */
- gettimeofday(&e, NULL);
- usec = utime_since(&s, &e);
-
- rate_throttle(td, usec, icd.bytes_done[td->ddir]);
-
- if (check_min_rate(td, &e)) {
- td_verror(td, ENOMEM);
- break;
- }
-
- if (runtime_exceeded(td, &e))
- break;
-
- if (td->thinktime)
- usec_sleep(td, td->thinktime);
-
- if (should_fsync(td) && td->fsync_blocks &&
- (td->io_blocks[DDIR_WRITE] % td->fsync_blocks) == 0)
- sync_td(td);
- }
-
- if (td->cur_depth)
- cleanup_pending_aio(td);
-
- if (should_fsync(td) && td->end_fsync)
- sync_td(td);
-}
-
-static void cleanup_io(struct thread_data *td)
-{
- if (td->io_cleanup)
- td->io_cleanup(td);
-}
-
-static int init_io(struct thread_data *td)
-{
- if (td->io_engine == FIO_SYNCIO)
- return fio_syncio_init(td);
- else if (td->io_engine == FIO_MMAPIO)
- return fio_mmapio_init(td);
- else if (td->io_engine == FIO_LIBAIO)
- return fio_libaio_init(td);
- else if (td->io_engine == FIO_POSIXAIO)
- return fio_posixaio_init(td);
- else if (td->io_engine == FIO_SGIO)
- return fio_sgio_init(td);
- else if (td->io_engine == FIO_SPLICEIO)
- return fio_spliceio_init(td);
- else {
- fprintf(stderr, "bad io_engine %d\n", td->io_engine);
- return 1;
- }
-}
-
-static void cleanup_io_u(struct thread_data *td)
-{
- struct list_head *entry, *n;
- struct io_u *io_u;
-
- list_for_each_safe(entry, n, &td->io_u_freelist) {
- io_u = list_entry(entry, struct io_u, list);
-
- list_del(&io_u->list);
- free(io_u);
- }
-
- if (td->mem_type == MEM_MALLOC)
- free(td->orig_buffer);
- else if (td->mem_type == MEM_SHM) {
- struct shmid_ds sbuf;
-
- shmdt(td->orig_buffer);
- shmctl(td->shm_id, IPC_RMID, &sbuf);
- } else if (td->mem_type == MEM_MMAP)
- munmap(td->orig_buffer, td->orig_buffer_size);
- else
- fprintf(stderr, "Bad memory type %d\n", td->mem_type);
-
- td->orig_buffer = NULL;
-}
-
-static int init_io_u(struct thread_data *td)