-#include "os.h"
-
-static unsigned long page_mask;
-#define ALIGN(buf) \
- (char *) (((unsigned long) (buf) + page_mask) & ~page_mask)
-
-int groupid = 0;
-int thread_number = 0;
-int shm_id = 0;
-int temp_stall_ts;
-
-static volatile int startup_sem;
-static volatile int fio_abort;
-
-struct io_log *agg_io_log[2];
-
-#define TERMINATE_ALL (-1)
-#define JOB_START_TIMEOUT (5 * 1000)
-
-static inline void td_set_runstate(struct thread_data *td, int runstate)
-{
- td->runstate = runstate;
-}
-
-static void terminate_threads(int group_id, int forced_kill)
-{
- struct thread_data *td;
- int i;
-
- for_each_td(td, i) {
- if (group_id == TERMINATE_ALL || groupid == td->groupid) {
- td->terminate = 1;
- td->start_delay = 0;
- if (forced_kill)
- td_set_runstate(td, TD_EXITED);
- }
- }
-}
-
-static void sig_handler(int sig)
-{
- switch (sig) {
- case SIGALRM:
- update_io_ticks();
- disk_util_timer_arm();
- print_thread_status();
- break;
- case SIGSEGV:
- fprintf(stderr, "fio: got segfault, aborting\n");
- terminate_threads(TERMINATE_ALL, 1);
- fio_abort = 1;
- exit(0);
- default:
- printf("\nfio: terminating on signal %d\n", sig);
- fflush(stdout);
- terminate_threads(TERMINATE_ALL, 0);
- break;
- }
-}
-
-/*
- * Check if we are above the minimum rate given.
- */
-static int check_min_rate(struct thread_data *td, struct timeval *now)
-{
- unsigned long spent;
- unsigned long rate;
- int ddir = td->ddir;
-
- /*
- * allow a 2 second settle period in the beginning
- */
- if (mtime_since(&td->start, now) < 2000)
- return 0;
-
- /*
- * if rate blocks is set, sample is running
- */
- if (td->rate_bytes) {
- spent = mtime_since(&td->lastrate, now);
- if (spent < td->ratecycle)
- return 0;
-
- rate = (td->this_io_bytes[ddir] - td->rate_bytes) / spent;
- if (rate < td->ratemin) {
- fprintf(f_out, "%s: min rate %u not met, got %luKiB/sec\n", td->name, td->ratemin, rate);
- return 1;
- }
- }
-
- td->rate_bytes = td->this_io_bytes[ddir];
- memcpy(&td->lastrate, now, sizeof(*now));
- return 0;
-}
-
-static inline int runtime_exceeded(struct thread_data *td, struct timeval *t)
-{
- if (!td->timeout)
- return 0;
- if (mtime_since(&td->epoch, t) >= td->timeout * 1000)
- return 1;
-
- return 0;
-}
-
-static struct fio_file *get_next_file(struct thread_data *td)
-{
- unsigned int old_next_file = td->next_file;
- struct fio_file *f;
-
- do {
- f = &td->files[td->next_file];
-
- td->next_file++;
- if (td->next_file >= td->nr_files)
- td->next_file = 0;
-
- if (f->fd != -1)
- break;
-
- f = NULL;
- } while (td->next_file != old_next_file);
-
- return f;
-}
-
-/*
- * When job exits, we can cancel the in-flight IO if we are using async
- * io. Attempt to do so.
- */
-static void cleanup_pending_aio(struct thread_data *td)
-{
- struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
- struct list_head *entry, *n;
- struct io_completion_data icd;
- struct io_u *io_u;
- int r;
-
- /*
- * get immediately available events, if any
- */
- r = td_io_getevents(td, 0, td->cur_depth, &ts);
- if (r > 0) {
- icd.nr = r;
- ios_completed(td, &icd);
- }
-
- /*
- * now cancel remaining active events
- */
- if (td->io_ops->cancel) {
- list_for_each_safe(entry, n, &td->io_u_busylist) {
- io_u = list_entry(entry, struct io_u, list);
-
- r = td->io_ops->cancel(td, io_u);
- if (!r)
- put_io_u(td, io_u);
- }
- }
-
- if (td->cur_depth) {
- r = td_io_getevents(td, td->cur_depth, td->cur_depth, NULL);
- if (r > 0) {
- icd.nr = r;
- ios_completed(td, &icd);
- }
- }
-}
-
-/*
- * Helper to handle the final sync of a file. Works just like the normal
- * io path, just does everything sync.
- */
-static int fio_io_sync(struct thread_data *td, struct fio_file *f)
-{
- struct io_u *io_u = __get_io_u(td);
- struct io_completion_data icd;
- int ret;
-
- if (!io_u)
- return 1;
-
- io_u->ddir = DDIR_SYNC;
- io_u->file = f;
-
- if (td_io_prep(td, io_u)) {
- put_io_u(td, io_u);
- return 1;
- }
-
- ret = td_io_queue(td, io_u);
- if (ret) {
- td_verror(td, io_u->error);
- put_io_u(td, io_u);
- return 1;
- }
-
- ret = td_io_getevents(td, 1, td->cur_depth, NULL);
- if (ret < 0) {
- td_verror(td, ret);
- return 1;
- }
-
- icd.nr = ret;
- ios_completed(td, &icd);
- if (icd.error) {
- td_verror(td, icd.error);
- return 1;
- }
-
- return 0;
-}
-
-/*
- * The main verify engine. Runs over the writes we previusly submitted,
- * reads the blocks back in, and checks the crc/md5 of the data.
- */
-static void do_verify(struct thread_data *td)
-{
- struct io_u *io_u, *v_io_u = NULL;
- struct io_completion_data icd;
- struct fio_file *f;
- int ret, i;
-
- /*
- * sync io first and invalidate cache, to make sure we really
- * read from disk.
- */
- for_each_file(td, f, i) {
- fio_io_sync(td, f);
- file_invalidate_cache(td, f);
- }
-
- td_set_runstate(td, TD_VERIFYING);
-
- do {
- if (td->terminate)
- break;
-
- io_u = __get_io_u(td);
- if (!io_u)
- break;
-
- if (runtime_exceeded(td, &io_u->start_time)) {
- put_io_u(td, io_u);
- break;
- }
-
- if (get_next_verify(td, io_u)) {
- put_io_u(td, io_u);
- break;
- }
-
- f = get_next_file(td);
- if (!f)
- break;
-
- io_u->file = f;
-
- if (td_io_prep(td, io_u)) {
- put_io_u(td, io_u);
- break;
- }
-
- ret = td_io_queue(td, io_u);
- if (ret) {
- td_verror(td, io_u->error);
- put_io_u(td, io_u);
- break;
- }
-
- /*
- * we have one pending to verify, do that while
- * we are doing io on the next one
- */
- if (do_io_u_verify(td, &v_io_u))
- break;
-
- ret = td_io_getevents(td, 1, 1, NULL);
- if (ret != 1) {
- if (ret < 0)
- td_verror(td, ret);
- break;
- }
-
- v_io_u = td->io_ops->event(td, 0);
- icd.nr = 1;
- icd.error = 0;
- fio_gettime(&icd.time, NULL);
- io_completed(td, v_io_u, &icd);
-
- if (icd.error) {
- td_verror(td, icd.error);
- put_io_u(td, v_io_u);
- v_io_u = NULL;
- break;
- }
-
- /*
- * if we can't submit more io, we need to verify now
- */
- if (queue_full(td) && do_io_u_verify(td, &v_io_u))
- break;
-
- } while (1);
-
- do_io_u_verify(td, &v_io_u);
-
- if (td->cur_depth)
- cleanup_pending_aio(td);
-
- td_set_runstate(td, TD_RUNNING);
-}
-
-/*
- * Not really an io thread, all it does is burn CPU cycles in the specified
- * manner.
- */
-static void do_cpuio(struct thread_data *td)
-{
- struct timeval e;
- int split = 100 / td->cpuload;
- int i = 0;
-
- while (!td->terminate) {
- fio_gettime(&e, NULL);
-
- if (runtime_exceeded(td, &e))
- break;
-
- if (!(i % split))
- __usec_sleep(10000);
- else
- usec_sleep(td, 10000);
-
- i++;
- }
-}
-
-/*
- * Main IO worker function. It retrieves io_u's to process and queues
- * and reaps them, checking for rate and errors along the way.
- */
-static void do_io(struct thread_data *td)
-{
- struct io_completion_data icd;
- struct timeval s;
- unsigned long usec;
- struct fio_file *f;
- int i, ret = 0;
-
- td_set_runstate(td, TD_RUNNING);
-
- while (td->this_io_bytes[td->ddir] < td->io_size) {
- struct timespec *timeout;
- int min_evts = 0;
- struct io_u *io_u;
-
- if (td->terminate)
- break;
-
- f = get_next_file(td);
- if (!f)
- break;
-
- io_u = get_io_u(td, f);
- if (!io_u)
- break;
-
- memcpy(&s, &io_u->start_time, sizeof(s));
-
- ret = td_io_queue(td, io_u);
- if (ret) {
- td_verror(td, io_u->error);
- put_io_u(td, io_u);
- break;
- }
-
- add_slat_sample(td, io_u->ddir, mtime_since(&io_u->start_time, &io_u->issue_time));
-
- if (td->cur_depth < td->iodepth) {
- struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
-
- timeout = &ts;
- min_evts = 0;
- } else {
- timeout = NULL;
- min_evts = 1;
- }
-
- ret = td_io_getevents(td, min_evts, td->cur_depth, timeout);
- if (ret < 0) {
- td_verror(td, ret);
- break;
- } else if (!ret)
- continue;
-
- icd.nr = ret;
- ios_completed(td, &icd);
- if (icd.error) {
- td_verror(td, icd.error);
- break;
- }
-
- /*
- * the rate is batched for now, it should work for batches
- * of completions except the very first one which may look
- * a little bursty
- */
- usec = utime_since(&s, &icd.time);
-
- rate_throttle(td, usec, icd.bytes_done[td->ddir], td->ddir);
-
- if (check_min_rate(td, &icd.time)) {
- if (exitall_on_terminate)
- terminate_threads(td->groupid, 0);
- td_verror(td, ENODATA);
- break;
- }
-
- if (runtime_exceeded(td, &icd.time))
- break;
-
- if (td->thinktime) {
- unsigned long long b;
-
- b = td->io_blocks[0] + td->io_blocks[1];
- if (!(b % td->thinktime_blocks))
- usec_sleep(td, td->thinktime);
- }
- }
-
- if (!td->error) {
- if (td->cur_depth)
- cleanup_pending_aio(td);
-
- if (should_fsync(td) && td->end_fsync) {
- td_set_runstate(td, TD_FSYNCING);
- for_each_file(td, f, i)
- fio_io_sync(td, f);
- }
- }
-}
-
-static void cleanup_io_u(struct thread_data *td)
-{
- struct list_head *entry, *n;
- struct io_u *io_u;
-
- list_for_each_safe(entry, n, &td->io_u_freelist) {
- io_u = list_entry(entry, struct io_u, list);
-
- list_del(&io_u->list);
- free(io_u);
- }
-
- free_io_mem(td);
-}
-
-/*
- * "randomly" fill the buffer contents
- */
-static void fill_rand_buf(struct io_u *io_u, int max_bs)
-{
- int *ptr = io_u->buf;
-
- while ((void *) ptr - io_u->buf < max_bs) {
- *ptr = rand() * 0x9e370001;
- ptr++;
- }
-}
-
-static int init_io_u(struct thread_data *td)
-{
- struct io_u *io_u;
- unsigned int max_bs;
- int i, max_units;
- char *p;
-
- if (td->io_ops->flags & FIO_CPUIO)
- return 0;
-
- if (td->io_ops->flags & FIO_SYNCIO)
- max_units = 1;
- else
- max_units = td->iodepth;
-
- max_bs = max(td->max_bs[DDIR_READ], td->max_bs[DDIR_WRITE]);
- td->orig_buffer_size = max_bs * max_units;
-
- if (td->mem_type == MEM_SHMHUGE || td->mem_type == MEM_MMAPHUGE)
- td->orig_buffer_size = (td->orig_buffer_size + td->hugepage_size - 1) & ~(td->hugepage_size - 1);
- else
- td->orig_buffer_size += page_mask;
-
- if (allocate_io_mem(td))
- return 1;
-
- p = ALIGN(td->orig_buffer);
- for (i = 0; i < max_units; i++) {
- io_u = malloc(sizeof(*io_u));
- memset(io_u, 0, sizeof(*io_u));
- INIT_LIST_HEAD(&io_u->list);
-
- io_u->buf = p + max_bs * i;
- if (td_write(td) || td_rw(td))
- fill_rand_buf(io_u, max_bs);
-
- io_u->index = i;
- list_add(&io_u->list, &td->io_u_freelist);
- }
-
- return 0;
-}
-
-static int switch_ioscheduler(struct thread_data *td)