Autodetect cgroup blkio mount point
[fio.git] / fio.c
diff --git a/fio.c b/fio.c
index 63d9a1b7ab434ca4eba416ab2fe71bbc9e71385a..7d4c96a2b8a5045cda742f4be7842deb66529637 100644 (file)
--- a/fio.c
+++ b/fio.c
@@ -39,6 +39,7 @@
 #include "smalloc.h"
 #include "verify.h"
 #include "diskutil.h"
+#include "cgroup.h"
 
 unsigned long page_mask;
 unsigned long page_size;
@@ -60,6 +61,8 @@ static volatile int fio_abort;
 static int exit_value;
 static struct itimerval itimer;
 static pthread_t gtod_thread;
+static struct flist_head *cgroup_list;
+static char *cgroup_mnt;
 
 struct io_log *agg_io_log[2];
 
@@ -223,7 +226,7 @@ static int __check_min_rate(struct thread_data *td, struct timeval *now,
                                if (rate < ratemin ||
                                    bytes < td->rate_bytes[ddir]) {
                                        log_err("%s: min rate %u not met, got"
-                                               " %luKiB/sec\n", td->o.name,
+                                               " %luKB/sec\n", td->o.name,
                                                        ratemin, rate);
                                        return 1;
                                }
@@ -388,21 +391,29 @@ static int break_on_this_error(struct thread_data *td, int *retptr)
                else
                        err = td->error;
 
-               update_error_count(td, err);
-
                if (td_non_fatal_error(err)) {
                        /*
                         * Continue with the I/Os in case of
                         * a non fatal error.
                         */
+                       update_error_count(td, err);
                        td_clear_error(td);
                        *retptr = 0;
                        return 0;
+               } else if (td->o.fill_device && err == ENOSPC) {
+                       /*
+                        * We expect to hit this error if
+                        * fill_device option is set.
+                        */
+                       td_clear_error(td);
+                       td->terminate = 1;
+                       return 1;
                } else {
                        /*
                         * Stop the I/O in case of a fatal
                         * error.
                         */
+                       update_error_count(td, err);
                        return 1;
                }
        }
@@ -464,7 +475,10 @@ static void do_verify(struct thread_data *td)
                        break;
                }
 
-               io_u->end_io = verify_io_u;
+               if (td->o.verify_async)
+                       io_u->end_io = verify_io_u_async;
+               else
+                       io_u->end_io = verify_io_u;
 
                ret = td_io_queue(td, io_u);
                switch (ret) {
@@ -525,7 +539,8 @@ sync_done:
                 */
                full = queue_full(td) || ret == FIO_Q_BUSY;
                if (full || !td->o.iodepth_batch_complete) {
-                       min_events = td->o.iodepth_batch_complete;
+                       min_events = min(td->o.iodepth_batch_complete,
+                                        td->cur_depth);
                        if (full && !min_events)
                                min_events = 1;
 
@@ -570,7 +585,8 @@ static void do_io(struct thread_data *td)
        else
                td_set_runstate(td, TD_RUNNING);
 
-       while ((td->this_io_bytes[0] + td->this_io_bytes[1]) < td->o.size) {
+       while ( (td->o.read_iolog_file && !flist_empty(&td->io_log_list)) ||
+               ((td->this_io_bytes[0] + td->this_io_bytes[1]) < td->o.size) ) {
                struct timeval comp_time;
                unsigned long bytes_done[2] = { 0, 0 };
                int min_evts = 0;
@@ -595,8 +611,12 @@ static void do_io(struct thread_data *td)
                 * Add verification end_io handler, if asked to verify
                 * a previously written file.
                 */
-               if (td->o.verify != VERIFY_NONE && io_u->ddir == DDIR_READ) {
-                       io_u->end_io = verify_io_u;
+               if (td->o.verify != VERIFY_NONE && io_u->ddir == DDIR_READ &&
+                   !td_rw(td)) {
+                       if (td->o.verify_async)
+                               io_u->end_io = verify_io_u_async;
+                       else
+                               io_u->end_io = verify_io_u;
                        td_set_runstate(td, TD_VERIFYING);
                } else if (in_ramp_time(td))
                        td_set_runstate(td, TD_RAMP);
@@ -672,7 +692,8 @@ sync_done:
                 */
                full = queue_full(td) || ret == FIO_Q_BUSY;
                if (full || !td->o.iodepth_batch_complete) {
-                       min_evts = td->o.iodepth_batch_complete;
+                       min_evts = min(td->o.iodepth_batch_complete,
+                                       td->cur_depth);
                        if (full && !min_evts)
                                min_evts = 1;
 
@@ -791,7 +812,7 @@ static int init_io_u(struct thread_data *td)
        if (allocate_io_mem(td))
                return 1;
 
-       if (td->o.mem_align)
+       if (td->o.odirect || td->o.mem_align)
                p = PAGE_ALIGN(td->orig_buffer) + td->o.mem_align;
        else
                p = td->orig_buffer;
@@ -984,6 +1005,7 @@ static void *thread_main(void *data)
 {
        unsigned long long runtime[2], elapsed;
        struct thread_data *td = data;
+       pthread_condattr_t attr;
        int clear_state;
 
        if (!td->o.use_thread)
@@ -998,8 +1020,14 @@ static void *thread_main(void *data)
        INIT_FLIST_HEAD(&td->io_u_requeues);
        INIT_FLIST_HEAD(&td->io_log_list);
        INIT_FLIST_HEAD(&td->io_hist_list);
+       INIT_FLIST_HEAD(&td->verify_list);
+       pthread_mutex_init(&td->io_u_lock, NULL);
        td->io_hist_tree = RB_ROOT;
 
+       pthread_condattr_init(&attr);
+       pthread_cond_init(&td->verify_cond, &attr);
+       pthread_cond_init(&td->free_cond, &attr);
+
        td_set_runstate(td, TD_INITIALIZED);
        dprint(FD_MUTEX, "up startup_mutex\n");
        fio_mutex_up(startup_mutex);
@@ -1023,7 +1051,10 @@ static void *thread_main(void *data)
        if (init_io_u(td))
                goto err;
 
-       if (td->o.cpumask_set && fio_setaffinity(td) == -1) {
+       if (td->o.verify_async && verify_async_init(td))
+               goto err;
+
+       if (td->o.cpumask_set && fio_setaffinity(td->pid, td->o.cpumask) == -1) {
                td_verror(td, errno, "cpu_set_affinity");
                goto err;
        }
@@ -1034,7 +1065,7 @@ static void *thread_main(void *data)
         */
        if (td->o.gtod_cpu) {
                fio_cpu_clear(&td->o.cpumask, td->o.gtod_cpu);
-               if (fio_setaffinity(td) == -1) {
+               if (fio_setaffinity(td->pid, td->o.cpumask) == -1) {
                        td_verror(td, errno, "cpu_set_affinity");
                        goto err;
                }
@@ -1047,6 +1078,9 @@ static void *thread_main(void *data)
                }
        }
 
+       if (td->o.cgroup_weight && cgroup_setup(td, cgroup_list, &cgroup_mnt))
+               goto err;
+
        if (nice(td->o.nice) == -1) {
                td_verror(td, errno, "nice");
                goto err;
@@ -1081,7 +1115,10 @@ static void *thread_main(void *data)
        clear_state = 0;
        while (keep_running(td)) {
                fio_gettime(&td->start, NULL);
-               memcpy(&td->ts.stat_sample_time, &td->start, sizeof(td->start));
+               memcpy(&td->ts.stat_sample_time[0], &td->start,
+                               sizeof(td->start));
+               memcpy(&td->ts.stat_sample_time[1], &td->start,
+                               sizeof(td->start));
                memcpy(&td->tv_cache, &td->start, sizeof(td->start));
 
                if (td->o.ratemin[0] || td->o.ratemin[1])
@@ -1166,9 +1203,14 @@ err:
        if (td->error)
                printf("fio: pid=%d, err=%d/%s\n", (int) td->pid, td->error,
                                                        td->verror);
+
+       if (td->o.verify_async)
+               verify_async_exit(td);
+
        close_and_free_files(td);
        close_ioengine(td);
        cleanup_io_u(td);
+       cgroup_shutdown(td, &cgroup_mnt);
 
        if (td->o.cpumask_set) {
                int ret = fio_cpuset_exit(&td->o.cpumask);
@@ -1492,7 +1534,13 @@ static void run_threads(void)
                                        *fio_debug_jobp = pid;
                        }
                        dprint(FD_MUTEX, "wait on startup_mutex\n");
-                       fio_mutex_down(startup_mutex);
+                       if (fio_mutex_down_timeout(startup_mutex, 10)) {
+                               log_err("fio: job startup hung? exiting.\n");
+                               terminate_threads(TERMINATE_ALL);
+                               fio_abort = 1;
+                               nr_started--;
+                               break;
+                       }
                        dprint(FD_MUTEX, "done waiting on startup_mutex\n");
                }
 
@@ -1582,12 +1630,6 @@ int main(int argc, char *argv[])
        if (!getenv("LC_NUMERIC"))
                setlocale(LC_NUMERIC, "en_US");
 
-       if (parse_options(argc, argv))
-               return 1;
-
-       if (!thread_number)
-               return 0;
-
        ps = sysconf(_SC_PAGESIZE);
        if (ps < 0) {
                log_err("Failed to get page size\n");
@@ -1597,6 +1639,14 @@ int main(int argc, char *argv[])
        page_size = ps;
        page_mask = ps - 1;
 
+       fio_keywords_init();
+
+       if (parse_options(argc, argv))
+               return 1;
+
+       if (!thread_number)
+               return 0;
+
        if (write_bw_log) {
                setup_log(&agg_io_log[DDIR_READ]);
                setup_log(&agg_io_log[DDIR_WRITE]);
@@ -1609,6 +1659,9 @@ int main(int argc, char *argv[])
 
        status_timer_arm();
 
+       cgroup_list = smalloc(sizeof(*cgroup_list));
+       INIT_FLIST_HEAD(cgroup_list);
+
        run_threads();
 
        if (!fio_abort) {
@@ -1620,6 +1673,11 @@ int main(int argc, char *argv[])
                }
        }
 
+       cgroup_kill(cgroup_list);
+       sfree(cgroup_list);
+       if (cgroup_mnt)
+               sfree(cgroup_mnt);
+
        fio_mutex_remove(startup_mutex);
        fio_mutex_remove(writeout_mutex);
        return exit_value;