Add support for cpus_allowed_policy
[fio.git] / backend.c
index 3ac72e771cb9eaa44a17fabe70d9aee718ccd6a5..12c76d8545ef53a02b95223ca8d53237b3175ef4 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -52,6 +52,7 @@
 #include "server.h"
 #include "lib/getrusage.h"
 #include "idletime.h"
+#include "err.h"
 
 static pthread_t disk_util_thread;
 static struct fio_mutex *disk_thread_mutex;
@@ -345,7 +346,7 @@ static inline int runtime_exceeded(struct thread_data *td, struct timeval *t)
                return 0;
        if (!td->o.timeout)
                return 0;
-       if (mtime_since(&td->epoch, t) >= td->o.timeout * 1000)
+       if (utime_since(&td->epoch, t) >= td->o.timeout)
                return 1;
 
        return 0;
@@ -478,6 +479,12 @@ static void do_verify(struct thread_data *td, uint64_t verify_bytes)
                                break;
 
                        while ((io_u = get_io_u(td)) != NULL) {
+                               if (IS_ERR(io_u)) {
+                                       io_u = NULL;
+                                       ret = FIO_Q_BUSY;
+                                       goto reap;
+                               }
+
                                /*
                                 * We are only interested in the places where
                                 * we wrote or trimmed IOs. Turn those into
@@ -574,6 +581,7 @@ sync_done:
                 * completed io_u's first. Note that we can get BUSY even
                 * without IO queued, if the system is resource starved.
                 */
+reap:
                full = queue_full(td) || (ret == FIO_Q_BUSY && td->cur_depth);
                if (full || !td->o.iodepth_batch_complete) {
                        min_events = min(td->o.iodepth_batch_complete,
@@ -651,8 +659,14 @@ static uint64_t do_io(struct thread_data *td)
 
        lat_target_init(td);
 
+       /*
+        * If verify_backlog is enabled, we'll run the verify in this
+        * handler as well. For that case, we may need up to twice the
+        * amount of bytes.
+        */
        total_bytes = td->o.size;
-       if (td->o.verify != VERIFY_NONE && td_write(td))
+       if (td->o.verify != VERIFY_NONE &&
+          (td_write(td) && td->o.verify_backlog))
                total_bytes += td->o.size;
 
        while ((td->o.read_iolog_file && !flist_empty(&td->io_log_list)) ||
@@ -686,7 +700,14 @@ static uint64_t do_io(struct thread_data *td)
                        break;
 
                io_u = get_io_u(td);
-               if (!io_u) {
+               if (IS_ERR_OR_NULL(io_u)) {
+                       int err = PTR_ERR(io_u);
+
+                       io_u = NULL;
+                       if (err == -EBUSY) {
+                               ret = FIO_Q_BUSY;
+                               goto reap;
+                       }
                        if (td->o.latency_target)
                                goto reap;
                        break;
@@ -718,6 +739,11 @@ static uint64_t do_io(struct thread_data *td)
                else
                        td_set_runstate(td, TD_RUNNING);
 
+               /*
+                * Always log IO before it's issued, so we know the specific
+                * order of it. The logged unit will track when the IO has
+                * completed.
+                */
                if (td_write(td) && io_u->ddir == DDIR_WRITE &&
                    td->o.do_verify &&
                    td->o.verify != VERIFY_NONE &&
@@ -1113,6 +1139,9 @@ static int keep_running(struct thread_data *td)
                if (diff < td_max_bs(td))
                        return 0;
 
+               if (fio_files_done(td))
+                       return 0;
+
                return 1;
        }
 
@@ -1224,13 +1253,6 @@ static void *thread_main(void *data)
        fio_mutex_down(td->mutex);
        dprint(FD_MUTEX, "done waiting on td->mutex\n");
 
-       /*
-        * the ->mutex mutex is now no longer used, close it to avoid
-        * eating a file descriptor
-        */
-       fio_mutex_remove(td->mutex);
-       td->mutex = NULL;
-
        /*
         * A new gid requires privilege, so we need to do this before setting
         * the uid.
@@ -1256,6 +1278,15 @@ static void *thread_main(void *data)
         * allocations.
         */
        if (o->cpumask_set) {
+               if (o->cpus_allowed_policy == FIO_CPUS_SPLIT) {
+                       ret = fio_cpus_split(&o->cpumask, td->thread_number);
+                       if (!ret) {
+                               log_err("fio: no CPUs set\n");
+                               log_err("fio: Try increasing number of available CPUs\n");
+                               td_verror(td, EINVAL, "cpus_split");
+                               goto err;
+                       }
+               }
                ret = fio_setaffinity(td->pid, o->cpumask);
                if (ret == -1) {
                        td_verror(td, errno, "cpu_set_affinity");
@@ -1439,6 +1470,7 @@ static void *thread_main(void *data)
        fio_unpin_memory(td);
 
        fio_mutex_down(writeout_mutex);
+       finalize_logs(td);
        if (td->bw_log) {
                if (o->bw_log_file) {
                        finish_log_named(td, td->bw_log,
@@ -1510,6 +1542,9 @@ err:
        fio_mutex_remove(td->rusage_sem);
        td->rusage_sem = NULL;
 
+       fio_mutex_remove(td->mutex);
+       td->mutex = NULL;
+
        td_set_runstate(td, TD_EXITED);
        return (void *) (uintptr_t) td->error;
 }
@@ -1657,8 +1692,8 @@ static void do_usleep(unsigned int usecs)
 static void run_threads(void)
 {
        struct thread_data *td;
-       unsigned long spent;
        unsigned int i, todo, nr_running, m_rate, t_rate, nr_started;
+       uint64_t spent;
 
        if (fio_gtod_offload && fio_start_gtod_thread())
                return;
@@ -1756,9 +1791,9 @@ static void run_threads(void)
                        }
 
                        if (td->o.start_delay) {
-                               spent = mtime_since_genesis();
+                               spent = utime_since_genesis();
 
-                               if (td->o.start_delay * 1000 > spent)
+                               if (td->o.start_delay > spent)
                                        continue;
                        }