Add support for cpus_allowed_policy

[fio.git] / backend.c
diff --git a/backend.c b/backend.c

index 3ac72e771cb9eaa44a17fabe70d9aee718ccd6a5..12c76d8545ef53a02b95223ca8d53237b3175ef4 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -52,6 +52,7 @@
  #include "server.h"
  #include "lib/getrusage.h"
  #include "idletime.h"
+#include "err.h"
  
  static pthread_t disk_util_thread;
  static struct fio_mutex *disk_thread_mutex;
@@ -345,7 +346,7 @@ static inline int runtime_exceeded(struct thread_data *td, struct timeval *t)
                 return 0;
         if (!td->o.timeout)
                 return 0;
-       if (mtime_since(&td->epoch, t) >= td->o.timeout * 1000)
+       if (utime_since(&td->epoch, t) >= td->o.timeout)
                 return 1;
  
         return 0;
@@ -478,6 +479,12 @@ static void do_verify(struct thread_data *td, uint64_t verify_bytes)
                                 break;
  
                         while ((io_u = get_io_u(td)) != NULL) {
+                               if (IS_ERR(io_u)) {
+                                       io_u = NULL;
+                                       ret = FIO_Q_BUSY;
+                                       goto reap;
+                               }
+
                                 /*
                                  * We are only interested in the places where
                                  * we wrote or trimmed IOs. Turn those into
@@ -574,6 +581,7 @@ sync_done:
                  * completed io_u's first. Note that we can get BUSY even
                  * without IO queued, if the system is resource starved.
                  */
+reap:
                 full = queue_full(td) || (ret == FIO_Q_BUSY && td->cur_depth);
                 if (full || !td->o.iodepth_batch_complete) {
                         min_events = min(td->o.iodepth_batch_complete,
@@ -651,8 +659,14 @@ static uint64_t do_io(struct thread_data *td)
  
         lat_target_init(td);
  
+       /*
+        * If verify_backlog is enabled, we'll run the verify in this
+        * handler as well. For that case, we may need up to twice the
+        * amount of bytes.
+        */
         total_bytes = td->o.size;
-       if (td->o.verify != VERIFY_NONE && td_write(td))
+       if (td->o.verify != VERIFY_NONE &&
+          (td_write(td) && td->o.verify_backlog))
                 total_bytes += td->o.size;
  
         while ((td->o.read_iolog_file && !flist_empty(&td->io_log_list)) ||
@@ -686,7 +700,14 @@ static uint64_t do_io(struct thread_data *td)
                         break;
  
                 io_u = get_io_u(td);
-               if (!io_u) {
+               if (IS_ERR_OR_NULL(io_u)) {
+                       int err = PTR_ERR(io_u);
+
+                       io_u = NULL;
+                       if (err == -EBUSY) {
+                               ret = FIO_Q_BUSY;
+                               goto reap;
+                       }
                         if (td->o.latency_target)
                                 goto reap;
                         break;
@@ -718,6 +739,11 @@ static uint64_t do_io(struct thread_data *td)
                 else
                         td_set_runstate(td, TD_RUNNING);
  
+               /*
+                * Always log IO before it's issued, so we know the specific
+                * order of it. The logged unit will track when the IO has
+                * completed.
+                */
                 if (td_write(td) && io_u->ddir == DDIR_WRITE &&
                     td->o.do_verify &&
                     td->o.verify != VERIFY_NONE &&
@@ -1113,6 +1139,9 @@ static int keep_running(struct thread_data *td)
                 if (diff < td_max_bs(td))
                         return 0;
  
+               if (fio_files_done(td))
+                       return 0;
+
                 return 1;
         }
  
@@ -1224,13 +1253,6 @@ static void *thread_main(void *data)
         fio_mutex_down(td->mutex);
         dprint(FD_MUTEX, "done waiting on td->mutex\n");
  
-       /*
-        * the ->mutex mutex is now no longer used, close it to avoid
-        * eating a file descriptor
-        */
-       fio_mutex_remove(td->mutex);
-       td->mutex = NULL;
-
         /*
          * A new gid requires privilege, so we need to do this before setting
          * the uid.
@@ -1256,6 +1278,15 @@ static void *thread_main(void *data)
          * allocations.
          */
         if (o->cpumask_set) {
+               if (o->cpus_allowed_policy == FIO_CPUS_SPLIT) {
+                       ret = fio_cpus_split(&o->cpumask, td->thread_number);
+                       if (!ret) {
+                               log_err("fio: no CPUs set\n");
+                               log_err("fio: Try increasing number of available CPUs\n");
+                               td_verror(td, EINVAL, "cpus_split");
+                               goto err;
+                       }
+               }
                 ret = fio_setaffinity(td->pid, o->cpumask);
                 if (ret == -1) {
                         td_verror(td, errno, "cpu_set_affinity");
@@ -1439,6 +1470,7 @@ static void *thread_main(void *data)
         fio_unpin_memory(td);
  
         fio_mutex_down(writeout_mutex);
+       finalize_logs(td);
         if (td->bw_log) {
                 if (o->bw_log_file) {
                         finish_log_named(td, td->bw_log,
@@ -1510,6 +1542,9 @@ err:
         fio_mutex_remove(td->rusage_sem);
         td->rusage_sem = NULL;
  
+       fio_mutex_remove(td->mutex);
+       td->mutex = NULL;
+
         td_set_runstate(td, TD_EXITED);
         return (void *) (uintptr_t) td->error;
  }
@@ -1657,8 +1692,8 @@ static void do_usleep(unsigned int usecs)
  static void run_threads(void)
  {
         struct thread_data *td;
-       unsigned long spent;
         unsigned int i, todo, nr_running, m_rate, t_rate, nr_started;
+       uint64_t spent;
  
         if (fio_gtod_offload && fio_start_gtod_thread())
                 return;
@@ -1756,9 +1791,9 @@ static void run_threads(void)
                         }
  
                         if (td->o.start_delay) {
-                               spent = mtime_since_genesis();
+                               spent = utime_since_genesis();
  
-                               if (td->o.start_delay * 1000 > spent)
+                               if (td->o.start_delay > spent)
                                         continue;
                         }