Warn more gently if the kernel is too old for io scheduler switching
[fio.git] / fio.c
diff --git a/fio.c b/fio.c
index de5c976e85cb0d636eadd21768fefaf601a66c95..160d533583b6850c30bacbd98e1ef345db5c7680 100644 (file)
--- a/fio.c
+++ b/fio.c
@@ -35,7 +35,6 @@
 #include <sys/mman.h>
 
 #include "fio.h"
-#include "os.h"
 
 unsigned long page_mask;
 unsigned long page_size;
@@ -342,10 +341,17 @@ static void do_verify(struct thread_data *td)
                                        put_io_u(td, io_u);
                                        break;
                                }
+
                                io_u->xfer_buflen = io_u->resid;
                                io_u->xfer_buf += bytes;
+                               io_u->offset += bytes;
+
+                               if (io_u->offset == io_u->file->real_file_size)
+                                       goto sync_done;
+
                                requeue_io_u(td, &io_u);
                        } else {
+sync_done:
                                ret = io_u_sync_complete(td, io_u);
                                if (ret < 0)
                                        break;
@@ -412,7 +418,7 @@ static void do_io(struct thread_data *td)
 
        td_set_runstate(td, TD_RUNNING);
 
-       while ((td->this_io_bytes[0] + td->this_io_bytes[1]) < td->io_size) {
+       while ((td->this_io_bytes[0] + td->this_io_bytes[1]) < td->o.size) {
                struct timeval comp_time;
                long bytes_done = 0;
                int min_evts = 0;
@@ -452,8 +458,14 @@ static void do_io(struct thread_data *td)
 
                                io_u->xfer_buflen = io_u->resid;
                                io_u->xfer_buf += bytes;
+                               io_u->offset += bytes;
+
+                               if (io_u->offset == io_u->file->real_file_size)
+                                       goto sync_done;
+
                                requeue_io_u(td, &io_u);
                        } else {
+sync_done:
                                fio_gettime(&comp_time, NULL);
                                bytes_done = io_u_sync_complete(td, io_u);
                                if (bytes_done < 0)
@@ -588,6 +600,7 @@ static void fill_rand_buf(struct io_u *io_u, int max_bs)
 
 static int init_io_u(struct thread_data *td)
 {
+       unsigned long long buf_size;
        struct io_u *io_u;
        unsigned int max_bs;
        int i, max_units;
@@ -599,12 +612,19 @@ static int init_io_u(struct thread_data *td)
                max_units = td->o.iodepth;
 
        max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
-       td->orig_buffer_size = max_bs * max_units;
+       buf_size = (unsigned long long) max_bs * (unsigned long long) max_units;
+       buf_size += page_mask;
+       if (buf_size != (size_t) buf_size) {
+               log_err("fio: IO memory too large. Reduce max_bs or iodepth\n");
+               return 1;
+       }
+
+       td->orig_buffer_size = buf_size;
 
        if (td->o.mem_type == MEM_SHMHUGE || td->o.mem_type == MEM_MMAPHUGE)
                td->orig_buffer_size = (td->orig_buffer_size + td->o.hugepage_size - 1) & ~(td->o.hugepage_size - 1);
-       else
-               td->orig_buffer_size += page_mask;
+       else if (td->orig_buffer_size & page_mask)
+               td->orig_buffer_size = (td->orig_buffer_size + page_mask) & ~page_mask;
 
        if (allocate_io_mem(td))
                return 1;
@@ -642,7 +662,11 @@ static int switch_ioscheduler(struct thread_data *td)
 
        f = fopen(tmp, "r+");
        if (!f) {
-               td_verror(td, errno, "fopen");
+               if (errno == ENOENT) {
+                       log_err("fio: os or kernel doesn't support IO scheduler switching\n");
+                       return 0;
+               }
+               td_verror(td, errno, "fopen iosched");
                return 1;
        }
 
@@ -691,6 +715,7 @@ static int clear_io_state(struct thread_data *td)
        td->zone_bytes = 0;
        td->rate_bytes = 0;
        td->rate_blocks = 0;
+       td->rw_end_set[0] = td->rw_end_set[1] = 0;
 
        td->last_was_sync = 0;
 
@@ -715,6 +740,7 @@ static void *thread_main(void *data)
 {
        unsigned long long runtime[2];
        struct thread_data *td = data;
+       unsigned long elapsed;
        int clear_state;
 
        if (!td->o.use_thread)
@@ -725,8 +751,9 @@ static void *thread_main(void *data)
        INIT_LIST_HEAD(&td->io_u_freelist);
        INIT_LIST_HEAD(&td->io_u_busylist);
        INIT_LIST_HEAD(&td->io_u_requeues);
-       INIT_LIST_HEAD(&td->io_hist_list);
        INIT_LIST_HEAD(&td->io_log_list);
+       INIT_LIST_HEAD(&td->io_hist_list);
+       td->io_hist_tree = RB_ROOT;
 
        if (init_io_u(td))
                goto err_sem;
@@ -773,6 +800,9 @@ static void *thread_main(void *data)
        if (open_files(td))
                goto err;
 
+       if (init_random_map(td))
+               goto err;
+
        if (td->o.exec_prerun) {
                if (system(td->o.exec_prerun) < 0)
                        goto err;
@@ -800,10 +830,22 @@ static void *thread_main(void *data)
 
                clear_state = 1;
 
-               if (td_read(td) && td->io_bytes[DDIR_READ])
-                       runtime[DDIR_READ] += utime_since_now(&td->start);
-               if (td_write(td) && td->io_bytes[DDIR_WRITE])
-                       runtime[DDIR_WRITE] += utime_since_now(&td->start);
+               if (td_read(td) && td->io_bytes[DDIR_READ]) {
+                       if (td->rw_end_set[DDIR_READ])
+                               elapsed = utime_since(&td->start, &td->rw_end[DDIR_READ]);
+                       else
+                               elapsed = utime_since_now(&td->start);
+
+                       runtime[DDIR_READ] += elapsed;
+               }
+               if (td_write(td) && td->io_bytes[DDIR_WRITE]) {
+                       if (td->rw_end_set[DDIR_WRITE])
+                               elapsed = utime_since(&td->start, &td->rw_end[DDIR_WRITE]);
+                       else
+                               elapsed = utime_since_now(&td->start);
+
+                       runtime[DDIR_WRITE] += elapsed;
+               }
                
                if (td->error || td->terminate)
                        break;
@@ -853,6 +895,7 @@ err:
        close_files(td);
        close_ioengine(td);
        cleanup_io_u(td);
+       options_mem_free(td);
        td_set_runstate(td, TD_EXITED);
        return (void *) (unsigned long) td->error;
 err_sem:
@@ -951,6 +994,7 @@ static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
                /*
                 * thread is not dead, continue
                 */
+               pending++;
                continue;
 reaped:
                if (td->o.use_thread) {
@@ -963,6 +1007,7 @@ reaped:
                (*nr_running)--;
                (*m_rate) -= td->o.ratemin;
                (*t_rate) -= td->o.rate;
+               pending--;
 
                if (td->error)
                        exit_value++;
@@ -1170,10 +1215,8 @@ int main(int argc, char *argv[])
        if (parse_options(argc, argv))
                return 1;
 
-       if (!thread_number) {
-               log_err("Nothing to do\n");
-               return 1;
-       }
+       if (!thread_number)
+               return 0;
 
        ps = sysconf(_SC_PAGESIZE);
        if (ps < 0) {