Merge branch 'master' into gfio
authorJens Axboe <axboe@kernel.dk>
Thu, 31 Jan 2013 12:23:40 +0000 (13:23 +0100)
committerJens Axboe <axboe@kernel.dk>
Thu, 31 Jan 2013 12:23:40 +0000 (13:23 +0100)
Conflicts:
Makefile
client.c
configure
fio.c
fio.h
server.c
server.h

Signed-off-by: Jens Axboe <axboe@kernel.dk>
35 files changed:
HOWTO
Makefile
README
backend.c
cconv.c
cgroup.c
client.c
configure
diskutil.c
diskutil.h
engines/falloc.c
engines/net.c
eta.c
filesetup.c
fio.1
fio.c
fio.h
flow.c
gettime-thread.c
idletime.c [new file with mode: 0644]
idletime.h [new file with mode: 0644]
init.c
iolog.c
log.c
options.c
os/os-linux.h
os/os-windows.h
os/windows/posix/include/netinet/tcp.h [new file with mode: 0644]
server.c
server.h
stat.c
stat.h
t/log.c
thread_options.h
verify.c

diff --git a/HOWTO b/HOWTO
index c9c1d8c..f7948c3 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -272,17 +272,17 @@ filename=str      Fio normally makes up a filename based on the job name,
                can specify a number of files by separating the names with a
                ':' colon. So if you wanted a job to open /dev/sda and /dev/sdb
                as the two working files, you would use
-               filename=/dev/sda:/dev/sdb. On Windows, disk devices are accessed
-               as \\.\PhysicalDrive0 for the first device, \\.\PhysicalDrive1
-               for the second etc.
-               Note: Windows and FreeBSD prevent write access to areas of the disk
-               containing in-use data (e.g. filesystems).
-               If the wanted filename does need to include a colon, then escape that
-               with a '\' character.
-               For instance, if the filename is "/dev/dsk/foo@3,0:c",
-               then you would use filename="/dev/dsk/foo@3,0\:c".
-               '-' is a reserved name, meaning stdin or stdout. Which of the
-               two depends on the read/write direction set.
+               filename=/dev/sda:/dev/sdb. On Windows, disk devices are
+               accessed as \\.\PhysicalDrive0 for the first device,
+               \\.\PhysicalDrive1 for the second etc. Note: Windows and
+               FreeBSD prevent write access to areas of the disk containing
+               in-use data (e.g. filesystems).
+               If the wanted filename does need to include a colon, then
+               escape that with a '\' character. For instance, if the filename
+               is "/dev/dsk/foo@3,0:c", then you would use
+               filename="/dev/dsk/foo@3,0\:c". '-' is a reserved name, meaning
+               stdin or stdout. Which of the two depends on the read/write
+               direction set.
 
 opendir=str    Tell fio to recursively add any file it can find in this
                directory and down the file system tree.
@@ -353,6 +353,12 @@ kb_base=int        The base unit for a kilobyte. The defacto base is 2^10, 1024.
                ten unit instead, for obvious reasons. Allow values are
                1024 or 1000, with 1024 being the default.
 
+unified_rw_reporting=bool      Fio normally reports statistics on a per
+               data direction basis, meaning that read, write, and trim are
+               accounted and reported separately. If this option is set,
+               the fio will sum the results and report them as "mixed"
+               instead.
+
 randrepeat=bool        For random IO workloads, seed the generator in a predictable
                way so that results are repeatable across repetitions.
 
@@ -1686,3 +1692,18 @@ write      Write 'length' bytes beginning from 'offset'
 sync       fsync() the file
 datasync   fdatasync() the file
 trim       trim the given file from the given 'offset' for 'length' bytes
+
+
+9.0 CPU idleness profiling
+
+In some cases, we want to understand CPU overhead in a test. For example,
+we test patches for the specific goodness of whether they reduce CPU usage.
+fio implements a balloon approach to create a thread per CPU that runs at
+idle priority, meaning that it only runs when nobody else needs the cpu.
+By measuring the amount of work completed by the thread, idleness of each
+CPU can be derived accordingly.
+
+An unit work is defined as touching a full page of unsigned characters. Mean
+and standard deviation of time to complete an unit work is reported in "unit
+work" section. Options can be chosen to report detailed percpu idleness or
+overall system idleness by aggregating percpu stats.
index 038eacf..0e79720 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -33,7 +33,7 @@ SOURCE := gettime.c ioengines.c init.c stat.c log.c time.c filesetup.c \
                memalign.c server.c client.c iolog.c backend.c libfio.c flow.c \
                cconv.c lib/prio_tree.c json.c lib/zipf.c lib/axmap.c \
                lib/lfsr.c gettime-thread.c helpers.c lib/flist_sort.c \
-               lib/hweight.c lib/getrusage.c
+               lib/hweight.c lib/getrusage.c idletime.c
 
 ifdef CONFIG_64BIT_LLP64
   CFLAGS += -DBITS_PER_LONG=32
diff --git a/README b/README
index 7c4552d..c43b795 100644 (file)
--- a/README
+++ b/README
@@ -145,6 +145,9 @@ $ fio
        --max-jobs              Maximum number of threads/processes to support
        --server=args           Start backend server. See Client/Server section.
        --client=host           Connect to specified backend.
+       --idle-prof=option      Report cpu idleness on a system or percpu basis
+                               (option=system,percpu) or run unit work
+                               calibration only (option=calibrate).
 
 
 Any parameters following the options will be assumed to be job files,
index 9aa2a28..49d6bc7 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -51,6 +51,7 @@
 #include "memalign.h"
 #include "server.h"
 #include "lib/getrusage.h"
+#include "idletime.h"
 
 static pthread_t disk_util_thread;
 static struct fio_mutex *disk_thread_mutex;
@@ -1508,6 +1509,8 @@ static void run_threads(void)
 
        if (fio_gtod_offload && fio_start_gtod_thread())
                return;
+       
+       fio_idle_prof_init();
 
        set_sig_handlers();
 
@@ -1573,6 +1576,9 @@ static void run_threads(void)
                }
        }
 
+       /* start idle threads before io threads start to run */
+       fio_idle_prof_start();
+
        set_genesis_time();
 
        while (todo) {
@@ -1727,6 +1733,8 @@ static void run_threads(void)
                usleep(10000);
        }
 
+       fio_idle_prof_stop();
+
        update_io_ticks();
 }
 
diff --git a/cconv.c b/cconv.c
index e7339d0..3a8572e 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -172,6 +172,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
        o->disable_clat = le32_to_cpu(top->disable_clat);
        o->disable_slat = le32_to_cpu(top->disable_slat);
        o->disable_bw = le32_to_cpu(top->disable_bw);
+       o->unified_rw_rep = le32_to_cpu(top->unified_rw_rep);
        o->gtod_reduce = le32_to_cpu(top->gtod_reduce);
        o->gtod_cpu = le32_to_cpu(top->gtod_cpu);
        o->gtod_offload = le32_to_cpu(top->gtod_offload);
@@ -317,6 +318,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
        top->disable_clat = cpu_to_le32(o->disable_clat);
        top->disable_slat = cpu_to_le32(o->disable_slat);
        top->disable_bw = cpu_to_le32(o->disable_bw);
+       top->unified_rw_rep = cpu_to_le32(o->unified_rw_rep);
        top->gtod_reduce = cpu_to_le32(o->gtod_reduce);
        top->gtod_cpu = cpu_to_le32(o->gtod_cpu);
        top->gtod_offload = cpu_to_le32(o->gtod_offload);
index 86d4d5e..34b61de 100644 (file)
--- a/cgroup.c
+++ b/cgroup.c
@@ -52,9 +52,22 @@ static void add_cgroup(struct thread_data *td, const char *name,
 {
        struct cgroup_member *cm;
 
+       if (!lock)
+               return;
+
        cm = smalloc(sizeof(*cm));
+       if (!cm) {
+err:
+               log_err("fio: failed to allocate cgroup member\n");
+               return;
+       }
+
        INIT_FLIST_HEAD(&cm->list);
        cm->root = smalloc_strdup(name);
+       if (!cm->root) {
+               sfree(cm);
+               goto err;
+       }
        if (td->o.cgroup_nodelete)
                cm->cgroup_nodelete = 1;
        fio_mutex_down(lock);
@@ -67,6 +80,9 @@ void cgroup_kill(struct flist_head *clist)
        struct flist_head *n, *tmp;
        struct cgroup_member *cm;
 
+       if (!lock)
+               return;
+
        fio_mutex_down(lock);
 
        flist_for_each_safe(n, tmp, clist) {
@@ -183,6 +199,8 @@ void cgroup_shutdown(struct thread_data *td, char **mnt)
 static void fio_init cgroup_init(void)
 {
        lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
+       if (!lock)
+               log_err("fio: failed to allocate cgroup lock\n");
 }
 
 static void fio_exit cgroup_exit(void)
index c280762..fe6d75e 100644 (file)
--- a/client.c
+++ b/client.c
@@ -699,6 +699,7 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src)
        dst->groupid            = le32_to_cpu(src->groupid);
        dst->pid                = le32_to_cpu(src->pid);
        dst->members            = le32_to_cpu(src->members);
+       dst->unified_rw_rep     = le32_to_cpu(src->unified_rw_rep);
 
        for (i = 0; i < DDIR_RWDIR_CNT; i++) {
                convert_io_stat(&dst->clat_stat[i], &src->clat_stat[i]);
@@ -736,7 +737,7 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src)
                for (j = 0; j < FIO_IO_U_PLAT_NR; j++)
                        dst->io_u_plat[i][j] = le32_to_cpu(src->io_u_plat[i][j]);
 
-       for (i = 0; i < 3; i++) {
+       for (i = 0; i < DDIR_RWDIR_CNT; i++) {
                dst->total_io_u[i]      = le64_to_cpu(src->total_io_u[i]);
                dst->short_io_u[i]      = le64_to_cpu(src->short_io_u[i]);
        }
@@ -771,6 +772,7 @@ static void convert_gs(struct group_run_stats *dst, struct group_run_stats *src)
 
        dst->kb_base    = le32_to_cpu(src->kb_base);
        dst->groupid    = le32_to_cpu(src->groupid);
+       dst->unified_rw_rep     = le32_to_cpu(src->unified_rw_rep);
 }
 
 static void handle_ts(struct fio_client *client, struct fio_net_cmd *cmd)
@@ -789,6 +791,7 @@ static void handle_ts(struct fio_client *client, struct fio_net_cmd *cmd)
        client_ts.members++;
        client_ts.thread_number = p->ts.thread_number;
        client_ts.groupid = p->ts.groupid;
+       client_ts.unified_rw_rep = p->ts.unified_rw_rep;
 
        if (++sum_stat_nr == sum_stat_clients) {
                strcpy(client_ts.name, "All clients");
@@ -878,8 +881,6 @@ static void convert_jobs_eta(struct jobs_eta *je)
                je->t_rate[i]   = le32_to_cpu(je->t_rate[i]);
                je->m_iops[i]   = le32_to_cpu(je->m_iops[i]);
                je->t_iops[i]   = le32_to_cpu(je->t_iops[i]);
-               je->rate[i]     = le32_to_cpu(je->rate[i]);
-               je->iops[i]     = le32_to_cpu(je->iops[i]);
        }
 
        je->elapsed_sec         = le64_to_cpu(je->elapsed_sec);
@@ -902,8 +903,6 @@ void fio_client_sum_jobs_eta(struct jobs_eta *dst, struct jobs_eta *je)
                dst->t_rate[i]  += je->t_rate[i];
                dst->m_iops[i]  += je->m_iops[i];
                dst->t_iops[i]  += je->t_iops[i];
-               dst->rate[i]    += je->rate[i];
-               dst->iops[i]    += je->iops[i];
        }
 
        dst->elapsed_sec        += je->elapsed_sec;
index ff87acc..880d113 100755 (executable)
--- a/configure
+++ b/configure
@@ -208,6 +208,8 @@ CYGWIN*)
   output_sym "CONFIG_FDATASYNC"
   output_sym "CONFIG_GETTIMEOFDAY"
   output_sym "CONFIG_CLOCK_GETTIME"
+  output_sym "CONFIG_SCHED_IDLE"
+  output_sym "CONFIG_TCP_NODELAY"
   echo "CC=$CC" >> $config_host_mak
   echo "EXTFLAGS=$CFLAGS -include config-host.h -D_GNU_SOURCE" >> $config_host_mak
   exit 0
@@ -888,6 +890,40 @@ if compile_prog "" "" "RUSAGE_THREAD"; then
 fi
 echo "RUSAGE_THREAD                 $rusage_thread"
 
+##########################################
+# Check whether we have SCHED_IDLE
+sched_idle="no"
+cat > $TMPC << EOF
+#include <sched.h>
+int main(int argc, char **argv)
+{
+  struct sched_param p;
+  return sched_setscheduler(0, SCHED_IDLE, &p);
+}
+EOF
+if compile_prog "" "" "SCHED_IDLE"; then
+  sched_idle="yes"
+fi
+echo "SCHED_IDLE                    $sched_idle"
+
+##########################################
+# Check whether we have TCP_NODELAY
+tcp_nodelay="no"
+cat > $TMPC << EOF
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+int main(int argc, char **argv)
+{
+  return getsockopt(0, 0, TCP_NODELAY, NULL, NULL);
+}
+EOF
+if compile_prog "" "" "TCP_NODELAY"; then
+  tcp_nodelay="yes"
+fi
+echo "TCP_NODELAY                   $tcp_nodelay"
+
 #############################################################################
 
 echo "# Automatically generated by configure - do not modify" > $config_host_mak
@@ -994,6 +1030,12 @@ fi
 if test "$gfio" = "yes" ; then
   echo "CONFIG_GFIO=y" >> $config_host_mak
 fi
+if test "$sched_idle" = "yes" ; then
+  output_sym "CONFIG_SCHED_IDLE"
+fi
+if test "$tcp_nodelay" = "yes" ; then
+  output_sym "CONFIG_TCP_NODELAY"
+fi
 
 echo "LIBS+=$LIBS" >> $config_host_mak
 echo "CFLAGS+=$CFLAGS" >> $config_host_mak
index fbc4268..e29d1c3 100644 (file)
@@ -276,13 +276,25 @@ static struct disk_util *disk_util_add(struct thread_data *td, int majdev,
 {
        struct disk_util *du, *__du;
        struct flist_head *entry;
+       int l;
 
        dprint(FD_DISKUTIL, "add maj/min %d/%d: %s\n", majdev, mindev, path);
 
        du = smalloc(sizeof(*du));
+       if (!du) {
+               log_err("fio: smalloc() pool exhausted\n");
+               return NULL;
+       }
+
        memset(du, 0, sizeof(*du));
        INIT_FLIST_HEAD(&du->list);
-       sprintf(du->path, "%s/stat", path);
+       l = snprintf(du->path, sizeof(du->path), "%s/stat", path);
+       if (l < 0 || l >= sizeof(du->path)) {
+               log_err("constructed path \"%.100s[...]/stat\" larger than buffer (%zu bytes)\n",
+                       path, sizeof(du->path) - 1);
+               sfree(du);
+               return NULL;
+       }
        strncpy((char *) du->dus.name, basename(path), FIO_DU_NAME_SZ);
        du->sysfs_root = path;
        du->major = majdev;
index b89aacc..ddd6471 100644 (file)
@@ -42,7 +42,7 @@ struct disk_util {
 
        char *name;
        char *sysfs_root;
-       char path[256];
+       char path[PATH_MAX];
        int major, minor;
 
        struct disk_util_stat dus;
index 525a0aa..4654fe8 100644 (file)
@@ -44,7 +44,7 @@ open_again:
        if (f->fd == -1) {
                char buf[FIO_VERROR_SIZE];
                int __e = errno;
-               snprintf(buf, sizeof(buf) - 1, "open(%s)", f->file_name);
+               snprintf(buf, sizeof(buf), "open(%s)", f->file_name);
                td_verror(td, __e, buf);
        }
 
index 9451186..12f49a2 100644 (file)
@@ -11,6 +11,7 @@
 #include <errno.h>
 #include <assert.h>
 #include <netinet/in.h>
+#include <netinet/tcp.h>
 #include <arpa/inet.h>
 #include <netdb.h>
 #include <sys/poll.h>
@@ -35,6 +36,7 @@ struct netio_options {
        unsigned int proto;
        unsigned int listen;
        unsigned int pingpong;
+       unsigned int nodelay;
 };
 
 struct udp_close_msg {
@@ -96,6 +98,14 @@ static struct fio_option options[] = {
                          },
                },
        },
+#ifdef CONFIG_TCP_NODELAY
+       {
+               .name   = "nodelay",
+               .type   = FIO_OPT_BOOL,
+               .off1   = offsetof(struct netio_options, nodelay),
+               .help   = "Use TCP_NODELAY on TCP connections",
+       },
+#endif
        {
                .name   = "listen",
                .lname  = "net engine listen",
@@ -456,7 +466,7 @@ static int fio_netio_connect(struct thread_data *td, struct fio_file *f)
 {
        struct netio_data *nd = td->io_ops->data;
        struct netio_options *o = td->eo;
-       int type, domain;
+       int type, domain, optval;
 
        if (o->proto == FIO_TYPE_TCP) {
                domain = AF_INET;
@@ -479,6 +489,16 @@ static int fio_netio_connect(struct thread_data *td, struct fio_file *f)
                return 1;
        }
 
+#ifdef CONFIG_TCP_NODELAY
+       if (o->nodelay && o->proto == FIO_TYPE_TCP) {
+               optval = 1;
+               if (setsockopt(f->fd, IPPROTO_TCP, TCP_NODELAY, (void *) &optval, sizeof(int)) < 0) {
+                       log_err("fio: cannot set TCP_NODELAY option on socket (%s), disable with 'nodelay=0'\n", strerror(errno));
+                       return 1;
+               }
+       }
+#endif
+
        if (o->proto == FIO_TYPE_UDP)
                return 0;
        else if (o->proto == FIO_TYPE_TCP) {
@@ -510,7 +530,7 @@ static int fio_netio_accept(struct thread_data *td, struct fio_file *f)
        struct netio_data *nd = td->io_ops->data;
        struct netio_options *o = td->eo;
        socklen_t socklen = sizeof(nd->addr);
-       int state;
+       int state, optval;
 
        if (o->proto == FIO_TYPE_UDP) {
                f->fd = nd->listenfd;
@@ -531,6 +551,16 @@ static int fio_netio_accept(struct thread_data *td, struct fio_file *f)
                goto err;
        }
 
+#ifdef CONFIG_TCP_NODELAY
+       if (o->nodelay && o->proto == FIO_TYPE_TCP) {
+               optval = 1;
+               if (setsockopt(f->fd, IPPROTO_TCP, TCP_NODELAY, (void *) &optval, sizeof(int)) < 0) {
+                       log_err("fio: cannot set TCP_NODELAY option on socket (%s), disable with 'nodelay=0'\n", strerror(errno));
+                       return 1;
+               }
+       }
+#endif
+
        reset_all_stats(td);
        td_set_runstate(td, state);
        return 0;
@@ -751,12 +781,12 @@ static int fio_netio_setup_listen_inet(struct thread_data *td, short port)
        }
 
        opt = 1;
-       if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (void*)&opt, sizeof(opt)) < 0) {
+       if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (void *) &opt, sizeof(opt)) < 0) {
                td_verror(td, errno, "setsockopt");
                return 1;
        }
 #ifdef SO_REUSEPORT
-       if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)) < 0) {
+       if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, (void *) &opt, sizeof(opt)) < 0) {
                td_verror(td, errno, "setsockopt");
                return 1;
        }
diff --git a/eta.c b/eta.c
index 066a732..238a0af 100644 (file)
--- a/eta.c
+++ b/eta.c
@@ -226,7 +226,8 @@ static int thread_eta(struct thread_data *td)
        return eta_sec;
 }
 
-static void calc_rate(unsigned long mtime, unsigned long long *io_bytes,
+static void calc_rate(int unified_rw_rep, unsigned long mtime,
+                     unsigned long long *io_bytes,
                      unsigned long long *prev_io_bytes, unsigned int *rate)
 {
        int i;
@@ -235,19 +236,32 @@ static void calc_rate(unsigned long mtime, unsigned long long *io_bytes,
                unsigned long long diff;
 
                diff = io_bytes[i] - prev_io_bytes[i];
-               rate[i] = ((1000 * diff) / mtime) / 1024;
+               if (unified_rw_rep) {
+                       rate[i] = 0;
+                       rate[0] += ((1000 * diff) / mtime) / 1024;
+               } else
+                       rate[i] = ((1000 * diff) / mtime) / 1024;
 
                prev_io_bytes[i] = io_bytes[i];
        }
 }
 
-static void calc_iops(unsigned long mtime, unsigned long long *io_iops,
+static void calc_iops(int unified_rw_rep, unsigned long mtime,
+                     unsigned long long *io_iops,
                      unsigned long long *prev_io_iops, unsigned int *iops)
 {
        int i;
 
        for (i = 0; i < DDIR_RWDIR_CNT; i++) {
-               iops[i] = ((io_iops[i] - prev_io_iops[i]) * 1000) / mtime;
+               unsigned long long diff;
+
+               diff = io_iops[i] - prev_io_iops[i];
+               if (unified_rw_rep) {
+                       iops[i] = 0;
+                       iops[0] += (diff * 1000) / mtime;
+               } else
+                       iops[i] = (diff * 1000) / mtime;
+
                prev_io_iops[i] = io_iops[i];
        }
 }
@@ -259,7 +273,7 @@ static void calc_iops(unsigned long mtime, unsigned long long *io_iops,
 int calc_thread_status(struct jobs_eta *je, int force)
 {
        struct thread_data *td;
-       int i;
+       int i, unified_rw_rep;
        unsigned long rate_time, disp_time, bw_avg_time, *eta_secs;
        unsigned long long io_bytes[DDIR_RWDIR_CNT];
        unsigned long long io_iops[DDIR_RWDIR_CNT];
@@ -293,7 +307,9 @@ int calc_thread_status(struct jobs_eta *je, int force)
        io_bytes[DDIR_READ] = io_bytes[DDIR_WRITE] = io_bytes[DDIR_TRIM] = 0;
        io_iops[DDIR_READ] = io_iops[DDIR_WRITE] = io_iops[DDIR_TRIM] = 0;
        bw_avg_time = ULONG_MAX;
+       unified_rw_rep = 0;
        for_each_td(td, i) {
+               unified_rw_rep += td->o.unified_rw_rep;
                if (is_power_of_2(td->o.kb_base))
                        je->is_pow2 = 1;
                if (td->o.bw_avg_time < bw_avg_time)
@@ -339,9 +355,15 @@ int calc_thread_status(struct jobs_eta *je, int force)
 
                if (td->runstate > TD_RAMP) {
                        int ddir;
+
                        for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) {
-                               io_bytes[ddir] += td->io_bytes[ddir];
-                               io_iops[ddir] += td->io_blocks[ddir];
+                               if (unified_rw_rep) {
+                                       io_bytes[0] += td->io_bytes[ddir];
+                                       io_iops[0] += td->io_blocks[ddir];
+                               } else {
+                                       io_bytes[ddir] += td->io_bytes[ddir];
+                                       io_iops[ddir] += td->io_blocks[ddir];
+                               }
                        }
                }
        }
@@ -367,7 +389,8 @@ int calc_thread_status(struct jobs_eta *je, int force)
        rate_time = mtime_since(&rate_prev_time, &now);
 
        if (write_bw_log && rate_time > bw_avg_time && !in_ramp_time(td)) {
-               calc_rate(rate_time, io_bytes, rate_io_bytes, je->rate);
+               calc_rate(unified_rw_rep, rate_time, io_bytes, rate_io_bytes,
+                               je->rate);
                memcpy(&rate_prev_time, &now, sizeof(now));
                add_agg_sample(je->rate[DDIR_READ], DDIR_READ, 0);
                add_agg_sample(je->rate[DDIR_WRITE], DDIR_WRITE, 0);
@@ -382,8 +405,8 @@ int calc_thread_status(struct jobs_eta *je, int force)
        if (!force && disp_time < 900)
                return 0;
 
-       calc_rate(disp_time, io_bytes, disp_io_bytes, je->rate);
-       calc_iops(disp_time, io_iops, disp_io_iops, je->iops);
+       calc_rate(unified_rw_rep, disp_time, io_bytes, disp_io_bytes, je->rate);
+       calc_iops(unified_rw_rep, disp_time, io_iops, disp_io_iops, je->iops);
 
        memcpy(&disp_prev_time, &now, sizeof(now));
 
index 8262ed3..3054d9d 100644 (file)
@@ -563,7 +563,7 @@ open_again:
                if (__e == EMFILE && file_close_shadow_fds(td))
                        goto open_again;
 
-               snprintf(buf, sizeof(buf) - 1, "open(%s)", f->file_name);
+               snprintf(buf, sizeof(buf), "open(%s)", f->file_name);
 
                if (__e == EINVAL && (flags & OS_O_DIRECT)) {
                        log_err("fio: looks like your file system does not " \
@@ -1250,7 +1250,7 @@ static int recurse_dir(struct thread_data *td, const char *dirname)
        if (!D) {
                char buf[FIO_VERROR_SIZE];
 
-               snprintf(buf, FIO_VERROR_SIZE - 1, "opendir(%s)", dirname);
+               snprintf(buf, FIO_VERROR_SIZE, "opendir(%s)", dirname);
                td_verror(td, errno, buf);
                return 1;
        }
diff --git a/fio.1 b/fio.1
index d224d54..c665591 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -77,6 +77,9 @@ Background a fio server, writing the pid to the given pid file.
 .TP
 .BI \-\-client \fR=\fPhost
 Instead of running the jobs locally, send and run them on the given host.
+.TP
+.BI \-\-idle\-prof \fR=\fPoption
+Report cpu idleness on a system or percpu basis (\fIoption\fP=system,percpu) or run unit work calibration only (\fIoption\fP=calibrate).
 .SH "JOB FILE FORMAT"
 Job files are in `ini' format. They consist of one or more
 job definitions, which begin with a job name in square brackets and
@@ -240,6 +243,11 @@ The base unit for a kilobyte. The defacto base is 2^10, 1024.  Storage
 manufacturers like to use 10^3 or 1000 as a base ten unit instead, for obvious
 reasons. Allow values are 1024 or 1000, with 1024 being the default.
 .TP
+.BI unified_rw_reporting \fR=\fPbool
+Fio normally reports statistics on a per data direction basis, meaning that
+read, write, and trim are accounted and reported separately. If this option is
+set, the fio will sum the results and report them as "mixed" instead.
+.TP
 .BI randrepeat \fR=\fPbool
 Seed the random number generator in a predictable way so results are repeatable
 across runs.  Default: true.
diff --git a/fio.c b/fio.c
index 755a4d5..7e6b06d 100644 (file)
--- a/fio.c
+++ b/fio.c
 #include <time.h>
 
 #include "fio.h"
-#include "hash.h"
 #include "smalloc.h"
-#include "verify.h"
-#include "trim.h"
-#include "diskutil.h"
-#include "profile.h"
-#include "lib/rand.h"
-#include "memalign.h"
-#include "client.h"
-#include "server.h"
 
 int main(int argc, char *argv[], char *envp[])
 {
diff --git a/fio.h b/fio.h
index f0babaf..5bfa438 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -325,7 +325,7 @@ enum {
                int e = (err);                                          \
                (td)->error = e;                                        \
                if (!(td)->first_error)                                 \
-                       snprintf(td->verror, sizeof(td->verror) - 1, "file:%s:%d, func=%s, error=%s", __FILE__, __LINE__, (func), (msg));               \
+                       snprintf(td->verror, sizeof(td->verror), "file:%s:%d, func=%s, error=%s", __FILE__, __LINE__, (func), (msg));           \
        } while (0)
 
 
diff --git a/flow.c b/flow.c
index 2993f4e..b7a2fb1 100644 (file)
--- a/flow.c
+++ b/flow.c
@@ -39,6 +39,9 @@ static struct fio_flow *flow_get(unsigned int id)
        struct fio_flow *flow = NULL;
        struct flist_head *n;
 
+       if (!flow_lock)
+               return NULL;
+
        fio_mutex_down(flow_lock);
 
        flist_for_each(n, flow_list) {
@@ -51,6 +54,10 @@ static struct fio_flow *flow_get(unsigned int id)
 
        if (!flow) {
                flow = smalloc(sizeof(*flow));
+               if (!flow) {
+                       log_err("fio: smalloc pool exhausted\n");
+                       return NULL;
+               }
                flow->refs = 0;
                INIT_FLIST_HEAD(&flow->list);
                flow->id = id;
@@ -66,6 +73,9 @@ static struct fio_flow *flow_get(unsigned int id)
 
 static void flow_put(struct fio_flow *flow)
 {
+       if (!flow_lock)
+               return;
+
        fio_mutex_down(flow_lock);
 
        if (!--flow->refs) {
@@ -92,13 +102,26 @@ void flow_exit_job(struct thread_data *td)
 
 void flow_init(void)
 {
-       flow_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
        flow_list = smalloc(sizeof(*flow_list));
+       if (!flow_list) {
+               log_err("fio: smalloc pool exhausted\n");
+               return;
+       }
+
+       flow_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
+       if (!flow_lock) {
+               log_err("fio: failed to allocate flow lock\n");
+               sfree(flow_list);
+               return;
+       }
+
        INIT_FLIST_HEAD(flow_list);
 }
 
 void flow_exit(void)
 {
-       fio_mutex_remove(flow_lock);
-       sfree(flow_list);
+       if (flow_lock)
+               fio_mutex_remove(flow_lock);
+       if (flow_list)
+               sfree(flow_list);
 }
index da40904..c1b4b09 100644 (file)
@@ -14,12 +14,14 @@ static pthread_t gtod_thread;
 void fio_gtod_init(void)
 {
        fio_tv = smalloc(sizeof(struct timeval));
-       assert(fio_tv);
+       if (!fio_tv)
+               log_err("fio: smalloc pool exhausted\n");
 }
 
 static void fio_gtod_update(void)
 {
-       gettimeofday(fio_tv, NULL);
+       if (fio_tv)
+               gettimeofday(fio_tv, NULL);
 }
 
 static void *gtod_thread_main(void *data)
diff --git a/idletime.c b/idletime.c
new file mode 100644 (file)
index 0000000..244723f
--- /dev/null
@@ -0,0 +1,475 @@
+#include <math.h>
+#include "json.h"
+#include "idletime.h"
+
+static volatile struct idle_prof_common ipc;
+
+/*
+ * Get time to complete an unit work on a particular cpu.
+ * The minimum number in CALIBRATE_RUNS runs is returned.
+ */
+static double calibrate_unit(unsigned char *data)
+{
+       unsigned long t, i, j, k;
+       struct timeval tps;
+       double tunit = 0.0;
+
+       for (i = 0; i < CALIBRATE_RUNS; i++) {
+
+               fio_gettime(&tps, NULL);
+               /* scale for less variance */
+               for (j = 0; j < CALIBRATE_SCALE; j++) {
+                       /* unit of work */
+                       for (k=0; k < page_size; k++) {
+                               data[(k + j) % page_size] = k % 256;
+                               /*
+                                * we won't see STOP here. this is to match
+                                * the same statement in the profiling loop.
+                                */
+                               if (ipc.status == IDLE_PROF_STATUS_PROF_STOP)
+                                       return 0.0;
+                       }
+               }
+
+               t = utime_since_now(&tps);
+               if (!t)
+                       continue;
+
+               /* get the minimum time to complete CALIBRATE_SCALE units */
+               if ((i == 0) || ((double)t < tunit))
+                       tunit = (double)t;
+       }
+
+       return tunit / CALIBRATE_SCALE;
+}
+
+static void *idle_prof_thread_fn(void *data)
+{
+       int retval;
+       unsigned long j, k;
+       struct idle_prof_thread *ipt = data;
+
+       /* wait for all threads are spawned */
+       pthread_mutex_lock(&ipt->init_lock);
+
+       /* exit if any other thread failed to start */
+       if (ipc.status == IDLE_PROF_STATUS_ABORT)
+               return NULL;
+
+#if defined(FIO_HAVE_CPU_AFFINITY)
+       os_cpu_mask_t cpu_mask;
+       memset(&cpu_mask, 0, sizeof(cpu_mask));
+       fio_cpu_set(&cpu_mask, ipt->cpu);
+
+       if ((retval=fio_setaffinity(gettid(), cpu_mask)) == -1)
+               log_err("fio: fio_setaffinity failed\n");
+#else
+       retval = -1;
+       log_err("fio: fio_setaffinity not supported\n");
+#endif
+       if (retval == -1) {
+               ipt->state = TD_EXITED;
+               pthread_mutex_unlock(&ipt->init_lock);
+               return NULL;
+        }
+
+       ipt->cali_time = calibrate_unit(ipt->data);
+
+       /* delay to set IDLE class till now for better calibration accuracy */
+#if defined(CONFIG_SCHED_IDLE)
+       if ((retval = fio_set_sched_idle()))
+               log_err("fio: fio_set_sched_idle failed\n");
+#else
+       retval = -1;
+       log_err("fio: fio_set_sched_idle not supported\n");
+#endif
+       if (retval == -1) {
+               ipt->state = TD_EXITED;
+               pthread_mutex_unlock(&ipt->init_lock);
+               return NULL;
+       }
+
+       ipt->state = TD_INITIALIZED;
+
+       /* signal the main thread that calibration is done */
+       pthread_cond_signal(&ipt->cond);
+       pthread_mutex_unlock(&ipt->init_lock);
+
+       /* wait for other calibration to finish */
+       pthread_mutex_lock(&ipt->start_lock);
+
+       /* exit if other threads failed to initialize */
+       if (ipc.status == IDLE_PROF_STATUS_ABORT)
+               return NULL;
+
+       /* exit if we are doing calibration only */
+       if (ipc.status == IDLE_PROF_STATUS_CALI_STOP)
+               return NULL;
+
+       fio_gettime(&ipt->tps, NULL);
+       ipt->state = TD_RUNNING;
+
+       j = 0;
+       while (1) {
+               for (k = 0; k < page_size; k++) {
+                       ipt->data[(k + j) % page_size] = k % 256;
+                       if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) {
+                               fio_gettime(&ipt->tpe, NULL);
+                               goto idle_prof_done;
+                       }
+               }
+               j++;
+       }
+
+idle_prof_done:
+
+       ipt->loops = j + (double) k / page_size;
+       ipt->state = TD_EXITED;
+       pthread_mutex_unlock(&ipt->start_lock);
+
+       return NULL;
+}
+
+/* calculate mean and standard deviation to complete an unit of work */
+static void calibration_stats(void)
+{
+       int i;
+       double sum = 0.0, var = 0.0;
+       struct idle_prof_thread *ipt;
+
+       for (i = 0; i < ipc.nr_cpus; i++) {
+               ipt = &ipc.ipts[i];
+               sum += ipt->cali_time;
+       }
+
+       ipc.cali_mean = sum/ipc.nr_cpus;
+
+       for (i = 0; i < ipc.nr_cpus; i++) {
+               ipt = &ipc.ipts[i];
+               var += pow(ipt->cali_time-ipc.cali_mean, 2);
+       }
+
+       ipc.cali_stddev = sqrt(var/(ipc.nr_cpus-1));
+}
+
+void fio_idle_prof_init(void)
+{
+       int i, ret;
+       struct timeval tp;
+       struct timespec ts;
+       pthread_attr_t tattr;
+       struct idle_prof_thread *ipt;
+
+       ipc.nr_cpus = cpus_online();
+       ipc.status = IDLE_PROF_STATUS_OK;
+
+       if (ipc.opt == IDLE_PROF_OPT_NONE)
+               return;
+
+       if ((ret = pthread_attr_init(&tattr))) {
+               log_err("fio: pthread_attr_init %s\n", strerror(ret));
+               return;
+       }
+       if ((ret = pthread_attr_setscope(&tattr, PTHREAD_SCOPE_SYSTEM))) {
+               log_err("fio: pthread_attr_setscope %s\n", strerror(ret));
+               return;
+       }
+
+       ipc.ipts = malloc(ipc.nr_cpus * sizeof(struct idle_prof_thread));
+       if (!ipc.ipts) {
+               log_err("fio: malloc failed\n");
+               return;
+       }
+
+       ipc.buf = malloc(ipc.nr_cpus * page_size);
+       if (!ipc.buf) {
+               log_err("fio: malloc failed\n");
+               free(ipc.ipts);
+               return;
+       }
+
+       /*
+        * profiling aborts on any single thread failure since the
+        * result won't be accurate if any cpu is not used.
+        */
+       for (i = 0; i < ipc.nr_cpus; i++) {
+               ipt = &ipc.ipts[i];
+
+               ipt->cpu = i;   
+               ipt->state = TD_NOT_CREATED;
+               ipt->data = (unsigned char *)(ipc.buf + page_size * i);
+
+               if ((ret = pthread_mutex_init(&ipt->init_lock, NULL))) {
+                       ipc.status = IDLE_PROF_STATUS_ABORT;
+                       log_err("fio: pthread_mutex_init %s\n", strerror(ret));
+                       break;
+               }
+
+               if ((ret = pthread_mutex_init(&ipt->start_lock, NULL))) {
+                       ipc.status = IDLE_PROF_STATUS_ABORT;
+                       log_err("fio: pthread_mutex_init %s\n", strerror(ret));
+                       break;
+               }
+
+               if ((ret = pthread_cond_init(&ipt->cond, NULL))) {
+                       ipc.status = IDLE_PROF_STATUS_ABORT;
+                       log_err("fio: pthread_cond_init %s\n", strerror(ret));
+                       break;
+               }
+
+               /* make sure all threads are spawned before they start */
+               pthread_mutex_lock(&ipt->init_lock);
+
+               /* make sure all threads finish init before profiling starts */
+               pthread_mutex_lock(&ipt->start_lock);
+
+               if ((ret = pthread_create(&ipt->thread, &tattr, idle_prof_thread_fn, ipt))) {
+                       ipc.status = IDLE_PROF_STATUS_ABORT;
+                       log_err("fio: pthread_create %s\n", strerror(ret));
+                       break;
+               } else
+                       ipt->state = TD_CREATED;
+
+               if ((ret = pthread_detach(ipt->thread))) {
+                       /* log error and let the thread spin */
+                       log_err("fio: pthread_detatch %s\n", strerror(ret));
+               }
+       }
+
+       /*
+        * let good threads continue so that they can exit
+        * if errors on other threads occurred previously.
+        */
+       for (i = 0; i < ipc.nr_cpus; i++) {
+               ipt = &ipc.ipts[i];
+               pthread_mutex_unlock(&ipt->init_lock);
+       }
+       
+       if (ipc.status == IDLE_PROF_STATUS_ABORT)
+               return;
+       
+       /* wait for calibration to finish */
+       for (i = 0; i < ipc.nr_cpus; i++) {
+               ipt = &ipc.ipts[i];
+               pthread_mutex_lock(&ipt->init_lock);
+               while ((ipt->state != TD_EXITED) &&
+                      (ipt->state!=TD_INITIALIZED)) {
+                       fio_gettime(&tp, NULL);
+                       ts.tv_sec = tp.tv_sec + 1;
+                       ts.tv_nsec = tp.tv_usec * 1000;
+                       pthread_cond_timedwait(&ipt->cond, &ipt->init_lock, &ts);
+               }
+               pthread_mutex_unlock(&ipt->init_lock);
+       
+               /*
+                * any thread failed to initialize would abort other threads
+                * later after fio_idle_prof_start. 
+                */     
+               if (ipt->state == TD_EXITED)
+                       ipc.status = IDLE_PROF_STATUS_ABORT;
+       }
+
+       if (ipc.status != IDLE_PROF_STATUS_ABORT)
+               calibration_stats();
+       else
+               ipc.cali_mean = ipc.cali_stddev = 0.0;
+
+       if (ipc.opt == IDLE_PROF_OPT_CALI)
+               ipc.status = IDLE_PROF_STATUS_CALI_STOP;
+}
+
+void fio_idle_prof_start(void)
+{
+       int i;
+       struct idle_prof_thread *ipt;
+
+       if (ipc.opt == IDLE_PROF_OPT_NONE)
+               return;
+
+       /* unlock regardless abort is set or not */
+       for (i = 0; i < ipc.nr_cpus; i++) {
+               ipt = &ipc.ipts[i];
+               pthread_mutex_unlock(&ipt->start_lock);
+       }
+}
+
+void fio_idle_prof_stop(void)
+{
+       int i;
+       uint64_t runt;
+       struct timeval tp;
+       struct timespec ts;
+       struct idle_prof_thread *ipt;
+
+       if (ipc.opt == IDLE_PROF_OPT_NONE)
+               return;
+
+       if (ipc.opt == IDLE_PROF_OPT_CALI)
+               return;
+
+       ipc.status = IDLE_PROF_STATUS_PROF_STOP;
+
+       /* wait for all threads to exit from profiling */
+       for (i = 0; i < ipc.nr_cpus; i++) {
+               ipt = &ipc.ipts[i];
+               pthread_mutex_lock(&ipt->start_lock);
+               while ((ipt->state != TD_EXITED) &&
+                      (ipt->state!=TD_NOT_CREATED)) {
+                       fio_gettime(&tp, NULL);
+                       ts.tv_sec = tp.tv_sec + 1;
+                       ts.tv_nsec = tp.tv_usec * 1000;
+                       /* timed wait in case a signal is not received */
+                       pthread_cond_timedwait(&ipt->cond, &ipt->start_lock, &ts);
+               }
+               pthread_mutex_unlock(&ipt->start_lock);
+
+               /* calculate idleness */
+               if (ipc.cali_mean != 0.0) {
+                       runt = utime_since(&ipt->tps, &ipt->tpe);
+                       ipt->idleness = ipt->loops * ipc.cali_mean / runt;
+               } else
+                       ipt->idleness = 0.0;
+       }
+
+       /*
+        * memory allocations are freed via explicit fio_idle_prof_cleanup
+        * after profiling stats are collected by apps.  
+        */
+}
+
+/*
+ * return system idle percentage when cpu is -1;
+ * return one cpu idle percentage otherwise.
+ */
+static double fio_idle_prof_cpu_stat(int cpu)
+{
+       int i, nr_cpus = ipc.nr_cpus;
+       struct idle_prof_thread *ipt;
+       double p = 0.0;
+
+       if (ipc.opt == IDLE_PROF_OPT_NONE)
+               return 0.0;
+
+       if ((cpu >= nr_cpus) || (cpu < -1)) {
+               log_err("fio: idle profiling invalid cpu index\n");
+               return 0.0;
+       }
+
+       if (cpu == -1) {
+               for (i = 0; i < nr_cpus; i++) {
+                       ipt = &ipc.ipts[i];
+                       p += ipt->idleness;
+               }
+               p /= nr_cpus;
+       } else {
+               ipt = &ipc.ipts[cpu];
+               p = ipt->idleness;
+       }
+
+       return p * 100.0;
+}
+
+void fio_idle_prof_cleanup(void)
+{
+       if (ipc.ipts) {
+               free(ipc.ipts);
+               ipc.ipts = NULL;
+       }
+
+       if (ipc.buf) {
+               free(ipc.buf);
+               ipc.buf = NULL;
+       }
+}
+
+int fio_idle_prof_parse_opt(const char *args)
+{
+       ipc.opt = IDLE_PROF_OPT_NONE; /* default */
+
+       if (!args) {
+               log_err("fio: empty idle-prof option string\n");
+               return -1;
+       }       
+
+#if defined(FIO_HAVE_CPU_AFFINITY) && defined(CONFIG_SCHED_IDLE)
+       if (strcmp("calibrate", args) == 0) {
+               ipc.opt = IDLE_PROF_OPT_CALI;
+               fio_idle_prof_init();
+               fio_idle_prof_start();
+               fio_idle_prof_stop();
+               show_idle_prof_stats(FIO_OUTPUT_NORMAL, NULL);
+               return 1;
+       } else if (strcmp("system", args) == 0) {
+               ipc.opt = IDLE_PROF_OPT_SYSTEM;
+               return 0;
+       } else if (strcmp("percpu", args) == 0) {
+               ipc.opt = IDLE_PROF_OPT_PERCPU;
+               return 0;
+       } else {
+               log_err("fio: incorrect idle-prof option\n", args);
+               return -1;
+       }       
+#else
+       log_err("fio: idle-prof not supported on this platform\n");
+       return -1;
+#endif
+}
+
+void show_idle_prof_stats(int output, struct json_object *parent)
+{
+       int i, nr_cpus = ipc.nr_cpus;
+       struct json_object *tmp;
+       char s[MAX_CPU_STR_LEN];
+
+       if (output == FIO_OUTPUT_NORMAL) {
+               if (ipc.opt > IDLE_PROF_OPT_CALI)
+                       log_info("\nCPU idleness:\n");
+               else if (ipc.opt == IDLE_PROF_OPT_CALI)
+                       log_info("CPU idleness:\n");
+
+               if (ipc.opt >= IDLE_PROF_OPT_SYSTEM)
+                       log_info("  system: %3.2f%%\n", fio_idle_prof_cpu_stat(-1));
+
+               if (ipc.opt == IDLE_PROF_OPT_PERCPU) {
+                       log_info("  percpu: %3.2f%%", fio_idle_prof_cpu_stat(0));
+                       for (i = 1; i < nr_cpus; i++)
+                               log_info(", %3.2f%%", fio_idle_prof_cpu_stat(i));
+                       log_info("\n");
+               }
+
+               if (ipc.opt >= IDLE_PROF_OPT_CALI) {
+                       log_info("  unit work: mean=%3.2fus,", ipc.cali_mean);
+                       log_info(" stddev=%3.2f\n", ipc.cali_stddev);
+               }
+
+               /* dynamic mem allocations can now be freed */
+               if (ipc.opt != IDLE_PROF_OPT_NONE)
+                       fio_idle_prof_cleanup();
+
+               return;
+       }
+
+       if ((ipc.opt != IDLE_PROF_OPT_NONE) && (output == FIO_OUTPUT_JSON)) {
+               if (!parent)
+                       return;
+
+               tmp = json_create_object();
+               if (!tmp)
+                       return;
+
+               json_object_add_value_object(parent, "cpu_idleness", tmp);
+               json_object_add_value_float(tmp, "system", fio_idle_prof_cpu_stat(-1));
+
+               if (ipc.opt == IDLE_PROF_OPT_PERCPU) {
+                       for (i = 0; i < nr_cpus; i++) {
+                               snprintf(s, MAX_CPU_STR_LEN, "cpu-%d", i);
+                               json_object_add_value_float(tmp, s, fio_idle_prof_cpu_stat(i));
+                       }
+               }
+
+               json_object_add_value_float(tmp, "unit_mean", ipc.cali_mean);
+               json_object_add_value_float(tmp, "unit_stddev", ipc.cali_stddev);
+               
+               fio_idle_prof_cleanup();
+       }
+}
diff --git a/idletime.h b/idletime.h
new file mode 100644 (file)
index 0000000..acb8407
--- /dev/null
@@ -0,0 +1,57 @@
+#ifndef FIO_IDLETIME_H
+#define FIO_IDLETIME_H
+
+#include "fio.h"
+
+#define CALIBRATE_RUNS  10
+#define CALIBRATE_SCALE 1000
+#define MAX_CPU_STR_LEN 32
+
+enum {
+       IDLE_PROF_OPT_NONE,
+       IDLE_PROF_OPT_CALI,                /* calibration only */
+       IDLE_PROF_OPT_SYSTEM,
+       IDLE_PROF_OPT_PERCPU
+};
+
+enum {
+        IDLE_PROF_STATUS_OK,
+        IDLE_PROF_STATUS_CALI_STOP,
+        IDLE_PROF_STATUS_PROF_STOP,
+        IDLE_PROF_STATUS_ABORT
+};
+
+struct idle_prof_thread {
+       pthread_t thread;
+       int cpu;
+       int state;
+       struct timeval tps;
+       struct timeval tpe;
+       double cali_time; /* microseconds to finish a unit wrok */
+       double loops;
+       double idleness;
+       unsigned char *data;             /* bytes to be touched */
+       pthread_cond_t  cond;
+       pthread_mutex_t init_lock;
+       pthread_mutex_t start_lock;
+};
+
+struct idle_prof_common {
+       struct idle_prof_thread *ipts;
+       int nr_cpus;
+       int status;
+       int opt;
+       double cali_mean;
+       double cali_stddev;
+       void *buf;    /* single data allocation for all threads */
+};
+
+extern int fio_idle_prof_parse_opt(const char *);
+
+extern void fio_idle_prof_init(void);
+extern void fio_idle_prof_start(void);
+extern void fio_idle_prof_stop(void);
+
+extern void show_idle_prof_stats(int, struct json_object *);
+
+#endif
diff --git a/init.c b/init.c
index 688c8ef..f5a1693 100644 (file)
--- a/init.c
+++ b/init.c
@@ -22,6 +22,7 @@
 #include "verify.h"
 #include "profile.h"
 #include "server.h"
+#include "idletime.h"
 
 #include "lib/getopt.h"
 
@@ -211,6 +212,11 @@ static struct option l_opts[FIO_NR_OPTIONS] = {
                .has_arg        = no_argument,
                .val            = 'T',
        },
+       {
+               .name           = (char *) "idle-prof",
+               .has_arg        = required_argument,
+               .val            = 'I',
+       },
        {
                .name           = NULL,
        },
@@ -628,7 +634,7 @@ char *fio_uint_to_kmg(unsigned int val)
                p++;
        } while (*p);
 
-       snprintf(buf, 31, "%u%c", val, *p);
+       snprintf(buf, 32, "%u%c", val, *p);
        return buf;
 }
 
@@ -1290,6 +1296,9 @@ static void usage(const char *name)
        printf("  --server=args\t\tStart a backend fio server\n");
        printf("  --daemonize=pidfile\tBackground fio server, write pid to file\n");
        printf("  --client=hostname\tTalk to remote backend fio server at hostname\n");
+       printf("  --idle-prof=option\tReport cpu idleness on a system or percpu basis\n"
+               "\t\t\t(option=system,percpu) or run unit work\n"
+               "\t\t\tcalibration only (option=calibrate)\n");
        printf("\nFio was written by Jens Axboe <jens.axboe@oracle.com>");
        printf("\n                   Jens Axboe <jaxboe@fusionio.com>\n");
 }
@@ -1653,6 +1662,14 @@ int parse_cmd_line(int argc, char *argv[], int client_type)
                case 'D':
                        pid_file = strdup(optarg);
                        break;
+               case 'I':
+                       if ((ret = fio_idle_prof_parse_opt(optarg))) {
+                               /* exit on error and calibration only */
+                               do_exit++;
+                               if (ret == -1) 
+                                       exit_val = 1;
+                       }
+                       break;
                case 'C':
                        if (is_backend) {
                                log_err("fio: can't be both client and server\n");
diff --git a/iolog.c b/iolog.c
index 137c1e9..e54016d 100644 (file)
--- a/iolog.c
+++ b/iolog.c
@@ -534,7 +534,7 @@ void finish_log_named(struct thread_data *td, struct io_log *log,
 {
        char file_name[256], *p;
 
-       snprintf(file_name, 200, "%s_%s.log", prefix, postfix);
+       snprintf(file_name, sizeof(file_name), "%s_%s.log", prefix, postfix);
        p = basename(file_name);
 
        if (td->client_type == FIO_CLIENT_TYPE_GUI) {
diff --git a/log.c b/log.c
index a26dee8..d481edf 100644 (file)
--- a/log.c
+++ b/log.c
@@ -12,6 +12,7 @@ int log_valist(const char *str, va_list args)
        size_t len;
 
        len = vsnprintf(buffer, sizeof(buffer), str, args);
+       len = min(len, sizeof(buffer) - 1);
 
        if (log_syslog)
                syslog(LOG_INFO, "%s", buffer);
@@ -40,6 +41,7 @@ int log_local(const char *format, ...)
        va_start(args, format);
        len = vsnprintf(buffer, sizeof(buffer), format, args);
        va_end(args);
+       len = min(len, sizeof(buffer) - 1);
 
        if (log_syslog)
                syslog(LOG_INFO, "%s", buffer);
@@ -58,6 +60,7 @@ int log_info(const char *format, ...)
        va_start(args, format);
        len = vsnprintf(buffer, sizeof(buffer), format, args);
        va_end(args);
+       len = min(len, sizeof(buffer) - 1);
 
        if (is_backend)
                return fio_server_text_output(FIO_LOG_INFO, buffer, len);
@@ -77,6 +80,7 @@ int log_err(const char *format, ...)
        va_start(args, format);
        len = vsnprintf(buffer, sizeof(buffer), format, args);
        va_end(args);
+       len = min(len, sizeof(buffer) - 1);
 
        if (is_backend)
                return fio_server_text_output(FIO_LOG_ERR, buffer, len);
index 9d49ff1..c39a6b4 100644 (file)
--- a/options.c
+++ b/options.c
@@ -2941,6 +2941,13 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .category = FIO_OPT_C_GENERAL,
                .group  = FIO_OPT_G_CLOCK,
        },
+       {
+               .name   = "unified_rw_reporting",
+               .type   = FIO_OPT_BOOL,
+               .off1   = td_var_offset(unified_rw_rep),
+               .help   = "Unify reporting across data direction",
+               .def    = "0",
+       },
        {
                .name   = "continue_on_error",
                .lname  = "Continue on error",
index 75964dc..869a25d 100644 (file)
@@ -253,4 +253,12 @@ static inline int os_trim(int fd, unsigned long long start,
        return errno;
 }
 
+#ifdef CONFIG_SCHED_IDLE
+static inline int fio_set_sched_idle(void)
+{
+       struct sched_param p = { .sched_priority = 0, };
+       return sched_setscheduler(gettid(), SCHED_IDLE, &p);
+}
+#endif
+
 #endif
index ef71dd7..98f9030 100644 (file)
@@ -247,4 +247,11 @@ static inline int init_random_state(struct thread_data *td, unsigned long *rand_
 }
 
 
+static inline int fio_set_sched_idle(void)
+{
+       /* SetThreadPriority returns nonzero for success */
+       return (SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_IDLE))? 0 : -1;
+}
+
+
 #endif /* FIO_OS_WINDOWS_H */
diff --git a/os/windows/posix/include/netinet/tcp.h b/os/windows/posix/include/netinet/tcp.h
new file mode 100644 (file)
index 0000000..250c4c3
--- /dev/null
@@ -0,0 +1,4 @@
+#ifndef NETINET_TCP_H
+#define NETINET_TCP_H
+
+#endif
index 514306d..8e5ca50 100644 (file)
--- a/server.c
+++ b/server.c
@@ -666,8 +666,6 @@ static int handle_send_eta_cmd(struct fio_net_cmd *cmd)
                je->t_rate[i]   = cpu_to_le32(je->t_rate[i]);
                je->m_iops[i]   = cpu_to_le32(je->m_iops[i]);
                je->t_iops[i]   = cpu_to_le32(je->t_iops[i]);
-               je->rate[i]     = cpu_to_le32(je->rate[i]);
-               je->iops[i]     = cpu_to_le32(je->iops[i]);
        }
 
        je->elapsed_sec         = cpu_to_le64(je->elapsed_sec);
@@ -938,6 +936,7 @@ static void convert_gs(struct group_run_stats *dst, struct group_run_stats *src)
 
        dst->kb_base    = cpu_to_le32(src->kb_base);
        dst->groupid    = cpu_to_le32(src->groupid);
+       dst->unified_rw_rep     = cpu_to_le32(src->unified_rw_rep);
 }
 
 /*
@@ -962,6 +961,7 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs)
        p.ts.groupid            = cpu_to_le32(ts->groupid);
        p.ts.pid                = cpu_to_le32(ts->pid);
        p.ts.members            = cpu_to_le32(ts->members);
+       p.ts.unified_rw_rep     = cpu_to_le32(ts->unified_rw_rep);
 
        for (i = 0; i < DDIR_RWDIR_CNT; i++) {
                convert_io_stat(&p.ts.clat_stat[i], &ts->clat_stat[i]);
@@ -999,7 +999,7 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs)
                for (j = 0; j < FIO_IO_U_PLAT_NR; j++)
                        p.ts.io_u_plat[i][j] = cpu_to_le32(ts->io_u_plat[i][j]);
 
-       for (i = 0; i < 3; i++) {
+       for (i = 0; i < DDIR_RWDIR_CNT; i++) {
                p.ts.total_io_u[i]      = cpu_to_le64(ts->total_io_u[i]);
                p.ts.short_io_u[i]      = cpu_to_le64(ts->short_io_u[i]);
        }
index fa88d24..e84a709 100644 (file)
--- a/server.h
+++ b/server.h
@@ -38,7 +38,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
-       FIO_SERVER_VER                  = 19,
+       FIO_SERVER_VER                  = 20,
 
        FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
 
diff --git a/stat.c b/stat.c
index 7cf66f0..26b3133 100644 (file)
--- a/stat.c
+++ b/stat.c
@@ -12,6 +12,7 @@
 #include "lib/ieee754.h"
 #include "json.h"
 #include "lib/getrusage.h"
+#include "idletime.h"
 
 void update_rusage_stat(struct thread_data *td)
 {
@@ -275,9 +276,9 @@ void show_group_stats(struct group_run_stats *rs)
                p4 = num2str(rs->max_bw[i], 6, rs->kb_base, i2p);
 
                log_info("%s: io=%sB, aggrb=%sB/s, minb=%sB/s, maxb=%sB/s,"
-                        " mint=%llumsec, maxt=%llumsec\n", ddir_str[i], p1, p2,
-                                               p3, p4, rs->min_run[i],
-                                               rs->max_run[i]);
+                        " mint=%llumsec, maxt=%llumsec\n",
+                               rs->unified_rw_rep ? "  MIXED" : ddir_str[i],
+                               p1, p2, p3, p4, rs->min_run[i], rs->max_run[i]);
 
                free(p1);
                free(p2);
@@ -379,8 +380,8 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
        iops_p = num2str(iops, 6, 1, 0);
 
        log_info("  %s: io=%sB, bw=%sB/s, iops=%s, runt=%6llumsec\n",
-                                       ddir_str[ddir], io_p, bw_p, iops_p,
-                                       ts->runtime[ddir]);
+                               rs->unified_rw_rep ? "mixed" : ddir_str[ddir],
+                               io_p, bw_p, iops_p, ts->runtime[ddir]);
 
        free(io_p);
        free(bw_p);
@@ -654,8 +655,12 @@ static void add_ddir_status_json(struct thread_stat *ts,
 
        assert(ddir_rw(ddir));
 
+       if (ts->unified_rw_rep && ddir != DDIR_READ)
+               return;
+
        dir_object = json_create_object();
-       json_object_add_value_object(parent, ddirname[ddir], dir_object);
+       json_object_add_value_object(parent,
+               ts->unified_rw_rep ? "mixed" : ddirname[ddir], dir_object);
 
        iops = bw = 0;
        if (ts->runtime[ddir]) {
@@ -707,7 +712,7 @@ static void add_ddir_status_json(struct thread_stat *ts,
                        json_object_add_value_int(percentile_object, "0.00", 0);
                        continue;
                }
-               snprintf(buf, sizeof(buf) - 1, "%2.2f", ts->percentile_list[i].u.f);
+               snprintf(buf, sizeof(buf), "%2.2f", ts->percentile_list[i].u.f);
                json_object_add_value_int(percentile_object, (const char *)buf, ovals[i]);
        }
 
@@ -913,9 +918,9 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
        for (i = 0; i < 7; i++) {
                char name[20];
                if (i < 6)
-                       snprintf(name, 19, "%d", 1 << i);
+                       snprintf(name, 20, "%d", 1 << i);
                else
-                       snprintf(name, 19, ">=%d", 1 << i);
+                       snprintf(name, 20, ">=%d", 1 << i);
                json_object_add_value_float(tmp, (const char *)name, io_u_dist[i]);
        }
 
@@ -1021,15 +1026,27 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src, int nr)
        int l, k;
 
        for (l = 0; l < DDIR_RWDIR_CNT; l++) {
-               sum_stat(&dst->clat_stat[l], &src->clat_stat[l], nr);
-               sum_stat(&dst->slat_stat[l], &src->slat_stat[l], nr);
-               sum_stat(&dst->lat_stat[l], &src->lat_stat[l], nr);
-               sum_stat(&dst->bw_stat[l], &src->bw_stat[l], nr);
-
-               dst->io_bytes[l] += src->io_bytes[l];
-
-               if (dst->runtime[l] < src->runtime[l])
-                       dst->runtime[l] = src->runtime[l];
+               if (!dst->unified_rw_rep) {
+                       sum_stat(&dst->clat_stat[l], &src->clat_stat[l], nr);
+                       sum_stat(&dst->slat_stat[l], &src->slat_stat[l], nr);
+                       sum_stat(&dst->lat_stat[l], &src->lat_stat[l], nr);
+                       sum_stat(&dst->bw_stat[l], &src->bw_stat[l], nr);
+
+                       dst->io_bytes[l] += src->io_bytes[l];
+
+                       if (dst->runtime[l] < src->runtime[l])
+                               dst->runtime[l] = src->runtime[l];
+               } else {
+                       sum_stat(&dst->clat_stat[0], &src->clat_stat[l], nr);
+                       sum_stat(&dst->slat_stat[0], &src->slat_stat[l], nr);
+                       sum_stat(&dst->lat_stat[0], &src->lat_stat[l], nr);
+                       sum_stat(&dst->bw_stat[0], &src->bw_stat[l], nr);
+
+                       dst->io_bytes[0] += src->io_bytes[l];
+
+                       if (dst->runtime[0] < src->runtime[l])
+                               dst->runtime[0] = src->runtime[l];
+               }
        }
 
        dst->usr_time += src->usr_time;
@@ -1050,14 +1067,24 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src, int nr)
                dst->io_u_lat_m[k] += src->io_u_lat_m[k];
 
        for (k = 0; k < DDIR_RWDIR_CNT; k++) {
-               dst->total_io_u[k] += src->total_io_u[k];
-               dst->short_io_u[k] += src->short_io_u[k];
+               if (!dst->unified_rw_rep) {
+                       dst->total_io_u[k] += src->total_io_u[k];
+                       dst->short_io_u[k] += src->short_io_u[k];
+               } else {
+                       dst->total_io_u[0] += src->total_io_u[k];
+                       dst->short_io_u[0] += src->short_io_u[k];
+               }
        }
 
        for (k = 0; k < DDIR_RWDIR_CNT; k++) {
                int m;
-               for (m = 0; m < FIO_IO_U_PLAT_NR; m++)
-                       dst->io_u_plat[k][m] += src->io_u_plat[k][m];
+
+               for (m = 0; m < FIO_IO_U_PLAT_NR; m++) {
+                       if (!dst->unified_rw_rep)
+                               dst->io_u_plat[k][m] += src->io_u_plat[k][m];
+                       else
+                               dst->io_u_plat[0][m] += src->io_u_plat[k][m];
+               }
        }
 
        dst->total_run_time += src->total_run_time;
@@ -1174,6 +1201,7 @@ void show_run_stats(void)
                        ts->pid = td->pid;
 
                        ts->kb_base = td->o.kb_base;
+                       ts->unified_rw_rep = td->o.unified_rw_rep;
                } else if (ts->kb_base != td->o.kb_base && !kb_base_warned) {
                        log_info("fio: kb_base differs for jobs in group, using"
                                 " %u as the base\n", ts->kb_base);
@@ -1203,6 +1231,7 @@ void show_run_stats(void)
                ts = &threadstats[i];
                rs = &runstats[ts->groupid];
                rs->kb_base = ts->kb_base;
+               rs->unified_rw_rep += ts->unified_rw_rep;
 
                for (j = 0; j < DDIR_RWDIR_CNT; j++) {
                        if (!ts->runtime[j])
@@ -1271,6 +1300,8 @@ void show_run_stats(void)
                /* disk util stats, if any */
                show_disk_util(1, root);
 
+               show_idle_prof_stats(FIO_OUTPUT_JSON, root);
+
                json_print_object(root);
                log_info("\n");
                json_free_object(root);
@@ -1291,6 +1322,8 @@ void show_run_stats(void)
        else if (output_format == FIO_OUTPUT_NORMAL)
                show_disk_util(0, NULL);
 
+       show_idle_prof_stats(FIO_OUTPUT_NORMAL, NULL);
+
        free(runstats);
        free(threadstats);
 }
diff --git a/stat.h b/stat.h
index f35f1f6..ba4c2bf 100644 (file)
--- a/stat.h
+++ b/stat.h
@@ -10,6 +10,7 @@ struct group_run_stats {
        uint64_t agg[DDIR_RWDIR_CNT];
        uint32_t kb_base;
        uint32_t groupid;
+       uint32_t unified_rw_rep;
 };
 
 /*
@@ -123,6 +124,7 @@ struct thread_stat {
        uint32_t pid;
        char description[FIO_JOBNAME_SIZE];
        uint32_t members;
+       uint32_t unified_rw_rep;
 
        /*
         * bandwidth and latency stats
diff --git a/t/log.c b/t/log.c
index ac02303..76ae68e 100644 (file)
--- a/t/log.c
+++ b/t/log.c
@@ -10,6 +10,7 @@ int log_err(const char *format, ...)
        va_start(args, format);
        len = vsnprintf(buffer, sizeof(buffer), format, args);
        va_end(args);
+       len = min(len, sizeof(buffer) - 1);
 
        return fwrite(buffer, len, 1, stderr);
 }
@@ -23,6 +24,7 @@ int log_info(const char *format, ...)
        va_start(args, format);
        len = vsnprintf(buffer, sizeof(buffer), format, args);
        va_end(args);
+       len = min(len, sizeof(buffer) - 1);
 
        return fwrite(buffer, len, 1, stdout);
 }
index 21c1dac..ae83f08 100644 (file)
@@ -186,6 +186,7 @@ struct thread_options {
        unsigned int disable_clat;
        unsigned int disable_slat;
        unsigned int disable_bw;
+       unsigned int unified_rw_rep;
        unsigned int gtod_reduce;
        unsigned int gtod_cpu;
        unsigned int gtod_offload;
@@ -385,6 +386,7 @@ struct thread_options_pack {
        uint32_t disable_clat;
        uint32_t disable_slat;
        uint32_t disable_bw;
+       uint32_t unified_rw_rep;
        uint32_t gtod_reduce;
        uint32_t gtod_cpu;
        uint32_t gtod_offload;
index fa24702..787cc37 100644 (file)
--- a/verify.c
+++ b/verify.c
@@ -10,7 +10,6 @@
 
 #include "fio.h"
 #include "verify.h"
-#include "smalloc.h"
 #include "trim.h"
 #include "lib/rand.h"
 #include "lib/hweight.h"