From: Jens Axboe Date: Thu, 31 Jan 2013 12:23:40 +0000 (+0100) Subject: Merge branch 'master' into gfio X-Git-Tag: fio-2.1~57^2~26 X-Git-Url: https://git.kernel.dk/?p=fio.git;a=commitdiff_plain;h=95820b6e6c92025df8d89c0bf39b174e53137c41;hp=135be493d843d4cae2966a35cbd22a3058ec8e4b Merge branch 'master' into gfio Conflicts: Makefile client.c configure fio.c fio.h server.c server.h Signed-off-by: Jens Axboe --- diff --git a/HOWTO b/HOWTO index c9c1d8c6..f7948c3f 100644 --- a/HOWTO +++ b/HOWTO @@ -272,17 +272,17 @@ filename=str Fio normally makes up a filename based on the job name, can specify a number of files by separating the names with a ':' colon. So if you wanted a job to open /dev/sda and /dev/sdb as the two working files, you would use - filename=/dev/sda:/dev/sdb. On Windows, disk devices are accessed - as \\.\PhysicalDrive0 for the first device, \\.\PhysicalDrive1 - for the second etc. - Note: Windows and FreeBSD prevent write access to areas of the disk - containing in-use data (e.g. filesystems). - If the wanted filename does need to include a colon, then escape that - with a '\' character. - For instance, if the filename is "/dev/dsk/foo@3,0:c", - then you would use filename="/dev/dsk/foo@3,0\:c". - '-' is a reserved name, meaning stdin or stdout. Which of the - two depends on the read/write direction set. + filename=/dev/sda:/dev/sdb. On Windows, disk devices are + accessed as \\.\PhysicalDrive0 for the first device, + \\.\PhysicalDrive1 for the second etc. Note: Windows and + FreeBSD prevent write access to areas of the disk containing + in-use data (e.g. filesystems). + If the wanted filename does need to include a colon, then + escape that with a '\' character. For instance, if the filename + is "/dev/dsk/foo@3,0:c", then you would use + filename="/dev/dsk/foo@3,0\:c". '-' is a reserved name, meaning + stdin or stdout. Which of the two depends on the read/write + direction set. opendir=str Tell fio to recursively add any file it can find in this directory and down the file system tree. @@ -353,6 +353,12 @@ kb_base=int The base unit for a kilobyte. The defacto base is 2^10, 1024. ten unit instead, for obvious reasons. Allow values are 1024 or 1000, with 1024 being the default. +unified_rw_reporting=bool Fio normally reports statistics on a per + data direction basis, meaning that read, write, and trim are + accounted and reported separately. If this option is set, + the fio will sum the results and report them as "mixed" + instead. + randrepeat=bool For random IO workloads, seed the generator in a predictable way so that results are repeatable across repetitions. @@ -1686,3 +1692,18 @@ write Write 'length' bytes beginning from 'offset' sync fsync() the file datasync fdatasync() the file trim trim the given file from the given 'offset' for 'length' bytes + + +9.0 CPU idleness profiling + +In some cases, we want to understand CPU overhead in a test. For example, +we test patches for the specific goodness of whether they reduce CPU usage. +fio implements a balloon approach to create a thread per CPU that runs at +idle priority, meaning that it only runs when nobody else needs the cpu. +By measuring the amount of work completed by the thread, idleness of each +CPU can be derived accordingly. + +An unit work is defined as touching a full page of unsigned characters. Mean +and standard deviation of time to complete an unit work is reported in "unit +work" section. Options can be chosen to report detailed percpu idleness or +overall system idleness by aggregating percpu stats. diff --git a/Makefile b/Makefile index 038eacf1..0e79720b 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,7 @@ SOURCE := gettime.c ioengines.c init.c stat.c log.c time.c filesetup.c \ memalign.c server.c client.c iolog.c backend.c libfio.c flow.c \ cconv.c lib/prio_tree.c json.c lib/zipf.c lib/axmap.c \ lib/lfsr.c gettime-thread.c helpers.c lib/flist_sort.c \ - lib/hweight.c lib/getrusage.c + lib/hweight.c lib/getrusage.c idletime.c ifdef CONFIG_64BIT_LLP64 CFLAGS += -DBITS_PER_LONG=32 diff --git a/README b/README index 7c4552d9..c43b795c 100644 --- a/README +++ b/README @@ -145,6 +145,9 @@ $ fio --max-jobs Maximum number of threads/processes to support --server=args Start backend server. See Client/Server section. --client=host Connect to specified backend. + --idle-prof=option Report cpu idleness on a system or percpu basis + (option=system,percpu) or run unit work + calibration only (option=calibrate). Any parameters following the options will be assumed to be job files, diff --git a/backend.c b/backend.c index 9aa2a285..49d6bc7c 100644 --- a/backend.c +++ b/backend.c @@ -51,6 +51,7 @@ #include "memalign.h" #include "server.h" #include "lib/getrusage.h" +#include "idletime.h" static pthread_t disk_util_thread; static struct fio_mutex *disk_thread_mutex; @@ -1508,6 +1509,8 @@ static void run_threads(void) if (fio_gtod_offload && fio_start_gtod_thread()) return; + + fio_idle_prof_init(); set_sig_handlers(); @@ -1573,6 +1576,9 @@ static void run_threads(void) } } + /* start idle threads before io threads start to run */ + fio_idle_prof_start(); + set_genesis_time(); while (todo) { @@ -1727,6 +1733,8 @@ static void run_threads(void) usleep(10000); } + fio_idle_prof_stop(); + update_io_ticks(); } diff --git a/cconv.c b/cconv.c index e7339d04..3a8572ec 100644 --- a/cconv.c +++ b/cconv.c @@ -172,6 +172,7 @@ void convert_thread_options_to_cpu(struct thread_options *o, o->disable_clat = le32_to_cpu(top->disable_clat); o->disable_slat = le32_to_cpu(top->disable_slat); o->disable_bw = le32_to_cpu(top->disable_bw); + o->unified_rw_rep = le32_to_cpu(top->unified_rw_rep); o->gtod_reduce = le32_to_cpu(top->gtod_reduce); o->gtod_cpu = le32_to_cpu(top->gtod_cpu); o->gtod_offload = le32_to_cpu(top->gtod_offload); @@ -317,6 +318,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->disable_clat = cpu_to_le32(o->disable_clat); top->disable_slat = cpu_to_le32(o->disable_slat); top->disable_bw = cpu_to_le32(o->disable_bw); + top->unified_rw_rep = cpu_to_le32(o->unified_rw_rep); top->gtod_reduce = cpu_to_le32(o->gtod_reduce); top->gtod_cpu = cpu_to_le32(o->gtod_cpu); top->gtod_offload = cpu_to_le32(o->gtod_offload); diff --git a/cgroup.c b/cgroup.c index 86d4d5ea..34b61ded 100644 --- a/cgroup.c +++ b/cgroup.c @@ -52,9 +52,22 @@ static void add_cgroup(struct thread_data *td, const char *name, { struct cgroup_member *cm; + if (!lock) + return; + cm = smalloc(sizeof(*cm)); + if (!cm) { +err: + log_err("fio: failed to allocate cgroup member\n"); + return; + } + INIT_FLIST_HEAD(&cm->list); cm->root = smalloc_strdup(name); + if (!cm->root) { + sfree(cm); + goto err; + } if (td->o.cgroup_nodelete) cm->cgroup_nodelete = 1; fio_mutex_down(lock); @@ -67,6 +80,9 @@ void cgroup_kill(struct flist_head *clist) struct flist_head *n, *tmp; struct cgroup_member *cm; + if (!lock) + return; + fio_mutex_down(lock); flist_for_each_safe(n, tmp, clist) { @@ -183,6 +199,8 @@ void cgroup_shutdown(struct thread_data *td, char **mnt) static void fio_init cgroup_init(void) { lock = fio_mutex_init(FIO_MUTEX_UNLOCKED); + if (!lock) + log_err("fio: failed to allocate cgroup lock\n"); } static void fio_exit cgroup_exit(void) diff --git a/client.c b/client.c index c2807622..fe6d75ef 100644 --- a/client.c +++ b/client.c @@ -699,6 +699,7 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src) dst->groupid = le32_to_cpu(src->groupid); dst->pid = le32_to_cpu(src->pid); dst->members = le32_to_cpu(src->members); + dst->unified_rw_rep = le32_to_cpu(src->unified_rw_rep); for (i = 0; i < DDIR_RWDIR_CNT; i++) { convert_io_stat(&dst->clat_stat[i], &src->clat_stat[i]); @@ -736,7 +737,7 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src) for (j = 0; j < FIO_IO_U_PLAT_NR; j++) dst->io_u_plat[i][j] = le32_to_cpu(src->io_u_plat[i][j]); - for (i = 0; i < 3; i++) { + for (i = 0; i < DDIR_RWDIR_CNT; i++) { dst->total_io_u[i] = le64_to_cpu(src->total_io_u[i]); dst->short_io_u[i] = le64_to_cpu(src->short_io_u[i]); } @@ -771,6 +772,7 @@ static void convert_gs(struct group_run_stats *dst, struct group_run_stats *src) dst->kb_base = le32_to_cpu(src->kb_base); dst->groupid = le32_to_cpu(src->groupid); + dst->unified_rw_rep = le32_to_cpu(src->unified_rw_rep); } static void handle_ts(struct fio_client *client, struct fio_net_cmd *cmd) @@ -789,6 +791,7 @@ static void handle_ts(struct fio_client *client, struct fio_net_cmd *cmd) client_ts.members++; client_ts.thread_number = p->ts.thread_number; client_ts.groupid = p->ts.groupid; + client_ts.unified_rw_rep = p->ts.unified_rw_rep; if (++sum_stat_nr == sum_stat_clients) { strcpy(client_ts.name, "All clients"); @@ -878,8 +881,6 @@ static void convert_jobs_eta(struct jobs_eta *je) je->t_rate[i] = le32_to_cpu(je->t_rate[i]); je->m_iops[i] = le32_to_cpu(je->m_iops[i]); je->t_iops[i] = le32_to_cpu(je->t_iops[i]); - je->rate[i] = le32_to_cpu(je->rate[i]); - je->iops[i] = le32_to_cpu(je->iops[i]); } je->elapsed_sec = le64_to_cpu(je->elapsed_sec); @@ -902,8 +903,6 @@ void fio_client_sum_jobs_eta(struct jobs_eta *dst, struct jobs_eta *je) dst->t_rate[i] += je->t_rate[i]; dst->m_iops[i] += je->m_iops[i]; dst->t_iops[i] += je->t_iops[i]; - dst->rate[i] += je->rate[i]; - dst->iops[i] += je->iops[i]; } dst->elapsed_sec += je->elapsed_sec; diff --git a/configure b/configure index ff87acc5..880d113d 100755 --- a/configure +++ b/configure @@ -208,6 +208,8 @@ CYGWIN*) output_sym "CONFIG_FDATASYNC" output_sym "CONFIG_GETTIMEOFDAY" output_sym "CONFIG_CLOCK_GETTIME" + output_sym "CONFIG_SCHED_IDLE" + output_sym "CONFIG_TCP_NODELAY" echo "CC=$CC" >> $config_host_mak echo "EXTFLAGS=$CFLAGS -include config-host.h -D_GNU_SOURCE" >> $config_host_mak exit 0 @@ -888,6 +890,40 @@ if compile_prog "" "" "RUSAGE_THREAD"; then fi echo "RUSAGE_THREAD $rusage_thread" +########################################## +# Check whether we have SCHED_IDLE +sched_idle="no" +cat > $TMPC << EOF +#include +int main(int argc, char **argv) +{ + struct sched_param p; + return sched_setscheduler(0, SCHED_IDLE, &p); +} +EOF +if compile_prog "" "" "SCHED_IDLE"; then + sched_idle="yes" +fi +echo "SCHED_IDLE $sched_idle" + +########################################## +# Check whether we have TCP_NODELAY +tcp_nodelay="no" +cat > $TMPC << EOF +#include +#include +#include +#include +int main(int argc, char **argv) +{ + return getsockopt(0, 0, TCP_NODELAY, NULL, NULL); +} +EOF +if compile_prog "" "" "TCP_NODELAY"; then + tcp_nodelay="yes" +fi +echo "TCP_NODELAY $tcp_nodelay" + ############################################################################# echo "# Automatically generated by configure - do not modify" > $config_host_mak @@ -994,6 +1030,12 @@ fi if test "$gfio" = "yes" ; then echo "CONFIG_GFIO=y" >> $config_host_mak fi +if test "$sched_idle" = "yes" ; then + output_sym "CONFIG_SCHED_IDLE" +fi +if test "$tcp_nodelay" = "yes" ; then + output_sym "CONFIG_TCP_NODELAY" +fi echo "LIBS+=$LIBS" >> $config_host_mak echo "CFLAGS+=$CFLAGS" >> $config_host_mak diff --git a/diskutil.c b/diskutil.c index fbc4268e..e29d1c34 100644 --- a/diskutil.c +++ b/diskutil.c @@ -276,13 +276,25 @@ static struct disk_util *disk_util_add(struct thread_data *td, int majdev, { struct disk_util *du, *__du; struct flist_head *entry; + int l; dprint(FD_DISKUTIL, "add maj/min %d/%d: %s\n", majdev, mindev, path); du = smalloc(sizeof(*du)); + if (!du) { + log_err("fio: smalloc() pool exhausted\n"); + return NULL; + } + memset(du, 0, sizeof(*du)); INIT_FLIST_HEAD(&du->list); - sprintf(du->path, "%s/stat", path); + l = snprintf(du->path, sizeof(du->path), "%s/stat", path); + if (l < 0 || l >= sizeof(du->path)) { + log_err("constructed path \"%.100s[...]/stat\" larger than buffer (%zu bytes)\n", + path, sizeof(du->path) - 1); + sfree(du); + return NULL; + } strncpy((char *) du->dus.name, basename(path), FIO_DU_NAME_SZ); du->sysfs_root = path; du->major = majdev; diff --git a/diskutil.h b/diskutil.h index b89aaccb..ddd64719 100644 --- a/diskutil.h +++ b/diskutil.h @@ -42,7 +42,7 @@ struct disk_util { char *name; char *sysfs_root; - char path[256]; + char path[PATH_MAX]; int major, minor; struct disk_util_stat dus; diff --git a/engines/falloc.c b/engines/falloc.c index 525a0aae..4654fe81 100644 --- a/engines/falloc.c +++ b/engines/falloc.c @@ -44,7 +44,7 @@ open_again: if (f->fd == -1) { char buf[FIO_VERROR_SIZE]; int __e = errno; - snprintf(buf, sizeof(buf) - 1, "open(%s)", f->file_name); + snprintf(buf, sizeof(buf), "open(%s)", f->file_name); td_verror(td, __e, buf); } diff --git a/engines/net.c b/engines/net.c index 94511869..12f49a2a 100644 --- a/engines/net.c +++ b/engines/net.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,7 @@ struct netio_options { unsigned int proto; unsigned int listen; unsigned int pingpong; + unsigned int nodelay; }; struct udp_close_msg { @@ -96,6 +98,14 @@ static struct fio_option options[] = { }, }, }, +#ifdef CONFIG_TCP_NODELAY + { + .name = "nodelay", + .type = FIO_OPT_BOOL, + .off1 = offsetof(struct netio_options, nodelay), + .help = "Use TCP_NODELAY on TCP connections", + }, +#endif { .name = "listen", .lname = "net engine listen", @@ -456,7 +466,7 @@ static int fio_netio_connect(struct thread_data *td, struct fio_file *f) { struct netio_data *nd = td->io_ops->data; struct netio_options *o = td->eo; - int type, domain; + int type, domain, optval; if (o->proto == FIO_TYPE_TCP) { domain = AF_INET; @@ -479,6 +489,16 @@ static int fio_netio_connect(struct thread_data *td, struct fio_file *f) return 1; } +#ifdef CONFIG_TCP_NODELAY + if (o->nodelay && o->proto == FIO_TYPE_TCP) { + optval = 1; + if (setsockopt(f->fd, IPPROTO_TCP, TCP_NODELAY, (void *) &optval, sizeof(int)) < 0) { + log_err("fio: cannot set TCP_NODELAY option on socket (%s), disable with 'nodelay=0'\n", strerror(errno)); + return 1; + } + } +#endif + if (o->proto == FIO_TYPE_UDP) return 0; else if (o->proto == FIO_TYPE_TCP) { @@ -510,7 +530,7 @@ static int fio_netio_accept(struct thread_data *td, struct fio_file *f) struct netio_data *nd = td->io_ops->data; struct netio_options *o = td->eo; socklen_t socklen = sizeof(nd->addr); - int state; + int state, optval; if (o->proto == FIO_TYPE_UDP) { f->fd = nd->listenfd; @@ -531,6 +551,16 @@ static int fio_netio_accept(struct thread_data *td, struct fio_file *f) goto err; } +#ifdef CONFIG_TCP_NODELAY + if (o->nodelay && o->proto == FIO_TYPE_TCP) { + optval = 1; + if (setsockopt(f->fd, IPPROTO_TCP, TCP_NODELAY, (void *) &optval, sizeof(int)) < 0) { + log_err("fio: cannot set TCP_NODELAY option on socket (%s), disable with 'nodelay=0'\n", strerror(errno)); + return 1; + } + } +#endif + reset_all_stats(td); td_set_runstate(td, state); return 0; @@ -751,12 +781,12 @@ static int fio_netio_setup_listen_inet(struct thread_data *td, short port) } opt = 1; - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (void*)&opt, sizeof(opt)) < 0) { + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (void *) &opt, sizeof(opt)) < 0) { td_verror(td, errno, "setsockopt"); return 1; } #ifdef SO_REUSEPORT - if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)) < 0) { + if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, (void *) &opt, sizeof(opt)) < 0) { td_verror(td, errno, "setsockopt"); return 1; } diff --git a/eta.c b/eta.c index 066a732f..238a0af1 100644 --- a/eta.c +++ b/eta.c @@ -226,7 +226,8 @@ static int thread_eta(struct thread_data *td) return eta_sec; } -static void calc_rate(unsigned long mtime, unsigned long long *io_bytes, +static void calc_rate(int unified_rw_rep, unsigned long mtime, + unsigned long long *io_bytes, unsigned long long *prev_io_bytes, unsigned int *rate) { int i; @@ -235,19 +236,32 @@ static void calc_rate(unsigned long mtime, unsigned long long *io_bytes, unsigned long long diff; diff = io_bytes[i] - prev_io_bytes[i]; - rate[i] = ((1000 * diff) / mtime) / 1024; + if (unified_rw_rep) { + rate[i] = 0; + rate[0] += ((1000 * diff) / mtime) / 1024; + } else + rate[i] = ((1000 * diff) / mtime) / 1024; prev_io_bytes[i] = io_bytes[i]; } } -static void calc_iops(unsigned long mtime, unsigned long long *io_iops, +static void calc_iops(int unified_rw_rep, unsigned long mtime, + unsigned long long *io_iops, unsigned long long *prev_io_iops, unsigned int *iops) { int i; for (i = 0; i < DDIR_RWDIR_CNT; i++) { - iops[i] = ((io_iops[i] - prev_io_iops[i]) * 1000) / mtime; + unsigned long long diff; + + diff = io_iops[i] - prev_io_iops[i]; + if (unified_rw_rep) { + iops[i] = 0; + iops[0] += (diff * 1000) / mtime; + } else + iops[i] = (diff * 1000) / mtime; + prev_io_iops[i] = io_iops[i]; } } @@ -259,7 +273,7 @@ static void calc_iops(unsigned long mtime, unsigned long long *io_iops, int calc_thread_status(struct jobs_eta *je, int force) { struct thread_data *td; - int i; + int i, unified_rw_rep; unsigned long rate_time, disp_time, bw_avg_time, *eta_secs; unsigned long long io_bytes[DDIR_RWDIR_CNT]; unsigned long long io_iops[DDIR_RWDIR_CNT]; @@ -293,7 +307,9 @@ int calc_thread_status(struct jobs_eta *je, int force) io_bytes[DDIR_READ] = io_bytes[DDIR_WRITE] = io_bytes[DDIR_TRIM] = 0; io_iops[DDIR_READ] = io_iops[DDIR_WRITE] = io_iops[DDIR_TRIM] = 0; bw_avg_time = ULONG_MAX; + unified_rw_rep = 0; for_each_td(td, i) { + unified_rw_rep += td->o.unified_rw_rep; if (is_power_of_2(td->o.kb_base)) je->is_pow2 = 1; if (td->o.bw_avg_time < bw_avg_time) @@ -339,9 +355,15 @@ int calc_thread_status(struct jobs_eta *je, int force) if (td->runstate > TD_RAMP) { int ddir; + for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) { - io_bytes[ddir] += td->io_bytes[ddir]; - io_iops[ddir] += td->io_blocks[ddir]; + if (unified_rw_rep) { + io_bytes[0] += td->io_bytes[ddir]; + io_iops[0] += td->io_blocks[ddir]; + } else { + io_bytes[ddir] += td->io_bytes[ddir]; + io_iops[ddir] += td->io_blocks[ddir]; + } } } } @@ -367,7 +389,8 @@ int calc_thread_status(struct jobs_eta *je, int force) rate_time = mtime_since(&rate_prev_time, &now); if (write_bw_log && rate_time > bw_avg_time && !in_ramp_time(td)) { - calc_rate(rate_time, io_bytes, rate_io_bytes, je->rate); + calc_rate(unified_rw_rep, rate_time, io_bytes, rate_io_bytes, + je->rate); memcpy(&rate_prev_time, &now, sizeof(now)); add_agg_sample(je->rate[DDIR_READ], DDIR_READ, 0); add_agg_sample(je->rate[DDIR_WRITE], DDIR_WRITE, 0); @@ -382,8 +405,8 @@ int calc_thread_status(struct jobs_eta *je, int force) if (!force && disp_time < 900) return 0; - calc_rate(disp_time, io_bytes, disp_io_bytes, je->rate); - calc_iops(disp_time, io_iops, disp_io_iops, je->iops); + calc_rate(unified_rw_rep, disp_time, io_bytes, disp_io_bytes, je->rate); + calc_iops(unified_rw_rep, disp_time, io_iops, disp_io_iops, je->iops); memcpy(&disp_prev_time, &now, sizeof(now)); diff --git a/filesetup.c b/filesetup.c index 8262ed3d..3054d9da 100644 --- a/filesetup.c +++ b/filesetup.c @@ -563,7 +563,7 @@ open_again: if (__e == EMFILE && file_close_shadow_fds(td)) goto open_again; - snprintf(buf, sizeof(buf) - 1, "open(%s)", f->file_name); + snprintf(buf, sizeof(buf), "open(%s)", f->file_name); if (__e == EINVAL && (flags & OS_O_DIRECT)) { log_err("fio: looks like your file system does not " \ @@ -1250,7 +1250,7 @@ static int recurse_dir(struct thread_data *td, const char *dirname) if (!D) { char buf[FIO_VERROR_SIZE]; - snprintf(buf, FIO_VERROR_SIZE - 1, "opendir(%s)", dirname); + snprintf(buf, FIO_VERROR_SIZE, "opendir(%s)", dirname); td_verror(td, errno, buf); return 1; } diff --git a/fio.1 b/fio.1 index d224d540..c6655913 100644 --- a/fio.1 +++ b/fio.1 @@ -77,6 +77,9 @@ Background a fio server, writing the pid to the given pid file. .TP .BI \-\-client \fR=\fPhost Instead of running the jobs locally, send and run them on the given host. +.TP +.BI \-\-idle\-prof \fR=\fPoption +Report cpu idleness on a system or percpu basis (\fIoption\fP=system,percpu) or run unit work calibration only (\fIoption\fP=calibrate). .SH "JOB FILE FORMAT" Job files are in `ini' format. They consist of one or more job definitions, which begin with a job name in square brackets and @@ -240,6 +243,11 @@ The base unit for a kilobyte. The defacto base is 2^10, 1024. Storage manufacturers like to use 10^3 or 1000 as a base ten unit instead, for obvious reasons. Allow values are 1024 or 1000, with 1024 being the default. .TP +.BI unified_rw_reporting \fR=\fPbool +Fio normally reports statistics on a per data direction basis, meaning that +read, write, and trim are accounted and reported separately. If this option is +set, the fio will sum the results and report them as "mixed" instead. +.TP .BI randrepeat \fR=\fPbool Seed the random number generator in a predictable way so results are repeatable across runs. Default: true. diff --git a/fio.c b/fio.c index 755a4d52..7e6b06d3 100644 --- a/fio.c +++ b/fio.c @@ -26,16 +26,7 @@ #include #include "fio.h" -#include "hash.h" #include "smalloc.h" -#include "verify.h" -#include "trim.h" -#include "diskutil.h" -#include "profile.h" -#include "lib/rand.h" -#include "memalign.h" -#include "client.h" -#include "server.h" int main(int argc, char *argv[], char *envp[]) { diff --git a/fio.h b/fio.h index f0babafe..5bfa4389 100644 --- a/fio.h +++ b/fio.h @@ -325,7 +325,7 @@ enum { int e = (err); \ (td)->error = e; \ if (!(td)->first_error) \ - snprintf(td->verror, sizeof(td->verror) - 1, "file:%s:%d, func=%s, error=%s", __FILE__, __LINE__, (func), (msg)); \ + snprintf(td->verror, sizeof(td->verror), "file:%s:%d, func=%s, error=%s", __FILE__, __LINE__, (func), (msg)); \ } while (0) diff --git a/flow.c b/flow.c index 2993f4e8..b7a2fb12 100644 --- a/flow.c +++ b/flow.c @@ -39,6 +39,9 @@ static struct fio_flow *flow_get(unsigned int id) struct fio_flow *flow = NULL; struct flist_head *n; + if (!flow_lock) + return NULL; + fio_mutex_down(flow_lock); flist_for_each(n, flow_list) { @@ -51,6 +54,10 @@ static struct fio_flow *flow_get(unsigned int id) if (!flow) { flow = smalloc(sizeof(*flow)); + if (!flow) { + log_err("fio: smalloc pool exhausted\n"); + return NULL; + } flow->refs = 0; INIT_FLIST_HEAD(&flow->list); flow->id = id; @@ -66,6 +73,9 @@ static struct fio_flow *flow_get(unsigned int id) static void flow_put(struct fio_flow *flow) { + if (!flow_lock) + return; + fio_mutex_down(flow_lock); if (!--flow->refs) { @@ -92,13 +102,26 @@ void flow_exit_job(struct thread_data *td) void flow_init(void) { - flow_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED); flow_list = smalloc(sizeof(*flow_list)); + if (!flow_list) { + log_err("fio: smalloc pool exhausted\n"); + return; + } + + flow_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED); + if (!flow_lock) { + log_err("fio: failed to allocate flow lock\n"); + sfree(flow_list); + return; + } + INIT_FLIST_HEAD(flow_list); } void flow_exit(void) { - fio_mutex_remove(flow_lock); - sfree(flow_list); + if (flow_lock) + fio_mutex_remove(flow_lock); + if (flow_list) + sfree(flow_list); } diff --git a/gettime-thread.c b/gettime-thread.c index da409042..c1b4b096 100644 --- a/gettime-thread.c +++ b/gettime-thread.c @@ -14,12 +14,14 @@ static pthread_t gtod_thread; void fio_gtod_init(void) { fio_tv = smalloc(sizeof(struct timeval)); - assert(fio_tv); + if (!fio_tv) + log_err("fio: smalloc pool exhausted\n"); } static void fio_gtod_update(void) { - gettimeofday(fio_tv, NULL); + if (fio_tv) + gettimeofday(fio_tv, NULL); } static void *gtod_thread_main(void *data) diff --git a/idletime.c b/idletime.c new file mode 100644 index 00000000..244723f0 --- /dev/null +++ b/idletime.c @@ -0,0 +1,475 @@ +#include +#include "json.h" +#include "idletime.h" + +static volatile struct idle_prof_common ipc; + +/* + * Get time to complete an unit work on a particular cpu. + * The minimum number in CALIBRATE_RUNS runs is returned. + */ +static double calibrate_unit(unsigned char *data) +{ + unsigned long t, i, j, k; + struct timeval tps; + double tunit = 0.0; + + for (i = 0; i < CALIBRATE_RUNS; i++) { + + fio_gettime(&tps, NULL); + /* scale for less variance */ + for (j = 0; j < CALIBRATE_SCALE; j++) { + /* unit of work */ + for (k=0; k < page_size; k++) { + data[(k + j) % page_size] = k % 256; + /* + * we won't see STOP here. this is to match + * the same statement in the profiling loop. + */ + if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) + return 0.0; + } + } + + t = utime_since_now(&tps); + if (!t) + continue; + + /* get the minimum time to complete CALIBRATE_SCALE units */ + if ((i == 0) || ((double)t < tunit)) + tunit = (double)t; + } + + return tunit / CALIBRATE_SCALE; +} + +static void *idle_prof_thread_fn(void *data) +{ + int retval; + unsigned long j, k; + struct idle_prof_thread *ipt = data; + + /* wait for all threads are spawned */ + pthread_mutex_lock(&ipt->init_lock); + + /* exit if any other thread failed to start */ + if (ipc.status == IDLE_PROF_STATUS_ABORT) + return NULL; + +#if defined(FIO_HAVE_CPU_AFFINITY) + os_cpu_mask_t cpu_mask; + memset(&cpu_mask, 0, sizeof(cpu_mask)); + fio_cpu_set(&cpu_mask, ipt->cpu); + + if ((retval=fio_setaffinity(gettid(), cpu_mask)) == -1) + log_err("fio: fio_setaffinity failed\n"); +#else + retval = -1; + log_err("fio: fio_setaffinity not supported\n"); +#endif + if (retval == -1) { + ipt->state = TD_EXITED; + pthread_mutex_unlock(&ipt->init_lock); + return NULL; + } + + ipt->cali_time = calibrate_unit(ipt->data); + + /* delay to set IDLE class till now for better calibration accuracy */ +#if defined(CONFIG_SCHED_IDLE) + if ((retval = fio_set_sched_idle())) + log_err("fio: fio_set_sched_idle failed\n"); +#else + retval = -1; + log_err("fio: fio_set_sched_idle not supported\n"); +#endif + if (retval == -1) { + ipt->state = TD_EXITED; + pthread_mutex_unlock(&ipt->init_lock); + return NULL; + } + + ipt->state = TD_INITIALIZED; + + /* signal the main thread that calibration is done */ + pthread_cond_signal(&ipt->cond); + pthread_mutex_unlock(&ipt->init_lock); + + /* wait for other calibration to finish */ + pthread_mutex_lock(&ipt->start_lock); + + /* exit if other threads failed to initialize */ + if (ipc.status == IDLE_PROF_STATUS_ABORT) + return NULL; + + /* exit if we are doing calibration only */ + if (ipc.status == IDLE_PROF_STATUS_CALI_STOP) + return NULL; + + fio_gettime(&ipt->tps, NULL); + ipt->state = TD_RUNNING; + + j = 0; + while (1) { + for (k = 0; k < page_size; k++) { + ipt->data[(k + j) % page_size] = k % 256; + if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) { + fio_gettime(&ipt->tpe, NULL); + goto idle_prof_done; + } + } + j++; + } + +idle_prof_done: + + ipt->loops = j + (double) k / page_size; + ipt->state = TD_EXITED; + pthread_mutex_unlock(&ipt->start_lock); + + return NULL; +} + +/* calculate mean and standard deviation to complete an unit of work */ +static void calibration_stats(void) +{ + int i; + double sum = 0.0, var = 0.0; + struct idle_prof_thread *ipt; + + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + sum += ipt->cali_time; + } + + ipc.cali_mean = sum/ipc.nr_cpus; + + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + var += pow(ipt->cali_time-ipc.cali_mean, 2); + } + + ipc.cali_stddev = sqrt(var/(ipc.nr_cpus-1)); +} + +void fio_idle_prof_init(void) +{ + int i, ret; + struct timeval tp; + struct timespec ts; + pthread_attr_t tattr; + struct idle_prof_thread *ipt; + + ipc.nr_cpus = cpus_online(); + ipc.status = IDLE_PROF_STATUS_OK; + + if (ipc.opt == IDLE_PROF_OPT_NONE) + return; + + if ((ret = pthread_attr_init(&tattr))) { + log_err("fio: pthread_attr_init %s\n", strerror(ret)); + return; + } + if ((ret = pthread_attr_setscope(&tattr, PTHREAD_SCOPE_SYSTEM))) { + log_err("fio: pthread_attr_setscope %s\n", strerror(ret)); + return; + } + + ipc.ipts = malloc(ipc.nr_cpus * sizeof(struct idle_prof_thread)); + if (!ipc.ipts) { + log_err("fio: malloc failed\n"); + return; + } + + ipc.buf = malloc(ipc.nr_cpus * page_size); + if (!ipc.buf) { + log_err("fio: malloc failed\n"); + free(ipc.ipts); + return; + } + + /* + * profiling aborts on any single thread failure since the + * result won't be accurate if any cpu is not used. + */ + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + + ipt->cpu = i; + ipt->state = TD_NOT_CREATED; + ipt->data = (unsigned char *)(ipc.buf + page_size * i); + + if ((ret = pthread_mutex_init(&ipt->init_lock, NULL))) { + ipc.status = IDLE_PROF_STATUS_ABORT; + log_err("fio: pthread_mutex_init %s\n", strerror(ret)); + break; + } + + if ((ret = pthread_mutex_init(&ipt->start_lock, NULL))) { + ipc.status = IDLE_PROF_STATUS_ABORT; + log_err("fio: pthread_mutex_init %s\n", strerror(ret)); + break; + } + + if ((ret = pthread_cond_init(&ipt->cond, NULL))) { + ipc.status = IDLE_PROF_STATUS_ABORT; + log_err("fio: pthread_cond_init %s\n", strerror(ret)); + break; + } + + /* make sure all threads are spawned before they start */ + pthread_mutex_lock(&ipt->init_lock); + + /* make sure all threads finish init before profiling starts */ + pthread_mutex_lock(&ipt->start_lock); + + if ((ret = pthread_create(&ipt->thread, &tattr, idle_prof_thread_fn, ipt))) { + ipc.status = IDLE_PROF_STATUS_ABORT; + log_err("fio: pthread_create %s\n", strerror(ret)); + break; + } else + ipt->state = TD_CREATED; + + if ((ret = pthread_detach(ipt->thread))) { + /* log error and let the thread spin */ + log_err("fio: pthread_detatch %s\n", strerror(ret)); + } + } + + /* + * let good threads continue so that they can exit + * if errors on other threads occurred previously. + */ + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + pthread_mutex_unlock(&ipt->init_lock); + } + + if (ipc.status == IDLE_PROF_STATUS_ABORT) + return; + + /* wait for calibration to finish */ + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + pthread_mutex_lock(&ipt->init_lock); + while ((ipt->state != TD_EXITED) && + (ipt->state!=TD_INITIALIZED)) { + fio_gettime(&tp, NULL); + ts.tv_sec = tp.tv_sec + 1; + ts.tv_nsec = tp.tv_usec * 1000; + pthread_cond_timedwait(&ipt->cond, &ipt->init_lock, &ts); + } + pthread_mutex_unlock(&ipt->init_lock); + + /* + * any thread failed to initialize would abort other threads + * later after fio_idle_prof_start. + */ + if (ipt->state == TD_EXITED) + ipc.status = IDLE_PROF_STATUS_ABORT; + } + + if (ipc.status != IDLE_PROF_STATUS_ABORT) + calibration_stats(); + else + ipc.cali_mean = ipc.cali_stddev = 0.0; + + if (ipc.opt == IDLE_PROF_OPT_CALI) + ipc.status = IDLE_PROF_STATUS_CALI_STOP; +} + +void fio_idle_prof_start(void) +{ + int i; + struct idle_prof_thread *ipt; + + if (ipc.opt == IDLE_PROF_OPT_NONE) + return; + + /* unlock regardless abort is set or not */ + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + pthread_mutex_unlock(&ipt->start_lock); + } +} + +void fio_idle_prof_stop(void) +{ + int i; + uint64_t runt; + struct timeval tp; + struct timespec ts; + struct idle_prof_thread *ipt; + + if (ipc.opt == IDLE_PROF_OPT_NONE) + return; + + if (ipc.opt == IDLE_PROF_OPT_CALI) + return; + + ipc.status = IDLE_PROF_STATUS_PROF_STOP; + + /* wait for all threads to exit from profiling */ + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + pthread_mutex_lock(&ipt->start_lock); + while ((ipt->state != TD_EXITED) && + (ipt->state!=TD_NOT_CREATED)) { + fio_gettime(&tp, NULL); + ts.tv_sec = tp.tv_sec + 1; + ts.tv_nsec = tp.tv_usec * 1000; + /* timed wait in case a signal is not received */ + pthread_cond_timedwait(&ipt->cond, &ipt->start_lock, &ts); + } + pthread_mutex_unlock(&ipt->start_lock); + + /* calculate idleness */ + if (ipc.cali_mean != 0.0) { + runt = utime_since(&ipt->tps, &ipt->tpe); + ipt->idleness = ipt->loops * ipc.cali_mean / runt; + } else + ipt->idleness = 0.0; + } + + /* + * memory allocations are freed via explicit fio_idle_prof_cleanup + * after profiling stats are collected by apps. + */ +} + +/* + * return system idle percentage when cpu is -1; + * return one cpu idle percentage otherwise. + */ +static double fio_idle_prof_cpu_stat(int cpu) +{ + int i, nr_cpus = ipc.nr_cpus; + struct idle_prof_thread *ipt; + double p = 0.0; + + if (ipc.opt == IDLE_PROF_OPT_NONE) + return 0.0; + + if ((cpu >= nr_cpus) || (cpu < -1)) { + log_err("fio: idle profiling invalid cpu index\n"); + return 0.0; + } + + if (cpu == -1) { + for (i = 0; i < nr_cpus; i++) { + ipt = &ipc.ipts[i]; + p += ipt->idleness; + } + p /= nr_cpus; + } else { + ipt = &ipc.ipts[cpu]; + p = ipt->idleness; + } + + return p * 100.0; +} + +void fio_idle_prof_cleanup(void) +{ + if (ipc.ipts) { + free(ipc.ipts); + ipc.ipts = NULL; + } + + if (ipc.buf) { + free(ipc.buf); + ipc.buf = NULL; + } +} + +int fio_idle_prof_parse_opt(const char *args) +{ + ipc.opt = IDLE_PROF_OPT_NONE; /* default */ + + if (!args) { + log_err("fio: empty idle-prof option string\n"); + return -1; + } + +#if defined(FIO_HAVE_CPU_AFFINITY) && defined(CONFIG_SCHED_IDLE) + if (strcmp("calibrate", args) == 0) { + ipc.opt = IDLE_PROF_OPT_CALI; + fio_idle_prof_init(); + fio_idle_prof_start(); + fio_idle_prof_stop(); + show_idle_prof_stats(FIO_OUTPUT_NORMAL, NULL); + return 1; + } else if (strcmp("system", args) == 0) { + ipc.opt = IDLE_PROF_OPT_SYSTEM; + return 0; + } else if (strcmp("percpu", args) == 0) { + ipc.opt = IDLE_PROF_OPT_PERCPU; + return 0; + } else { + log_err("fio: incorrect idle-prof option\n", args); + return -1; + } +#else + log_err("fio: idle-prof not supported on this platform\n"); + return -1; +#endif +} + +void show_idle_prof_stats(int output, struct json_object *parent) +{ + int i, nr_cpus = ipc.nr_cpus; + struct json_object *tmp; + char s[MAX_CPU_STR_LEN]; + + if (output == FIO_OUTPUT_NORMAL) { + if (ipc.opt > IDLE_PROF_OPT_CALI) + log_info("\nCPU idleness:\n"); + else if (ipc.opt == IDLE_PROF_OPT_CALI) + log_info("CPU idleness:\n"); + + if (ipc.opt >= IDLE_PROF_OPT_SYSTEM) + log_info(" system: %3.2f%%\n", fio_idle_prof_cpu_stat(-1)); + + if (ipc.opt == IDLE_PROF_OPT_PERCPU) { + log_info(" percpu: %3.2f%%", fio_idle_prof_cpu_stat(0)); + for (i = 1; i < nr_cpus; i++) + log_info(", %3.2f%%", fio_idle_prof_cpu_stat(i)); + log_info("\n"); + } + + if (ipc.opt >= IDLE_PROF_OPT_CALI) { + log_info(" unit work: mean=%3.2fus,", ipc.cali_mean); + log_info(" stddev=%3.2f\n", ipc.cali_stddev); + } + + /* dynamic mem allocations can now be freed */ + if (ipc.opt != IDLE_PROF_OPT_NONE) + fio_idle_prof_cleanup(); + + return; + } + + if ((ipc.opt != IDLE_PROF_OPT_NONE) && (output == FIO_OUTPUT_JSON)) { + if (!parent) + return; + + tmp = json_create_object(); + if (!tmp) + return; + + json_object_add_value_object(parent, "cpu_idleness", tmp); + json_object_add_value_float(tmp, "system", fio_idle_prof_cpu_stat(-1)); + + if (ipc.opt == IDLE_PROF_OPT_PERCPU) { + for (i = 0; i < nr_cpus; i++) { + snprintf(s, MAX_CPU_STR_LEN, "cpu-%d", i); + json_object_add_value_float(tmp, s, fio_idle_prof_cpu_stat(i)); + } + } + + json_object_add_value_float(tmp, "unit_mean", ipc.cali_mean); + json_object_add_value_float(tmp, "unit_stddev", ipc.cali_stddev); + + fio_idle_prof_cleanup(); + } +} diff --git a/idletime.h b/idletime.h new file mode 100644 index 00000000..acb8407d --- /dev/null +++ b/idletime.h @@ -0,0 +1,57 @@ +#ifndef FIO_IDLETIME_H +#define FIO_IDLETIME_H + +#include "fio.h" + +#define CALIBRATE_RUNS 10 +#define CALIBRATE_SCALE 1000 +#define MAX_CPU_STR_LEN 32 + +enum { + IDLE_PROF_OPT_NONE, + IDLE_PROF_OPT_CALI, /* calibration only */ + IDLE_PROF_OPT_SYSTEM, + IDLE_PROF_OPT_PERCPU +}; + +enum { + IDLE_PROF_STATUS_OK, + IDLE_PROF_STATUS_CALI_STOP, + IDLE_PROF_STATUS_PROF_STOP, + IDLE_PROF_STATUS_ABORT +}; + +struct idle_prof_thread { + pthread_t thread; + int cpu; + int state; + struct timeval tps; + struct timeval tpe; + double cali_time; /* microseconds to finish a unit wrok */ + double loops; + double idleness; + unsigned char *data; /* bytes to be touched */ + pthread_cond_t cond; + pthread_mutex_t init_lock; + pthread_mutex_t start_lock; +}; + +struct idle_prof_common { + struct idle_prof_thread *ipts; + int nr_cpus; + int status; + int opt; + double cali_mean; + double cali_stddev; + void *buf; /* single data allocation for all threads */ +}; + +extern int fio_idle_prof_parse_opt(const char *); + +extern void fio_idle_prof_init(void); +extern void fio_idle_prof_start(void); +extern void fio_idle_prof_stop(void); + +extern void show_idle_prof_stats(int, struct json_object *); + +#endif diff --git a/init.c b/init.c index 688c8efa..f5a1693f 100644 --- a/init.c +++ b/init.c @@ -22,6 +22,7 @@ #include "verify.h" #include "profile.h" #include "server.h" +#include "idletime.h" #include "lib/getopt.h" @@ -211,6 +212,11 @@ static struct option l_opts[FIO_NR_OPTIONS] = { .has_arg = no_argument, .val = 'T', }, + { + .name = (char *) "idle-prof", + .has_arg = required_argument, + .val = 'I', + }, { .name = NULL, }, @@ -628,7 +634,7 @@ char *fio_uint_to_kmg(unsigned int val) p++; } while (*p); - snprintf(buf, 31, "%u%c", val, *p); + snprintf(buf, 32, "%u%c", val, *p); return buf; } @@ -1290,6 +1296,9 @@ static void usage(const char *name) printf(" --server=args\t\tStart a backend fio server\n"); printf(" --daemonize=pidfile\tBackground fio server, write pid to file\n"); printf(" --client=hostname\tTalk to remote backend fio server at hostname\n"); + printf(" --idle-prof=option\tReport cpu idleness on a system or percpu basis\n" + "\t\t\t(option=system,percpu) or run unit work\n" + "\t\t\tcalibration only (option=calibrate)\n"); printf("\nFio was written by Jens Axboe "); printf("\n Jens Axboe \n"); } @@ -1653,6 +1662,14 @@ int parse_cmd_line(int argc, char *argv[], int client_type) case 'D': pid_file = strdup(optarg); break; + case 'I': + if ((ret = fio_idle_prof_parse_opt(optarg))) { + /* exit on error and calibration only */ + do_exit++; + if (ret == -1) + exit_val = 1; + } + break; case 'C': if (is_backend) { log_err("fio: can't be both client and server\n"); diff --git a/iolog.c b/iolog.c index 137c1e98..e54016d6 100644 --- a/iolog.c +++ b/iolog.c @@ -534,7 +534,7 @@ void finish_log_named(struct thread_data *td, struct io_log *log, { char file_name[256], *p; - snprintf(file_name, 200, "%s_%s.log", prefix, postfix); + snprintf(file_name, sizeof(file_name), "%s_%s.log", prefix, postfix); p = basename(file_name); if (td->client_type == FIO_CLIENT_TYPE_GUI) { diff --git a/log.c b/log.c index a26dee87..d481edf2 100644 --- a/log.c +++ b/log.c @@ -12,6 +12,7 @@ int log_valist(const char *str, va_list args) size_t len; len = vsnprintf(buffer, sizeof(buffer), str, args); + len = min(len, sizeof(buffer) - 1); if (log_syslog) syslog(LOG_INFO, "%s", buffer); @@ -40,6 +41,7 @@ int log_local(const char *format, ...) va_start(args, format); len = vsnprintf(buffer, sizeof(buffer), format, args); va_end(args); + len = min(len, sizeof(buffer) - 1); if (log_syslog) syslog(LOG_INFO, "%s", buffer); @@ -58,6 +60,7 @@ int log_info(const char *format, ...) va_start(args, format); len = vsnprintf(buffer, sizeof(buffer), format, args); va_end(args); + len = min(len, sizeof(buffer) - 1); if (is_backend) return fio_server_text_output(FIO_LOG_INFO, buffer, len); @@ -77,6 +80,7 @@ int log_err(const char *format, ...) va_start(args, format); len = vsnprintf(buffer, sizeof(buffer), format, args); va_end(args); + len = min(len, sizeof(buffer) - 1); if (is_backend) return fio_server_text_output(FIO_LOG_ERR, buffer, len); diff --git a/options.c b/options.c index 9d49ff17..c39a6b40 100644 --- a/options.c +++ b/options.c @@ -2941,6 +2941,13 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .category = FIO_OPT_C_GENERAL, .group = FIO_OPT_G_CLOCK, }, + { + .name = "unified_rw_reporting", + .type = FIO_OPT_BOOL, + .off1 = td_var_offset(unified_rw_rep), + .help = "Unify reporting across data direction", + .def = "0", + }, { .name = "continue_on_error", .lname = "Continue on error", diff --git a/os/os-linux.h b/os/os-linux.h index 75964dca..869a25d8 100644 --- a/os/os-linux.h +++ b/os/os-linux.h @@ -253,4 +253,12 @@ static inline int os_trim(int fd, unsigned long long start, return errno; } +#ifdef CONFIG_SCHED_IDLE +static inline int fio_set_sched_idle(void) +{ + struct sched_param p = { .sched_priority = 0, }; + return sched_setscheduler(gettid(), SCHED_IDLE, &p); +} +#endif + #endif diff --git a/os/os-windows.h b/os/os-windows.h index ef71dd70..98f90305 100644 --- a/os/os-windows.h +++ b/os/os-windows.h @@ -247,4 +247,11 @@ static inline int init_random_state(struct thread_data *td, unsigned long *rand_ } +static inline int fio_set_sched_idle(void) +{ + /* SetThreadPriority returns nonzero for success */ + return (SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_IDLE))? 0 : -1; +} + + #endif /* FIO_OS_WINDOWS_H */ diff --git a/os/windows/posix/include/netinet/tcp.h b/os/windows/posix/include/netinet/tcp.h new file mode 100644 index 00000000..250c4c35 --- /dev/null +++ b/os/windows/posix/include/netinet/tcp.h @@ -0,0 +1,4 @@ +#ifndef NETINET_TCP_H +#define NETINET_TCP_H + +#endif diff --git a/server.c b/server.c index 514306dc..8e5ca50f 100644 --- a/server.c +++ b/server.c @@ -666,8 +666,6 @@ static int handle_send_eta_cmd(struct fio_net_cmd *cmd) je->t_rate[i] = cpu_to_le32(je->t_rate[i]); je->m_iops[i] = cpu_to_le32(je->m_iops[i]); je->t_iops[i] = cpu_to_le32(je->t_iops[i]); - je->rate[i] = cpu_to_le32(je->rate[i]); - je->iops[i] = cpu_to_le32(je->iops[i]); } je->elapsed_sec = cpu_to_le64(je->elapsed_sec); @@ -938,6 +936,7 @@ static void convert_gs(struct group_run_stats *dst, struct group_run_stats *src) dst->kb_base = cpu_to_le32(src->kb_base); dst->groupid = cpu_to_le32(src->groupid); + dst->unified_rw_rep = cpu_to_le32(src->unified_rw_rep); } /* @@ -962,6 +961,7 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs) p.ts.groupid = cpu_to_le32(ts->groupid); p.ts.pid = cpu_to_le32(ts->pid); p.ts.members = cpu_to_le32(ts->members); + p.ts.unified_rw_rep = cpu_to_le32(ts->unified_rw_rep); for (i = 0; i < DDIR_RWDIR_CNT; i++) { convert_io_stat(&p.ts.clat_stat[i], &ts->clat_stat[i]); @@ -999,7 +999,7 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs) for (j = 0; j < FIO_IO_U_PLAT_NR; j++) p.ts.io_u_plat[i][j] = cpu_to_le32(ts->io_u_plat[i][j]); - for (i = 0; i < 3; i++) { + for (i = 0; i < DDIR_RWDIR_CNT; i++) { p.ts.total_io_u[i] = cpu_to_le64(ts->total_io_u[i]); p.ts.short_io_u[i] = cpu_to_le64(ts->short_io_u[i]); } diff --git a/server.h b/server.h index fa88d241..e84a7095 100644 --- a/server.h +++ b/server.h @@ -38,7 +38,7 @@ struct fio_net_cmd_reply { }; enum { - FIO_SERVER_VER = 19, + FIO_SERVER_VER = 20, FIO_SERVER_MAX_FRAGMENT_PDU = 1024, diff --git a/stat.c b/stat.c index 7cf66f00..26b3133a 100644 --- a/stat.c +++ b/stat.c @@ -12,6 +12,7 @@ #include "lib/ieee754.h" #include "json.h" #include "lib/getrusage.h" +#include "idletime.h" void update_rusage_stat(struct thread_data *td) { @@ -275,9 +276,9 @@ void show_group_stats(struct group_run_stats *rs) p4 = num2str(rs->max_bw[i], 6, rs->kb_base, i2p); log_info("%s: io=%sB, aggrb=%sB/s, minb=%sB/s, maxb=%sB/s," - " mint=%llumsec, maxt=%llumsec\n", ddir_str[i], p1, p2, - p3, p4, rs->min_run[i], - rs->max_run[i]); + " mint=%llumsec, maxt=%llumsec\n", + rs->unified_rw_rep ? " MIXED" : ddir_str[i], + p1, p2, p3, p4, rs->min_run[i], rs->max_run[i]); free(p1); free(p2); @@ -379,8 +380,8 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts, iops_p = num2str(iops, 6, 1, 0); log_info(" %s: io=%sB, bw=%sB/s, iops=%s, runt=%6llumsec\n", - ddir_str[ddir], io_p, bw_p, iops_p, - ts->runtime[ddir]); + rs->unified_rw_rep ? "mixed" : ddir_str[ddir], + io_p, bw_p, iops_p, ts->runtime[ddir]); free(io_p); free(bw_p); @@ -654,8 +655,12 @@ static void add_ddir_status_json(struct thread_stat *ts, assert(ddir_rw(ddir)); + if (ts->unified_rw_rep && ddir != DDIR_READ) + return; + dir_object = json_create_object(); - json_object_add_value_object(parent, ddirname[ddir], dir_object); + json_object_add_value_object(parent, + ts->unified_rw_rep ? "mixed" : ddirname[ddir], dir_object); iops = bw = 0; if (ts->runtime[ddir]) { @@ -707,7 +712,7 @@ static void add_ddir_status_json(struct thread_stat *ts, json_object_add_value_int(percentile_object, "0.00", 0); continue; } - snprintf(buf, sizeof(buf) - 1, "%2.2f", ts->percentile_list[i].u.f); + snprintf(buf, sizeof(buf), "%2.2f", ts->percentile_list[i].u.f); json_object_add_value_int(percentile_object, (const char *)buf, ovals[i]); } @@ -913,9 +918,9 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts, for (i = 0; i < 7; i++) { char name[20]; if (i < 6) - snprintf(name, 19, "%d", 1 << i); + snprintf(name, 20, "%d", 1 << i); else - snprintf(name, 19, ">=%d", 1 << i); + snprintf(name, 20, ">=%d", 1 << i); json_object_add_value_float(tmp, (const char *)name, io_u_dist[i]); } @@ -1021,15 +1026,27 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src, int nr) int l, k; for (l = 0; l < DDIR_RWDIR_CNT; l++) { - sum_stat(&dst->clat_stat[l], &src->clat_stat[l], nr); - sum_stat(&dst->slat_stat[l], &src->slat_stat[l], nr); - sum_stat(&dst->lat_stat[l], &src->lat_stat[l], nr); - sum_stat(&dst->bw_stat[l], &src->bw_stat[l], nr); - - dst->io_bytes[l] += src->io_bytes[l]; - - if (dst->runtime[l] < src->runtime[l]) - dst->runtime[l] = src->runtime[l]; + if (!dst->unified_rw_rep) { + sum_stat(&dst->clat_stat[l], &src->clat_stat[l], nr); + sum_stat(&dst->slat_stat[l], &src->slat_stat[l], nr); + sum_stat(&dst->lat_stat[l], &src->lat_stat[l], nr); + sum_stat(&dst->bw_stat[l], &src->bw_stat[l], nr); + + dst->io_bytes[l] += src->io_bytes[l]; + + if (dst->runtime[l] < src->runtime[l]) + dst->runtime[l] = src->runtime[l]; + } else { + sum_stat(&dst->clat_stat[0], &src->clat_stat[l], nr); + sum_stat(&dst->slat_stat[0], &src->slat_stat[l], nr); + sum_stat(&dst->lat_stat[0], &src->lat_stat[l], nr); + sum_stat(&dst->bw_stat[0], &src->bw_stat[l], nr); + + dst->io_bytes[0] += src->io_bytes[l]; + + if (dst->runtime[0] < src->runtime[l]) + dst->runtime[0] = src->runtime[l]; + } } dst->usr_time += src->usr_time; @@ -1050,14 +1067,24 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src, int nr) dst->io_u_lat_m[k] += src->io_u_lat_m[k]; for (k = 0; k < DDIR_RWDIR_CNT; k++) { - dst->total_io_u[k] += src->total_io_u[k]; - dst->short_io_u[k] += src->short_io_u[k]; + if (!dst->unified_rw_rep) { + dst->total_io_u[k] += src->total_io_u[k]; + dst->short_io_u[k] += src->short_io_u[k]; + } else { + dst->total_io_u[0] += src->total_io_u[k]; + dst->short_io_u[0] += src->short_io_u[k]; + } } for (k = 0; k < DDIR_RWDIR_CNT; k++) { int m; - for (m = 0; m < FIO_IO_U_PLAT_NR; m++) - dst->io_u_plat[k][m] += src->io_u_plat[k][m]; + + for (m = 0; m < FIO_IO_U_PLAT_NR; m++) { + if (!dst->unified_rw_rep) + dst->io_u_plat[k][m] += src->io_u_plat[k][m]; + else + dst->io_u_plat[0][m] += src->io_u_plat[k][m]; + } } dst->total_run_time += src->total_run_time; @@ -1174,6 +1201,7 @@ void show_run_stats(void) ts->pid = td->pid; ts->kb_base = td->o.kb_base; + ts->unified_rw_rep = td->o.unified_rw_rep; } else if (ts->kb_base != td->o.kb_base && !kb_base_warned) { log_info("fio: kb_base differs for jobs in group, using" " %u as the base\n", ts->kb_base); @@ -1203,6 +1231,7 @@ void show_run_stats(void) ts = &threadstats[i]; rs = &runstats[ts->groupid]; rs->kb_base = ts->kb_base; + rs->unified_rw_rep += ts->unified_rw_rep; for (j = 0; j < DDIR_RWDIR_CNT; j++) { if (!ts->runtime[j]) @@ -1271,6 +1300,8 @@ void show_run_stats(void) /* disk util stats, if any */ show_disk_util(1, root); + show_idle_prof_stats(FIO_OUTPUT_JSON, root); + json_print_object(root); log_info("\n"); json_free_object(root); @@ -1291,6 +1322,8 @@ void show_run_stats(void) else if (output_format == FIO_OUTPUT_NORMAL) show_disk_util(0, NULL); + show_idle_prof_stats(FIO_OUTPUT_NORMAL, NULL); + free(runstats); free(threadstats); } diff --git a/stat.h b/stat.h index f35f1f60..ba4c2bf2 100644 --- a/stat.h +++ b/stat.h @@ -10,6 +10,7 @@ struct group_run_stats { uint64_t agg[DDIR_RWDIR_CNT]; uint32_t kb_base; uint32_t groupid; + uint32_t unified_rw_rep; }; /* @@ -123,6 +124,7 @@ struct thread_stat { uint32_t pid; char description[FIO_JOBNAME_SIZE]; uint32_t members; + uint32_t unified_rw_rep; /* * bandwidth and latency stats diff --git a/t/log.c b/t/log.c index ac023032..76ae68ed 100644 --- a/t/log.c +++ b/t/log.c @@ -10,6 +10,7 @@ int log_err(const char *format, ...) va_start(args, format); len = vsnprintf(buffer, sizeof(buffer), format, args); va_end(args); + len = min(len, sizeof(buffer) - 1); return fwrite(buffer, len, 1, stderr); } @@ -23,6 +24,7 @@ int log_info(const char *format, ...) va_start(args, format); len = vsnprintf(buffer, sizeof(buffer), format, args); va_end(args); + len = min(len, sizeof(buffer) - 1); return fwrite(buffer, len, 1, stdout); } diff --git a/thread_options.h b/thread_options.h index 21c1dac5..ae83f08a 100644 --- a/thread_options.h +++ b/thread_options.h @@ -186,6 +186,7 @@ struct thread_options { unsigned int disable_clat; unsigned int disable_slat; unsigned int disable_bw; + unsigned int unified_rw_rep; unsigned int gtod_reduce; unsigned int gtod_cpu; unsigned int gtod_offload; @@ -385,6 +386,7 @@ struct thread_options_pack { uint32_t disable_clat; uint32_t disable_slat; uint32_t disable_bw; + uint32_t unified_rw_rep; uint32_t gtod_reduce; uint32_t gtod_cpu; uint32_t gtod_offload; diff --git a/verify.c b/verify.c index fa24702b..787cc377 100644 --- a/verify.c +++ b/verify.c @@ -10,7 +10,6 @@ #include "fio.h" #include "verify.h" -#include "smalloc.h" #include "trim.h" #include "lib/rand.h" #include "lib/hweight.h"