t/zbd: Align block size to zone capacity
[fio.git] / stat.c
diff --git a/stat.c b/stat.c
index 55d83fcc903e62e5e91fb49ac35caf24535cac6a..30f9b5c1e995d49174aebc2fd48ebe6e51d34acf 100644 (file)
--- a/stat.c
+++ b/stat.c
 #include "zbd.h"
 #include "oslib/asprintf.h"
 
+#ifdef WIN32
+#define LOG_MSEC_SLACK 2
+#else
 #define LOG_MSEC_SLACK 1
+#endif
 
 struct fio_sem *stat_sem;
 
@@ -159,7 +163,7 @@ unsigned int calc_clat_percentiles(uint64_t *io_u_plat, unsigned long long nr,
         * isn't a worry. Also note that this does not work for NaN values.
         */
        if (len > 1)
-               qsort((void *)plist, len, sizeof(plist[0]), double_cmp);
+               qsort(plist, len, sizeof(plist[0]), double_cmp);
 
        ovals = malloc(len * sizeof(*ovals));
        if (!ovals)
@@ -211,7 +215,7 @@ static void show_clat_percentiles(uint64_t *io_u_plat, unsigned long long nr,
 
        len = calc_clat_percentiles(io_u_plat, nr, plist, &ovals, &maxv, &minv);
        if (!len || !ovals)
-               goto out;
+               return;
 
        /*
         * We default to nsecs, but if the value range is such that we
@@ -258,9 +262,7 @@ static void show_clat_percentiles(uint64_t *io_u_plat, unsigned long long nr,
                        log_buf(out, "\n");
        }
 
-out:
-       if (ovals)
-               free(ovals);
+       free(ovals);
 }
 
 bool calc_lat(struct io_stat *is, unsigned long long *min,
@@ -283,6 +285,46 @@ bool calc_lat(struct io_stat *is, unsigned long long *min,
        return true;
 }
 
+void show_mixed_group_stats(struct group_run_stats *rs, struct buf_output *out) 
+{
+       char *io, *agg, *min, *max;
+       char *ioalt, *aggalt, *minalt, *maxalt;
+       uint64_t io_mix = 0, agg_mix = 0, min_mix = -1, max_mix = 0, min_run = -1, max_run = 0;
+       int i;
+       const int i2p = is_power_of_2(rs->kb_base);
+
+       for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+               if (!rs->max_run[i])
+                       continue;
+               io_mix += rs->iobytes[i];
+               agg_mix += rs->agg[i];
+               min_mix = min_mix < rs->min_bw[i] ? min_mix : rs->min_bw[i];
+               max_mix = max_mix > rs->max_bw[i] ? max_mix : rs->max_bw[i];
+               min_run = min_run < rs->min_run[i] ? min_run : rs->min_run[i];
+               max_run = max_run > rs->max_run[i] ? max_run : rs->max_run[i];
+       }
+       io = num2str(io_mix, rs->sig_figs, 1, i2p, N2S_BYTE);
+       ioalt = num2str(io_mix, rs->sig_figs, 1, !i2p, N2S_BYTE);
+       agg = num2str(agg_mix, rs->sig_figs, 1, i2p, rs->unit_base);
+       aggalt = num2str(agg_mix, rs->sig_figs, 1, !i2p, rs->unit_base);
+       min = num2str(min_mix, rs->sig_figs, 1, i2p, rs->unit_base);
+       minalt = num2str(min_mix, rs->sig_figs, 1, !i2p, rs->unit_base);
+       max = num2str(max_mix, rs->sig_figs, 1, i2p, rs->unit_base);
+       maxalt = num2str(max_mix, rs->sig_figs, 1, !i2p, rs->unit_base);
+       log_buf(out, "  MIXED: bw=%s (%s), %s-%s (%s-%s), io=%s (%s), run=%llu-%llumsec\n",
+                       agg, aggalt, min, max, minalt, maxalt, io, ioalt,
+                       (unsigned long long) min_run,
+                       (unsigned long long) max_run);
+       free(io);
+       free(agg);
+       free(min);
+       free(max);
+       free(ioalt);
+       free(aggalt);
+       free(minalt);
+       free(maxalt);
+}
+
 void show_group_stats(struct group_run_stats *rs, struct buf_output *out)
 {
        char *io, *agg, *min, *max;
@@ -307,7 +349,7 @@ void show_group_stats(struct group_run_stats *rs, struct buf_output *out)
                max = num2str(rs->max_bw[i], rs->sig_figs, 1, i2p, rs->unit_base);
                maxalt = num2str(rs->max_bw[i], rs->sig_figs, 1, !i2p, rs->unit_base);
                log_buf(out, "%s: bw=%s (%s), %s-%s (%s-%s), io=%s (%s), run=%llu-%llumsec\n",
-                               rs->unified_rw_rep ? "  MIXED" : str[i],
+                               (rs->unified_rw_rep == UNIFIED_MIXED) ? "  MIXED" : str[i],
                                agg, aggalt, min, max, minalt, maxalt, io, ioalt,
                                (unsigned long long) rs->min_run[i],
                                (unsigned long long) rs->max_run[i]);
@@ -321,6 +363,10 @@ void show_group_stats(struct group_run_stats *rs, struct buf_output *out)
                free(minalt);
                free(maxalt);
        }
+       
+       /* Need to aggregate statisitics to show mixed values */
+       if (rs->unified_rw_rep == UNIFIED_BOTH) 
+               show_mixed_group_stats(rs, out);
 }
 
 void stat_calc_dist(uint64_t *map, unsigned long total, double *io_u_dist)
@@ -415,6 +461,180 @@ static void display_lat(const char *name, unsigned long long min,
        free(maxp);
 }
 
+static double convert_agg_kbytes_percent(struct group_run_stats *rs, int ddir, int mean)
+{
+       double p_of_agg = 100.0;
+       if (rs && rs->agg[ddir] > 1024) {
+               p_of_agg = mean * 100.0 / (double) (rs->agg[ddir] / 1024.0);
+
+               if (p_of_agg > 100.0)
+                       p_of_agg = 100.0;
+       }
+       return p_of_agg;
+}
+
+static void show_mixed_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
+                            struct buf_output *out)
+{
+       unsigned long runt;
+       unsigned long long min, max, bw, iops;
+       double mean, dev;
+       char *io_p, *bw_p, *bw_p_alt, *iops_p, *post_st = NULL;
+       struct thread_stat *ts_lcl;
+
+       int i2p;
+       int ddir = 0, i;
+
+       /* Handle aggregation of Reads (ddir = 0), Writes (ddir = 1), and Trims (ddir = 2) */
+       ts_lcl = malloc(sizeof(struct thread_stat));
+       memset((void *)ts_lcl, 0, sizeof(struct thread_stat));
+       ts_lcl->unified_rw_rep = UNIFIED_MIXED;               /* calculate mixed stats  */
+       for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+               ts_lcl->clat_stat[i].min_val = ULONG_MAX;
+               ts_lcl->slat_stat[i].min_val = ULONG_MAX;
+               ts_lcl->lat_stat[i].min_val = ULONG_MAX;
+               ts_lcl->bw_stat[i].min_val = ULONG_MAX;
+               ts_lcl->iops_stat[i].min_val = ULONG_MAX;
+               ts_lcl->clat_high_prio_stat[i].min_val = ULONG_MAX;
+               ts_lcl->clat_low_prio_stat[i].min_val = ULONG_MAX;
+       }
+       ts_lcl->sync_stat.min_val = ULONG_MAX;
+
+       sum_thread_stats(ts_lcl, ts, 1);
+
+       assert(ddir_rw(ddir));
+
+       if (!ts_lcl->runtime[ddir])
+               return;
+
+       i2p = is_power_of_2(rs->kb_base);
+       runt = ts_lcl->runtime[ddir];
+
+       bw = (1000 * ts_lcl->io_bytes[ddir]) / runt;
+       io_p = num2str(ts_lcl->io_bytes[ddir], ts->sig_figs, 1, i2p, N2S_BYTE);
+       bw_p = num2str(bw, ts->sig_figs, 1, i2p, ts->unit_base);
+       bw_p_alt = num2str(bw, ts->sig_figs, 1, !i2p, ts->unit_base);
+
+       iops = (1000 * ts_lcl->total_io_u[ddir]) / runt;
+       iops_p = num2str(iops, ts->sig_figs, 1, 0, N2S_NONE);
+
+       log_buf(out, "  mixed: IOPS=%s, BW=%s (%s)(%s/%llumsec)%s\n",
+                       iops_p, bw_p, bw_p_alt, io_p,
+                       (unsigned long long) ts_lcl->runtime[ddir],
+                       post_st ? : "");
+
+       free(post_st);
+       free(io_p);
+       free(bw_p);
+       free(bw_p_alt);
+       free(iops_p);
+
+       if (calc_lat(&ts_lcl->slat_stat[ddir], &min, &max, &mean, &dev))
+               display_lat("slat", min, max, mean, dev, out);
+       if (calc_lat(&ts_lcl->clat_stat[ddir], &min, &max, &mean, &dev))
+               display_lat("clat", min, max, mean, dev, out);
+       if (calc_lat(&ts_lcl->lat_stat[ddir], &min, &max, &mean, &dev))
+               display_lat(" lat", min, max, mean, dev, out);
+       if (calc_lat(&ts_lcl->clat_high_prio_stat[ddir], &min, &max, &mean, &dev)) {
+               display_lat(ts_lcl->lat_percentiles ? "high prio_lat" : "high prio_clat",
+                               min, max, mean, dev, out);
+               if (calc_lat(&ts_lcl->clat_low_prio_stat[ddir], &min, &max, &mean, &dev))
+                       display_lat(ts_lcl->lat_percentiles ? "low prio_lat" : "low prio_clat",
+                                       min, max, mean, dev, out);
+       }
+
+       if (ts->slat_percentiles && ts_lcl->slat_stat[ddir].samples > 0)
+               show_clat_percentiles(ts_lcl->io_u_plat[FIO_SLAT][ddir],
+                               ts_lcl->slat_stat[ddir].samples,
+                               ts->percentile_list,
+                               ts->percentile_precision, "slat", out);
+       if (ts->clat_percentiles && ts_lcl->clat_stat[ddir].samples > 0)
+               show_clat_percentiles(ts_lcl->io_u_plat[FIO_CLAT][ddir],
+                               ts_lcl->clat_stat[ddir].samples,
+                               ts->percentile_list,
+                               ts->percentile_precision, "clat", out);
+       if (ts->lat_percentiles && ts_lcl->lat_stat[ddir].samples > 0)
+               show_clat_percentiles(ts_lcl->io_u_plat[FIO_LAT][ddir],
+                               ts_lcl->lat_stat[ddir].samples,
+                               ts->percentile_list,
+                               ts->percentile_precision, "lat", out);
+
+       if (ts->clat_percentiles || ts->lat_percentiles) {
+               const char *name = ts->lat_percentiles ? "lat" : "clat";
+               char prio_name[32];
+               uint64_t samples;
+
+               if (ts->lat_percentiles)
+                       samples = ts_lcl->lat_stat[ddir].samples;
+               else
+                       samples = ts_lcl->clat_stat[ddir].samples;
+
+               /* Only print this if some high and low priority stats were collected */
+               if (ts_lcl->clat_high_prio_stat[ddir].samples > 0 &&
+                               ts_lcl->clat_low_prio_stat[ddir].samples > 0)
+               {
+                       sprintf(prio_name, "high prio (%.2f%%) %s",
+                                       100. * (double) ts_lcl->clat_high_prio_stat[ddir].samples / (double) samples,
+                                       name);
+                       show_clat_percentiles(ts_lcl->io_u_plat_high_prio[ddir],
+                                       ts_lcl->clat_high_prio_stat[ddir].samples,
+                                       ts->percentile_list,
+                                       ts->percentile_precision, prio_name, out);
+
+                       sprintf(prio_name, "low prio (%.2f%%) %s",
+                                       100. * (double) ts_lcl->clat_low_prio_stat[ddir].samples / (double) samples,
+                                       name);
+                       show_clat_percentiles(ts_lcl->io_u_plat_low_prio[ddir],
+                                       ts_lcl->clat_low_prio_stat[ddir].samples,
+                                       ts->percentile_list,
+                                       ts->percentile_precision, prio_name, out);
+               }
+       }
+
+       if (calc_lat(&ts_lcl->bw_stat[ddir], &min, &max, &mean, &dev)) {
+               double p_of_agg = 100.0, fkb_base = (double)rs->kb_base;
+               const char *bw_str;
+
+               if ((rs->unit_base == 1) && i2p)
+                       bw_str = "Kibit";
+               else if (rs->unit_base == 1)
+                       bw_str = "kbit";
+               else if (i2p)
+                       bw_str = "KiB";
+               else
+                       bw_str = "kB";
+
+               p_of_agg = convert_agg_kbytes_percent(rs, ddir, mean);
+
+               if (rs->unit_base == 1) {
+                       min *= 8.0;
+                       max *= 8.0;
+                       mean *= 8.0;
+                       dev *= 8.0;
+               }
+
+               if (mean > fkb_base * fkb_base) {
+                       min /= fkb_base;
+                       max /= fkb_base;
+                       mean /= fkb_base;
+                       dev /= fkb_base;
+                       bw_str = (rs->unit_base == 1 ? "Mibit" : "MiB");
+               }
+
+               log_buf(out, "   bw (%5s/s): min=%5llu, max=%5llu, per=%3.2f%%, "
+                       "avg=%5.02f, stdev=%5.02f, samples=%" PRIu64 "\n",
+                       bw_str, min, max, p_of_agg, mean, dev,
+                       (&ts_lcl->bw_stat[ddir])->samples);
+       }
+       if (calc_lat(&ts_lcl->iops_stat[ddir], &min, &max, &mean, &dev)) {
+               log_buf(out, "   iops        : min=%5llu, max=%5llu, "
+                       "avg=%5.02f, stdev=%5.02f, samples=%" PRIu64 "\n",
+                       min, max, mean, dev, (&ts_lcl->iops_stat[ddir])->samples);
+       }
+
+       free(ts_lcl);
+}
+
 static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
                             int ddir, struct buf_output *out)
 {
@@ -466,7 +686,7 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
        }
 
        log_buf(out, "  %s: IOPS=%s, BW=%s (%s)(%s/%llumsec)%s\n",
-                       rs->unified_rw_rep ? "mixed" : io_ddir_name(ddir),
+                       (ts->unified_rw_rep == UNIFIED_MIXED) ? "mixed" : io_ddir_name(ddir),
                        iops_p, bw_p, bw_p_alt, io_p,
                        (unsigned long long) ts->runtime[ddir],
                        post_st ? : "");
@@ -483,21 +703,62 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
                display_lat("clat", min, max, mean, dev, out);
        if (calc_lat(&ts->lat_stat[ddir], &min, &max, &mean, &dev))
                display_lat(" lat", min, max, mean, dev, out);
+       if (calc_lat(&ts->clat_high_prio_stat[ddir], &min, &max, &mean, &dev)) {
+               display_lat(ts->lat_percentiles ? "high prio_lat" : "high prio_clat",
+                               min, max, mean, dev, out);
+               if (calc_lat(&ts->clat_low_prio_stat[ddir], &min, &max, &mean, &dev))
+                       display_lat(ts->lat_percentiles ? "low prio_lat" : "low prio_clat",
+                                       min, max, mean, dev, out);
+       }
+
+       if (ts->slat_percentiles && ts->slat_stat[ddir].samples > 0)
+               show_clat_percentiles(ts->io_u_plat[FIO_SLAT][ddir],
+                                       ts->slat_stat[ddir].samples,
+                                       ts->percentile_list,
+                                       ts->percentile_precision, "slat", out);
+       if (ts->clat_percentiles && ts->clat_stat[ddir].samples > 0)
+               show_clat_percentiles(ts->io_u_plat[FIO_CLAT][ddir],
+                                       ts->clat_stat[ddir].samples,
+                                       ts->percentile_list,
+                                       ts->percentile_precision, "clat", out);
+       if (ts->lat_percentiles && ts->lat_stat[ddir].samples > 0)
+               show_clat_percentiles(ts->io_u_plat[FIO_LAT][ddir],
+                                       ts->lat_stat[ddir].samples,
+                                       ts->percentile_list,
+                                       ts->percentile_precision, "lat", out);
 
        if (ts->clat_percentiles || ts->lat_percentiles) {
-               const char *name = ts->clat_percentiles ? "clat" : " lat";
+               const char *name = ts->lat_percentiles ? "lat" : "clat";
+               char prio_name[32];
                uint64_t samples;
 
-               if (ts->clat_percentiles)
-                       samples = ts->clat_stat[ddir].samples;
-               else
+               if (ts->lat_percentiles)
                        samples = ts->lat_stat[ddir].samples;
+               else
+                       samples = ts->clat_stat[ddir].samples;
 
-               show_clat_percentiles(ts->io_u_plat[ddir],
-                                       samples,
-                                       ts->percentile_list,
-                                       ts->percentile_precision, name, out);
+               /* Only print this if some high and low priority stats were collected */
+               if (ts->clat_high_prio_stat[ddir].samples > 0 &&
+                       ts->clat_low_prio_stat[ddir].samples > 0)
+               {
+                       sprintf(prio_name, "high prio (%.2f%%) %s",
+                                       100. * (double) ts->clat_high_prio_stat[ddir].samples / (double) samples,
+                                       name);
+                       show_clat_percentiles(ts->io_u_plat_high_prio[ddir],
+                                               ts->clat_high_prio_stat[ddir].samples,
+                                               ts->percentile_list,
+                                               ts->percentile_precision, prio_name, out);
+
+                       sprintf(prio_name, "low prio (%.2f%%) %s",
+                                       100. * (double) ts->clat_low_prio_stat[ddir].samples / (double) samples,
+                                       name);
+                       show_clat_percentiles(ts->io_u_plat_low_prio[ddir],
+                                               ts->clat_low_prio_stat[ddir].samples,
+                                               ts->percentile_list,
+                                               ts->percentile_precision, prio_name, out);
+               }
        }
+
        if (calc_lat(&ts->bw_stat[ddir], &min, &max, &mean, &dev)) {
                double p_of_agg = 100.0, fkb_base = (double)rs->kb_base;
                const char *bw_str;
@@ -511,11 +772,7 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
                else
                        bw_str = "kB";
 
-               if (rs->agg[ddir]) {
-                       p_of_agg = mean * 100 / (double) (rs->agg[ddir] / 1024);
-                       if (p_of_agg > 100.0)
-                               p_of_agg = 100.0;
-               }
+               p_of_agg = convert_agg_kbytes_percent(rs, ddir, mean);
 
                if (rs->unit_base == 1) {
                        min *= 8.0;
@@ -684,7 +941,7 @@ static int calc_block_percentiles(int nr_block_infos, uint32_t *block_infos,
         * isn't a worry. Also note that this does not work for NaN values.
         */
        if (len > 1)
-               qsort((void *)plist, len, sizeof(plist[0]), double_cmp);
+               qsort(plist, len, sizeof(plist[0]), double_cmp);
 
        /* Start only after the uninit entries end */
        for (nr_uninit = 0;
@@ -914,7 +1171,7 @@ void json_array_add_disk_util(struct disk_util_stat *dus,
        obj = json_create_object();
        json_array_add_value_object(array, obj);
 
-       json_object_add_value_string(obj, "name", dus->name);
+       json_object_add_value_string(obj, "name", (const char *)dus->name);
        json_object_add_value_int(obj, "read_ios", dus->s.ios[0]);
        json_object_add_value_int(obj, "write_ios", dus->s.ios[1]);
        json_object_add_value_int(obj, "read_merges", dus->s.merges[0]);
@@ -1010,7 +1267,7 @@ static void show_thread_status_normal(struct thread_stat *ts,
 
        if (!ddir_rw_sum(ts->io_bytes) && !ddir_rw_sum(ts->total_io_u))
                return;
-               
+
        memset(time_buf, 0, sizeof(time_buf));
 
        time(&time_p);
@@ -1030,12 +1287,13 @@ static void show_thread_status_normal(struct thread_stat *ts,
        if (strlen(ts->description))
                log_buf(out, "  Description  : [%s]\n", ts->description);
 
-       if (ts->io_bytes[DDIR_READ])
-               show_ddir_status(rs, ts, DDIR_READ, out);
-       if (ts->io_bytes[DDIR_WRITE])
-               show_ddir_status(rs, ts, DDIR_WRITE, out);
-       if (ts->io_bytes[DDIR_TRIM])
-               show_ddir_status(rs, ts, DDIR_TRIM, out);
+       for_each_rw_ddir(ddir) {
+               if (ts->io_bytes[ddir])
+                       show_ddir_status(rs, ts, ddir, out);
+       }
+
+       if (ts->unified_rw_rep == UNIFIED_BOTH)
+               show_mixed_ddir_status(rs, ts, out);
 
        show_latencies(ts, out);
 
@@ -1147,14 +1405,19 @@ static void show_ddir_status_terse(struct thread_stat *ts,
        else
                log_buf(out, ";%llu;%llu;%f;%f", 0ULL, 0ULL, 0.0, 0.0);
 
-       if (ts->clat_percentiles || ts->lat_percentiles) {
-               len = calc_clat_percentiles(ts->io_u_plat[ddir],
+       if (ts->lat_percentiles)
+               len = calc_clat_percentiles(ts->io_u_plat[FIO_LAT][ddir],
+                                       ts->lat_stat[ddir].samples,
+                                       ts->percentile_list, &ovals, &maxv,
+                                       &minv);
+       else if (ts->clat_percentiles)
+               len = calc_clat_percentiles(ts->io_u_plat[FIO_CLAT][ddir],
                                        ts->clat_stat[ddir].samples,
                                        ts->percentile_list, &ovals, &maxv,
                                        &minv);
-       else
+       else
                len = 0;
-
+       
        for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++) {
                if (i >= len) {
                        log_buf(out, ";0%%=0");
@@ -1168,8 +1431,7 @@ static void show_ddir_status_terse(struct thread_stat *ts,
        else
                log_buf(out, ";%llu;%llu;%f;%f", 0ULL, 0ULL, 0.0, 0.0);
 
-       if (ovals)
-               free(ovals);
+       free(ovals);
 
        bw_stat = calc_lat(&ts->bw_stat[ddir], &min, &max, &mean, &dev);
        if (bw_stat) {
@@ -1199,27 +1461,107 @@ static void show_ddir_status_terse(struct thread_stat *ts,
        }
 }
 
+static void show_mixed_ddir_status_terse(struct thread_stat *ts,
+                                  struct group_run_stats *rs,
+                                  int ver, struct buf_output *out)
+{
+       struct thread_stat *ts_lcl;
+       int i;
+
+       /* Handle aggregation of Reads (ddir = 0), Writes (ddir = 1), and Trims (ddir = 2) */
+       ts_lcl = malloc(sizeof(struct thread_stat));
+       memset((void *)ts_lcl, 0, sizeof(struct thread_stat));
+       ts_lcl->unified_rw_rep = UNIFIED_MIXED;               /* calculate mixed stats  */
+       for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+               ts_lcl->clat_stat[i].min_val = ULONG_MAX;
+               ts_lcl->slat_stat[i].min_val = ULONG_MAX;
+               ts_lcl->lat_stat[i].min_val = ULONG_MAX;
+               ts_lcl->bw_stat[i].min_val = ULONG_MAX;
+               ts_lcl->iops_stat[i].min_val = ULONG_MAX;
+               ts_lcl->clat_high_prio_stat[i].min_val = ULONG_MAX;
+               ts_lcl->clat_low_prio_stat[i].min_val = ULONG_MAX;
+       }
+       ts_lcl->sync_stat.min_val = ULONG_MAX;
+       ts_lcl->lat_percentiles = ts->lat_percentiles;
+       ts_lcl->clat_percentiles = ts->clat_percentiles;
+       ts_lcl->slat_percentiles = ts->slat_percentiles;
+       ts_lcl->percentile_precision = ts->percentile_precision;                
+       memcpy(ts_lcl->percentile_list, ts->percentile_list, sizeof(ts->percentile_list));
+       
+       sum_thread_stats(ts_lcl, ts, 1);
+
+       /* add the aggregated stats to json parent */
+       show_ddir_status_terse(ts_lcl, rs, DDIR_READ, ver, out);
+       free(ts_lcl);
+}
+
+static struct json_object *add_ddir_lat_json(struct thread_stat *ts, uint32_t percentiles,
+               struct io_stat *lat_stat, uint64_t *io_u_plat)
+{
+       char buf[120];
+       double mean, dev;
+       unsigned int i, len;
+       struct json_object *lat_object, *percentile_object, *clat_bins_object;
+       unsigned long long min, max, maxv, minv, *ovals = NULL;
+
+       if (!calc_lat(lat_stat, &min, &max, &mean, &dev)) {
+               min = max = 0;
+               mean = dev = 0.0;
+       }
+       lat_object = json_create_object();
+       json_object_add_value_int(lat_object, "min", min);
+       json_object_add_value_int(lat_object, "max", max);
+       json_object_add_value_float(lat_object, "mean", mean);
+       json_object_add_value_float(lat_object, "stddev", dev);
+       json_object_add_value_int(lat_object, "N", lat_stat->samples);
+
+       if (percentiles && lat_stat->samples) {
+               len = calc_clat_percentiles(io_u_plat, lat_stat->samples,
+                               ts->percentile_list, &ovals, &maxv, &minv);
+
+               if (len > FIO_IO_U_LIST_MAX_LEN)
+                       len = FIO_IO_U_LIST_MAX_LEN;
+
+               percentile_object = json_create_object();
+               json_object_add_value_object(lat_object, "percentile", percentile_object);
+               for (i = 0; i < len; i++) {
+                       snprintf(buf, sizeof(buf), "%f", ts->percentile_list[i].u.f);
+                       json_object_add_value_int(percentile_object, buf, ovals[i]);
+               }
+               free(ovals);
+
+               if (output_format & FIO_OUTPUT_JSON_PLUS) {
+                       clat_bins_object = json_create_object();
+                       json_object_add_value_object(lat_object, "bins", clat_bins_object);
+
+                       for(i = 0; i < FIO_IO_U_PLAT_NR; i++)
+                               if (io_u_plat[i]) {
+                                       snprintf(buf, sizeof(buf), "%llu", plat_idx_to_val(i));
+                                       json_object_add_value_int(clat_bins_object, buf, io_u_plat[i]);
+                               }
+               }
+       }
+
+       return lat_object;
+}
+
 static void add_ddir_status_json(struct thread_stat *ts,
                struct group_run_stats *rs, int ddir, struct json_object *parent)
 {
-       unsigned long long min, max, minv, maxv;
+       unsigned long long min, max;
        unsigned long long bw_bytes, bw;
-       unsigned long long *ovals = NULL;
        double mean, dev, iops;
-       unsigned int len;
-       int i;
-       struct json_object *dir_object, *tmp_object, *percentile_object, *clat_bins_object = NULL;
-       char buf[120];
+       struct json_object *dir_object, *tmp_object;
        double p_of_agg = 100.0;
 
        assert(ddir_rw(ddir) || ddir_sync(ddir));
 
-       if (ts->unified_rw_rep && ddir != DDIR_READ)
+       if ((ts->unified_rw_rep == UNIFIED_MIXED) && ddir != DDIR_READ)
                return;
 
        dir_object = json_create_object();
        json_object_add_value_object(parent,
-               ts->unified_rw_rep ? "mixed" : io_ddir_name(ddir), dir_object);
+               (ts->unified_rw_rep == UNIFIED_MIXED) ? "mixed" : io_ddir_name(ddir), dir_object);
 
        if (ddir_rw(ddir)) {
                bw_bytes = 0;
@@ -1243,125 +1585,55 @@ static void add_ddir_status_json(struct thread_stat *ts,
                json_object_add_value_int(dir_object, "short_ios", ts->short_io_u[ddir]);
                json_object_add_value_int(dir_object, "drop_ios", ts->drop_io_u[ddir]);
 
-               if (!calc_lat(&ts->slat_stat[ddir], &min, &max, &mean, &dev)) {
-                       min = max = 0;
-                       mean = dev = 0.0;
-               }
-               tmp_object = json_create_object();
+               tmp_object = add_ddir_lat_json(ts, ts->slat_percentiles,
+                               &ts->slat_stat[ddir], ts->io_u_plat[FIO_SLAT][ddir]);
                json_object_add_value_object(dir_object, "slat_ns", tmp_object);
-               json_object_add_value_int(tmp_object, "min", min);
-               json_object_add_value_int(tmp_object, "max", max);
-               json_object_add_value_float(tmp_object, "mean", mean);
-               json_object_add_value_float(tmp_object, "stddev", dev);
-
-               if (!calc_lat(&ts->clat_stat[ddir], &min, &max, &mean, &dev)) {
-                       min = max = 0;
-                       mean = dev = 0.0;
-               }
-               tmp_object = json_create_object();
+
+               tmp_object = add_ddir_lat_json(ts, ts->clat_percentiles,
+                               &ts->clat_stat[ddir], ts->io_u_plat[FIO_CLAT][ddir]);
                json_object_add_value_object(dir_object, "clat_ns", tmp_object);
-               json_object_add_value_int(tmp_object, "min", min);
-               json_object_add_value_int(tmp_object, "max", max);
-               json_object_add_value_float(tmp_object, "mean", mean);
-               json_object_add_value_float(tmp_object, "stddev", dev);
-       } else {
-               if (!calc_lat(&ts->sync_stat, &min, &max, &mean, &dev)) {
-                       min = max = 0;
-                       mean = dev = 0.0;
-               }
 
-               tmp_object = json_create_object();
+               tmp_object = add_ddir_lat_json(ts, ts->lat_percentiles,
+                               &ts->lat_stat[ddir], ts->io_u_plat[FIO_LAT][ddir]);
                json_object_add_value_object(dir_object, "lat_ns", tmp_object);
+       } else {
                json_object_add_value_int(dir_object, "total_ios", ts->total_io_u[DDIR_SYNC]);
-               json_object_add_value_int(tmp_object, "min", min);
-               json_object_add_value_int(tmp_object, "max", max);
-               json_object_add_value_float(tmp_object, "mean", mean);
-               json_object_add_value_float(tmp_object, "stddev", dev);
+               tmp_object = add_ddir_lat_json(ts, ts->lat_percentiles | ts->clat_percentiles,
+                               &ts->sync_stat, ts->io_u_sync_plat);
+               json_object_add_value_object(dir_object, "lat_ns", tmp_object);
        }
 
-       if (ts->clat_percentiles || ts->lat_percentiles) {
-               if (ddir_rw(ddir)) {
-                       uint64_t samples;
+       if (!ddir_rw(ddir))
+               return;
 
-                       if (ts->clat_percentiles)
-                               samples = ts->clat_stat[ddir].samples;
-                       else
-                               samples = ts->lat_stat[ddir].samples;
+       /* Only print PRIO latencies if some high priority samples were gathered */
+       if (ts->clat_high_prio_stat[ddir].samples > 0) {
+               const char *high, *low;
 
-                       len = calc_clat_percentiles(ts->io_u_plat[ddir],
-                                       samples, ts->percentile_list, &ovals,
-                                       &maxv, &minv);
+               if (ts->lat_percentiles) {
+                       high = "lat_high_prio";
+                       low = "lat_low_prio";
                } else {
-                       len = calc_clat_percentiles(ts->io_u_sync_plat,
-                                       ts->sync_stat.samples,
-                                       ts->percentile_list, &ovals, &maxv,
-                                       &minv);
-               }
-
-               if (len > FIO_IO_U_LIST_MAX_LEN)
-                       len = FIO_IO_U_LIST_MAX_LEN;
-       } else
-               len = 0;
-
-       percentile_object = json_create_object();
-       if (ts->clat_percentiles)
-               json_object_add_value_object(tmp_object, "percentile", percentile_object);
-       for (i = 0; i < len; i++) {
-               snprintf(buf, sizeof(buf), "%f", ts->percentile_list[i].u.f);
-               json_object_add_value_int(percentile_object, (const char *)buf, ovals[i]);
-       }
-
-       if (output_format & FIO_OUTPUT_JSON_PLUS) {
-               clat_bins_object = json_create_object();
-               if (ts->clat_percentiles)
-                       json_object_add_value_object(tmp_object, "bins", clat_bins_object);
-
-               for(i = 0; i < FIO_IO_U_PLAT_NR; i++) {
-                       if (ddir_rw(ddir)) {
-                               if (ts->io_u_plat[ddir][i]) {
-                                       snprintf(buf, sizeof(buf), "%llu", plat_idx_to_val(i));
-                                       json_object_add_value_int(clat_bins_object, (const char *)buf, ts->io_u_plat[ddir][i]);
-                               }
-                       } else {
-                               if (ts->io_u_sync_plat[i]) {
-                                       snprintf(buf, sizeof(buf), "%llu", plat_idx_to_val(i));
-                                       json_object_add_value_int(clat_bins_object, (const char *)buf, ts->io_u_sync_plat[i]);
-                               }
-                       }
+                       high = "clat_high_prio";
+                       low = "clat_low_prio";
                }
-       }
 
-       if (!ddir_rw(ddir))
-               return;
+               tmp_object = add_ddir_lat_json(ts, ts->clat_percentiles | ts->lat_percentiles,
+                               &ts->clat_high_prio_stat[ddir], ts->io_u_plat_high_prio[ddir]);
+               json_object_add_value_object(dir_object, high, tmp_object);
 
-       if (!calc_lat(&ts->lat_stat[ddir], &min, &max, &mean, &dev)) {
-               min = max = 0;
-               mean = dev = 0.0;
+               tmp_object = add_ddir_lat_json(ts, ts->clat_percentiles | ts->lat_percentiles,
+                               &ts->clat_low_prio_stat[ddir], ts->io_u_plat_low_prio[ddir]);
+               json_object_add_value_object(dir_object, low, tmp_object);
        }
-       tmp_object = json_create_object();
-       json_object_add_value_object(dir_object, "lat_ns", tmp_object);
-       json_object_add_value_int(tmp_object, "min", min);
-       json_object_add_value_int(tmp_object, "max", max);
-       json_object_add_value_float(tmp_object, "mean", mean);
-       json_object_add_value_float(tmp_object, "stddev", dev);
-       if (ts->lat_percentiles)
-               json_object_add_value_object(tmp_object, "percentile", percentile_object);
-       if (output_format & FIO_OUTPUT_JSON_PLUS && ts->lat_percentiles)
-               json_object_add_value_object(tmp_object, "bins", clat_bins_object);
-
-       if (ovals)
-               free(ovals);
 
        if (calc_lat(&ts->bw_stat[ddir], &min, &max, &mean, &dev)) {
-               if (rs->agg[ddir]) {
-                       p_of_agg = mean * 100 / (double) (rs->agg[ddir] / 1024);
-                       if (p_of_agg > 100.0)
-                               p_of_agg = 100.0;
-               }
+               p_of_agg = convert_agg_kbytes_percent(rs, ddir, mean);
        } else {
                min = max = 0;
                p_of_agg = mean = dev = 0.0;
        }
+
        json_object_add_value_int(dir_object, "bw_min", min);
        json_object_add_value_int(dir_object, "bw_max", max);
        json_object_add_value_float(dir_object, "bw_agg", p_of_agg);
@@ -1392,6 +1664,39 @@ static void add_ddir_status_json(struct thread_stat *ts,
        }
 }
 
+static void add_mixed_ddir_status_json(struct thread_stat *ts,
+               struct group_run_stats *rs, struct json_object *parent)
+{
+       struct thread_stat *ts_lcl;
+       int i;
+
+       /* Handle aggregation of Reads (ddir = 0), Writes (ddir = 1), and Trims (ddir = 2) */
+       ts_lcl = malloc(sizeof(struct thread_stat));
+       memset((void *)ts_lcl, 0, sizeof(struct thread_stat));
+       ts_lcl->unified_rw_rep = UNIFIED_MIXED;               /* calculate mixed stats  */
+       for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+               ts_lcl->clat_stat[i].min_val = ULONG_MAX;
+               ts_lcl->slat_stat[i].min_val = ULONG_MAX;
+               ts_lcl->lat_stat[i].min_val = ULONG_MAX;
+               ts_lcl->bw_stat[i].min_val = ULONG_MAX;
+               ts_lcl->iops_stat[i].min_val = ULONG_MAX;
+               ts_lcl->clat_high_prio_stat[i].min_val = ULONG_MAX;
+               ts_lcl->clat_low_prio_stat[i].min_val = ULONG_MAX;
+       }
+       ts_lcl->sync_stat.min_val = ULONG_MAX;
+       ts_lcl->lat_percentiles = ts->lat_percentiles;
+       ts_lcl->clat_percentiles = ts->clat_percentiles;
+       ts_lcl->slat_percentiles = ts->slat_percentiles;
+       ts_lcl->percentile_precision = ts->percentile_precision;                
+       memcpy(ts_lcl->percentile_list, ts->percentile_list, sizeof(ts->percentile_list));
+
+       sum_thread_stats(ts_lcl, ts, 1);
+
+       /* add the aggregated stats to json parent */
+       add_ddir_status_json(ts_lcl, rs, DDIR_READ, parent);
+       free(ts_lcl);
+}
+
 static void show_thread_status_terse_all(struct thread_stat *ts,
                                         struct group_run_stats *rs, int ver,
                                         struct buf_output *out)
@@ -1409,14 +1714,17 @@ static void show_thread_status_terse_all(struct thread_stat *ts,
                log_buf(out, "%d;%s;%s;%d;%d", ver, fio_version_string,
                        ts->name, ts->groupid, ts->error);
 
-       /* Log Read Status */
+       /* Log Read Status, or mixed if unified_rw_rep = 1 */
        show_ddir_status_terse(ts, rs, DDIR_READ, ver, out);
-       /* Log Write Status */
-       show_ddir_status_terse(ts, rs, DDIR_WRITE, ver, out);
-       /* Log Trim Status */
-       if (ver == 2 || ver == 4 || ver == 5)
-               show_ddir_status_terse(ts, rs, DDIR_TRIM, ver, out);
-
+       if (ts->unified_rw_rep != UNIFIED_MIXED) {
+               /* Log Write Status */
+               show_ddir_status_terse(ts, rs, DDIR_WRITE, ver, out);
+               /* Log Trim Status */
+               if (ver == 2 || ver == 4 || ver == 5)
+                       show_ddir_status_terse(ts, rs, DDIR_TRIM, ver, out);
+       }
+       if (ts->unified_rw_rep == UNIFIED_BOTH)
+               show_mixed_ddir_status_terse(ts, rs, ver, out);
        /* CPU Usage */
        if (ts->total_run_time) {
                double runt = (double) ts->total_run_time;
@@ -1482,12 +1790,8 @@ static void json_add_job_opts(struct json_object *root, const char *name,
        json_object_add_value_object(root, name, dir_object);
 
        flist_for_each(entry, opt_list) {
-               const char *pos = "";
-
                p = flist_entry(entry, struct print_option, list);
-               if (p->value)
-                       pos = p->value;
-               json_object_add_value_string(dir_object, p->name, pos);
+               json_object_add_value_string(dir_object, p->name, p->value);
        }
 }
 
@@ -1525,6 +1829,9 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
        add_ddir_status_json(ts, rs, DDIR_TRIM, root);
        add_ddir_status_json(ts, rs, DDIR_SYNC, root);
 
+       if (ts->unified_rw_rep == UNIFIED_BOTH)
+               add_mixed_ddir_status_json(ts, rs, root);
+
        /* CPU Usage */
        if (ts->total_run_time) {
                double runt = (double) ts->total_run_time;
@@ -1661,7 +1968,7 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
                                snprintf(buf, sizeof(buf), "%f",
                                         ts->percentile_list[i].u.f);
                                json_object_add_value_int(percentile_object,
-                                                         (const char *)buf,
+                                                         buf,
                                                          percentiles[i]);
                        }
 
@@ -1850,11 +2157,13 @@ void sum_group_stats(struct group_run_stats *dst, struct group_run_stats *src)
 void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src,
                      bool first)
 {
-       int l, k;
+       int k, l, m;
 
        for (l = 0; l < DDIR_RWDIR_CNT; l++) {
-               if (!dst->unified_rw_rep) {
+               if (!(dst->unified_rw_rep == UNIFIED_MIXED)) {
                        sum_stat(&dst->clat_stat[l], &src->clat_stat[l], first, false);
+                       sum_stat(&dst->clat_high_prio_stat[l], &src->clat_high_prio_stat[l], first, false);
+                       sum_stat(&dst->clat_low_prio_stat[l], &src->clat_low_prio_stat[l], first, false);
                        sum_stat(&dst->slat_stat[l], &src->slat_stat[l], first, false);
                        sum_stat(&dst->lat_stat[l], &src->lat_stat[l], first, false);
                        sum_stat(&dst->bw_stat[l], &src->bw_stat[l], first, true);
@@ -1866,6 +2175,8 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src,
                                dst->runtime[l] = src->runtime[l];
                } else {
                        sum_stat(&dst->clat_stat[0], &src->clat_stat[l], first, false);
+                       sum_stat(&dst->clat_high_prio_stat[0], &src->clat_high_prio_stat[l], first, false);
+                       sum_stat(&dst->clat_low_prio_stat[0], &src->clat_low_prio_stat[l], first, false);
                        sum_stat(&dst->slat_stat[0], &src->slat_stat[l], first, false);
                        sum_stat(&dst->lat_stat[0], &src->lat_stat[l], first, false);
                        sum_stat(&dst->bw_stat[0], &src->bw_stat[l], first, true);
@@ -1904,11 +2215,8 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src,
        for (k = 0; k < FIO_IO_U_LAT_M_NR; k++)
                dst->io_u_lat_m[k] += src->io_u_lat_m[k];
 
-       for (k = 0; k < FIO_IO_U_PLAT_NR; k++)
-               dst->io_u_sync_plat[k] += src->io_u_sync_plat[k];
-
        for (k = 0; k < DDIR_RWDIR_CNT; k++) {
-               if (!dst->unified_rw_rep) {
+               if (!(dst->unified_rw_rep == UNIFIED_MIXED)) {
                        dst->total_io_u[k] += src->total_io_u[k];
                        dst->short_io_u[k] += src->short_io_u[k];
                        dst->drop_io_u[k] += src->drop_io_u[k];
@@ -1921,14 +2229,27 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src,
 
        dst->total_io_u[DDIR_SYNC] += src->total_io_u[DDIR_SYNC];
 
-       for (k = 0; k < DDIR_RWDIR_CNT; k++) {
-               int m;
+       for (k = 0; k < FIO_LAT_CNT; k++)
+               for (l = 0; l < DDIR_RWDIR_CNT; l++)
+                       for (m = 0; m < FIO_IO_U_PLAT_NR; m++)
+                               if (!(dst->unified_rw_rep == UNIFIED_MIXED))
+                                       dst->io_u_plat[k][l][m] += src->io_u_plat[k][l][m];
+                               else
+                                       dst->io_u_plat[k][0][m] += src->io_u_plat[k][l][m];
 
+       for (k = 0; k < FIO_IO_U_PLAT_NR; k++)
+               dst->io_u_sync_plat[k] += src->io_u_sync_plat[k];
+
+       for (k = 0; k < DDIR_RWDIR_CNT; k++) {
                for (m = 0; m < FIO_IO_U_PLAT_NR; m++) {
-                       if (!dst->unified_rw_rep)
-                               dst->io_u_plat[k][m] += src->io_u_plat[k][m];
-                       else
-                               dst->io_u_plat[0][m] += src->io_u_plat[k][m];
+                       if (!(dst->unified_rw_rep == UNIFIED_MIXED)) {
+                               dst->io_u_plat_high_prio[k][m] += src->io_u_plat_high_prio[k][m];
+                               dst->io_u_plat_low_prio[k][m] += src->io_u_plat_low_prio[k][m];
+                       } else {
+                               dst->io_u_plat_high_prio[0][m] += src->io_u_plat_high_prio[k][m];
+                               dst->io_u_plat_low_prio[0][m] += src->io_u_plat_low_prio[k][m];
+                       }
+
                }
        }
 
@@ -1961,6 +2282,8 @@ void init_thread_stat(struct thread_stat *ts)
                ts->slat_stat[j].min_val = -1UL;
                ts->bw_stat[j].min_val = -1UL;
                ts->iops_stat[j].min_val = -1UL;
+               ts->clat_high_prio_stat[j].min_val = -1UL;
+               ts->clat_low_prio_stat[j].min_val = -1UL;
        }
        ts->sync_stat.min_val = -1UL;
        ts->groupid = -1;
@@ -2030,6 +2353,7 @@ void __show_run_stats(void)
 
                ts->clat_percentiles = td->o.clat_percentiles;
                ts->lat_percentiles = td->o.lat_percentiles;
+               ts->slat_percentiles = td->o.slat_percentiles;
                ts->percentile_precision = td->o.percentile_precision;
                memcpy(ts->percentile_list, td->o.percentile_list, sizeof(td->o.percentile_list));
                opt_lists[j] = &td->opt_list;
@@ -2127,7 +2451,7 @@ void __show_run_stats(void)
                rs->kb_base = ts->kb_base;
                rs->unit_base = ts->unit_base;
                rs->sig_figs = ts->sig_figs;
-               rs->unified_rw_rep += ts->unified_rw_rep;
+               rs->unified_rw_rep |= ts->unified_rw_rep;
 
                for (j = 0; j < DDIR_RWDIR_CNT; j++) {
                        if (!ts->runtime[j])
@@ -2260,7 +2584,7 @@ void __show_run_stats(void)
        free(opt_lists);
 }
 
-void __show_running_run_stats(void)
+int __show_running_run_stats(void)
 {
        struct thread_data *td;
        unsigned long long *rt;
@@ -2274,9 +2598,9 @@ void __show_running_run_stats(void)
 
        for_each_td(td, i) {
                td->update_rusage = 1;
-               td->ts.io_bytes[DDIR_READ] = td->io_bytes[DDIR_READ];
-               td->ts.io_bytes[DDIR_WRITE] = td->io_bytes[DDIR_WRITE];
-               td->ts.io_bytes[DDIR_TRIM] = td->io_bytes[DDIR_TRIM];
+               for_each_rw_ddir(ddir) {
+                       td->ts.io_bytes[ddir] = td->io_bytes[ddir];
+               }
                td->ts.total_run_time = mtime_since(&td->epoch, &ts);
 
                rt[i] = mtime_since(&td->start, &ts);
@@ -2311,10 +2635,10 @@ void __show_running_run_stats(void)
 
        free(rt);
        fio_sem_up(stat_sem);
+
+       return 0;
 }
 
-static bool status_interval_init;
-static struct timespec status_time;
 static bool status_file_disabled;
 
 #define FIO_STATUS_FILE                "fio-dump-status"
@@ -2357,16 +2681,6 @@ static int check_status_file(void)
 
 void check_for_running_stats(void)
 {
-       if (status_interval) {
-               if (!status_interval_init) {
-                       fio_gettime(&status_time, NULL);
-                       status_interval_init = true;
-               } else if (mtime_since_now(&status_time) >= status_interval) {
-                       show_running_run_stats();
-                       fio_gettime(&status_time, NULL);
-                       return;
-               }
-       }
        if (check_status_file()) {
                show_running_run_stats();
                return;
@@ -2507,6 +2821,14 @@ void regrow_logs(struct thread_data *td)
        td->flags &= ~TD_F_REGROW_LOGS;
 }
 
+void regrow_agg_logs(void)
+{
+       enum fio_ddir ddir;
+
+       for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++)
+               regrow_log(agg_io_log[ddir]);
+}
+
 static struct io_logs *get_cur_log(struct io_log *iolog)
 {
        struct io_logs *cur_log;
@@ -2541,7 +2863,8 @@ static struct io_logs *get_cur_log(struct io_log *iolog)
 
 static void __add_log_sample(struct io_log *iolog, union io_sample_data data,
                             enum fio_ddir ddir, unsigned long long bs,
-                            unsigned long t, uint64_t offset)
+                            unsigned long t, uint64_t offset,
+                            unsigned int priority)
 {
        struct io_logs *cur_log;
 
@@ -2560,6 +2883,7 @@ static void __add_log_sample(struct io_log *iolog, union io_sample_data data,
                s->time = t + (iolog->td ? iolog->td->unix_epoch : 0);
                io_sample_set_ddir(iolog, s, ddir);
                s->bs = bs;
+               s->priority = priority;
 
                if (iolog->log_offset) {
                        struct io_sample_offset *so = (void *) s;
@@ -2584,9 +2908,11 @@ static inline void reset_io_stat(struct io_stat *ios)
 void reset_io_stats(struct thread_data *td)
 {
        struct thread_stat *ts = &td->ts;
-       int i, j;
+       int i, j, k;
 
        for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+               reset_io_stat(&ts->clat_high_prio_stat[i]);
+               reset_io_stat(&ts->clat_low_prio_stat[i]);
                reset_io_stat(&ts->clat_stat[i]);
                reset_io_stat(&ts->slat_stat[i]);
                reset_io_stat(&ts->lat_stat[i]);
@@ -2600,12 +2926,18 @@ void reset_io_stats(struct thread_data *td)
                ts->drop_io_u[i] = 0;
 
                for (j = 0; j < FIO_IO_U_PLAT_NR; j++) {
-                       ts->io_u_plat[i][j] = 0;
+                       ts->io_u_plat_high_prio[i][j] = 0;
+                       ts->io_u_plat_low_prio[i][j] = 0;
                        if (!i)
                                ts->io_u_sync_plat[j] = 0;
                }
        }
 
+       for (i = 0; i < FIO_LAT_CNT; i++)
+               for (j = 0; j < DDIR_RWDIR_CNT; j++)
+                       for (k = 0; k < FIO_IO_U_PLAT_NR; k++)
+                               ts->io_u_plat[i][j][k] = 0;
+
        ts->total_io_u[DDIR_SYNC] = 0;
 
        for (i = 0; i < FIO_IO_U_MAP_NR; i++) {
@@ -2643,7 +2975,7 @@ static void __add_stat_to_log(struct io_log *iolog, enum fio_ddir ddir,
                else
                        data.val = iolog->avg_window[ddir].mean.u.f + 0.50;
 
-               __add_log_sample(iolog, data, ddir, 0, elapsed, 0);
+               __add_log_sample(iolog, data, ddir, 0, elapsed, 0, 0);
        }
 
        reset_io_stat(&iolog->avg_window[ddir]);
@@ -2662,7 +2994,7 @@ static unsigned long add_log_sample(struct thread_data *td,
                                    struct io_log *iolog,
                                    union io_sample_data data,
                                    enum fio_ddir ddir, unsigned long long bs,
-                                   uint64_t offset)
+                                   uint64_t offset, unsigned int ioprio)
 {
        unsigned long elapsed, this_window;
 
@@ -2675,7 +3007,8 @@ static unsigned long add_log_sample(struct thread_data *td,
         * If no time averaging, just add the log sample.
         */
        if (!iolog->avg_msec) {
-               __add_log_sample(iolog, data, ddir, bs, elapsed, offset);
+               __add_log_sample(iolog, data, ddir, bs, elapsed, offset,
+                                ioprio);
                return 0;
        }
 
@@ -2701,7 +3034,8 @@ static unsigned long add_log_sample(struct thread_data *td,
 
        __add_stat_to_log(iolog, ddir, elapsed, td->o.log_max != 0);
 
-       iolog->avg_last[ddir] = elapsed - (this_window - iolog->avg_msec);
+       iolog->avg_last[ddir] = elapsed - (elapsed % iolog->avg_msec);
+
        return iolog->avg_msec;
 }
 
@@ -2723,7 +3057,8 @@ void finalize_logs(struct thread_data *td, bool unit_logs)
                _add_stat_to_log(td->iops_log, elapsed, td->o.log_max != 0);
 }
 
-void add_agg_sample(union io_sample_data data, enum fio_ddir ddir, unsigned long long bs)
+void add_agg_sample(union io_sample_data data, enum fio_ddir ddir,
+                   unsigned long long bs)
 {
        struct io_log *iolog;
 
@@ -2731,7 +3066,7 @@ void add_agg_sample(union io_sample_data data, enum fio_ddir ddir, unsigned long
                return;
 
        iolog = agg_io_log[ddir];
-       __add_log_sample(iolog, data, ddir, bs, mtime_since_genesis(), 0);
+       __add_log_sample(iolog, data, ddir, bs, mtime_since_genesis(), 0, 0);
 }
 
 void add_sync_clat_sample(struct thread_stat *ts, unsigned long long nsec)
@@ -2743,18 +3078,32 @@ void add_sync_clat_sample(struct thread_stat *ts, unsigned long long nsec)
        add_stat_sample(&ts->sync_stat, nsec);
 }
 
-static void add_clat_percentile_sample(struct thread_stat *ts,
-                               unsigned long long nsec, enum fio_ddir ddir)
+static void add_lat_percentile_sample_noprio(struct thread_stat *ts,
+                               unsigned long long nsec, enum fio_ddir ddir, enum fio_lat lat)
 {
        unsigned int idx = plat_val_to_idx(nsec);
        assert(idx < FIO_IO_U_PLAT_NR);
 
-       ts->io_u_plat[ddir][idx]++;
+       ts->io_u_plat[lat][ddir][idx]++;
+}
+
+static void add_lat_percentile_sample(struct thread_stat *ts,
+                               unsigned long long nsec, enum fio_ddir ddir,
+                               bool high_prio, enum fio_lat lat)
+{
+       unsigned int idx = plat_val_to_idx(nsec);
+
+       add_lat_percentile_sample_noprio(ts, nsec, ddir, lat);
+
+       if (!high_prio)
+               ts->io_u_plat_low_prio[ddir][idx]++;
+       else
+               ts->io_u_plat_high_prio[ddir][idx]++;
 }
 
 void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
                     unsigned long long nsec, unsigned long long bs,
-                    uint64_t offset)
+                    uint64_t offset, unsigned int ioprio, bool high_prio)
 {
        const bool needs_lock = td_async_processing(td);
        unsigned long elapsed, this_window;
@@ -2766,12 +3115,23 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
 
        add_stat_sample(&ts->clat_stat[ddir], nsec);
 
+       if (!ts->lat_percentiles) {
+               if (high_prio)
+                       add_stat_sample(&ts->clat_high_prio_stat[ddir], nsec);
+               else
+                       add_stat_sample(&ts->clat_low_prio_stat[ddir], nsec);
+       }
+
        if (td->clat_log)
                add_log_sample(td, td->clat_log, sample_val(nsec), ddir, bs,
-                              offset);
+                              offset, ioprio);
 
-       if (ts->clat_percentiles)
-               add_clat_percentile_sample(ts, nsec, ddir);
+       if (ts->clat_percentiles) {
+               if (ts->lat_percentiles)
+                       add_lat_percentile_sample_noprio(ts, nsec, ddir, FIO_CLAT);
+               else
+                       add_lat_percentile_sample(ts, nsec, ddir, high_prio, FIO_CLAT);
+       }
 
        if (iolog && iolog->hist_msec) {
                struct io_hist *hw = &iolog->hist_window[ddir];
@@ -2781,7 +3141,7 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
                if (!hw->hist_last)
                        hw->hist_last = elapsed;
                this_window = elapsed - hw->hist_last;
-               
+
                if (this_window >= iolog->hist_msec) {
                        uint64_t *io_u_plat;
                        struct io_u_plat_entry *dst;
@@ -2793,13 +3153,13 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
                         * located in iolog.c after printing this sample to the
                         * log file.
                         */
-                       io_u_plat = (uint64_t *) td->ts.io_u_plat[ddir];
+                       io_u_plat = (uint64_t *) td->ts.io_u_plat[FIO_CLAT][ddir];
                        dst = malloc(sizeof(struct io_u_plat_entry));
                        memcpy(&(dst->io_u_plat), io_u_plat,
                                FIO_IO_U_PLAT_NR * sizeof(uint64_t));
                        flist_add(&dst->list, &hw->list);
                        __add_log_sample(iolog, sample_plat(dst), ddir, bs,
-                                               elapsed, offset);
+                                        elapsed, offset, ioprio);
 
                        /*
                         * Update the last time we recorded as being now, minus
@@ -2816,7 +3176,8 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
 }
 
 void add_slat_sample(struct thread_data *td, enum fio_ddir ddir,
-                    unsigned long usec, unsigned long long bs, uint64_t offset)
+                    unsigned long long nsec, unsigned long long bs,
+                    uint64_t offset, unsigned int ioprio)
 {
        const bool needs_lock = td_async_processing(td);
        struct thread_stat *ts = &td->ts;
@@ -2827,10 +3188,14 @@ void add_slat_sample(struct thread_data *td, enum fio_ddir ddir,
        if (needs_lock)
                __td_io_u_lock(td);
 
-       add_stat_sample(&ts->slat_stat[ddir], usec);
+       add_stat_sample(&ts->slat_stat[ddir], nsec);
 
        if (td->slat_log)
-               add_log_sample(td, td->slat_log, sample_val(usec), ddir, bs, offset);
+               add_log_sample(td, td->slat_log, sample_val(nsec), ddir, bs,
+                              offset, ioprio);
+
+       if (ts->slat_percentiles)
+               add_lat_percentile_sample_noprio(ts, nsec, ddir, FIO_SLAT);
 
        if (needs_lock)
                __td_io_u_unlock(td);
@@ -2838,7 +3203,7 @@ void add_slat_sample(struct thread_data *td, enum fio_ddir ddir,
 
 void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
                    unsigned long long nsec, unsigned long long bs,
-                   uint64_t offset)
+                   uint64_t offset, unsigned int ioprio, bool high_prio)
 {
        const bool needs_lock = td_async_processing(td);
        struct thread_stat *ts = &td->ts;
@@ -2853,11 +3218,16 @@ void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
 
        if (td->lat_log)
                add_log_sample(td, td->lat_log, sample_val(nsec), ddir, bs,
-                              offset);
+                              offset, ioprio);
 
-       if (ts->lat_percentiles)
-               add_clat_percentile_sample(ts, nsec, ddir);
+       if (ts->lat_percentiles) {
+               add_lat_percentile_sample(ts, nsec, ddir, high_prio, FIO_LAT);
+               if (high_prio)
+                       add_stat_sample(&ts->clat_high_prio_stat[ddir], nsec);
+               else
+                       add_stat_sample(&ts->clat_low_prio_stat[ddir], nsec);
 
+       }
        if (needs_lock)
                __td_io_u_unlock(td);
 }
@@ -2881,7 +3251,7 @@ void add_bw_sample(struct thread_data *td, struct io_u *io_u,
 
        if (td->bw_log)
                add_log_sample(td, td->bw_log, sample_val(rate), io_u->ddir,
-                              bytes, io_u->offset);
+                              bytes, io_u->offset, io_u->ioprio);
 
        td->stat_io_bytes[io_u->ddir] = td->this_io_bytes[io_u->ddir];
 
@@ -2903,7 +3273,7 @@ static int __add_samples(struct thread_data *td, struct timespec *parent_tv,
        next_log = avg_time;
 
        spent = mtime_since(parent_tv, t);
-       if (spent < avg_time && avg_time - spent >= LOG_MSEC_SLACK)
+       if (spent < avg_time && avg_time - spent > LOG_MSEC_SLACK)
                return avg_time - spent;
 
        if (needs_lock)
@@ -2935,14 +3305,15 @@ static int __add_samples(struct thread_data *td, struct timespec *parent_tv,
                        if (td->o.min_bs[ddir] == td->o.max_bs[ddir])
                                bs = td->o.min_bs[ddir];
 
-                       next = add_log_sample(td, log, sample_val(rate), ddir, bs, 0);
+                       next = add_log_sample(td, log, sample_val(rate), ddir,
+                                             bs, 0, 0);
                        next_log = min(next_log, next);
                }
 
                stat_io_bytes[ddir] = this_io_bytes[ddir];
        }
 
-       timespec_add_msec(parent_tv, avg_time);
+       *parent_tv = *t;
 
        if (needs_lock)
                __td_io_u_unlock(td);
@@ -2975,7 +3346,7 @@ void add_iops_sample(struct thread_data *td, struct io_u *io_u,
 
        if (td->iops_log)
                add_log_sample(td, td->iops_log, sample_val(1), io_u->ddir,
-                              bytes, io_u->offset);
+                              bytes, io_u->offset, io_u->ioprio);
 
        td->stat_io_blocks[io_u->ddir] = td->this_io_blocks[io_u->ddir];
 
@@ -2996,13 +3367,16 @@ static int add_iops_samples(struct thread_data *td, struct timespec *t)
 int calc_log_samples(void)
 {
        struct thread_data *td;
-       unsigned int next = ~0U, tmp;
+       unsigned int next = ~0U, tmp = 0, next_mod = 0, log_avg_msec_min = -1U;
        struct timespec now;
        int i;
+       long elapsed_time = 0;
 
        fio_gettime(&now, NULL);
 
        for_each_td(td, i) {
+               elapsed_time = mtime_since_now(&td->epoch);
+
                if (!td->o.stats)
                        continue;
                if (in_ramp_time(td) ||
@@ -3013,17 +3387,34 @@ int calc_log_samples(void)
                if (!td->bw_log ||
                        (td->bw_log && !per_unit_log(td->bw_log))) {
                        tmp = add_bw_samples(td, &now);
-                       if (tmp < next)
-                               next = tmp;
+
+                       if (td->bw_log)
+                               log_avg_msec_min = min(log_avg_msec_min, (unsigned int)td->bw_log->avg_msec);
                }
                if (!td->iops_log ||
                        (td->iops_log && !per_unit_log(td->iops_log))) {
                        tmp = add_iops_samples(td, &now);
-                       if (tmp < next)
-                               next = tmp;
+
+                       if (td->iops_log)
+                               log_avg_msec_min = min(log_avg_msec_min, (unsigned int)td->iops_log->avg_msec);
                }
+
+               if (tmp < next)
+                       next = tmp;
        }
 
+       /* if log_avg_msec_min has not been changed, set it to 0 */
+       if (log_avg_msec_min == -1U)
+               log_avg_msec_min = 0;
+
+       if (log_avg_msec_min == 0)
+               next_mod = elapsed_time;
+       else
+               next_mod = elapsed_time % log_avg_msec_min;
+
+       /* correction to keep the time on the log avg msec boundary */
+       next = min(next, (log_avg_msec_min - next_mod));
+
        return next == ~0U ? 0 : next;
 }
 
@@ -3060,3 +3451,4 @@ uint32_t *io_u_block_info(struct thread_data *td, struct io_u *io_u)
        assert(idx < td->ts.nr_block_infos);
        return info;
 }
+