Improve rwmix buffered split
authorJens Axboe <jens.axboe@oracle.com>
Fri, 16 Mar 2007 19:27:27 +0000 (20:27 +0100)
committerJens Axboe <jens.axboe@oracle.com>
Fri, 16 Mar 2007 19:27:27 +0000 (20:27 +0100)
For buffered IO, the rwmix split can often be grossly unfair
towards reads, because dirtying tons of memory is done much
faster than reading data. Improve the split for such workloads
by not only looking at time.

Note that it'll still be somewhat unfair, there's only so much
we can reliably do. But it's better.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
fio.h
io_u.c

diff --git a/fio.h b/fio.h
index 6e9311be9517347892e1adc4dd8037f57676c31d..bd78e85b6365702a293b40205139aeeff8ca5209 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -489,6 +489,7 @@ struct thread_data {
         * read/write mixed workload state
         */
        os_random_state_t rwmix_state;
         * read/write mixed workload state
         */
        os_random_state_t rwmix_state;
+       unsigned long long rwmix_bytes;
        struct timeval rwmix_switch;
        enum fio_ddir rwmix_ddir;
 
        struct timeval rwmix_switch;
        enum fio_ddir rwmix_ddir;
 
diff --git a/io_u.c b/io_u.c
index 5331f8efee4d052e0aa555f80839fd9f6e66c191..9cb68af4691025ff83f50c9d562292d94bfefccc 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -169,6 +169,24 @@ static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u)
        return buflen;
 }
 
        return buflen;
 }
 
+static void set_rwmix_bytes(struct thread_data *td)
+{
+       unsigned long long rbytes;
+       unsigned int diff;
+
+       /*
+        * we do time or byte based switch. this is needed because
+        * buffered writes may issue a lot quicker than they complete,
+        * whereas reads do not.
+        */
+       rbytes = td->io_bytes[td->rwmix_ddir] - td->rwmix_bytes;
+       diff = td->o.rwmixread;
+       if (td->rwmix_ddir == DDIR_WRITE)
+               diff = 100 - diff;
+
+       td->rwmix_bytes = td->io_bytes[td->rwmix_ddir] + (rbytes * (100 - diff)) / diff;
+}
+
 /*
  * Return the data direction for the next io_u. If the job is a
  * mixed read/write workload, check the rwmix cycle and switch if
 /*
  * Return the data direction for the next io_u. If the job is a
  * mixed read/write workload, check the rwmix cycle and switch if
@@ -179,23 +197,34 @@ static enum fio_ddir get_rw_ddir(struct thread_data *td)
        if (td_rw(td)) {
                struct timeval now;
                unsigned long elapsed;
        if (td_rw(td)) {
                struct timeval now;
                unsigned long elapsed;
+               unsigned int cycle;
 
                fio_gettime(&now, NULL);
                elapsed = mtime_since_now(&td->rwmix_switch);
 
 
                fio_gettime(&now, NULL);
                elapsed = mtime_since_now(&td->rwmix_switch);
 
+               cycle = td->o.rwmixcycle;
+               if (!td->rwmix_bytes)
+                       cycle /= 10;
+
                /*
                 * Check if it's time to seed a new data direction.
                 */
                /*
                 * Check if it's time to seed a new data direction.
                 */
-               if (elapsed >= td->o.rwmixcycle) {
+               if (elapsed >= cycle &&
+                   td->io_bytes[td->rwmix_ddir] >= td->rwmix_bytes) {
                        unsigned int v;
                        long r;
 
                        r = os_random_long(&td->rwmix_state);
                        v = 1 + (int) (100.0 * (r / (RAND_MAX + 1.0)));
                        unsigned int v;
                        long r;
 
                        r = os_random_long(&td->rwmix_state);
                        v = 1 + (int) (100.0 * (r / (RAND_MAX + 1.0)));
-                       if (v < td->o.rwmixread)
+                       if (v < td->o.rwmixread) {
+                               if (td->rwmix_ddir != DDIR_READ)
+                                       set_rwmix_bytes(td);
                                td->rwmix_ddir = DDIR_READ;
                                td->rwmix_ddir = DDIR_READ;
-                       else
+                       } else {
+                               if (td->rwmix_ddir != DDIR_WRITE)
+                                       set_rwmix_bytes(td);
                                td->rwmix_ddir = DDIR_WRITE;
                                td->rwmix_ddir = DDIR_WRITE;
+                       }
                        memcpy(&td->rwmix_switch, &now, sizeof(now));
                }
                return td->rwmix_ddir;
                        memcpy(&td->rwmix_switch, &now, sizeof(now));
                }
                return td->rwmix_ddir;