Add sample zipf distribution randomizer
[fio.git] / options.c
index f62ab6d573cd3369cbeced32edec4a4fe4c666d4..05a6a5081fa576810c9071506e634077ec27a401 100644 (file)
--- a/options.c
+++ b/options.c
@@ -166,7 +166,7 @@ static int bssplit_ddir(struct thread_data *td, int ddir, char *str)
 static int str_bssplit_cb(void *data, const char *input)
 {
        struct thread_data *td = data;
-       char *str, *p, *odir;
+       char *str, *p, *odir, *ddir;
        int ret = 0;
 
        p = str = strdup(input);
@@ -176,7 +176,21 @@ static int str_bssplit_cb(void *data, const char *input)
 
        odir = strchr(str, ',');
        if (odir) {
-               ret = bssplit_ddir(td, DDIR_WRITE, odir + 1);
+               ddir = strchr(odir + 1, ',');
+               if (ddir) {
+                       ret = bssplit_ddir(td, DDIR_TRIM, ddir + 1);
+                       if (!ret)
+                               *ddir = '\0';
+               } else {
+                       char *op;
+
+                       op = strdup(odir + 1);
+                       ret = bssplit_ddir(td, DDIR_TRIM, op);
+
+                       free(op);
+               }
+               if (!ret) 
+                       ret = bssplit_ddir(td, DDIR_WRITE, odir + 1);
                if (!ret) {
                        *odir = '\0';
                        ret = bssplit_ddir(td, DDIR_READ, str);
@@ -185,18 +199,116 @@ static int str_bssplit_cb(void *data, const char *input)
                char *op;
 
                op = strdup(str);
+               ret = bssplit_ddir(td, DDIR_WRITE, op);
+               free(op);
 
+               if (!ret) {
+                       op = strdup(str);
+                       ret = bssplit_ddir(td, DDIR_TRIM, op);
+                       free(op);
+               }
                ret = bssplit_ddir(td, DDIR_READ, str);
-               if (!ret)
-                       ret = bssplit_ddir(td, DDIR_WRITE, op);
-
-               free(op);
        }
 
        free(p);
        return ret;
 }
 
+static int str2error(char *str)
+{
+       const char * err[] = {"EPERM", "ENOENT", "ESRCH", "EINTR", "EIO",
+                           "ENXIO", "E2BIG", "ENOEXEC", "EBADF",
+                           "ECHILD", "EAGAIN", "ENOMEM", "EACCES",
+                           "EFAULT", "ENOTBLK", "EBUSY", "EEXIST",
+                           "EXDEV", "ENODEV", "ENOTDIR", "EISDIR",
+                           "EINVAL", "ENFILE", "EMFILE", "ENOTTY",
+                           "ETXTBSY","EFBIG", "ENOSPC", "ESPIPE",
+                           "EROFS","EMLINK", "EPIPE", "EDOM", "ERANGE"};
+       int i = 0, num = sizeof(err) / sizeof(void *);
+
+       while( i < num) {
+               if (!strcmp(err[i], str))
+                       return i + 1;
+               i++;
+       }
+       return 0;
+}
+
+static int ignore_error_type(struct thread_data *td, int etype, char *str)
+{
+       unsigned int i;
+       int *error;
+       char *fname;
+
+       if (etype >= ERROR_TYPE_CNT) {
+               log_err("Illegal error type\n");
+               return 1;
+       }
+
+       td->o.ignore_error_nr[etype] = 4;
+       error = malloc(4 * sizeof(struct bssplit));
+
+       i = 0;
+       while ((fname = strsep(&str, ":")) != NULL) {
+
+               if (!strlen(fname))
+                       break;
+
+               /*
+                * grow struct buffer, if needed
+                */
+               if (i == td->o.ignore_error_nr[etype]) {
+                       td->o.ignore_error_nr[etype] <<= 1;
+                       error = realloc(error, td->o.ignore_error_nr[etype]
+                                                 * sizeof(int));
+               }
+               if (fname[0] == 'E') {
+                       error[i] = str2error(fname);
+               } else {
+                       error[i] = atoi(fname);
+                       if (error[i] < 0)
+                               error[i] = error[i];
+               }
+               if (!error[i]) {
+                       log_err("Unknown error %s, please use number value \n",
+                                 fname);
+                       return 1;
+               }
+               i++;
+       }
+       if (i) {
+               td->o.continue_on_error |= 1 << etype;
+               td->o.ignore_error_nr[etype] = i;
+               td->o.ignore_error[etype] = error;
+       }
+       return 0;
+
+}
+
+static int str_ignore_error_cb(void *data, const char *input)
+{
+       struct thread_data *td = data;
+       char *str, *p, *n;
+       int type = 0, ret = 1;
+       p = str = strdup(input);
+
+       strip_blank_front(&str);
+       strip_blank_end(str);
+
+       while (p) {
+               n = strchr(p, ',');
+               if (n)
+                       *n++ = '\0';
+               ret = ignore_error_type(td, type, p);
+               if (ret)
+                       break;
+               p = n;
+               type++;
+       }
+       free(str);
+       return ret;
+}
+
 static int str_rw_cb(void *data, const char *str)
 {
        struct thread_data *td = data;
@@ -245,12 +357,9 @@ static int str_verify_cb(void *data, const char *mem)
 {
        struct thread_data *td = data;
 
-       if (td->o.verify != VERIFY_CRC32C_INTEL)
-               return 0;
-
-       if (!crc32c_intel_works()) {
-               log_info("fio: System does not support hw accelerated crc32c. Falling back to sw crc32c.\n");
-               td->o.verify = VERIFY_CRC32C;
+       if (td->o.verify == VERIFY_CRC32C_INTEL ||
+           td->o.verify == VERIFY_CRC32C) {
+               crc32c_intel_probe();
        }
 
        return 0;
@@ -455,6 +564,130 @@ static int str_verify_cpus_allowed_cb(void *data, const char *input)
 }
 #endif
 
+#ifdef FIO_HAVE_LIBNUMA
+static int str_numa_cpunodes_cb(void *data, char *input)
+{
+       struct thread_data *td = data;
+
+       /* numa_parse_nodestring() parses a character string list
+        * of nodes into a bit mask. The bit mask is allocated by
+        * numa_allocate_nodemask(), so it should be freed by
+        * numa_free_nodemask().
+        */
+       td->o.numa_cpunodesmask = numa_parse_nodestring(input);
+       if (td->o.numa_cpunodesmask == NULL) {
+               log_err("fio: numa_parse_nodestring failed\n");
+               td_verror(td, 1, "str_numa_cpunodes_cb");
+               return 1;
+       }
+
+       td->o.numa_cpumask_set = 1;
+       return 0;
+}
+
+static int str_numa_mpol_cb(void *data, char *input)
+{
+       struct thread_data *td = data;
+       const char * const policy_types[] =
+               { "default", "prefer", "bind", "interleave", "local" };
+       int i;
+
+       char *nodelist = strchr(input, ':');
+       if (nodelist) {
+               /* NUL-terminate mode */
+               *nodelist++ = '\0';
+       }
+
+       for (i = 0; i <= MPOL_LOCAL; i++) {
+               if (!strcmp(input, policy_types[i])) {
+                       td->o.numa_mem_mode = i;
+                       break;
+               }
+       }
+       if (i > MPOL_LOCAL) {
+               log_err("fio: memory policy should be: default, prefer, bind, interleave, local\n");
+               goto out;
+       }
+
+       switch (td->o.numa_mem_mode) {
+       case MPOL_PREFERRED:
+               /*
+                * Insist on a nodelist of one node only
+                */
+               if (nodelist) {
+                       char *rest = nodelist;
+                       while (isdigit(*rest))
+                               rest++;
+                       if (*rest) {
+                               log_err("fio: one node only for \'prefer\'\n");
+                               goto out;
+                       }
+               } else {
+                       log_err("fio: one node is needed for \'prefer\'\n");
+                       goto out;
+               }
+               break;
+       case MPOL_INTERLEAVE:
+               /*
+                * Default to online nodes with memory if no nodelist
+                */
+               if (!nodelist)
+                       nodelist = strdup("all");
+               break;
+       case MPOL_LOCAL:
+       case MPOL_DEFAULT:
+               /*
+                * Don't allow a nodelist
+                */
+               if (nodelist) {
+                       log_err("fio: NO nodelist for \'local\'\n");
+                       goto out;
+               }
+               break;
+       case MPOL_BIND:
+               /*
+                * Insist on a nodelist
+                */
+               if (!nodelist) {
+                       log_err("fio: a nodelist is needed for \'bind\'\n");
+                       goto out;
+               }
+               break;
+       }
+
+
+       /* numa_parse_nodestring() parses a character string list
+        * of nodes into a bit mask. The bit mask is allocated by
+        * numa_allocate_nodemask(), so it should be freed by
+        * numa_free_nodemask().
+        */
+       switch (td->o.numa_mem_mode) {
+       case MPOL_PREFERRED:
+               td->o.numa_mem_prefer_node = atoi(nodelist);
+               break;
+       case MPOL_INTERLEAVE:
+       case MPOL_BIND:
+               td->o.numa_memnodesmask = numa_parse_nodestring(nodelist);
+               if (td->o.numa_memnodesmask == NULL) {
+                       log_err("fio: numa_parse_nodestring failed\n");
+                       td_verror(td, 1, "str_numa_memnodes_cb");
+                       return 1;
+               }
+               break;
+       case MPOL_LOCAL:
+       case MPOL_DEFAULT:
+       default:
+               break;
+       }
+
+       td->o.numa_memmask_set = 1;
+       return 0;
+
+out:
+       return 1;
+}
+#endif
+
 #ifdef FIO_HAVE_TRIM
 static int str_verify_trim_cb(void *data, unsigned long long *val)
 {
@@ -495,6 +728,29 @@ static int str_sfr_cb(void *data, const char *str)
 }
 #endif
 
+static int str_random_distribution_cb(void *data, const char *str)
+{
+       struct thread_data *td = data;
+       double val;
+       char *nr;
+
+       if (td->o.random_distribution == FIO_RAND_DIST_RANDOM)
+               return 0;
+
+       nr = get_opt_postfix(str);
+       if (!nr)
+               val = 0.6;
+       else if (!str_to_float(nr, &val)) {
+               log_err("fio: random postfix parsing failed\n");
+               free(nr);
+               return 1;
+       }
+
+       td->o.zipf_theta = val;
+       free(nr);
+       return 0;
+}
+
 static int check_dir(struct thread_data *td, char *fname)
 {
 #if 0
@@ -689,12 +945,32 @@ static int str_verify_pattern_cb(void *data, const char *input)
                        }
                }
        }
+
+       /*
+        * Fill the pattern all the way to the end. This greatly reduces
+        * the number of memcpy's we have to do when verifying the IO.
+        */
+       while (i > 1 && i * 2 <= MAX_PATTERN_SIZE) {
+               memcpy(&td->o.verify_pattern[i], &td->o.verify_pattern[0], i);
+               i *= 2;
+       }
+       if (i == 1) {
+               /*
+                * The code in verify_io_u_pattern assumes a single byte pattern
+                * fills the whole verify pattern buffer.
+                */
+               memset(td->o.verify_pattern, td->o.verify_pattern[0],
+                      MAX_PATTERN_SIZE);
+       }
+
        td->o.verify_pattern_bytes = i;
+
        /*
         * VERIFY_META could already be set
         */
        if (td->o.verify == VERIFY_NONE)
                td->o.verify = VERIFY_PATTERN;
+
        return 0;
 }
 
@@ -915,6 +1191,10 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                            .oval = TD_DDIR_WRITE,
                            .help = "Sequential write",
                          },
+                         { .ival = "trim",
+                           .oval = TD_DDIR_TRIM,
+                           .help = "Sequential trim",
+                         },
                          { .ival = "randread",
                            .oval = TD_DDIR_RANDREAD,
                            .help = "Random read",
@@ -923,10 +1203,18 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                            .oval = TD_DDIR_RANDWRITE,
                            .help = "Random write",
                          },
+                         { .ival = "randtrim",
+                           .oval = TD_DDIR_RANDTRIM,
+                           .help = "Random trim",
+                         },
                          { .ival = "rw",
                            .oval = TD_DDIR_RW,
                            .help = "Sequential read and write mix",
                          },
+                         { .ival = "readwrite",
+                           .oval = TD_DDIR_RW,
+                           .help = "Sequential read and write mix",
+                         },
                          { .ival = "randrw",
                            .oval = TD_DDIR_RANDRW,
                            .help = "Random read and write mix"
@@ -1031,6 +1319,21 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                          { .ival = "rdma",
                            .help = "RDMA IO engine",
                          },
+#endif
+#ifdef FIO_HAVE_FUSION_AW
+                         { .ival = "fusion-aw-sync",
+                           .help = "Fusion-io atomic write engine",
+                         },
+#endif
+#ifdef FIO_HAVE_E4_ENG
+                         { .ival = "e4defrag",
+                           .help = "ext4 defrag engine",
+                         },
+#endif
+#ifdef FIO_HAVE_FALLOC_ENG
+                         { .ival = "falloc",
+                           .help = "fallocate() file based engine",
+                         },
 #endif
                          { .ival = "external",
                            .help = "Load external engine (append name)",
@@ -1101,12 +1404,21 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                .help   = "Start IO from this offset",
                .def    = "0",
        },
+       {
+               .name   = "offset_increment",
+               .type   = FIO_OPT_STR_VAL,
+               .off1   = td_var_offset(offset_increment),
+               .help   = "What is the increment from one offset to the next",
+               .parent = "offset",
+               .def    = "0",
+       },
        {
                .name   = "bs",
                .alias  = "blocksize",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(bs[DDIR_READ]),
                .off2   = td_var_offset(bs[DDIR_WRITE]),
+               .off3   = td_var_offset(bs[DDIR_TRIM]),
                .minval = 1,
                .help   = "Block size unit",
                .def    = "4k",
@@ -1118,6 +1430,7 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(ba[DDIR_READ]),
                .off2   = td_var_offset(ba[DDIR_WRITE]),
+               .off3   = td_var_offset(ba[DDIR_TRIM]),
                .minval = 1,
                .help   = "IO block offset alignment",
                .parent = "rw",
@@ -1130,6 +1443,8 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                .off2   = td_var_offset(max_bs[DDIR_READ]),
                .off3   = td_var_offset(min_bs[DDIR_WRITE]),
                .off4   = td_var_offset(max_bs[DDIR_WRITE]),
+               .off5   = td_var_offset(min_bs[DDIR_TRIM]),
+               .off6   = td_var_offset(max_bs[DDIR_TRIM]),
                .minval = 1,
                .help   = "Set block size range (in more detail than bs)",
                .parent = "rw",
@@ -1180,6 +1495,24 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                .parent = "norandommap",
                .def    = "0",
        },
+       {
+               .name   = "random_distribution",
+               .type   = FIO_OPT_STR,
+               .off1   = td_var_offset(random_distribution),
+               .cb     = str_random_distribution_cb,
+               .help   = "Random offset distribution generator",
+               .def    = "random",
+               .posval = {
+                         { .ival = "random",
+                           .oval = FIO_RAND_DIST_RANDOM,
+                           .help = "Completely random",
+                         },
+                         { .ival = "zipf",
+                           .oval = FIO_RAND_DIST_ZIPF,
+                           .help = "Zipf distribution",
+                         },
+               },
+       },
        {
                .name   = "nrfiles",
                .alias  = "nr_files",
@@ -1462,12 +1795,12 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                            .help = "Use crc32 checksums for verification",
                          },
                          { .ival = "crc32c-intel",
-                           .oval = VERIFY_CRC32C_INTEL,
-                           .help = "Use hw crc32c checksums for verification",
+                           .oval = VERIFY_CRC32C,
+                           .help = "Use crc32c checksums for verification (hw assisted, if available)",
                          },
                          { .ival = "crc32c",
                            .oval = VERIFY_CRC32C,
-                           .help = "Use crc32c checksums for verification",
+                           .help = "Use crc32c checksums for verification (hw assisted, if available)",
                          },
                          { .ival = "crc16",
                            .oval = VERIFY_CRC16,
@@ -1767,30 +2100,34 @@ static struct fio_option options[FIO_MAX_OPTS] = {
        {
                .name   = "rate",
                .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(rate[0]),
-               .off2   = td_var_offset(rate[1]),
+               .off1   = td_var_offset(rate[DDIR_READ]),
+               .off2   = td_var_offset(rate[DDIR_WRITE]),
+               .off3   = td_var_offset(rate[DDIR_TRIM]),
                .help   = "Set bandwidth rate",
        },
        {
                .name   = "ratemin",
                .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(ratemin[0]),
-               .off2   = td_var_offset(ratemin[1]),
+               .off1   = td_var_offset(ratemin[DDIR_READ]),
+               .off2   = td_var_offset(ratemin[DDIR_WRITE]),
+               .off3   = td_var_offset(ratemin[DDIR_TRIM]),
                .help   = "Job must meet this rate or it will be shutdown",
                .parent = "rate",
        },
        {
                .name   = "rate_iops",
                .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(rate_iops[0]),
-               .off2   = td_var_offset(rate_iops[1]),
+               .off1   = td_var_offset(rate_iops[DDIR_READ]),
+               .off2   = td_var_offset(rate_iops[DDIR_WRITE]),
+               .off3   = td_var_offset(rate_iops[DDIR_TRIM]),
                .help   = "Limit IO used to this number of IO operations/sec",
        },
        {
                .name   = "rate_iops_min",
                .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(rate_iops_min[0]),
-               .off2   = td_var_offset(rate_iops_min[1]),
+               .off1   = td_var_offset(rate_iops_min[DDIR_READ]),
+               .off2   = td_var_offset(rate_iops_min[DDIR_WRITE]),
+               .off3   = td_var_offset(rate_iops_min[DDIR_TRIM]),
                .help   = "Job must meet this rate or it will be shut down",
                .parent = "rate_iops",
        },
@@ -1802,6 +2139,12 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                .def    = "1000",
                .parent = "rate",
        },
+       {
+               .name   = "max_latency",
+               .type   = FIO_OPT_INT,
+               .off1   = td_var_offset(max_latency),
+               .help   = "Maximum tolerated IO latency (usec)",
+       },
        {
                .name   = "invalidate",
                .type   = FIO_OPT_BOOL,
@@ -1855,6 +2198,13 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                .help   = "Create files when they are opened for IO",
                .def    = "0",
        },
+       {
+               .name   = "create_only",
+               .type   = FIO_OPT_BOOL,
+               .off1   = td_var_offset(create_only),
+               .help   = "Only perform file creation phase",
+               .def    = "0",
+       },
        {
                .name   = "pre_read",
                .type   = FIO_OPT_BOOL,
@@ -1889,6 +2239,20 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                .cb     = str_cpus_allowed_cb,
                .help   = "Set CPUs allowed",
        },
+#endif
+#ifdef FIO_HAVE_LIBNUMA
+       {
+               .name   = "numa_cpu_nodes",
+               .type   = FIO_OPT_STR,
+               .cb     = str_numa_cpunodes_cb,
+               .help   = "NUMA CPU nodes bind",
+       },
+       {
+               .name   = "numa_mem_policy",
+               .type   = FIO_OPT_STR,
+               .cb     = str_numa_mpol_cb,
+               .help   = "NUMA memory policy setup",
+       },
 #endif
        {
                .name   = "end_fsync",
@@ -1996,6 +2360,21 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                .help   = "Slightly scramble buffers on every IO submit",
                .def    = "1",
        },
+       {
+               .name   = "buffer_compress_percentage",
+               .type   = FIO_OPT_INT,
+               .off1   = td_var_offset(compress_percentage),
+               .maxval = 100,
+               .minval = 1,
+               .help   = "How compressible the buffer is (approximately)",
+       },
+       {
+               .name   = "buffer_compress_chunk",
+               .type   = FIO_OPT_INT,
+               .off1   = td_var_offset(compress_chunk),
+               .parent = "buffer_compress_percentage",
+               .help   = "Size of compressible region in buffer",
+       },
        {
                .name   = "clat_percentiles",
                .type   = FIO_OPT_BOOL,
@@ -2110,6 +2489,21 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                          },
                },
        },
+       {
+               .name   = "ignore_error",
+               .type   = FIO_OPT_STR,
+               .cb     = str_ignore_error_cb,
+               .help   = "Set a specific list of errors to ignore",
+               .parent = "rw",
+       },
+       {
+               .name   = "error_dump",
+               .type   = FIO_OPT_BOOL,
+               .off1   = td_var_offset(error_dump),
+               .def    = "0",
+               .help   = "Dump info on each error",
+       },
+
        {
                .name   = "profile",
                .type   = FIO_OPT_STR_STORE,
@@ -2149,6 +2543,40 @@ static struct fio_option options[FIO_MAX_OPTS] = {
                .off1   = td_var_offset(gid),
                .help   = "Run job with this group ID",
        },
+       {
+               .name   = "flow_id",
+               .type   = FIO_OPT_INT,
+               .off1   = td_var_offset(flow_id),
+               .help   = "The flow index ID to use",
+               .def    = "0",
+       },
+       {
+               .name   = "flow",
+               .type   = FIO_OPT_INT,
+               .off1   = td_var_offset(flow),
+               .help   = "Weight for flow control of this job",
+               .parent = "flow_id",
+               .def    = "0",
+       },
+       {
+               .name   = "flow_watermark",
+               .type   = FIO_OPT_INT,
+               .off1   = td_var_offset(flow_watermark),
+               .help   = "High watermark for flow control. This option"
+                       " should be set to the same value for all threads"
+                       " with non-zero flow.",
+               .parent = "flow_id",
+               .def    = "1024",
+       },
+       {
+               .name   = "flow_sleep",
+               .type   = FIO_OPT_INT,
+               .off1   = td_var_offset(flow_sleep),
+               .help   = "How many microseconds to sleep after being held"
+                       " back by the flow control mechanism",
+               .parent = "flow_id",
+               .def    = "0",
+       },
        {
                .name = NULL,
        },
@@ -2250,7 +2678,7 @@ void fio_keywords_init(void)
        char buf[128];
        long l;
 
-       sprintf(buf, "%lu", page_size);
+       sprintf(buf, "%lu", (unsigned long) page_size);
        fio_keywords[0].replace = strdup(buf);
 
        mb_memory = os_phys_mem() / (1024 * 1024);