fdp: support scheme placement id (index) selection

author Hyunwoo Park <dshw.park@samsung.com>

Tue, 30 Apr 2024 06:58:12 +0000 (06:58 +0000)

committer Hyunwoo Park <dshw.park@samsung.com>

Tue, 21 May 2024 12:12:35 +0000 (12:12 +0000)
author Hyunwoo Park <dshw.park@samsung.com>
Tue, 30 Apr 2024 06:58:12 +0000 (06:58 +0000)
committer Hyunwoo Park <dshw.park@samsung.com>
Tue, 21 May 2024 12:12:35 +0000 (12:12 +0000)
diff --git a/HOWTO.rst b/HOWTO.rst

index 2f8ef6d42cbfa05972c5e87373aa5dd18b60af86..3b262faeae7a7d340c816aaef21b97e129241cc0 100644 (file)
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -2529,8 +2529,12 @@ with the caveat that when used on the command line, they must come after the
                         Round robin over available placement IDs. This is the
                         default.
  
-       The available placement ID (indices) are defined by the option
-       :option:`plids`.
+               **scheme**
+                       Choose a placement ID (index) based on the scheme file defined by
+                       the option :option:`dp_scheme`.
+
+       The available placement ID (indices) are defined by the option :option:`fdp_pli`
+       or :option:`plids` except for the case of **scheme**.
  
  .. option:: plids=str, fdp_pli=str : [io_uring_cmd] [xnvme]
  
@@ -2541,6 +2545,26 @@ with the caveat that when used on the command line, they must come after the
          identifiers only at indices 0, 2 and 5 specify ``plids=0,2,5``. For
          streams this should be a comma-separated list of Stream IDs.
  
+.. option:: dp_scheme=str : [io_uring_cmd] [xnvme]
+
+       Defines which placement ID (index) to be selected based on offset(LBA) range.
+       The file should contains one or more scheme entries in the following format:
+
+               0, 10737418240, 0
+               10737418240, 21474836480, 1
+               21474836480, 32212254720, 2
+               ...
+
+       Each line, a scheme entry, contains start offset, end offset, and placement ID
+       (index) separated by comma(,). If the write offset is within the range of a certain
+       scheme entry(start offset ≤ offset < end offset), the corresponding placement ID
+       (index) will be selected. If the write offset belongs to multiple scheme entries,
+       the first matched scheme entry will be applied. If the offset is not within any range
+       of scheme entry, dspec field will be set to 0, default RUH. (Caution: In case of
+       multiple devices in a job, all devices of the job will be affected by the scheme. If
+       this option is specified, the option :option:`plids` or :option:`fdp_pli` will be
+       ignored.)
+
  .. option:: md_per_io_size=int : [io_uring_cmd] [xnvme]
  
         Size in bytes for separate metadata buffer per IO. Default: 0.
diff --git a/cconv.c b/cconv.c

index 16112248a6b71016c2542a5097ca35750f9a470b..9b344940cb73f0676c71560f82b074ff3c26f8a1 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -94,6 +94,7 @@ int convert_thread_options_to_cpu(struct thread_options *o,
         string_to_cpu(&o->ioscheduler, top->ioscheduler);
         string_to_cpu(&o->profile, top->profile);
         string_to_cpu(&o->cgroup, top->cgroup);
+       string_to_cpu(&o->dp_scheme_file, top->dp_scheme_file);
  
         o->allow_create = le32_to_cpu(top->allow_create);
         o->allow_mounted_write = le32_to_cpu(top->allow_mounted_write);
@@ -398,6 +399,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
         string_to_net(top->ioscheduler, o->ioscheduler);
         string_to_net(top->profile, o->profile);
         string_to_net(top->cgroup, o->cgroup);
+       string_to_net(top->dp_scheme_file, o->dp_scheme_file);
  
         top->allow_create = cpu_to_le32(o->allow_create);
         top->allow_mounted_write = cpu_to_le32(o->allow_mounted_write);
diff --git a/dataplacement.c b/dataplacement.c

index 1d5b21edfd96bd204166609fccd262edf9809c20..8a4c8e64419c33e0912b6e921fb6ec6561abbca5 100644 (file)
--- a/dataplacement.c
+++ b/dataplacement.c
@@ -100,6 +100,56 @@ out:
         return ret;
  }
  
+static int init_ruh_scheme(struct thread_data *td, struct fio_file *f)
+{
+       struct fio_ruhs_scheme *ruh_scheme;
+       FILE *scheme_fp;
+       unsigned long long start, end;
+       uint16_t pli;
+       int ret = 0;
+
+       if (td->o.dp_id_select != FIO_DP_SCHEME)
+               return 0;
+
+       /* Get the scheme from the file */
+       scheme_fp = fopen(td->o.dp_scheme_file, "r");
+
+       if (!scheme_fp) {
+               log_err("fio: ruh scheme failed to open scheme file %s\n",
+                        td->o.dp_scheme_file);
+               ret = -errno;
+               goto out;
+       }
+
+       ruh_scheme = scalloc(1, sizeof(*ruh_scheme));
+       if (!ruh_scheme) {
+               ret = -ENOMEM;
+               goto out_with_close_fp;
+       }
+
+       for (int i = 0;
+               i < DP_MAX_SCHEME_ENTRIES && fscanf(scheme_fp, "%llu,%llu,%hu\n", &start, &end, &pli) == 3;
+               i++) {
+
+               ruh_scheme->scheme_entries[i].start_offset = start;
+               ruh_scheme->scheme_entries[i].end_offset = end;
+               ruh_scheme->scheme_entries[i].pli = pli;
+               ruh_scheme->nr_schemes++;
+       }
+
+       if (fscanf(scheme_fp, "%llu,%llu,%hu\n", &start, &end, &pli) == 3)
+               log_info("fio: too many scheme entries in %s. Only the first %d scheme entries are applied\n",
+                        td->o.dp_scheme_file,
+                        DP_MAX_SCHEME_ENTRIES);
+
+       f->ruhs_scheme = ruh_scheme;
+
+out_with_close_fp:
+       fclose(scheme_fp);
+out:
+       return ret;
+}
+
  int dp_init(struct thread_data *td)
  {
         struct fio_file *f;
@@ -109,6 +159,10 @@ int dp_init(struct thread_data *td)
                 ret = init_ruh_info(td, f);
                 if (ret)
                         break;
+
+               ret = init_ruh_scheme(td, f);
+               if (ret)
+                       break;
         }
         return ret;
  }
@@ -119,6 +173,11 @@ void fdp_free_ruhs_info(struct fio_file *f)
                 return;
         sfree(f->ruhs_info);
         f->ruhs_info = NULL;
+
+       if (!f->ruhs_scheme)
+               return;
+       sfree(f->ruhs_scheme);
+       f->ruhs_scheme = NULL;
  }
  
  void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u)
@@ -138,6 +197,25 @@ void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u)
                         ruhs->pli_loc = 0;
  
                 dspec = ruhs->plis[ruhs->pli_loc++];
+       } else if (td->o.dp_id_select == FIO_DP_SCHEME) {
+               struct fio_ruhs_scheme *ruhs_scheme = f->ruhs_scheme;
+               unsigned long long offset = io_u->offset;
+               int i;
+
+               for (i = 0; i < ruhs_scheme->nr_schemes; i++) {
+                       if (offset >= ruhs_scheme->scheme_entries[i].start_offset &&
+                           offset < ruhs_scheme->scheme_entries[i].end_offset) {
+                               dspec = ruhs_scheme->scheme_entries[i].pli;
+                               break;
+                       }
+               }
+
+               /*
+                * If the write offset is not affected by any scheme entry,
+                * 0(default RUH) will be assigned to dspec
+                */
+               if (i == ruhs_scheme->nr_schemes)
+                       dspec = 0;
         } else {
                 ruhs->pli_loc = rand_between(&td->fdp_state, 0, ruhs->nr_ruhs - 1);
                 dspec = ruhs->plis[ruhs->pli_loc];
diff --git a/dataplacement.h b/dataplacement.h

index b5718c869e7dbadd2ab68e90b331f65914f0fb92..71d19d69653c9e50eabc87cb3effa705c31f7e5d 100644 (file)
--- a/dataplacement.h
+++ b/dataplacement.h
@@ -7,6 +7,7 @@
  #define FDP_DIR_DTYPE          2
  #define FDP_MAX_RUHS           128
  #define FIO_MAX_DP_IDS                 16
+#define DP_MAX_SCHEME_ENTRIES  32
  
  /*
   * How fio chooses what placement identifier to use next. Choice of
@@ -15,9 +16,9 @@
  enum {
         FIO_DP_RANDOM   = 0x1,
         FIO_DP_RR       = 0x2,
+       FIO_DP_SCHEME   = 0x3,
  };
  
-
  enum {
         FIO_DP_NONE     = 0x0,
         FIO_DP_FDP      = 0x1,
@@ -30,6 +31,17 @@ struct fio_ruhs_info {
         uint16_t plis[];
  };
  
+struct fio_ruhs_scheme_entry {
+       unsigned long long start_offset;
+       unsigned long long end_offset;
+       uint16_t pli;
+};
+
+struct fio_ruhs_scheme {
+       uint16_t nr_schemes;
+       struct fio_ruhs_scheme_entry scheme_entries[DP_MAX_SCHEME_ENTRIES];
+};
+
  int dp_init(struct thread_data *td);
  void fdp_free_ruhs_info(struct fio_file *f);
  void dp_fill_dspec_data(struct thread_data *td, struct io_u *io_u);
diff --git a/file.h b/file.h

index deb36e0291369ee78058a4a06e0072b50d8fb2dc..e38ed2f12337d809a3fca54cf03c1e62a5a44e2b 100644 (file)
--- a/file.h
+++ b/file.h
@@ -103,6 +103,7 @@ struct fio_file {
         uint64_t io_size;
  
         struct fio_ruhs_info *ruhs_info;
+       struct fio_ruhs_scheme *ruhs_scheme;
  
         /*
          * Zoned block device information. See also zonemode=zbd.
diff --git a/fio.1 b/fio.1

index ee8124946a81a087375fb17293e7a8db5cfc81c4..1c8e3a56707e3e3700e3267bd1747786cd2540d0 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -2294,9 +2294,14 @@ Choose a placement ID at random (uniform).
  .TP
  .B roundrobin
  Round robin over available placement IDs. This is the default.
+.TP
+.B scheme
+Choose a placement ID (index) based on the scheme file defined by
+the option \fBdp_scheme\fP.
  .RE
  .P
-The available placement ID (indices) are defined by the \fBplids\fR option.
+The available placement ID (indices) are defined by \fBplids\fR or
+\fBfdp_pli\fR option except for the case of \fBscheme\fP.
  .RE
  .TP
  .BI (io_uring_cmd,xnvme)plids=str, fdp_pli \fR=\fPstr
@@ -2307,6 +2312,31 @@ jobs. If you want fio to use placement identifier only at indices 0, 2 and 5
  specify, you would set `plids=0,2,5`. For streams this should be a
  comma-separated list of Stream IDs.
  .TP
+.BI (io_uring_cmd,xnvme)\fR\fBdp_scheme\fP=str
+Defines which placement ID (index) to be selected based on offset(LBA) range.
+The file should contains one or more scheme entries in the following format:
+.sp
+.RS
+.RS
+0, 10737418240, 0
+.br
+10737418240, 21474836480, 1
+.br
+21474836480, 32212254720, 2
+.br
+\&...
+.RE
+.sp
+Each line, a scheme entry, contains start offset, end offset, and placement ID
+(index) separated by comma(,). If the write offset is within the range of a certain
+scheme entry(start offset ≤ offset < end offset), the corresponding placement ID
+(index) will be selected. If the write offset belongs to multiple scheme entries,
+the first matched scheme entry will be applied. If the offset is not within any range
+of scheme entry, dspec field will be set to 0, default RUH. (Caution: In case of
+multiple devices in a job, all devices of the job will be affected by the scheme. If
+this option is specified, the option \fBplids\fP or \fBfdp_pli\fP will be ignored.)
+.RE
+.TP
  .BI (io_uring_cmd,xnvme)md_per_io_size \fR=\fPint
  Size in bytes for separate metadata buffer per IO. Default: 0.
  .TP
diff --git a/options.c b/options.c

index 61ea41cc4e0fc673893c3b9ecc9ec647c4216e3a..f5d221c776317fe50e2e0229a8e9564d244b8db4 100644 (file)
--- a/options.c
+++ b/options.c
@@ -287,6 +287,43 @@ static int str_fdp_pli_cb(void *data, const char *input)
         return 0;
  }
  
+/* str_dp_scheme_cb() is a callback function for parsing the fdp_scheme option
+       This function validates the fdp_scheme filename. */
+static int str_dp_scheme_cb(void *data, const char *input)
+{
+       struct thread_data *td = cb_data_to_td(data);
+       struct stat sb;
+       char *filename;
+       int ret = 0;
+
+       if (parse_dryrun())
+               return 0;
+
+       filename = strdup(td->o.dp_scheme_file);
+       strip_blank_front(&filename);
+       strip_blank_end(filename);
+
+       strcpy(td->o.dp_scheme_file, filename);
+
+       if (lstat(filename, &sb) < 0){
+               ret = errno;
+               log_err("fio: lstat() error related to %s\n", filename);
+               td_verror(td, ret, "lstat");
+               goto out;
+       }
+
+       if (!S_ISREG(sb.st_mode)) {
+               ret = errno;
+               log_err("fio: %s is not a file\n", filename);
+               td_verror(td, ret, "S_ISREG");
+               goto out;
+       }
+
+out:
+       free(filename);
+       return ret;
+}
+
  static int str_bssplit_cb(void *data, const char *input)
  {
         struct thread_data *td = cb_data_to_td(data);
@@ -3760,6 +3797,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                             .oval = FIO_DP_RR,
                             .help = "Round robin select Placement IDs",
                           },
+                         { .ival = "scheme",
+                           .oval = FIO_DP_SCHEME,
+                           .help = "Use a scheme(based on LBA) to select Placement IDs",
+                         },
                 },
         },
         {
@@ -3774,6 +3815,17 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_INVALID,
         },
+       {
+               .name   = "dp_scheme",
+               .lname  = "Data Placement Scheme",
+               .type   = FIO_OPT_STR_STORE,
+               .cb     = str_dp_scheme_cb,
+               .off1   = offsetof(struct thread_options, dp_scheme_file),
+               .maxlen = PATH_MAX,
+               .help   = "scheme file that specifies offset-RUH mapping",
+               .category = FIO_OPT_C_IO,
+               .group  = FIO_OPT_G_INVALID,
+       },
         {
                 .name   = "lockmem",
                 .lname  = "Lock memory",
diff --git a/server.h b/server.h

index 83ce449ba0e77034de3bf2f0ef9897ec81ff5f86..e8659f79209a46c9696f9d4c4fcdf7d99f6e2fe2 100644 (file)
--- a/server.h
+++ b/server.h
@@ -51,7 +51,7 @@ struct fio_net_cmd_reply {
  };
  
  enum {
-       FIO_SERVER_VER                  = 104,
+       FIO_SERVER_VER                  = 105,
  
         FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
         FIO_SERVER_MAX_CMD_MB           = 2048,
diff --git a/thread_options.h b/thread_options.h

index a36b79094f26a77afdfe327b85182f601cd09c31..ccd0c064b928e081b392c03a3c11bee334902745 100644 (file)
--- a/thread_options.h
+++ b/thread_options.h
@@ -396,6 +396,7 @@ struct thread_options {
         unsigned int dp_id_select;
         unsigned int dp_ids[FIO_MAX_DP_IDS];
         unsigned int dp_nr_ids;
+       char *dp_scheme_file;
  
         unsigned int log_entries;
         unsigned int log_prio;
@@ -713,6 +714,7 @@ struct thread_options_pack {
         uint32_t dp_id_select;
         uint32_t dp_ids[FIO_MAX_DP_IDS];
         uint32_t dp_nr_ids;
+       uint8_t dp_scheme_file[FIO_TOP_STR_MAX];
  
         uint32_t num_range;
         /*
author	Hyunwoo Park <dshw.park@samsung.com>
	Tue, 30 Apr 2024 06:58:12 +0000 (06:58 +0000)
committer	Hyunwoo Park <dshw.park@samsung.com>
	Tue, 21 May 2024 12:12:35 +0000 (12:12 +0000)
HOWTO.rst		patch \| blob \| blame \| history
cconv.c		patch \| blob \| blame \| history
dataplacement.c		patch \| blob \| blame \| history
dataplacement.h		patch \| blob \| blame \| history
file.h		patch \| blob \| blame \| history
fio.1		patch \| blob \| blame \| history
options.c		patch \| blob \| blame \| history
server.h		patch \| blob \| blame \| history
thread_options.h		patch \| blob \| blame \| history