For direct I/O, requests will only succeed if cache invalidation isn't required,
file blocks are fully allocated and the disk request could be issued immediately.
+.. option:: fdp=bool : [io_uring_cmd]
+
+ Enable Flexible Data Placement mode for write commands.
+
+.. option:: fdp_pli=str : [io_uring_cmd]
+
+ Select which Placement ID Index/Indicies this job is allowed to use for
+ writes. By default, the job will cycle through all available Placement
+ IDs, so use this to isolate these identifiers to specific jobs. If you
+ want fio to use placement identifier only at indices 0, 2 and 5 specify
+ ``fdp_pli=0,2,5``.
+
.. option:: cpuload=int : [cpuio]
Attempt to use the specified percentage of CPU cycles. This is a mandatory
gettime-thread.c helpers.c json.c idletime.c td_error.c \
profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \
workqueue.c rate-submit.c optgroup.c helper_thread.c \
- steadystate.c zone-dist.c zbd.c dedupe.c
+ steadystate.c zone-dist.c zbd.c dedupe.c fdp.c
ifdef CONFIG_LIBHDFS
HDFSFLAGS= -I $(JAVA_HOME)/include -I $(JAVA_HOME)/include/linux -I $(FIO_LIBHDFS_INCLUDE)
for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
o->merge_blktrace_iters[i].u.f = fio_uint64_to_double(le64_to_cpu(top->merge_blktrace_iters[i].u.i));
+
+ o->fdp = le32_to_cpu(top->fdp);
+ o->fdp_nrpli = le32_to_cpu(top->fdp_nrpli);
+ for (i = 0; i < o->fdp_nrpli; i++)
+ o->fdp_plis[i] = le32_to_cpu(top->fdp_plis[i]);
#if 0
uint8_t cpumask[FIO_TOP_STR_MAX];
uint8_t verify_cpumask[FIO_TOP_STR_MAX];
for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
top->merge_blktrace_iters[i].u.i = __cpu_to_le64(fio_double_to_uint64(o->merge_blktrace_iters[i].u.f));
+
+ top->fdp = cpu_to_le32(o->fdp);
+ top->fdp_nrpli = cpu_to_le32(o->fdp_nrpli);
+ for (i = 0; i < o->fdp_nrpli; i++)
+ top->fdp_plis[i] = cpu_to_le32(o->fdp_plis[i]);
#if 0
uint8_t cpumask[FIO_TOP_STR_MAX];
uint8_t verify_cpumask[FIO_TOP_STR_MAX];
return fio_nvme_get_max_open_zones(td, f, max_open_zones);
}
+static int fio_ioring_cmd_fetch_ruhs(struct thread_data *td, struct fio_file *f,
+ struct fio_ruhs_info *fruhs_info)
+{
+ struct nvme_fdp_ruh_status *ruhs;
+ int bytes, ret, i;
+
+ bytes = sizeof(*ruhs) + 128 * sizeof(struct nvme_fdp_ruh_status_desc);
+ ruhs = scalloc(1, bytes);
+ if (!ruhs)
+ return -ENOMEM;
+
+ ret = fio_nvme_iomgmt_ruhs(td, f, ruhs, bytes);
+ if (ret)
+ goto free;
+
+ fruhs_info->nr_ruhs = le16_to_cpu(ruhs->nruhsd);
+ for (i = 0; i < fruhs_info->nr_ruhs; i++)
+ fruhs_info->plis[i] = le16_to_cpu(ruhs->ruhss[i].pid);
+free:
+ sfree(ruhs);
+ return ret;
+}
+
static struct ioengine_ops ioengine_uring = {
.name = "io_uring",
.version = FIO_IOOPS_VERSION,
.get_max_open_zones = fio_ioring_cmd_get_max_open_zones,
.options = options,
.option_struct_size = sizeof(struct ioring_options),
+ .fdp_fetch_ruhs = fio_ioring_cmd_fetch_ruhs,
};
static void fio_init fio_ioring_register(void)
cmd->cdw10 = slba & 0xffffffff;
cmd->cdw11 = slba >> 32;
/* cdw12 represent number of lba's for read/write */
- cmd->cdw12 = nlb;
+ cmd->cdw12 = nlb | (io_u->dtype << 20);
+ cmd->cdw13 = io_u->dspec << 16;
if (iov) {
iov->iov_base = io_u->xfer_buf;
iov->iov_len = io_u->xfer_buflen;
close(fd);
return ret;
}
+
+static inline int nvme_fdp_reclaim_unit_handle_status(int fd, __u32 nsid,
+ __u32 data_len, void *data)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_io_mgmt_recv,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t)data,
+ .data_len = data_len,
+ .cdw10 = 1,
+ .cdw11 = (data_len >> 2) - 1,
+ };
+
+ return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+}
+
+int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
+ struct nvme_fdp_ruh_status *ruhs, __u32 bytes)
+{
+ struct nvme_data *data = FILE_ENG_DATA(f);
+ int fd, ret;
+
+ fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
+ if (fd < 0)
+ return -errno;
+
+ ret = nvme_fdp_reclaim_unit_handle_status(fd, data->nsid, bytes, ruhs);
+ if (ret) {
+ log_err("%s: nvme_fdp_reclaim_unit_handle_status failed, err=%d\n",
+ f->file_name, ret);
+ errno = ENOTSUP;
+ } else
+ errno = 0;
+
+ close(fd);
+ return -errno;
+}
enum nvme_io_opcode {
nvme_cmd_write = 0x01,
nvme_cmd_read = 0x02,
+ nvme_cmd_io_mgmt_recv = 0x12,
nvme_zns_cmd_mgmt_send = 0x79,
nvme_zns_cmd_mgmt_recv = 0x7a,
};
struct nvme_zns_desc entries[];
};
+struct nvme_fdp_ruh_status_desc {
+ __u16 pid;
+ __u16 ruhid;
+ __u32 earutr;
+ __u64 ruamw;
+ __u8 rsvd16[16];
+};
+
+struct nvme_fdp_ruh_status {
+ __u8 rsvd0[14];
+ __le16 nruhsd;
+ struct nvme_fdp_ruh_status_desc ruhss[];
+};
+
+int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
+ struct nvme_fdp_ruh_status *ruhs, __u32 bytes);
+
int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
__u64 *nlba);
--- /dev/null
+# io_uring_cmd I/O engine for nvme-ns generic character device with FDP enabled
+# This assumes the namespace is already configured with FDP support and has at
+# least 8 available reclaim units.
+#
+# Each job targets different ranges of LBAs with different placement
+# identifiers, and has different write intensity.
+
+[global]
+filename=/dev/ng0n1
+ioengine=io_uring_cmd
+cmd_type=nvme
+iodepth=32
+bs=4K
+fdp=1
+time_based=1
+runtime=1000
+
+[write-heavy]
+rw=randrw
+rwmixwrite=90
+fdp_pli=0,1,2,3
+offset=0%
+size=30%
+
+[write-mid]
+rw=randrw
+rwmixwrite=30
+fdp_pli=4,5
+offset=30%
+size=30%
+
+[write-light]
+rw=randrw
+rwmixwrite=10
+fdp_pli=6
+offset=60%
+size=30%
--- /dev/null
+/*
+ * Note: This is similar to a very basic setup
+ * of ZBD devices
+ *
+ * Specify fdp=1 (With char devices /dev/ng0n1)
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "file.h"
+#include "fio.h"
+
+#include "pshared.h"
+#include "fdp.h"
+
+static int fdp_ruh_info(struct thread_data *td, struct fio_file *f,
+ struct fio_ruhs_info *ruhs)
+{
+ int ret = -EINVAL;
+
+ if (td->io_ops && td->io_ops->fdp_fetch_ruhs) {
+ ret = td->io_ops->fdp_fetch_ruhs(td, f, ruhs);
+ if (ret < 0) {
+ td_verror(td, errno, "fdp fetch ruhs failed");
+ log_err("%s: fdp fetch ruhs failed (%d)\n",
+ f->file_name, errno);
+ }
+ } else
+ log_err("%s: engine (%s) lacks fetch ruhs\n",
+ f->file_name, td->io_ops->name);
+
+ return ret;
+}
+
+static int init_ruh_info(struct thread_data *td, struct fio_file *f)
+{
+ struct fio_ruhs_info *ruhs, *tmp;
+ int i, ret;
+
+ ruhs = scalloc(1, sizeof(*ruhs) + 128 * sizeof(*ruhs->plis));
+ if (!ruhs)
+ return -ENOMEM;
+
+ ret = fdp_ruh_info(td, f, ruhs);
+ if (ret) {
+ log_info("fio: ruh info failed for %s (%d)\n",
+ f->file_name, -ret);
+ goto out;
+ }
+
+ if (ruhs->nr_ruhs > 128)
+ ruhs->nr_ruhs = 128;
+
+ if (td->o.fdp_nrpli == 0) {
+ f->ruhs_info = ruhs;
+ return 0;
+ }
+
+ for (i = 0; i < td->o.fdp_nrpli; i++) {
+ if (td->o.fdp_plis[i] > ruhs->nr_ruhs) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ tmp = scalloc(1, sizeof(*tmp) + ruhs->nr_ruhs * sizeof(*tmp->plis));
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ tmp->nr_ruhs = td->o.fdp_nrpli;
+ for (i = 0; i < td->o.fdp_nrpli; i++)
+ tmp->plis[i] = ruhs->plis[td->o.fdp_plis[i]];
+ f->ruhs_info = tmp;
+out:
+ sfree(ruhs);
+ return ret;
+}
+
+int fdp_init(struct thread_data *td)
+{
+ struct fio_file *f;
+ int i, ret = 0;
+
+ for_each_file(td, f, i) {
+ ret = init_ruh_info(td, f);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+void fdp_free_ruhs_info(struct fio_file *f)
+{
+ if (!f->ruhs_info)
+ return;
+ sfree(f->ruhs_info);
+ f->ruhs_info = NULL;
+}
+
+void fdp_fill_dspec_data(struct thread_data *td, struct io_u *io_u)
+{
+ struct fio_file *f = io_u->file;
+ struct fio_ruhs_info *ruhs = f->ruhs_info;
+ int dspec;
+
+ if (!ruhs || io_u->ddir != DDIR_WRITE) {
+ io_u->dtype = 0;
+ io_u->dspec = 0;
+ return;
+ }
+
+ dspec = ruhs->plis[ruhs->pli_loc++ % ruhs->nr_ruhs];
+ io_u->dtype = 2;
+ io_u->dspec = dspec;
+}
--- /dev/null
+#ifndef FIO_FDP_H
+#define FIO_FDP_H
+
+#include "io_u.h"
+
+struct fio_ruhs_info {
+ uint32_t nr_ruhs;
+ uint32_t pli_loc;
+ uint16_t plis[];
+};
+
+int fdp_init(struct thread_data *td);
+void fdp_free_ruhs_info(struct fio_file *f);
+void fdp_fill_dspec_data(struct thread_data *td, struct io_u *io_u);
+
+#endif /* FIO_FDP_H */
/* Forward declarations */
struct zoned_block_device_info;
+struct fdp_ruh_info;
/*
* The type of object we are working on
uint64_t file_offset;
uint64_t io_size;
+ struct fio_ruhs_info *ruhs_info;
+
/*
* Zoned block device information. See also zonemode=zbd.
*/
td_restore_runstate(td, old_state);
+ if (td->o.fdp) {
+ err = fdp_init(td);
+ if (err)
+ goto err_out;
+ }
+
return 0;
err_offset:
{
if (fio_file_axmap(f))
axmap_free(f->io_axmap);
+ if (f->ruhs_info)
+ sfree(f->ruhs_info);
if (!fio_file_smalloc(f)) {
free(f->file_name);
free(f);
}
zbd_close_file(f);
+ fdp_free_ruhs_info(f);
fio_file_free(f);
}
For direct I/O, requests will only succeed if cache invalidation isn't required,
file blocks are fully allocated and the disk request could be issued immediately.
.TP
+.BI (io_uring_cmd)fdp \fR=\fPbool
+Enable Flexible Data Placement mode for write commands.
+.TP
+.BI (io_uring_cmd)fdp_pli \fR=\fPstr
+Select which Placement ID Index/Indicies this job is allowed to use for writes.
+By default, the job will cycle through all available Placement IDs, so use this
+to isolate these identifiers to specific jobs. If you want fio to use placement
+identifier only at indices 0, 2 and 5 specify, you would set `fdp_pli=0,2,5`.
+.TP
.BI (cpuio)cpuload \fR=\fPint
Attempt to use the specified percentage of CPU cycles. This is a mandatory
option when using cpuio I/O engine.
}
}
+ if (td->o.fdp)
+ fdp_fill_dspec_data(td, io_u);
+
if (io_u->offset + io_u->buflen > io_u->file->real_file_size) {
dprint(FD_IO, "io_u %p, off=0x%llx + len=0x%llx exceeds file size=0x%llx\n",
io_u,
*/
int (*end_io)(struct thread_data *, struct io_u **);
+ uint32_t dtype;
+ uint32_t dspec;
+
union {
#ifdef CONFIG_LIBAIO
struct iocb iocb;
#include "flist.h"
#include "io_u.h"
#include "zbd_types.h"
+#include "fdp.h"
-#define FIO_IOOPS_VERSION 31
+#define FIO_IOOPS_VERSION 32
#ifndef CONFIG_DYNAMIC_ENGINES
#define FIO_STATIC static
unsigned int *);
int (*finish_zone)(struct thread_data *, struct fio_file *,
uint64_t, uint64_t);
+ int (*fdp_fetch_ruhs)(struct thread_data *, struct fio_file *,
+ struct fio_ruhs_info *);
int option_struct_size;
struct fio_option *options;
};
return ret;
}
+static int fio_fdp_cmp(const void *p1, const void *p2)
+{
+ const uint16_t *t1 = p1;
+ const uint16_t *t2 = p2;
+
+ return *t1 - *t2;
+}
+
+static int str_fdp_pli_cb(void *data, const char *input)
+{
+ struct thread_data *td = cb_data_to_td(data);
+ char *str, *p, *v;
+ int i = 0;
+
+ p = str = strdup(input);
+ strip_blank_front(&str);
+ strip_blank_end(str);
+
+ while ((v = strsep(&str, ",")) != NULL && i < FIO_MAX_PLIS)
+ td->o.fdp_plis[i++] = strtoll(v, NULL, 0);
+ free(p);
+
+ qsort(td->o.fdp_plis, i, sizeof(*td->o.fdp_plis), fio_fdp_cmp);
+ td->o.fdp_nrpli = i;
+
+ return 0;
+}
+
static int str_bssplit_cb(void *data, const char *input)
{
struct thread_data *td = cb_data_to_td(data);
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_ZONE,
},
+ {
+ .name = "fdp",
+ .lname = "Flexible data placement",
+ .type = FIO_OPT_BOOL,
+ .off1 = offsetof(struct thread_options, fdp),
+ .help = "Use Data placement directive (FDP)",
+ .def = "0",
+ .category = FIO_OPT_C_IO,
+ .group = FIO_OPT_G_INVALID,
+ },
+ {
+ .name = "fdp_pli",
+ .lname = "FDP Placement ID indicies",
+ .type = FIO_OPT_STR,
+ .cb = str_fdp_pli_cb,
+ .off1 = offsetof(struct thread_options, fdp_plis),
+ .help = "Sets which placement ids to use (defaults to all)",
+ .hide = 1,
+ .category = FIO_OPT_C_IO,
+ .group = FIO_OPT_G_INVALID,
+ },
{
.name = "lockmem",
.lname = "Lock memory",
};
enum {
- FIO_SERVER_VER = 98,
+ FIO_SERVER_VER = 99,
FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
FIO_SERVER_MAX_CMD_MB = 2048,
fio_fp64_t zrt;
fio_fp64_t zrf;
+#define FIO_MAX_PLIS 16
+ unsigned int fdp;
+ unsigned int fdp_plis[FIO_MAX_PLIS];
+ unsigned int fdp_nrpli;
+
unsigned int log_entries;
unsigned int log_prio;
};
uint32_t log_entries;
uint32_t log_prio;
+ uint32_t fdp;
+ uint32_t fdp_plis[FIO_MAX_PLIS];
+ uint32_t fdp_nrpli;
+
/*
* verify_pattern followed by buffer_pattern from the unpacked struct
*/