4 * IO engine that reads/writes from files by doing memcpy to/from
5 * a memory mapped region of DAX enabled device.
7 * Copyright (C) 2016 Intel Corp
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License,
11 * version 2 as published by the Free Software Foundation..
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
22 * IO engine that access a DAX device directly for read and write data
27 * Other relevant settings:
30 * filename=/dev/daxN.N
33 * direct should be left to 0. Using dev-dax implies that memory access
34 * is direct. However, dev-dax does not support O_DIRECT flag by design
35 * since it is not necessary.
37 * bs should adhere to the device dax alignment at minimally.
40 * By default, the dev-dax engine will let the system find the libpmem.so
41 * that it uses. You can use an alternative libpmem by setting the
42 * FIO_PMEM_LIB environment variable to the full path to the desired
53 #include <sys/sysmacros.h>
58 #include "../verify.h"
61 * Limits us to 1GiB of mapped files in total to model after
62 * mmap engine behavior
64 #define MMAP_TOTAL_SZ (1 * 1024 * 1024 * 1024UL)
66 struct fio_devdax_data {
72 static int fio_devdax_file(struct thread_data *td, struct fio_file *f,
73 size_t length, off_t off)
75 struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
79 flags = PROT_READ | PROT_WRITE;
80 else if (td_write(td)) {
83 if (td->o.verify != VERIFY_NONE)
88 fdd->devdax_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off);
89 if (fdd->devdax_ptr == MAP_FAILED) {
90 fdd->devdax_ptr = NULL;
91 td_verror(td, errno, "mmap");
94 if (td->error && fdd->devdax_ptr)
95 munmap(fdd->devdax_ptr, length);
101 * Just mmap an appropriate portion, we cannot mmap the full extent
103 static int fio_devdax_prep_limited(struct thread_data *td, struct io_u *io_u)
105 struct fio_file *f = io_u->file;
106 struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
108 if (io_u->buflen > f->real_file_size) {
109 log_err("dev-dax: bs too big for dev-dax engine\n");
113 fdd->devdax_sz = min(MMAP_TOTAL_SZ, f->real_file_size);
114 if (fdd->devdax_sz > f->io_size)
115 fdd->devdax_sz = f->io_size;
117 fdd->devdax_off = io_u->offset;
119 return fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off);
123 * Attempt to mmap the entire file
125 static int fio_devdax_prep_full(struct thread_data *td, struct io_u *io_u)
127 struct fio_file *f = io_u->file;
128 struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
131 if (fio_file_partial_mmap(f))
134 if (io_u->offset != (size_t) io_u->offset ||
135 f->io_size != (size_t) f->io_size) {
136 fio_file_set_partial_mmap(f);
140 fdd->devdax_sz = f->io_size;
143 ret = fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off);
145 fio_file_set_partial_mmap(f);
150 static int fio_devdax_prep(struct thread_data *td, struct io_u *io_u)
152 struct fio_file *f = io_u->file;
153 struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
157 * It fits within existing mapping, use it
159 if (io_u->offset >= fdd->devdax_off &&
160 io_u->offset + io_u->buflen <= fdd->devdax_off + fdd->devdax_sz)
164 * unmap any existing mapping
166 if (fdd->devdax_ptr) {
167 if (munmap(fdd->devdax_ptr, fdd->devdax_sz) < 0)
169 fdd->devdax_ptr = NULL;
172 if (fio_devdax_prep_full(td, io_u)) {
174 ret = fio_devdax_prep_limited(td, io_u);
180 io_u->mmap_data = fdd->devdax_ptr + io_u->offset - fdd->devdax_off -
185 static enum fio_q_status fio_devdax_queue(struct thread_data *td,
188 fio_ro_check(td, io_u);
191 switch (io_u->ddir) {
193 memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen);
196 pmem_memcpy_persist(io_u->mmap_data, io_u->xfer_buf,
201 case DDIR_SYNC_FILE_RANGE:
204 io_u->error = EINVAL;
208 return FIO_Q_COMPLETED;
211 static int fio_devdax_init(struct thread_data *td)
213 struct thread_options *o = &td->o;
215 if ((o->rw_min_bs & page_mask) &&
216 (o->fsync_blocks || o->fdatasync_blocks)) {
217 log_err("dev-dax: mmap options dictate a minimum block size of %llu bytes\n",
218 (unsigned long long) page_size);
225 static int fio_devdax_open_file(struct thread_data *td, struct fio_file *f)
227 struct fio_devdax_data *fdd;
230 ret = generic_open_file(td, f);
234 fdd = calloc(1, sizeof(*fdd));
236 int fio_unused __ret;
237 __ret = generic_close_file(td, f);
241 FILE_SET_ENG_DATA(f, fdd);
246 static int fio_devdax_close_file(struct thread_data *td, struct fio_file *f)
248 struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
250 FILE_SET_ENG_DATA(f, NULL);
252 fio_file_clear_partial_mmap(f);
254 return generic_close_file(td, f);
258 fio_devdax_get_file_size(struct thread_data *td, struct fio_file *f)
260 char spath[PATH_MAX];
261 char npath[PATH_MAX];
262 char *rpath, *basename;
268 if (fio_file_size_known(f))
271 if (f->filetype != FIO_TYPE_CHAR)
274 rc = stat(f->file_name, &st);
276 log_err("%s: failed to stat file %s (%s)\n",
277 td->o.name, f->file_name, strerror(errno));
281 snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/subsystem",
282 major(st.st_rdev), minor(st.st_rdev));
284 rpath = realpath(spath, npath);
286 log_err("%s: realpath on %s failed (%s)\n",
287 td->o.name, spath, strerror(errno));
291 /* check if DAX device */
292 basename = strrchr(rpath, '/');
293 if (!basename || strcmp("dax", basename+1)) {
294 log_err("%s: %s not a DAX device!\n",
295 td->o.name, f->file_name);
298 snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/size",
299 major(st.st_rdev), minor(st.st_rdev));
301 sfile = fopen(spath, "r");
303 log_err("%s: fopen on %s failed (%s)\n",
304 td->o.name, spath, strerror(errno));
308 rc = fscanf(sfile, "%lu", &size);
310 log_err("%s: fscanf on %s failed (%s)\n",
311 td->o.name, spath, strerror(errno));
316 f->real_file_size = size;
320 if (f->file_offset > f->real_file_size) {
321 log_err("%s: offset extends end (%llu > %llu)\n", td->o.name,
322 (unsigned long long) f->file_offset,
323 (unsigned long long) f->real_file_size);
327 fio_file_set_size_known(f);
331 static struct ioengine_ops ioengine = {
333 .version = FIO_IOOPS_VERSION,
334 .init = fio_devdax_init,
335 .prep = fio_devdax_prep,
336 .queue = fio_devdax_queue,
337 .open_file = fio_devdax_open_file,
338 .close_file = fio_devdax_close_file,
339 .get_file_size = fio_devdax_get_file_size,
340 .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL,
343 static void fio_init fio_devdax_register(void)
345 register_ioengine(&ioengine);
348 static void fio_exit fio_devdax_unregister(void)
350 unregister_ioengine(&ioengine);