| 1 | /* |
| 2 | * libpmem: IO engine that uses PMDK libpmem to read and write data |
| 3 | * |
| 4 | * Copyright (C) 2017 Nippon Telegraph and Telephone Corporation. |
| 5 | * Copyright 2018-2020, Intel Corporation |
| 6 | * |
| 7 | * This program is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU General Public License, |
| 9 | * version 2 as published by the Free Software Foundation.. |
| 10 | * |
| 11 | * This program is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | * GNU General Public License for more details. |
| 15 | * |
| 16 | */ |
| 17 | |
| 18 | /* |
| 19 | * libpmem engine |
| 20 | * |
| 21 | * IO engine that uses libpmem to write data (and memcpy to read) |
| 22 | * |
| 23 | * To use: |
| 24 | * ioengine=libpmem |
| 25 | * |
| 26 | * Other relevant settings: |
| 27 | * iodepth=1 |
| 28 | * direct=1 |
| 29 | * sync=1 |
| 30 | * directory=/mnt/pmem0/ |
| 31 | * bs=4k |
| 32 | * |
| 33 | * sync=1 means that pmem_drain() is executed for each write operation. |
| 34 | * Otherwise is not and should be called on demand. |
| 35 | * |
| 36 | * direct=1 means PMEM_F_MEM_NONTEMPORAL flag is set in pmem_memcpy(). |
| 37 | * |
| 38 | * The pmem device must have a DAX-capable filesystem and be mounted |
| 39 | * with DAX enabled. Directory must point to a mount point of DAX FS. |
| 40 | * |
| 41 | * Example: |
| 42 | * mkfs.xfs /dev/pmem0 |
| 43 | * mkdir /mnt/pmem0 |
| 44 | * mount -o dax /dev/pmem0 /mnt/pmem0 |
| 45 | * |
| 46 | * See examples/libpmem.fio for more. |
| 47 | * |
| 48 | * |
| 49 | * libpmem.so |
| 50 | * By default, the libpmem engine will let the system find the libpmem.so |
| 51 | * that it uses. You can use an alternative libpmem by setting the |
| 52 | * FIO_PMEM_LIB environment variable to the full path to the desired |
| 53 | * libpmem.so. This engine requires PMDK >= 1.5. |
| 54 | */ |
| 55 | |
| 56 | #include <stdio.h> |
| 57 | #include <limits.h> |
| 58 | #include <stdlib.h> |
| 59 | #include <unistd.h> |
| 60 | #include <errno.h> |
| 61 | #include <sys/mman.h> |
| 62 | #include <sys/stat.h> |
| 63 | #include <sys/sysmacros.h> |
| 64 | #include <libgen.h> |
| 65 | #include <libpmem.h> |
| 66 | |
| 67 | #include "../fio.h" |
| 68 | #include "../verify.h" |
| 69 | |
| 70 | struct fio_libpmem_data { |
| 71 | void *libpmem_ptr; |
| 72 | size_t libpmem_sz; |
| 73 | off_t libpmem_off; |
| 74 | }; |
| 75 | |
| 76 | static int fio_libpmem_init(struct thread_data *td) |
| 77 | { |
| 78 | struct thread_options *o = &td->o; |
| 79 | |
| 80 | dprint(FD_IO,"o->rw_min_bs %llu \n o->fsync_blocks %u \n o->fdatasync_blocks %u \n", |
| 81 | o->rw_min_bs,o->fsync_blocks,o->fdatasync_blocks); |
| 82 | dprint(FD_IO, "DEBUG fio_libpmem_init\n"); |
| 83 | |
| 84 | if ((o->rw_min_bs & page_mask) && |
| 85 | (o->fsync_blocks || o->fdatasync_blocks)) { |
| 86 | log_err("libpmem: mmap options dictate a minimum block size of " |
| 87 | "%llu bytes\n", (unsigned long long) page_size); |
| 88 | return 1; |
| 89 | } |
| 90 | return 0; |
| 91 | } |
| 92 | |
| 93 | /* |
| 94 | * This is the pmem_map_file execution function |
| 95 | */ |
| 96 | static int fio_libpmem_file(struct thread_data *td, struct fio_file *f, |
| 97 | size_t length, off_t off) |
| 98 | { |
| 99 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); |
| 100 | mode_t mode = 0; |
| 101 | size_t mapped_len; |
| 102 | int is_pmem; |
| 103 | |
| 104 | if(td_rw(td)) |
| 105 | mode = S_IWUSR | S_IRUSR; |
| 106 | else if (td_write(td)) |
| 107 | mode = S_IWUSR; |
| 108 | else |
| 109 | mode = S_IRUSR; |
| 110 | |
| 111 | dprint(FD_IO, "DEBUG fio_libpmem_file\n"); |
| 112 | dprint(FD_IO, "f->file_name = %s td->o.verify = %d \n", f->file_name, |
| 113 | td->o.verify); |
| 114 | dprint(FD_IO, "length = %ld f->fd = %d off = %ld file mode = %d \n", |
| 115 | length, f->fd, off, mode); |
| 116 | |
| 117 | /* unmap any existing mapping */ |
| 118 | if (fdd->libpmem_ptr) { |
| 119 | dprint(FD_IO,"pmem_unmap \n"); |
| 120 | if (pmem_unmap(fdd->libpmem_ptr, fdd->libpmem_sz) < 0) |
| 121 | return errno; |
| 122 | fdd->libpmem_ptr = NULL; |
| 123 | } |
| 124 | |
| 125 | if((fdd->libpmem_ptr = pmem_map_file(f->file_name, length, PMEM_FILE_CREATE, mode, &mapped_len, &is_pmem)) == NULL) { |
| 126 | td_verror(td, errno, pmem_errormsg()); |
| 127 | goto err; |
| 128 | } |
| 129 | |
| 130 | if (!is_pmem) { |
| 131 | td_verror(td, errno, "file_name does not point to persistent memory"); |
| 132 | } |
| 133 | |
| 134 | err: |
| 135 | if (td->error && fdd->libpmem_ptr) |
| 136 | pmem_unmap(fdd->libpmem_ptr, length); |
| 137 | |
| 138 | return td->error; |
| 139 | } |
| 140 | |
| 141 | static int fio_libpmem_open_file(struct thread_data *td, struct fio_file *f) |
| 142 | { |
| 143 | struct fio_libpmem_data *fdd; |
| 144 | |
| 145 | dprint(FD_IO,"DEBUG fio_libpmem_open_file\n"); |
| 146 | dprint(FD_IO,"f->io_size=%ld \n",f->io_size); |
| 147 | dprint(FD_IO,"td->o.size=%lld \n",td->o.size); |
| 148 | dprint(FD_IO,"td->o.iodepth=%d\n",td->o.iodepth); |
| 149 | dprint(FD_IO,"td->o.iodepth_batch=%d \n",td->o.iodepth_batch); |
| 150 | |
| 151 | if (fio_file_open(f)) |
| 152 | td_io_close_file(td, f); |
| 153 | |
| 154 | fdd = calloc(1, sizeof(*fdd)); |
| 155 | if (!fdd) { |
| 156 | return 1; |
| 157 | } |
| 158 | FILE_SET_ENG_DATA(f, fdd); |
| 159 | fdd->libpmem_sz = f->io_size; |
| 160 | fdd->libpmem_off = 0; |
| 161 | |
| 162 | return fio_libpmem_file(td, f, fdd->libpmem_sz, fdd->libpmem_off); |
| 163 | } |
| 164 | |
| 165 | static int fio_libpmem_prep(struct thread_data *td, struct io_u *io_u) |
| 166 | { |
| 167 | struct fio_file *f = io_u->file; |
| 168 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); |
| 169 | |
| 170 | dprint(FD_IO, "DEBUG fio_libpmem_prep\n" ); |
| 171 | dprint(FD_IO," io_u->offset %llu : fdd->libpmem_off %ld : " |
| 172 | "io_u->buflen %llu : fdd->libpmem_sz %ld\n", |
| 173 | io_u->offset, fdd->libpmem_off, |
| 174 | io_u->buflen, fdd->libpmem_sz); |
| 175 | |
| 176 | if (io_u->buflen > f->real_file_size) { |
| 177 | log_err("libpmem: bs bigger than the file size\n"); |
| 178 | return EIO; |
| 179 | } |
| 180 | |
| 181 | io_u->mmap_data = fdd->libpmem_ptr + io_u->offset - fdd->libpmem_off |
| 182 | - f->file_offset; |
| 183 | return 0; |
| 184 | } |
| 185 | |
| 186 | static enum fio_q_status fio_libpmem_queue(struct thread_data *td, |
| 187 | struct io_u *io_u) |
| 188 | { |
| 189 | unsigned flags = 0; |
| 190 | |
| 191 | fio_ro_check(td, io_u); |
| 192 | io_u->error = 0; |
| 193 | |
| 194 | dprint(FD_IO, "DEBUG fio_libpmem_queue\n"); |
| 195 | dprint(FD_IO,"td->o.odirect %d td->o.sync_io %d \n",td->o.odirect, td->o.sync_io); |
| 196 | /* map both O_SYNC / DSYNC to not using NODRAIN */ |
| 197 | flags = td->o.sync_io ? 0 : PMEM_F_MEM_NODRAIN; |
| 198 | flags |= td->o.odirect ? PMEM_F_MEM_NONTEMPORAL : PMEM_F_MEM_TEMPORAL; |
| 199 | |
| 200 | switch (io_u->ddir) { |
| 201 | case DDIR_READ: |
| 202 | memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen); |
| 203 | break; |
| 204 | case DDIR_WRITE: |
| 205 | dprint(FD_IO, "DEBUG mmap_data=%p, xfer_buf=%p\n", |
| 206 | io_u->mmap_data, io_u->xfer_buf ); |
| 207 | pmem_memcpy(io_u->mmap_data, |
| 208 | io_u->xfer_buf, |
| 209 | io_u->xfer_buflen, |
| 210 | flags); |
| 211 | break; |
| 212 | case DDIR_SYNC: |
| 213 | case DDIR_DATASYNC: |
| 214 | case DDIR_SYNC_FILE_RANGE: |
| 215 | pmem_drain(); |
| 216 | break; |
| 217 | default: |
| 218 | io_u->error = EINVAL; |
| 219 | break; |
| 220 | } |
| 221 | |
| 222 | return FIO_Q_COMPLETED; |
| 223 | } |
| 224 | |
| 225 | static int fio_libpmem_close_file(struct thread_data *td, struct fio_file *f) |
| 226 | { |
| 227 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); |
| 228 | int ret = 0; |
| 229 | |
| 230 | dprint(FD_IO,"DEBUG fio_libpmem_close_file\n"); |
| 231 | dprint(FD_IO,"td->o.odirect %d \n",td->o.odirect); |
| 232 | |
| 233 | if (!td->o.odirect) { |
| 234 | dprint(FD_IO,"pmem_drain\n"); |
| 235 | pmem_drain(); |
| 236 | } |
| 237 | |
| 238 | if (fdd->libpmem_ptr) |
| 239 | ret = pmem_unmap(fdd->libpmem_ptr, fdd->libpmem_sz); |
| 240 | if (fio_file_open(f)) |
| 241 | ret &= generic_close_file(td, f); |
| 242 | |
| 243 | FILE_SET_ENG_DATA(f, NULL); |
| 244 | free(fdd); |
| 245 | |
| 246 | return ret; |
| 247 | } |
| 248 | |
| 249 | FIO_STATIC struct ioengine_ops ioengine = { |
| 250 | .name = "libpmem", |
| 251 | .version = FIO_IOOPS_VERSION, |
| 252 | .init = fio_libpmem_init, |
| 253 | .prep = fio_libpmem_prep, |
| 254 | .queue = fio_libpmem_queue, |
| 255 | .open_file = fio_libpmem_open_file, |
| 256 | .close_file = fio_libpmem_close_file, |
| 257 | .get_file_size = generic_get_file_size, |
| 258 | .prepopulate_file = generic_prepopulate_file, |
| 259 | .flags = FIO_SYNCIO | FIO_RAWIO | FIO_DISKLESSIO | FIO_NOEXTEND | |
| 260 | FIO_NODISKUTIL | FIO_BARRIER | FIO_MEMALIGN, |
| 261 | }; |
| 262 | |
| 263 | static void fio_init fio_libpmem_register(void) |
| 264 | { |
| 265 | register_ioengine(&ioengine); |
| 266 | } |
| 267 | |
| 268 | static void fio_exit fio_libpmem_unregister(void) |
| 269 | { |
| 270 | unregister_ioengine(&ioengine); |
| 271 | } |