| 1 | /* |
| 2 | * libpmem: IO engine that uses PMDK libpmem to read and write data |
| 3 | * |
| 4 | * Copyright (C) 2017 Nippon Telegraph and Telephone Corporation. |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License, |
| 8 | * version 2 as published by the Free Software Foundation.. |
| 9 | * |
| 10 | * This program is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | * GNU General Public License for more details. |
| 14 | * |
| 15 | */ |
| 16 | |
| 17 | /* |
| 18 | * libpmem engine |
| 19 | * |
| 20 | * IO engine that uses libpmem to read and write data |
| 21 | * |
| 22 | * To use: |
| 23 | * ioengine=libpmem |
| 24 | * |
| 25 | * Other relevant settings: |
| 26 | * iodepth=1 |
| 27 | * direct=1 |
| 28 | * directory=/mnt/pmem0/ |
| 29 | * bs=4k |
| 30 | * |
| 31 | * direct=1 means that pmem_drain() is executed for each write operation. |
| 32 | * In contrast, direct=0 means that pmem_drain() is not executed. |
| 33 | * |
| 34 | * The pmem device must have a DAX-capable filesystem and be mounted |
| 35 | * with DAX enabled. directory must point to a mount point of DAX FS. |
| 36 | * |
| 37 | * Example: |
| 38 | * mkfs.xfs /dev/pmem0 |
| 39 | * mkdir /mnt/pmem0 |
| 40 | * mount -o dax /dev/pmem0 /mnt/pmem0 |
| 41 | * |
| 42 | * |
| 43 | * See examples/libpmem.fio for more. |
| 44 | * |
| 45 | * |
| 46 | * libpmem.so |
| 47 | * By default, the libpmem engine will let the system find the libpmem.so |
| 48 | * that it uses. You can use an alternative libpmem by setting the |
| 49 | * FIO_PMEM_LIB environment variable to the full path to the desired |
| 50 | * libpmem.so. |
| 51 | */ |
| 52 | |
| 53 | #include <stdio.h> |
| 54 | #include <limits.h> |
| 55 | #include <stdlib.h> |
| 56 | #include <unistd.h> |
| 57 | #include <errno.h> |
| 58 | #include <sys/mman.h> |
| 59 | #include <sys/stat.h> |
| 60 | #include <sys/sysmacros.h> |
| 61 | #include <libgen.h> |
| 62 | #include <libpmem.h> |
| 63 | |
| 64 | #include "../fio.h" |
| 65 | #include "../verify.h" |
| 66 | |
| 67 | /* |
| 68 | * Limits us to 1GiB of mapped files in total to model after |
| 69 | * libpmem engine behavior |
| 70 | */ |
| 71 | #define MMAP_TOTAL_SZ (1 * 1024 * 1024 * 1024UL) |
| 72 | |
| 73 | struct fio_libpmem_data { |
| 74 | void *libpmem_ptr; |
| 75 | size_t libpmem_sz; |
| 76 | off_t libpmem_off; |
| 77 | }; |
| 78 | |
| 79 | #define MEGABYTE ((uintptr_t)1 << 20) |
| 80 | #define GIGABYTE ((uintptr_t)1 << 30) |
| 81 | #define PROCMAXLEN 2048 /* maximum expected line length in /proc files */ |
| 82 | #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) |
| 83 | |
| 84 | static bool Mmap_no_random; |
| 85 | static void *Mmap_hint; |
| 86 | static unsigned long long Mmap_align; |
| 87 | |
| 88 | /* |
| 89 | * util_map_hint_align -- choose the desired mapping alignment |
| 90 | * |
| 91 | * Use 2MB/1GB page alignment only if the mapping length is at least |
| 92 | * twice as big as the page size. |
| 93 | */ |
| 94 | static inline size_t util_map_hint_align(size_t len, size_t req_align) |
| 95 | { |
| 96 | size_t align = Mmap_align; |
| 97 | |
| 98 | dprint(FD_IO, "DEBUG util_map_hint_align\n" ); |
| 99 | |
| 100 | if (req_align) |
| 101 | align = req_align; |
| 102 | else if (len >= 2 * GIGABYTE) |
| 103 | align = GIGABYTE; |
| 104 | else if (len >= 4 * MEGABYTE) |
| 105 | align = 2 * MEGABYTE; |
| 106 | |
| 107 | dprint(FD_IO, "align=%d\n", (int)align); |
| 108 | return align; |
| 109 | } |
| 110 | |
| 111 | #ifdef __FreeBSD__ |
| 112 | static const char *sscanf_os = "%p %p"; |
| 113 | #define MAP_NORESERVE 0 |
| 114 | #define OS_MAPFILE "/proc/curproc/map" |
| 115 | #else |
| 116 | static const char *sscanf_os = "%p-%p"; |
| 117 | #define OS_MAPFILE "/proc/self/maps" |
| 118 | #endif |
| 119 | |
| 120 | /* |
| 121 | * util_map_hint_unused -- use /proc to determine a hint address for mmap() |
| 122 | * |
| 123 | * This is a helper function for util_map_hint(). |
| 124 | * It opens up /proc/self/maps and looks for the first unused address |
| 125 | * in the process address space that is: |
| 126 | * - greater or equal 'minaddr' argument, |
| 127 | * - large enough to hold range of given length, |
| 128 | * - aligned to the specified unit. |
| 129 | * |
| 130 | * Asking for aligned address like this will allow the DAX code to use large |
| 131 | * mappings. It is not an error if mmap() ignores the hint and chooses |
| 132 | * different address. |
| 133 | */ |
| 134 | static char *util_map_hint_unused(void *minaddr, size_t len, size_t align) |
| 135 | { |
| 136 | char *lo = NULL; /* beginning of current range in maps file */ |
| 137 | char *hi = NULL; /* end of current range in maps file */ |
| 138 | char *raddr = minaddr; /* ignore regions below 'minaddr' */ |
| 139 | |
| 140 | #ifdef WIN32 |
| 141 | MEMORY_BASIC_INFORMATION mi; |
| 142 | #else |
| 143 | FILE *fp; |
| 144 | char line[PROCMAXLEN]; /* for fgets() */ |
| 145 | #endif |
| 146 | |
| 147 | dprint(FD_IO, "DEBUG util_map_hint_unused\n"); |
| 148 | assert(align > 0); |
| 149 | |
| 150 | if (raddr == NULL) |
| 151 | raddr += page_size; |
| 152 | |
| 153 | raddr = (char *)roundup((uintptr_t)raddr, align); |
| 154 | |
| 155 | #ifdef WIN32 |
| 156 | while ((uintptr_t)raddr < UINTPTR_MAX - len) { |
| 157 | size_t ret = VirtualQuery(raddr, &mi, sizeof(mi)); |
| 158 | if (ret == 0) { |
| 159 | ERR("VirtualQuery %p", raddr); |
| 160 | return MAP_FAILED; |
| 161 | } |
| 162 | dprint(FD_IO, "addr %p len %zu state %d", |
| 163 | mi.BaseAddress, mi.RegionSize, mi.State); |
| 164 | |
| 165 | if ((mi.State != MEM_FREE) || (mi.RegionSize < len)) { |
| 166 | raddr = (char *)mi.BaseAddress + mi.RegionSize; |
| 167 | raddr = (char *)roundup((uintptr_t)raddr, align); |
| 168 | dprint(FD_IO, "nearest aligned addr %p", raddr); |
| 169 | } else { |
| 170 | dprint(FD_IO, "unused region of size %zu found at %p", |
| 171 | mi.RegionSize, mi.BaseAddress); |
| 172 | return mi.BaseAddress; |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | dprint(FD_IO, "end of address space reached"); |
| 177 | return MAP_FAILED; |
| 178 | #else |
| 179 | fp = fopen(OS_MAPFILE, "r"); |
| 180 | if (!fp) { |
| 181 | log_err("!%s\n", OS_MAPFILE); |
| 182 | return MAP_FAILED; |
| 183 | } |
| 184 | |
| 185 | while (fgets(line, PROCMAXLEN, fp) != NULL) { |
| 186 | /* check for range line */ |
| 187 | if (sscanf(line, sscanf_os, &lo, &hi) == 2) { |
| 188 | dprint(FD_IO, "%p-%p\n", lo, hi); |
| 189 | if (lo > raddr) { |
| 190 | if ((uintptr_t)(lo - raddr) >= len) { |
| 191 | dprint(FD_IO, "unused region of size " |
| 192 | "%zu found at %p\n", |
| 193 | lo - raddr, raddr); |
| 194 | break; |
| 195 | } else { |
| 196 | dprint(FD_IO, "region is too small: " |
| 197 | "%zu < %zu\n", |
| 198 | lo - raddr, len); |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | if (hi > raddr) { |
| 203 | raddr = (char *)roundup((uintptr_t)hi, align); |
| 204 | dprint(FD_IO, "nearest aligned addr %p\n", |
| 205 | raddr); |
| 206 | } |
| 207 | |
| 208 | if (raddr == 0) { |
| 209 | dprint(FD_IO, "end of address space reached\n"); |
| 210 | break; |
| 211 | } |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | /* |
| 216 | * Check for a case when this is the last unused range in the address |
| 217 | * space, but is not large enough. (very unlikely) |
| 218 | */ |
| 219 | if ((raddr != NULL) && (UINTPTR_MAX - (uintptr_t)raddr < len)) { |
| 220 | dprint(FD_IO, "end of address space reached"); |
| 221 | raddr = MAP_FAILED; |
| 222 | } |
| 223 | |
| 224 | fclose(fp); |
| 225 | |
| 226 | dprint(FD_IO, "returning %p", raddr); |
| 227 | return raddr; |
| 228 | #endif |
| 229 | } |
| 230 | |
| 231 | /* |
| 232 | * util_map_hint -- determine hint address for mmap() |
| 233 | * |
| 234 | * If PMEM_MMAP_HINT environment variable is not set, we let the system to pick |
| 235 | * the randomized mapping address. Otherwise, a user-defined hint address |
| 236 | * is used. |
| 237 | * |
| 238 | * Windows Environment: |
| 239 | * XXX - Windows doesn't support large DAX pages yet, so there is |
| 240 | * no point in aligning for the same. |
| 241 | * |
| 242 | * Except for Windows Environment: |
| 243 | * ALSR in 64-bit Linux kernel uses 28-bit of randomness for mmap |
| 244 | * (bit positions 12-39), which means the base mapping address is randomized |
| 245 | * within [0..1024GB] range, with 4KB granularity. Assuming additional |
| 246 | * 1GB alignment, it results in 1024 possible locations. |
| 247 | * |
| 248 | * Configuring the hint address via PMEM_MMAP_HINT environment variable |
| 249 | * disables address randomization. In such case, the function will search for |
| 250 | * the first unused, properly aligned region of given size, above the |
| 251 | * specified address. |
| 252 | */ |
| 253 | static char *util_map_hint(size_t len, size_t req_align) |
| 254 | { |
| 255 | char *addr; |
| 256 | size_t align = 0; |
| 257 | char *e = NULL; |
| 258 | |
| 259 | dprint(FD_IO, "DEBUG util_map_hint\n"); |
| 260 | dprint(FD_IO, "len %zu req_align %zu\n", len, req_align); |
| 261 | |
| 262 | /* choose the desired alignment based on the requested length */ |
| 263 | align = util_map_hint_align(len, req_align); |
| 264 | |
| 265 | e = getenv("PMEM_MMAP_HINT"); |
| 266 | if (e) { |
| 267 | char *endp; |
| 268 | unsigned long long val = 0; |
| 269 | |
| 270 | errno = 0; |
| 271 | |
| 272 | val = strtoull(e, &endp, 16); |
| 273 | if (errno || endp == e) { |
| 274 | dprint(FD_IO, "Invalid PMEM_MMAP_HINT\n"); |
| 275 | } else { |
| 276 | Mmap_hint = (void *)val; |
| 277 | Mmap_no_random = true; |
| 278 | dprint(FD_IO, "PMEM_MMAP_HINT set to %p\n", Mmap_hint); |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | if (Mmap_no_random) { |
| 283 | dprint(FD_IO, "user-defined hint %p\n", (void *)Mmap_hint); |
| 284 | addr = util_map_hint_unused((void *)Mmap_hint, len, align); |
| 285 | } else { |
| 286 | /* |
| 287 | * Create dummy mapping to find an unused region of given size. |
| 288 | * * Request for increased size for later address alignment. |
| 289 | * |
| 290 | * Windows Environment: |
| 291 | * Use MAP_NORESERVE flag to only reserve the range of pages |
| 292 | * rather than commit. We don't want the pages to be actually |
| 293 | * backed by the operating system paging file, as the swap |
| 294 | * file is usually too small to handle terabyte pools. |
| 295 | * |
| 296 | * Except for Windows Environment: |
| 297 | * Use MAP_PRIVATE with read-only access to simulate |
| 298 | * zero cost for overcommit accounting. Note: MAP_NORESERVE |
| 299 | * flag is ignored if overcommit is disabled (mode 2). |
| 300 | */ |
| 301 | #ifndef WIN32 |
| 302 | addr = mmap(NULL, len + align, PROT_READ, |
| 303 | MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); |
| 304 | #else |
| 305 | addr = mmap(NULL, len + align, PROT_READ, |
| 306 | MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); |
| 307 | #endif |
| 308 | if (addr != MAP_FAILED) { |
| 309 | dprint(FD_IO, "system choice %p\n", addr); |
| 310 | munmap(addr, len + align); |
| 311 | addr = (char *)roundup((uintptr_t)addr, align); |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | dprint(FD_IO, "hint %p\n", addr); |
| 316 | |
| 317 | return addr; |
| 318 | } |
| 319 | |
| 320 | /* |
| 321 | * This is the mmap execution function |
| 322 | */ |
| 323 | static int fio_libpmem_file(struct thread_data *td, struct fio_file *f, |
| 324 | size_t length, off_t off) |
| 325 | { |
| 326 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); |
| 327 | int flags = 0; |
| 328 | void *addr = NULL; |
| 329 | |
| 330 | dprint(FD_IO, "DEBUG fio_libpmem_file\n"); |
| 331 | |
| 332 | if (td_rw(td)) |
| 333 | flags = PROT_READ | PROT_WRITE; |
| 334 | else if (td_write(td)) { |
| 335 | flags = PROT_WRITE; |
| 336 | |
| 337 | if (td->o.verify != VERIFY_NONE) |
| 338 | flags |= PROT_READ; |
| 339 | } else |
| 340 | flags = PROT_READ; |
| 341 | |
| 342 | dprint(FD_IO, "f->file_name = %s td->o.verify = %d \n", f->file_name, |
| 343 | td->o.verify); |
| 344 | dprint(FD_IO, "length = %ld flags = %d f->fd = %d off = %ld \n", |
| 345 | length, flags, f->fd,off); |
| 346 | |
| 347 | addr = util_map_hint(length, 0); |
| 348 | |
| 349 | fdd->libpmem_ptr = mmap(addr, length, flags, MAP_SHARED, f->fd, off); |
| 350 | if (fdd->libpmem_ptr == MAP_FAILED) { |
| 351 | fdd->libpmem_ptr = NULL; |
| 352 | td_verror(td, errno, "mmap"); |
| 353 | } |
| 354 | |
| 355 | if (td->error && fdd->libpmem_ptr) |
| 356 | munmap(fdd->libpmem_ptr, length); |
| 357 | |
| 358 | return td->error; |
| 359 | } |
| 360 | |
| 361 | /* |
| 362 | * XXX Just mmap an appropriate portion, we cannot mmap the full extent |
| 363 | */ |
| 364 | static int fio_libpmem_prep_limited(struct thread_data *td, struct io_u *io_u) |
| 365 | { |
| 366 | struct fio_file *f = io_u->file; |
| 367 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); |
| 368 | |
| 369 | dprint(FD_IO, "DEBUG fio_libpmem_prep_limited\n" ); |
| 370 | |
| 371 | if (io_u->buflen > f->real_file_size) { |
| 372 | log_err("libpmem: bs too big for libpmem engine\n"); |
| 373 | return EIO; |
| 374 | } |
| 375 | |
| 376 | fdd->libpmem_sz = min(MMAP_TOTAL_SZ, f->real_file_size); |
| 377 | if (fdd->libpmem_sz > f->io_size) |
| 378 | fdd->libpmem_sz = f->io_size; |
| 379 | |
| 380 | fdd->libpmem_off = io_u->offset; |
| 381 | |
| 382 | return fio_libpmem_file(td, f, fdd->libpmem_sz, fdd->libpmem_off); |
| 383 | } |
| 384 | |
| 385 | /* |
| 386 | * Attempt to mmap the entire file |
| 387 | */ |
| 388 | static int fio_libpmem_prep_full(struct thread_data *td, struct io_u *io_u) |
| 389 | { |
| 390 | struct fio_file *f = io_u->file; |
| 391 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); |
| 392 | int ret; |
| 393 | |
| 394 | dprint(FD_IO, "DEBUG fio_libpmem_prep_full\n" ); |
| 395 | |
| 396 | if (fio_file_partial_mmap(f)) |
| 397 | return EINVAL; |
| 398 | |
| 399 | dprint(FD_IO," f->io_size %ld : io_u->offset %lld \n", |
| 400 | f->io_size, io_u->offset); |
| 401 | |
| 402 | if (io_u->offset != (size_t) io_u->offset || |
| 403 | f->io_size != (size_t) f->io_size) { |
| 404 | fio_file_set_partial_mmap(f); |
| 405 | return EINVAL; |
| 406 | } |
| 407 | fdd->libpmem_sz = f->io_size; |
| 408 | fdd->libpmem_off = 0; |
| 409 | |
| 410 | ret = fio_libpmem_file(td, f, fdd->libpmem_sz, fdd->libpmem_off); |
| 411 | if (ret) |
| 412 | fio_file_set_partial_mmap(f); |
| 413 | |
| 414 | return ret; |
| 415 | } |
| 416 | |
| 417 | static int fio_libpmem_prep(struct thread_data *td, struct io_u *io_u) |
| 418 | { |
| 419 | struct fio_file *f = io_u->file; |
| 420 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); |
| 421 | int ret; |
| 422 | |
| 423 | dprint(FD_IO, "DEBUG fio_libpmem_prep\n" ); |
| 424 | /* |
| 425 | * It fits within existing mapping, use it |
| 426 | */ |
| 427 | dprint(FD_IO," io_u->offset %llu : fdd->libpmem_off %llu : " |
| 428 | "io_u->buflen %llu : fdd->libpmem_sz %llu\n", |
| 429 | io_u->offset, (unsigned long long) fdd->libpmem_off, |
| 430 | io_u->buflen, (unsigned long long) fdd->libpmem_sz); |
| 431 | |
| 432 | if (io_u->offset >= fdd->libpmem_off && |
| 433 | (io_u->offset + io_u->buflen <= |
| 434 | fdd->libpmem_off + fdd->libpmem_sz)) |
| 435 | goto done; |
| 436 | |
| 437 | /* |
| 438 | * unmap any existing mapping |
| 439 | */ |
| 440 | if (fdd->libpmem_ptr) { |
| 441 | dprint(FD_IO,"munmap \n"); |
| 442 | if (munmap(fdd->libpmem_ptr, fdd->libpmem_sz) < 0) |
| 443 | return errno; |
| 444 | fdd->libpmem_ptr = NULL; |
| 445 | } |
| 446 | |
| 447 | if (fio_libpmem_prep_full(td, io_u)) { |
| 448 | td_clear_error(td); |
| 449 | ret = fio_libpmem_prep_limited(td, io_u); |
| 450 | if (ret) |
| 451 | return ret; |
| 452 | } |
| 453 | |
| 454 | done: |
| 455 | io_u->mmap_data = fdd->libpmem_ptr + io_u->offset - fdd->libpmem_off |
| 456 | - f->file_offset; |
| 457 | return 0; |
| 458 | } |
| 459 | |
| 460 | static enum fio_q_status fio_libpmem_queue(struct thread_data *td, |
| 461 | struct io_u *io_u) |
| 462 | { |
| 463 | fio_ro_check(td, io_u); |
| 464 | io_u->error = 0; |
| 465 | |
| 466 | dprint(FD_IO, "DEBUG fio_libpmem_queue\n"); |
| 467 | |
| 468 | switch (io_u->ddir) { |
| 469 | case DDIR_READ: |
| 470 | memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen); |
| 471 | break; |
| 472 | case DDIR_WRITE: |
| 473 | dprint(FD_IO, "DEBUG mmap_data=%p, xfer_buf=%p\n", |
| 474 | io_u->mmap_data, io_u->xfer_buf ); |
| 475 | dprint(FD_IO,"td->o.odirect %d \n",td->o.odirect); |
| 476 | if (td->o.odirect) { |
| 477 | pmem_memcpy_persist(io_u->mmap_data, |
| 478 | io_u->xfer_buf, |
| 479 | io_u->xfer_buflen); |
| 480 | } else { |
| 481 | pmem_memcpy_nodrain(io_u->mmap_data, |
| 482 | io_u->xfer_buf, |
| 483 | io_u->xfer_buflen); |
| 484 | } |
| 485 | break; |
| 486 | case DDIR_SYNC: |
| 487 | case DDIR_DATASYNC: |
| 488 | case DDIR_SYNC_FILE_RANGE: |
| 489 | break; |
| 490 | default: |
| 491 | io_u->error = EINVAL; |
| 492 | break; |
| 493 | } |
| 494 | |
| 495 | return FIO_Q_COMPLETED; |
| 496 | } |
| 497 | |
| 498 | static int fio_libpmem_init(struct thread_data *td) |
| 499 | { |
| 500 | struct thread_options *o = &td->o; |
| 501 | |
| 502 | dprint(FD_IO,"o->rw_min_bs %llu \n o->fsync_blocks %d \n o->fdatasync_blocks %d \n", |
| 503 | o->rw_min_bs,o->fsync_blocks,o->fdatasync_blocks); |
| 504 | dprint(FD_IO, "DEBUG fio_libpmem_init\n"); |
| 505 | |
| 506 | if ((o->rw_min_bs & page_mask) && |
| 507 | (o->fsync_blocks || o->fdatasync_blocks)) { |
| 508 | log_err("libpmem: mmap options dictate a minimum block size of " |
| 509 | "%llu bytes\n", (unsigned long long) page_size); |
| 510 | return 1; |
| 511 | } |
| 512 | return 0; |
| 513 | } |
| 514 | |
| 515 | static int fio_libpmem_open_file(struct thread_data *td, struct fio_file *f) |
| 516 | { |
| 517 | struct fio_libpmem_data *fdd; |
| 518 | int ret; |
| 519 | |
| 520 | dprint(FD_IO,"DEBUG fio_libpmem_open_file\n"); |
| 521 | dprint(FD_IO,"f->io_size=%ld \n",f->io_size); |
| 522 | dprint(FD_IO,"td->o.size=%lld \n",td->o.size); |
| 523 | dprint(FD_IO,"td->o.iodepth=%d\n",td->o.iodepth); |
| 524 | dprint(FD_IO,"td->o.iodepth_batch=%d \n",td->o.iodepth_batch); |
| 525 | |
| 526 | ret = generic_open_file(td, f); |
| 527 | if (ret) |
| 528 | return ret; |
| 529 | |
| 530 | fdd = calloc(1, sizeof(*fdd)); |
| 531 | if (!fdd) { |
| 532 | int fio_unused __ret; |
| 533 | __ret = generic_close_file(td, f); |
| 534 | return 1; |
| 535 | } |
| 536 | |
| 537 | FILE_SET_ENG_DATA(f, fdd); |
| 538 | |
| 539 | return 0; |
| 540 | } |
| 541 | |
| 542 | static int fio_libpmem_close_file(struct thread_data *td, struct fio_file *f) |
| 543 | { |
| 544 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); |
| 545 | |
| 546 | dprint(FD_IO,"DEBUG fio_libpmem_close_file\n"); |
| 547 | dprint(FD_IO,"td->o.odirect %d \n",td->o.odirect); |
| 548 | |
| 549 | if (!td->o.odirect) { |
| 550 | dprint(FD_IO,"pmem_drain\n"); |
| 551 | pmem_drain(); |
| 552 | } |
| 553 | |
| 554 | FILE_SET_ENG_DATA(f, NULL); |
| 555 | free(fdd); |
| 556 | fio_file_clear_partial_mmap(f); |
| 557 | |
| 558 | return generic_close_file(td, f); |
| 559 | } |
| 560 | |
| 561 | static struct ioengine_ops ioengine = { |
| 562 | .name = "libpmem", |
| 563 | .version = FIO_IOOPS_VERSION, |
| 564 | .init = fio_libpmem_init, |
| 565 | .prep = fio_libpmem_prep, |
| 566 | .queue = fio_libpmem_queue, |
| 567 | .open_file = fio_libpmem_open_file, |
| 568 | .close_file = fio_libpmem_close_file, |
| 569 | .get_file_size = generic_get_file_size, |
| 570 | .flags = FIO_SYNCIO |FIO_NOEXTEND, |
| 571 | }; |
| 572 | |
| 573 | static void fio_init fio_libpmem_register(void) |
| 574 | { |
| 575 | #ifndef WIN32 |
| 576 | Mmap_align = page_size; |
| 577 | #else |
| 578 | if (Mmap_align == 0) { |
| 579 | SYSTEM_INFO si; |
| 580 | |
| 581 | GetSystemInfo(&si); |
| 582 | Mmap_align = si.dwAllocationGranularity; |
| 583 | } |
| 584 | #endif |
| 585 | |
| 586 | register_ioengine(&ioengine); |
| 587 | } |
| 588 | |
| 589 | static void fio_exit fio_libpmem_unregister(void) |
| 590 | { |
| 591 | unregister_ioengine(&ioengine); |
| 592 | } |