Commit | Line | Data |
---|---|---|
ae0db592 | 1 | /* |
363a5f65 | 2 | * libpmem: IO engine that uses PMDK libpmem to read and write data |
ae0db592 TI |
3 | * |
4 | * Copyright (C) 2017 Nippon Telegraph and Telephone Corporation. | |
ebcdccde | 5 | * Copyright 2018-2021, Intel Corporation |
ae0db592 TI |
6 | * |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License, | |
9 | * version 2 as published by the Free Software Foundation.. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | */ | |
17 | ||
18 | /* | |
19 | * libpmem engine | |
20 | * | |
67719e13 | 21 | * IO engine that uses libpmem to write data (and memcpy to read) |
ae0db592 TI |
22 | * |
23 | * To use: | |
24 | * ioengine=libpmem | |
25 | * | |
26 | * Other relevant settings: | |
27 | * iodepth=1 | |
28 | * direct=1 | |
67719e13 | 29 | * sync=1 |
ae0db592 TI |
30 | * directory=/mnt/pmem0/ |
31 | * bs=4k | |
32 | * | |
67719e13 ŁS |
33 | * sync=1 means that pmem_drain() is executed for each write operation. |
34 | * Otherwise is not and should be called on demand. | |
35 | * | |
36 | * direct=1 means PMEM_F_MEM_NONTEMPORAL flag is set in pmem_memcpy(). | |
ae0db592 TI |
37 | * |
38 | * The pmem device must have a DAX-capable filesystem and be mounted | |
67719e13 | 39 | * with DAX enabled. Directory must point to a mount point of DAX FS. |
ae0db592 TI |
40 | * |
41 | * Example: | |
42 | * mkfs.xfs /dev/pmem0 | |
43 | * mkdir /mnt/pmem0 | |
44 | * mount -o dax /dev/pmem0 /mnt/pmem0 | |
45 | * | |
ae0db592 TI |
46 | * See examples/libpmem.fio for more. |
47 | * | |
48 | * | |
49 | * libpmem.so | |
50 | * By default, the libpmem engine will let the system find the libpmem.so | |
51 | * that it uses. You can use an alternative libpmem by setting the | |
52 | * FIO_PMEM_LIB environment variable to the full path to the desired | |
67719e13 | 53 | * libpmem.so. This engine requires PMDK >= 1.5. |
ae0db592 TI |
54 | */ |
55 | ||
56 | #include <stdio.h> | |
57 | #include <limits.h> | |
58 | #include <stdlib.h> | |
59 | #include <unistd.h> | |
60 | #include <errno.h> | |
61 | #include <sys/mman.h> | |
62 | #include <sys/stat.h> | |
63 | #include <sys/sysmacros.h> | |
64 | #include <libgen.h> | |
65 | #include <libpmem.h> | |
66 | ||
67 | #include "../fio.h" | |
68 | #include "../verify.h" | |
69 | ||
ae0db592 TI |
70 | struct fio_libpmem_data { |
71 | void *libpmem_ptr; | |
72 | size_t libpmem_sz; | |
73 | off_t libpmem_off; | |
74 | }; | |
75 | ||
67719e13 | 76 | static int fio_libpmem_init(struct thread_data *td) |
ae0db592 | 77 | { |
67719e13 | 78 | struct thread_options *o = &td->o; |
ae0db592 | 79 | |
67719e13 ŁS |
80 | dprint(FD_IO,"o->rw_min_bs %llu \n o->fsync_blocks %u \n o->fdatasync_blocks %u \n", |
81 | o->rw_min_bs,o->fsync_blocks,o->fdatasync_blocks); | |
82 | dprint(FD_IO, "DEBUG fio_libpmem_init\n"); | |
ae0db592 | 83 | |
67719e13 ŁS |
84 | if ((o->rw_min_bs & page_mask) && |
85 | (o->fsync_blocks || o->fdatasync_blocks)) { | |
86 | log_err("libpmem: mmap options dictate a minimum block size of " | |
87 | "%llu bytes\n", (unsigned long long) page_size); | |
88 | return 1; | |
ae0db592 | 89 | } |
67719e13 | 90 | return 0; |
ae0db592 TI |
91 | } |
92 | ||
93 | /* | |
67719e13 | 94 | * This is the pmem_map_file execution function |
ae0db592 TI |
95 | */ |
96 | static int fio_libpmem_file(struct thread_data *td, struct fio_file *f, | |
97 | size_t length, off_t off) | |
98 | { | |
99 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); | |
ebcdccde | 100 | mode_t mode = S_IWUSR | S_IRUSR; |
67719e13 ŁS |
101 | size_t mapped_len; |
102 | int is_pmem; | |
ae0db592 | 103 | |
67719e13 ŁS |
104 | dprint(FD_IO, "DEBUG fio_libpmem_file\n"); |
105 | dprint(FD_IO, "f->file_name = %s td->o.verify = %d \n", f->file_name, | |
ae0db592 | 106 | td->o.verify); |
67719e13 ŁS |
107 | dprint(FD_IO, "length = %ld f->fd = %d off = %ld file mode = %d \n", |
108 | length, f->fd, off, mode); | |
ae0db592 | 109 | |
67719e13 ŁS |
110 | /* unmap any existing mapping */ |
111 | if (fdd->libpmem_ptr) { | |
112 | dprint(FD_IO,"pmem_unmap \n"); | |
113 | if (pmem_unmap(fdd->libpmem_ptr, fdd->libpmem_sz) < 0) | |
114 | return errno; | |
ae0db592 | 115 | fdd->libpmem_ptr = NULL; |
ae0db592 TI |
116 | } |
117 | ||
67719e13 ŁS |
118 | if((fdd->libpmem_ptr = pmem_map_file(f->file_name, length, PMEM_FILE_CREATE, mode, &mapped_len, &is_pmem)) == NULL) { |
119 | td_verror(td, errno, pmem_errormsg()); | |
120 | goto err; | |
ae0db592 TI |
121 | } |
122 | ||
67719e13 ŁS |
123 | if (!is_pmem) { |
124 | td_verror(td, errno, "file_name does not point to persistent memory"); | |
125 | } | |
ae0db592 | 126 | |
67719e13 ŁS |
127 | err: |
128 | if (td->error && fdd->libpmem_ptr) | |
129 | pmem_unmap(fdd->libpmem_ptr, length); | |
ae0db592 | 130 | |
67719e13 | 131 | return td->error; |
ae0db592 TI |
132 | } |
133 | ||
67719e13 | 134 | static int fio_libpmem_open_file(struct thread_data *td, struct fio_file *f) |
ae0db592 | 135 | { |
67719e13 | 136 | struct fio_libpmem_data *fdd; |
ae0db592 | 137 | |
67719e13 ŁS |
138 | dprint(FD_IO,"DEBUG fio_libpmem_open_file\n"); |
139 | dprint(FD_IO,"f->io_size=%ld \n",f->io_size); | |
140 | dprint(FD_IO,"td->o.size=%lld \n",td->o.size); | |
141 | dprint(FD_IO,"td->o.iodepth=%d\n",td->o.iodepth); | |
142 | dprint(FD_IO,"td->o.iodepth_batch=%d \n",td->o.iodepth_batch); | |
ae0db592 | 143 | |
67719e13 ŁS |
144 | if (fio_file_open(f)) |
145 | td_io_close_file(td, f); | |
ae0db592 | 146 | |
67719e13 ŁS |
147 | fdd = calloc(1, sizeof(*fdd)); |
148 | if (!fdd) { | |
149 | return 1; | |
ae0db592 | 150 | } |
67719e13 | 151 | FILE_SET_ENG_DATA(f, fdd); |
ae0db592 TI |
152 | fdd->libpmem_sz = f->io_size; |
153 | fdd->libpmem_off = 0; | |
154 | ||
67719e13 | 155 | return fio_libpmem_file(td, f, fdd->libpmem_sz, fdd->libpmem_off); |
ae0db592 TI |
156 | } |
157 | ||
158 | static int fio_libpmem_prep(struct thread_data *td, struct io_u *io_u) | |
159 | { | |
160 | struct fio_file *f = io_u->file; | |
161 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); | |
ae0db592 TI |
162 | |
163 | dprint(FD_IO, "DEBUG fio_libpmem_prep\n" ); | |
67719e13 ŁS |
164 | dprint(FD_IO," io_u->offset %llu : fdd->libpmem_off %ld : " |
165 | "io_u->buflen %llu : fdd->libpmem_sz %ld\n", | |
166 | io_u->offset, fdd->libpmem_off, | |
167 | io_u->buflen, fdd->libpmem_sz); | |
ae0db592 | 168 | |
67719e13 ŁS |
169 | if (io_u->buflen > f->real_file_size) { |
170 | log_err("libpmem: bs bigger than the file size\n"); | |
171 | return EIO; | |
ae0db592 TI |
172 | } |
173 | ||
ae0db592 | 174 | io_u->mmap_data = fdd->libpmem_ptr + io_u->offset - fdd->libpmem_off |
597a6533 | 175 | - f->file_offset; |
ae0db592 TI |
176 | return 0; |
177 | } | |
178 | ||
2e4ef4fb JA |
179 | static enum fio_q_status fio_libpmem_queue(struct thread_data *td, |
180 | struct io_u *io_u) | |
ae0db592 | 181 | { |
67719e13 ŁS |
182 | unsigned flags = 0; |
183 | ||
ae0db592 TI |
184 | fio_ro_check(td, io_u); |
185 | io_u->error = 0; | |
186 | ||
187 | dprint(FD_IO, "DEBUG fio_libpmem_queue\n"); | |
67719e13 | 188 | dprint(FD_IO,"td->o.odirect %d td->o.sync_io %d \n",td->o.odirect, td->o.sync_io); |
eb9f8d7f | 189 | /* map both O_SYNC / DSYNC to not using NODRAIN */ |
67719e13 ŁS |
190 | flags = td->o.sync_io ? 0 : PMEM_F_MEM_NODRAIN; |
191 | flags |= td->o.odirect ? PMEM_F_MEM_NONTEMPORAL : PMEM_F_MEM_TEMPORAL; | |
ae0db592 TI |
192 | |
193 | switch (io_u->ddir) { | |
597a6533 JA |
194 | case DDIR_READ: |
195 | memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen); | |
196 | break; | |
197 | case DDIR_WRITE: | |
198 | dprint(FD_IO, "DEBUG mmap_data=%p, xfer_buf=%p\n", | |
199 | io_u->mmap_data, io_u->xfer_buf ); | |
67719e13 ŁS |
200 | pmem_memcpy(io_u->mmap_data, |
201 | io_u->xfer_buf, | |
202 | io_u->xfer_buflen, | |
203 | flags); | |
597a6533 JA |
204 | break; |
205 | case DDIR_SYNC: | |
206 | case DDIR_DATASYNC: | |
207 | case DDIR_SYNC_FILE_RANGE: | |
67719e13 | 208 | pmem_drain(); |
597a6533 JA |
209 | break; |
210 | default: | |
211 | io_u->error = EINVAL; | |
212 | break; | |
ae0db592 TI |
213 | } |
214 | ||
215 | return FIO_Q_COMPLETED; | |
216 | } | |
217 | ||
ae0db592 TI |
218 | static int fio_libpmem_close_file(struct thread_data *td, struct fio_file *f) |
219 | { | |
220 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); | |
67719e13 | 221 | int ret = 0; |
ae0db592 TI |
222 | |
223 | dprint(FD_IO,"DEBUG fio_libpmem_close_file\n"); | |
224 | dprint(FD_IO,"td->o.odirect %d \n",td->o.odirect); | |
225 | ||
597a6533 | 226 | if (!td->o.odirect) { |
ae0db592 TI |
227 | dprint(FD_IO,"pmem_drain\n"); |
228 | pmem_drain(); | |
229 | } | |
230 | ||
67719e13 ŁS |
231 | if (fdd->libpmem_ptr) |
232 | ret = pmem_unmap(fdd->libpmem_ptr, fdd->libpmem_sz); | |
233 | if (fio_file_open(f)) | |
234 | ret &= generic_close_file(td, f); | |
235 | ||
ae0db592 TI |
236 | FILE_SET_ENG_DATA(f, NULL); |
237 | free(fdd); | |
ae0db592 | 238 | |
67719e13 | 239 | return ret; |
ae0db592 TI |
240 | } |
241 | ||
5a8a6a03 | 242 | FIO_STATIC struct ioengine_ops ioengine = { |
597a6533 JA |
243 | .name = "libpmem", |
244 | .version = FIO_IOOPS_VERSION, | |
245 | .init = fio_libpmem_init, | |
246 | .prep = fio_libpmem_prep, | |
247 | .queue = fio_libpmem_queue, | |
248 | .open_file = fio_libpmem_open_file, | |
249 | .close_file = fio_libpmem_close_file, | |
250 | .get_file_size = generic_get_file_size, | |
8c47cc76 | 251 | .prepopulate_file = generic_prepopulate_file, |
67719e13 ŁS |
252 | .flags = FIO_SYNCIO | FIO_RAWIO | FIO_DISKLESSIO | FIO_NOEXTEND | |
253 | FIO_NODISKUTIL | FIO_BARRIER | FIO_MEMALIGN, | |
ae0db592 TI |
254 | }; |
255 | ||
256 | static void fio_init fio_libpmem_register(void) | |
257 | { | |
258 | register_ioengine(&ioengine); | |
259 | } | |
260 | ||
261 | static void fio_exit fio_libpmem_unregister(void) | |
262 | { | |
263 | unregister_ioengine(&ioengine); | |
264 | } |