Commit | Line | Data |
---|---|---|
cbb15e42 DJ |
1 | /* |
2 | * device DAX engine | |
3 | * | |
4 | * IO engine that reads/writes from files by doing memcpy to/from | |
5 | * a memory mapped region of DAX enabled device. | |
6 | * | |
7 | * Copyright (C) 2016 Intel Corp | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License, | |
11 | * version 2 as published by the Free Software Foundation.. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | */ | |
19 | ||
20 | /* | |
21 | * device dax engine | |
22 | * IO engine that access a DAX device directly for read and write data | |
23 | * | |
24 | * To use: | |
25 | * ioengine=dev-dax | |
26 | * | |
27 | * Other relevant settings: | |
28 | * iodepth=1 | |
29 | * direct=0 REQUIRED | |
30 | * filename=/dev/daxN.N | |
31 | * bs=2m | |
32 | * | |
33 | * direct should be left to 0. Using dev-dax implies that memory access | |
34 | * is direct. However, dev-dax does not support O_DIRECT flag by design | |
35 | * since it is not necessary. | |
36 | * | |
37 | * bs should adhere to the device dax alignment at minimally. | |
38 | * | |
39 | * libpmem.so | |
40 | * By default, the dev-dax engine will let the system find the libpmem.so | |
41 | * that it uses. You can use an alternative libpmem by setting the | |
42 | * FIO_PMEM_LIB environment variable to the full path to the desired | |
43 | * libpmem.so. | |
44 | */ | |
45 | ||
46 | #include <stdio.h> | |
47 | #include <limits.h> | |
48 | #include <stdlib.h> | |
49 | #include <unistd.h> | |
50 | #include <errno.h> | |
51 | #include <sys/mman.h> | |
52 | #include <sys/stat.h> | |
53 | #include <sys/sysmacros.h> | |
54 | #include <dlfcn.h> | |
55 | #include <libgen.h> | |
56 | ||
57 | #include "../fio.h" | |
58 | #include "../verify.h" | |
59 | ||
60 | /* | |
61 | * Limits us to 1GB of mapped files in total to model after | |
62 | * mmap engine behavior | |
63 | */ | |
64 | #define MMAP_TOTAL_SZ (1 * 1024 * 1024 * 1024UL) | |
65 | ||
66 | struct fio_devdax_data { | |
67 | void *devdax_ptr; | |
68 | size_t devdax_sz; | |
69 | off_t devdax_off; | |
70 | }; | |
71 | ||
72 | static void * (*pmem_memcpy_persist)(void *dest, const void *src, size_t len); | |
73 | ||
74 | static int fio_devdax_file(struct thread_data *td, struct fio_file *f, | |
75 | size_t length, off_t off) | |
76 | { | |
77 | struct fio_devdax_data *fdd = FILE_ENG_DATA(f); | |
78 | int flags = 0; | |
79 | ||
80 | if (td_rw(td)) | |
81 | flags = PROT_READ | PROT_WRITE; | |
82 | else if (td_write(td)) { | |
83 | flags = PROT_WRITE; | |
84 | ||
85 | if (td->o.verify != VERIFY_NONE) | |
86 | flags |= PROT_READ; | |
87 | } else | |
88 | flags = PROT_READ; | |
89 | ||
90 | fdd->devdax_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off); | |
91 | if (fdd->devdax_ptr == MAP_FAILED) { | |
92 | fdd->devdax_ptr = NULL; | |
93 | td_verror(td, errno, "mmap"); | |
94 | } | |
95 | ||
96 | if (td->error && fdd->devdax_ptr) | |
97 | munmap(fdd->devdax_ptr, length); | |
98 | ||
99 | return td->error; | |
100 | } | |
101 | ||
102 | /* | |
103 | * Just mmap an appropriate portion, we cannot mmap the full extent | |
104 | */ | |
105 | static int fio_devdax_prep_limited(struct thread_data *td, struct io_u *io_u) | |
106 | { | |
107 | struct fio_file *f = io_u->file; | |
108 | struct fio_devdax_data *fdd = FILE_ENG_DATA(f); | |
109 | ||
110 | if (io_u->buflen > f->real_file_size) { | |
111 | log_err("fio: bs too big for dev-dax engine\n"); | |
112 | return EIO; | |
113 | } | |
114 | ||
115 | fdd->devdax_sz = min(MMAP_TOTAL_SZ, f->real_file_size); | |
116 | if (fdd->devdax_sz > f->io_size) | |
117 | fdd->devdax_sz = f->io_size; | |
118 | ||
119 | fdd->devdax_off = io_u->offset; | |
120 | ||
121 | return fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off); | |
122 | } | |
123 | ||
124 | /* | |
125 | * Attempt to mmap the entire file | |
126 | */ | |
127 | static int fio_devdax_prep_full(struct thread_data *td, struct io_u *io_u) | |
128 | { | |
129 | struct fio_file *f = io_u->file; | |
130 | struct fio_devdax_data *fdd = FILE_ENG_DATA(f); | |
131 | int ret; | |
132 | ||
133 | if (fio_file_partial_mmap(f)) | |
134 | return EINVAL; | |
135 | ||
136 | if (io_u->offset != (size_t) io_u->offset || | |
137 | f->io_size != (size_t) f->io_size) { | |
138 | fio_file_set_partial_mmap(f); | |
139 | return EINVAL; | |
140 | } | |
141 | ||
142 | fdd->devdax_sz = f->io_size; | |
143 | fdd->devdax_off = 0; | |
144 | ||
145 | ret = fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off); | |
146 | if (ret) | |
147 | fio_file_set_partial_mmap(f); | |
148 | ||
149 | return ret; | |
150 | } | |
151 | ||
152 | static int fio_devdax_prep(struct thread_data *td, struct io_u *io_u) | |
153 | { | |
154 | struct fio_file *f = io_u->file; | |
155 | struct fio_devdax_data *fdd = FILE_ENG_DATA(f); | |
156 | int ret; | |
157 | ||
158 | /* | |
159 | * It fits within existing mapping, use it | |
160 | */ | |
161 | if (io_u->offset >= fdd->devdax_off && | |
162 | io_u->offset + io_u->buflen < fdd->devdax_off + fdd->devdax_sz) | |
163 | goto done; | |
164 | ||
165 | /* | |
166 | * unmap any existing mapping | |
167 | */ | |
168 | if (fdd->devdax_ptr) { | |
169 | if (munmap(fdd->devdax_ptr, fdd->devdax_sz) < 0) | |
170 | return errno; | |
171 | fdd->devdax_ptr = NULL; | |
172 | } | |
173 | ||
174 | if (fio_devdax_prep_full(td, io_u)) { | |
175 | td_clear_error(td); | |
176 | ret = fio_devdax_prep_limited(td, io_u); | |
177 | if (ret) | |
178 | return ret; | |
179 | } | |
180 | ||
181 | done: | |
182 | io_u->mmap_data = fdd->devdax_ptr + io_u->offset - fdd->devdax_off - | |
183 | f->file_offset; | |
184 | return 0; | |
185 | } | |
186 | ||
187 | static int fio_devdax_queue(struct thread_data *td, struct io_u *io_u) | |
188 | { | |
189 | fio_ro_check(td, io_u); | |
190 | io_u->error = 0; | |
191 | ||
192 | switch (io_u->ddir) { | |
193 | case DDIR_READ: | |
194 | memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen); | |
195 | break; | |
196 | case DDIR_WRITE: | |
197 | pmem_memcpy_persist(io_u->mmap_data, io_u->xfer_buf, | |
198 | io_u->xfer_buflen); | |
199 | break; | |
200 | case DDIR_SYNC: | |
201 | case DDIR_DATASYNC: | |
202 | case DDIR_SYNC_FILE_RANGE: | |
203 | break; | |
204 | default: | |
205 | io_u->error = EINVAL; | |
206 | break; | |
207 | } | |
208 | ||
209 | return FIO_Q_COMPLETED; | |
210 | } | |
211 | ||
212 | static int fio_devdax_init(struct thread_data *td) | |
213 | { | |
214 | struct thread_options *o = &td->o; | |
215 | const char *path; | |
216 | void *dl; | |
217 | ||
218 | if ((o->rw_min_bs & page_mask) && | |
219 | (o->fsync_blocks || o->fdatasync_blocks)) { | |
220 | log_err("fio: mmap options dictate a minimum block size of " | |
221 | "%llu bytes\n", (unsigned long long) page_size); | |
222 | return 1; | |
223 | } | |
224 | ||
225 | path = getenv("FIO_PMEM_LIB"); | |
226 | if (!path) | |
227 | path = "libpmem.so"; | |
228 | ||
229 | dl = dlopen(path, RTLD_NOW | RTLD_NODELETE); | |
230 | if (!dl) { | |
231 | log_err("fio: unable to open libpmem: %s\n", dlerror()); | |
232 | return 1; | |
233 | } | |
234 | ||
235 | pmem_memcpy_persist = dlsym(dl, "pmem_memcpy_persist"); | |
236 | if (!pmem_memcpy_persist) { | |
237 | log_err("fio: unable to load libpmem: %s\n", dlerror()); | |
238 | return 1; | |
239 | } | |
240 | ||
241 | return 0; | |
242 | } | |
243 | ||
244 | static int fio_devdax_open_file(struct thread_data *td, struct fio_file *f) | |
245 | { | |
246 | struct fio_devdax_data *fdd; | |
247 | int ret; | |
248 | ||
249 | ret = generic_open_file(td, f); | |
250 | if (ret) | |
251 | return ret; | |
252 | ||
253 | fdd = calloc(1, sizeof(*fdd)); | |
254 | if (!fdd) { | |
255 | int fio_unused __ret; | |
256 | __ret = generic_close_file(td, f); | |
257 | return 1; | |
258 | } | |
259 | ||
260 | FILE_SET_ENG_DATA(f, fdd); | |
261 | ||
262 | return 0; | |
263 | } | |
264 | ||
265 | static int fio_devdax_close_file(struct thread_data *td, struct fio_file *f) | |
266 | { | |
267 | struct fio_devdax_data *fdd = FILE_ENG_DATA(f); | |
268 | ||
269 | FILE_SET_ENG_DATA(f, NULL); | |
270 | free(fdd); | |
271 | fio_file_clear_partial_mmap(f); | |
272 | ||
273 | return generic_close_file(td, f); | |
274 | } | |
275 | ||
276 | static int | |
277 | fio_devdax_get_file_size(struct thread_data *td, struct fio_file *f) | |
278 | { | |
279 | char spath[PATH_MAX]; | |
280 | char npath[PATH_MAX]; | |
281 | char *rpath; | |
282 | FILE *sfile; | |
283 | uint64_t size; | |
284 | struct stat st; | |
285 | int rc; | |
286 | ||
287 | if (fio_file_size_known(f)) | |
288 | return 0; | |
289 | ||
290 | if (f->filetype != FIO_TYPE_CHAR) | |
291 | return -EINVAL; | |
292 | ||
293 | rc = stat(f->file_name, &st); | |
294 | if (rc < 0) { | |
295 | log_err("%s: failed to stat file %s: %d\n", | |
296 | td->o.name, f->file_name, errno); | |
297 | return -errno; | |
298 | } | |
299 | ||
300 | snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/subsystem", | |
301 | major(st.st_rdev), minor(st.st_rdev)); | |
302 | ||
303 | rpath = realpath(spath, npath); | |
304 | if (!rpath) { | |
305 | log_err("%s: realpath on %s failed: %d\n", | |
306 | td->o.name, spath, errno); | |
307 | return -errno; | |
308 | } | |
309 | ||
310 | /* check if DAX device */ | |
311 | if (strcmp("/sys/class/dax", rpath)) { | |
312 | log_err("%s: %s not a DAX device!\n", | |
313 | td->o.name, f->file_name); | |
314 | } | |
315 | ||
316 | snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/size", | |
317 | major(st.st_rdev), minor(st.st_rdev)); | |
318 | ||
319 | sfile = fopen(spath, "r"); | |
320 | if (!sfile) { | |
321 | log_err("%s: fopen on %s failed: %d\n", | |
322 | td->o.name, spath, errno); | |
323 | return 1; | |
324 | } | |
325 | ||
326 | rc = fscanf(sfile, "%lu", &size); | |
327 | if (rc < 0) { | |
328 | log_err("%s: fscanf on %s failed: %d\n", | |
329 | td->o.name, spath, errno); | |
330 | return 1; | |
331 | } | |
332 | ||
333 | f->real_file_size = size; | |
334 | ||
335 | fclose(sfile); | |
336 | ||
337 | if (f->file_offset > f->real_file_size) { | |
338 | log_err("%s: offset extends end (%llu > %llu)\n", td->o.name, | |
339 | (unsigned long long) f->file_offset, | |
340 | (unsigned long long) f->real_file_size); | |
341 | return 1; | |
342 | } | |
343 | ||
344 | fio_file_set_size_known(f); | |
345 | return 0; | |
346 | } | |
347 | ||
348 | static struct ioengine_ops ioengine = { | |
349 | .name = "dev-dax", | |
350 | .version = FIO_IOOPS_VERSION, | |
351 | .init = fio_devdax_init, | |
352 | .prep = fio_devdax_prep, | |
353 | .queue = fio_devdax_queue, | |
354 | .open_file = fio_devdax_open_file, | |
355 | .close_file = fio_devdax_close_file, | |
356 | .get_file_size = fio_devdax_get_file_size, | |
357 | .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL, | |
358 | }; | |
359 | ||
360 | static void fio_init fio_devdax_register(void) | |
361 | { | |
362 | register_ioengine(&ioengine); | |
363 | } | |
364 | ||
365 | static void fio_exit fio_devdax_unregister(void) | |
366 | { | |
367 | unregister_ioengine(&ioengine); | |
368 | } |