Commit | Line | Data |
---|---|---|
cbb15e42 DJ |
1 | /* |
2 | * device DAX engine | |
3 | * | |
4 | * IO engine that reads/writes from files by doing memcpy to/from | |
5 | * a memory mapped region of DAX enabled device. | |
6 | * | |
7 | * Copyright (C) 2016 Intel Corp | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License, | |
11 | * version 2 as published by the Free Software Foundation.. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | */ | |
19 | ||
20 | /* | |
21 | * device dax engine | |
22 | * IO engine that access a DAX device directly for read and write data | |
23 | * | |
24 | * To use: | |
25 | * ioengine=dev-dax | |
26 | * | |
27 | * Other relevant settings: | |
28 | * iodepth=1 | |
29 | * direct=0 REQUIRED | |
30 | * filename=/dev/daxN.N | |
31 | * bs=2m | |
32 | * | |
33 | * direct should be left to 0. Using dev-dax implies that memory access | |
34 | * is direct. However, dev-dax does not support O_DIRECT flag by design | |
35 | * since it is not necessary. | |
36 | * | |
37 | * bs should adhere to the device dax alignment at minimally. | |
38 | * | |
39 | * libpmem.so | |
40 | * By default, the dev-dax engine will let the system find the libpmem.so | |
41 | * that it uses. You can use an alternative libpmem by setting the | |
42 | * FIO_PMEM_LIB environment variable to the full path to the desired | |
43 | * libpmem.so. | |
44 | */ | |
45 | ||
46 | #include <stdio.h> | |
47 | #include <limits.h> | |
48 | #include <stdlib.h> | |
49 | #include <unistd.h> | |
50 | #include <errno.h> | |
51 | #include <sys/mman.h> | |
52 | #include <sys/stat.h> | |
53 | #include <sys/sysmacros.h> | |
cbb15e42 | 54 | #include <libgen.h> |
cf8775b8 | 55 | #include <libpmem.h> |
cbb15e42 DJ |
56 | |
57 | #include "../fio.h" | |
58 | #include "../verify.h" | |
59 | ||
60 | /* | |
420b104a | 61 | * Limits us to 1GiB of mapped files in total to model after |
cbb15e42 DJ |
62 | * mmap engine behavior |
63 | */ | |
64 | #define MMAP_TOTAL_SZ (1 * 1024 * 1024 * 1024UL) | |
65 | ||
66 | struct fio_devdax_data { | |
67 | void *devdax_ptr; | |
68 | size_t devdax_sz; | |
69 | off_t devdax_off; | |
70 | }; | |
71 | ||
cbb15e42 DJ |
72 | static int fio_devdax_file(struct thread_data *td, struct fio_file *f, |
73 | size_t length, off_t off) | |
74 | { | |
75 | struct fio_devdax_data *fdd = FILE_ENG_DATA(f); | |
76 | int flags = 0; | |
77 | ||
78 | if (td_rw(td)) | |
79 | flags = PROT_READ | PROT_WRITE; | |
80 | else if (td_write(td)) { | |
81 | flags = PROT_WRITE; | |
82 | ||
83 | if (td->o.verify != VERIFY_NONE) | |
84 | flags |= PROT_READ; | |
85 | } else | |
86 | flags = PROT_READ; | |
87 | ||
88 | fdd->devdax_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off); | |
89 | if (fdd->devdax_ptr == MAP_FAILED) { | |
90 | fdd->devdax_ptr = NULL; | |
91 | td_verror(td, errno, "mmap"); | |
92 | } | |
93 | ||
94 | if (td->error && fdd->devdax_ptr) | |
95 | munmap(fdd->devdax_ptr, length); | |
96 | ||
97 | return td->error; | |
98 | } | |
99 | ||
100 | /* | |
101 | * Just mmap an appropriate portion, we cannot mmap the full extent | |
102 | */ | |
103 | static int fio_devdax_prep_limited(struct thread_data *td, struct io_u *io_u) | |
104 | { | |
105 | struct fio_file *f = io_u->file; | |
106 | struct fio_devdax_data *fdd = FILE_ENG_DATA(f); | |
107 | ||
108 | if (io_u->buflen > f->real_file_size) { | |
a0998600 | 109 | log_err("dev-dax: bs too big for dev-dax engine\n"); |
cbb15e42 DJ |
110 | return EIO; |
111 | } | |
112 | ||
113 | fdd->devdax_sz = min(MMAP_TOTAL_SZ, f->real_file_size); | |
114 | if (fdd->devdax_sz > f->io_size) | |
115 | fdd->devdax_sz = f->io_size; | |
116 | ||
117 | fdd->devdax_off = io_u->offset; | |
118 | ||
119 | return fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off); | |
120 | } | |
121 | ||
122 | /* | |
123 | * Attempt to mmap the entire file | |
124 | */ | |
125 | static int fio_devdax_prep_full(struct thread_data *td, struct io_u *io_u) | |
126 | { | |
127 | struct fio_file *f = io_u->file; | |
128 | struct fio_devdax_data *fdd = FILE_ENG_DATA(f); | |
129 | int ret; | |
130 | ||
131 | if (fio_file_partial_mmap(f)) | |
132 | return EINVAL; | |
133 | ||
134 | if (io_u->offset != (size_t) io_u->offset || | |
135 | f->io_size != (size_t) f->io_size) { | |
136 | fio_file_set_partial_mmap(f); | |
137 | return EINVAL; | |
138 | } | |
139 | ||
140 | fdd->devdax_sz = f->io_size; | |
141 | fdd->devdax_off = 0; | |
142 | ||
143 | ret = fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off); | |
144 | if (ret) | |
145 | fio_file_set_partial_mmap(f); | |
146 | ||
147 | return ret; | |
148 | } | |
149 | ||
150 | static int fio_devdax_prep(struct thread_data *td, struct io_u *io_u) | |
151 | { | |
152 | struct fio_file *f = io_u->file; | |
153 | struct fio_devdax_data *fdd = FILE_ENG_DATA(f); | |
154 | int ret; | |
155 | ||
156 | /* | |
157 | * It fits within existing mapping, use it | |
158 | */ | |
159 | if (io_u->offset >= fdd->devdax_off && | |
f23ff35e | 160 | io_u->offset + io_u->buflen <= fdd->devdax_off + fdd->devdax_sz) |
cbb15e42 DJ |
161 | goto done; |
162 | ||
163 | /* | |
164 | * unmap any existing mapping | |
165 | */ | |
166 | if (fdd->devdax_ptr) { | |
167 | if (munmap(fdd->devdax_ptr, fdd->devdax_sz) < 0) | |
168 | return errno; | |
169 | fdd->devdax_ptr = NULL; | |
170 | } | |
171 | ||
172 | if (fio_devdax_prep_full(td, io_u)) { | |
173 | td_clear_error(td); | |
174 | ret = fio_devdax_prep_limited(td, io_u); | |
175 | if (ret) | |
176 | return ret; | |
177 | } | |
178 | ||
179 | done: | |
180 | io_u->mmap_data = fdd->devdax_ptr + io_u->offset - fdd->devdax_off - | |
181 | f->file_offset; | |
182 | return 0; | |
183 | } | |
184 | ||
2e4ef4fb JA |
185 | static enum fio_q_status fio_devdax_queue(struct thread_data *td, |
186 | struct io_u *io_u) | |
cbb15e42 DJ |
187 | { |
188 | fio_ro_check(td, io_u); | |
189 | io_u->error = 0; | |
190 | ||
191 | switch (io_u->ddir) { | |
192 | case DDIR_READ: | |
193 | memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen); | |
194 | break; | |
195 | case DDIR_WRITE: | |
196 | pmem_memcpy_persist(io_u->mmap_data, io_u->xfer_buf, | |
197 | io_u->xfer_buflen); | |
198 | break; | |
199 | case DDIR_SYNC: | |
200 | case DDIR_DATASYNC: | |
201 | case DDIR_SYNC_FILE_RANGE: | |
202 | break; | |
203 | default: | |
204 | io_u->error = EINVAL; | |
205 | break; | |
206 | } | |
207 | ||
208 | return FIO_Q_COMPLETED; | |
209 | } | |
210 | ||
211 | static int fio_devdax_init(struct thread_data *td) | |
212 | { | |
213 | struct thread_options *o = &td->o; | |
cbb15e42 DJ |
214 | |
215 | if ((o->rw_min_bs & page_mask) && | |
216 | (o->fsync_blocks || o->fdatasync_blocks)) { | |
a0998600 RE |
217 | log_err("dev-dax: mmap options dictate a minimum block size of %llu bytes\n", |
218 | (unsigned long long) page_size); | |
cbb15e42 DJ |
219 | return 1; |
220 | } | |
221 | ||
cbb15e42 DJ |
222 | return 0; |
223 | } | |
224 | ||
225 | static int fio_devdax_open_file(struct thread_data *td, struct fio_file *f) | |
226 | { | |
227 | struct fio_devdax_data *fdd; | |
228 | int ret; | |
229 | ||
230 | ret = generic_open_file(td, f); | |
231 | if (ret) | |
232 | return ret; | |
233 | ||
234 | fdd = calloc(1, sizeof(*fdd)); | |
235 | if (!fdd) { | |
236 | int fio_unused __ret; | |
237 | __ret = generic_close_file(td, f); | |
238 | return 1; | |
239 | } | |
240 | ||
241 | FILE_SET_ENG_DATA(f, fdd); | |
242 | ||
243 | return 0; | |
244 | } | |
245 | ||
246 | static int fio_devdax_close_file(struct thread_data *td, struct fio_file *f) | |
247 | { | |
248 | struct fio_devdax_data *fdd = FILE_ENG_DATA(f); | |
249 | ||
250 | FILE_SET_ENG_DATA(f, NULL); | |
251 | free(fdd); | |
252 | fio_file_clear_partial_mmap(f); | |
253 | ||
254 | return generic_close_file(td, f); | |
255 | } | |
256 | ||
257 | static int | |
258 | fio_devdax_get_file_size(struct thread_data *td, struct fio_file *f) | |
259 | { | |
260 | char spath[PATH_MAX]; | |
261 | char npath[PATH_MAX]; | |
262 | char *rpath; | |
263 | FILE *sfile; | |
264 | uint64_t size; | |
265 | struct stat st; | |
266 | int rc; | |
267 | ||
268 | if (fio_file_size_known(f)) | |
269 | return 0; | |
270 | ||
271 | if (f->filetype != FIO_TYPE_CHAR) | |
272 | return -EINVAL; | |
273 | ||
274 | rc = stat(f->file_name, &st); | |
275 | if (rc < 0) { | |
a0998600 RE |
276 | log_err("%s: failed to stat file %s (%s)\n", |
277 | td->o.name, f->file_name, strerror(errno)); | |
cbb15e42 DJ |
278 | return -errno; |
279 | } | |
280 | ||
281 | snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/subsystem", | |
282 | major(st.st_rdev), minor(st.st_rdev)); | |
283 | ||
284 | rpath = realpath(spath, npath); | |
285 | if (!rpath) { | |
a0998600 RE |
286 | log_err("%s: realpath on %s failed (%s)\n", |
287 | td->o.name, spath, strerror(errno)); | |
cbb15e42 DJ |
288 | return -errno; |
289 | } | |
290 | ||
291 | /* check if DAX device */ | |
292 | if (strcmp("/sys/class/dax", rpath)) { | |
293 | log_err("%s: %s not a DAX device!\n", | |
294 | td->o.name, f->file_name); | |
295 | } | |
296 | ||
297 | snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/size", | |
298 | major(st.st_rdev), minor(st.st_rdev)); | |
299 | ||
300 | sfile = fopen(spath, "r"); | |
301 | if (!sfile) { | |
a0998600 RE |
302 | log_err("%s: fopen on %s failed (%s)\n", |
303 | td->o.name, spath, strerror(errno)); | |
cbb15e42 DJ |
304 | return 1; |
305 | } | |
306 | ||
307 | rc = fscanf(sfile, "%lu", &size); | |
308 | if (rc < 0) { | |
a0998600 RE |
309 | log_err("%s: fscanf on %s failed (%s)\n", |
310 | td->o.name, spath, strerror(errno)); | |
65f655bc | 311 | fclose(sfile); |
cbb15e42 DJ |
312 | return 1; |
313 | } | |
314 | ||
315 | f->real_file_size = size; | |
316 | ||
317 | fclose(sfile); | |
318 | ||
319 | if (f->file_offset > f->real_file_size) { | |
320 | log_err("%s: offset extends end (%llu > %llu)\n", td->o.name, | |
321 | (unsigned long long) f->file_offset, | |
322 | (unsigned long long) f->real_file_size); | |
323 | return 1; | |
324 | } | |
325 | ||
326 | fio_file_set_size_known(f); | |
327 | return 0; | |
328 | } | |
329 | ||
330 | static struct ioengine_ops ioengine = { | |
331 | .name = "dev-dax", | |
332 | .version = FIO_IOOPS_VERSION, | |
333 | .init = fio_devdax_init, | |
334 | .prep = fio_devdax_prep, | |
335 | .queue = fio_devdax_queue, | |
336 | .open_file = fio_devdax_open_file, | |
337 | .close_file = fio_devdax_close_file, | |
338 | .get_file_size = fio_devdax_get_file_size, | |
339 | .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL, | |
340 | }; | |
341 | ||
342 | static void fio_init fio_devdax_register(void) | |
343 | { | |
344 | register_ioengine(&ioengine); | |
345 | } | |
346 | ||
347 | static void fio_exit fio_devdax_unregister(void) | |
348 | { | |
349 | unregister_ioengine(&ioengine); | |
350 | } |