Commit | Line | Data |
---|---|---|
ae0db592 | 1 | /* |
363a5f65 | 2 | * libpmem: IO engine that uses PMDK libpmem to read and write data |
ae0db592 TI |
3 | * |
4 | * Copyright (C) 2017 Nippon Telegraph and Telephone Corporation. | |
ebcdccde | 5 | * Copyright 2018-2021, Intel Corporation |
ae0db592 TI |
6 | * |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License, | |
9 | * version 2 as published by the Free Software Foundation.. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | */ | |
17 | ||
18 | /* | |
19 | * libpmem engine | |
20 | * | |
0e684e9d ŁS |
21 | * IO engine that uses libpmem (part of PMDK collection) to write data |
22 | * and libc's memcpy to read. It requires PMDK >= 1.5. | |
ae0db592 TI |
23 | * |
24 | * To use: | |
25 | * ioengine=libpmem | |
26 | * | |
27 | * Other relevant settings: | |
28 | * iodepth=1 | |
29 | * direct=1 | |
67719e13 | 30 | * sync=1 |
ae0db592 TI |
31 | * directory=/mnt/pmem0/ |
32 | * bs=4k | |
33 | * | |
67719e13 ŁS |
34 | * sync=1 means that pmem_drain() is executed for each write operation. |
35 | * Otherwise is not and should be called on demand. | |
36 | * | |
37 | * direct=1 means PMEM_F_MEM_NONTEMPORAL flag is set in pmem_memcpy(). | |
ae0db592 TI |
38 | * |
39 | * The pmem device must have a DAX-capable filesystem and be mounted | |
67719e13 | 40 | * with DAX enabled. Directory must point to a mount point of DAX FS. |
ae0db592 TI |
41 | * |
42 | * Example: | |
43 | * mkfs.xfs /dev/pmem0 | |
44 | * mkdir /mnt/pmem0 | |
45 | * mount -o dax /dev/pmem0 /mnt/pmem0 | |
46 | * | |
0e684e9d | 47 | * See examples/libpmem.fio for complete usage example. |
ae0db592 TI |
48 | */ |
49 | ||
50 | #include <stdio.h> | |
ae0db592 TI |
51 | #include <stdlib.h> |
52 | #include <unistd.h> | |
53 | #include <errno.h> | |
ae0db592 TI |
54 | #include <libpmem.h> |
55 | ||
56 | #include "../fio.h" | |
57 | #include "../verify.h" | |
58 | ||
ae0db592 TI |
59 | struct fio_libpmem_data { |
60 | void *libpmem_ptr; | |
61 | size_t libpmem_sz; | |
62 | off_t libpmem_off; | |
63 | }; | |
64 | ||
67719e13 | 65 | static int fio_libpmem_init(struct thread_data *td) |
ae0db592 | 66 | { |
67719e13 | 67 | struct thread_options *o = &td->o; |
ae0db592 | 68 | |
0e684e9d ŁS |
69 | dprint(FD_IO, "o->rw_min_bs %llu\n o->fsync_blocks %u\n o->fdatasync_blocks %u\n", |
70 | o->rw_min_bs, o->fsync_blocks, o->fdatasync_blocks); | |
67719e13 | 71 | dprint(FD_IO, "DEBUG fio_libpmem_init\n"); |
ae0db592 | 72 | |
67719e13 ŁS |
73 | if ((o->rw_min_bs & page_mask) && |
74 | (o->fsync_blocks || o->fdatasync_blocks)) { | |
75 | log_err("libpmem: mmap options dictate a minimum block size of " | |
76 | "%llu bytes\n", (unsigned long long) page_size); | |
77 | return 1; | |
ae0db592 | 78 | } |
67719e13 | 79 | return 0; |
ae0db592 TI |
80 | } |
81 | ||
82 | /* | |
0e684e9d ŁS |
83 | * This is the pmem_map_file execution function, a helper to |
84 | * fio_libpmem_open_file function. | |
ae0db592 TI |
85 | */ |
86 | static int fio_libpmem_file(struct thread_data *td, struct fio_file *f, | |
87 | size_t length, off_t off) | |
88 | { | |
89 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); | |
ebcdccde | 90 | mode_t mode = S_IWUSR | S_IRUSR; |
67719e13 ŁS |
91 | size_t mapped_len; |
92 | int is_pmem; | |
ae0db592 | 93 | |
67719e13 ŁS |
94 | dprint(FD_IO, "DEBUG fio_libpmem_file\n"); |
95 | dprint(FD_IO, "f->file_name = %s td->o.verify = %d \n", f->file_name, | |
ae0db592 | 96 | td->o.verify); |
67719e13 ŁS |
97 | dprint(FD_IO, "length = %ld f->fd = %d off = %ld file mode = %d \n", |
98 | length, f->fd, off, mode); | |
ae0db592 | 99 | |
67719e13 ŁS |
100 | /* unmap any existing mapping */ |
101 | if (fdd->libpmem_ptr) { | |
102 | dprint(FD_IO,"pmem_unmap \n"); | |
103 | if (pmem_unmap(fdd->libpmem_ptr, fdd->libpmem_sz) < 0) | |
104 | return errno; | |
ae0db592 | 105 | fdd->libpmem_ptr = NULL; |
ae0db592 TI |
106 | } |
107 | ||
67719e13 ŁS |
108 | if((fdd->libpmem_ptr = pmem_map_file(f->file_name, length, PMEM_FILE_CREATE, mode, &mapped_len, &is_pmem)) == NULL) { |
109 | td_verror(td, errno, pmem_errormsg()); | |
110 | goto err; | |
ae0db592 TI |
111 | } |
112 | ||
67719e13 ŁS |
113 | if (!is_pmem) { |
114 | td_verror(td, errno, "file_name does not point to persistent memory"); | |
115 | } | |
ae0db592 | 116 | |
67719e13 ŁS |
117 | err: |
118 | if (td->error && fdd->libpmem_ptr) | |
119 | pmem_unmap(fdd->libpmem_ptr, length); | |
ae0db592 | 120 | |
67719e13 | 121 | return td->error; |
ae0db592 TI |
122 | } |
123 | ||
67719e13 | 124 | static int fio_libpmem_open_file(struct thread_data *td, struct fio_file *f) |
ae0db592 | 125 | { |
67719e13 | 126 | struct fio_libpmem_data *fdd; |
ae0db592 | 127 | |
0e684e9d ŁS |
128 | dprint(FD_IO, "DEBUG fio_libpmem_open_file\n"); |
129 | dprint(FD_IO, "f->io_size=%ld\n", f->io_size); | |
130 | dprint(FD_IO, "td->o.size=%lld\n", td->o.size); | |
131 | dprint(FD_IO, "td->o.iodepth=%d\n", td->o.iodepth); | |
132 | dprint(FD_IO, "td->o.iodepth_batch=%d\n", td->o.iodepth_batch); | |
ae0db592 | 133 | |
67719e13 ŁS |
134 | if (fio_file_open(f)) |
135 | td_io_close_file(td, f); | |
ae0db592 | 136 | |
67719e13 ŁS |
137 | fdd = calloc(1, sizeof(*fdd)); |
138 | if (!fdd) { | |
139 | return 1; | |
ae0db592 | 140 | } |
67719e13 | 141 | FILE_SET_ENG_DATA(f, fdd); |
ae0db592 TI |
142 | fdd->libpmem_sz = f->io_size; |
143 | fdd->libpmem_off = 0; | |
144 | ||
67719e13 | 145 | return fio_libpmem_file(td, f, fdd->libpmem_sz, fdd->libpmem_off); |
ae0db592 TI |
146 | } |
147 | ||
148 | static int fio_libpmem_prep(struct thread_data *td, struct io_u *io_u) | |
149 | { | |
150 | struct fio_file *f = io_u->file; | |
151 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); | |
ae0db592 | 152 | |
0e684e9d ŁS |
153 | dprint(FD_IO, "DEBUG fio_libpmem_prep\n"); |
154 | dprint(FD_IO, "io_u->offset %llu : fdd->libpmem_off %ld : " | |
67719e13 ŁS |
155 | "io_u->buflen %llu : fdd->libpmem_sz %ld\n", |
156 | io_u->offset, fdd->libpmem_off, | |
157 | io_u->buflen, fdd->libpmem_sz); | |
ae0db592 | 158 | |
67719e13 ŁS |
159 | if (io_u->buflen > f->real_file_size) { |
160 | log_err("libpmem: bs bigger than the file size\n"); | |
161 | return EIO; | |
ae0db592 TI |
162 | } |
163 | ||
ae0db592 | 164 | io_u->mmap_data = fdd->libpmem_ptr + io_u->offset - fdd->libpmem_off |
597a6533 | 165 | - f->file_offset; |
ae0db592 TI |
166 | return 0; |
167 | } | |
168 | ||
2e4ef4fb JA |
169 | static enum fio_q_status fio_libpmem_queue(struct thread_data *td, |
170 | struct io_u *io_u) | |
ae0db592 | 171 | { |
67719e13 ŁS |
172 | unsigned flags = 0; |
173 | ||
ae0db592 TI |
174 | fio_ro_check(td, io_u); |
175 | io_u->error = 0; | |
176 | ||
177 | dprint(FD_IO, "DEBUG fio_libpmem_queue\n"); | |
0e684e9d ŁS |
178 | dprint(FD_IO, "td->o.odirect %d td->o.sync_io %d\n", |
179 | td->o.odirect, td->o.sync_io); | |
180 | /* map both O_SYNC / DSYNC to not use NODRAIN */ | |
67719e13 ŁS |
181 | flags = td->o.sync_io ? 0 : PMEM_F_MEM_NODRAIN; |
182 | flags |= td->o.odirect ? PMEM_F_MEM_NONTEMPORAL : PMEM_F_MEM_TEMPORAL; | |
ae0db592 TI |
183 | |
184 | switch (io_u->ddir) { | |
597a6533 JA |
185 | case DDIR_READ: |
186 | memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen); | |
187 | break; | |
188 | case DDIR_WRITE: | |
189 | dprint(FD_IO, "DEBUG mmap_data=%p, xfer_buf=%p\n", | |
0e684e9d | 190 | io_u->mmap_data, io_u->xfer_buf); |
67719e13 ŁS |
191 | pmem_memcpy(io_u->mmap_data, |
192 | io_u->xfer_buf, | |
193 | io_u->xfer_buflen, | |
194 | flags); | |
597a6533 JA |
195 | break; |
196 | case DDIR_SYNC: | |
197 | case DDIR_DATASYNC: | |
198 | case DDIR_SYNC_FILE_RANGE: | |
67719e13 | 199 | pmem_drain(); |
597a6533 JA |
200 | break; |
201 | default: | |
202 | io_u->error = EINVAL; | |
203 | break; | |
ae0db592 TI |
204 | } |
205 | ||
206 | return FIO_Q_COMPLETED; | |
207 | } | |
208 | ||
ae0db592 TI |
209 | static int fio_libpmem_close_file(struct thread_data *td, struct fio_file *f) |
210 | { | |
211 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); | |
67719e13 | 212 | int ret = 0; |
ae0db592 | 213 | |
0e684e9d | 214 | dprint(FD_IO, "DEBUG fio_libpmem_close_file\n"); |
ae0db592 | 215 | |
67719e13 ŁS |
216 | if (fdd->libpmem_ptr) |
217 | ret = pmem_unmap(fdd->libpmem_ptr, fdd->libpmem_sz); | |
218 | if (fio_file_open(f)) | |
219 | ret &= generic_close_file(td, f); | |
220 | ||
ae0db592 TI |
221 | FILE_SET_ENG_DATA(f, NULL); |
222 | free(fdd); | |
ae0db592 | 223 | |
67719e13 | 224 | return ret; |
ae0db592 TI |
225 | } |
226 | ||
5a8a6a03 | 227 | FIO_STATIC struct ioengine_ops ioengine = { |
597a6533 JA |
228 | .name = "libpmem", |
229 | .version = FIO_IOOPS_VERSION, | |
230 | .init = fio_libpmem_init, | |
231 | .prep = fio_libpmem_prep, | |
232 | .queue = fio_libpmem_queue, | |
233 | .open_file = fio_libpmem_open_file, | |
234 | .close_file = fio_libpmem_close_file, | |
235 | .get_file_size = generic_get_file_size, | |
8c47cc76 | 236 | .prepopulate_file = generic_prepopulate_file, |
67719e13 ŁS |
237 | .flags = FIO_SYNCIO | FIO_RAWIO | FIO_DISKLESSIO | FIO_NOEXTEND | |
238 | FIO_NODISKUTIL | FIO_BARRIER | FIO_MEMALIGN, | |
ae0db592 TI |
239 | }; |
240 | ||
241 | static void fio_init fio_libpmem_register(void) | |
242 | { | |
243 | register_ioengine(&ioengine); | |
244 | } | |
245 | ||
246 | static void fio_exit fio_libpmem_unregister(void) | |
247 | { | |
248 | unregister_ioengine(&ioengine); | |
249 | } |