Commit | Line | Data |
---|---|---|
ae0db592 | 1 | /* |
363a5f65 | 2 | * libpmem: IO engine that uses PMDK libpmem to read and write data |
ae0db592 TI |
3 | * |
4 | * Copyright (C) 2017 Nippon Telegraph and Telephone Corporation. | |
67719e13 | 5 | * Copyright 2018-2020, Intel Corporation |
ae0db592 TI |
6 | * |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License, | |
9 | * version 2 as published by the Free Software Foundation.. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | */ | |
17 | ||
18 | /* | |
19 | * libpmem engine | |
20 | * | |
67719e13 | 21 | * IO engine that uses libpmem to write data (and memcpy to read) |
ae0db592 TI |
22 | * |
23 | * To use: | |
24 | * ioengine=libpmem | |
25 | * | |
26 | * Other relevant settings: | |
27 | * iodepth=1 | |
28 | * direct=1 | |
67719e13 | 29 | * sync=1 |
ae0db592 TI |
30 | * directory=/mnt/pmem0/ |
31 | * bs=4k | |
32 | * | |
67719e13 ŁS |
33 | * sync=1 means that pmem_drain() is executed for each write operation. |
34 | * Otherwise is not and should be called on demand. | |
35 | * | |
36 | * direct=1 means PMEM_F_MEM_NONTEMPORAL flag is set in pmem_memcpy(). | |
ae0db592 TI |
37 | * |
38 | * The pmem device must have a DAX-capable filesystem and be mounted | |
67719e13 | 39 | * with DAX enabled. Directory must point to a mount point of DAX FS. |
ae0db592 TI |
40 | * |
41 | * Example: | |
42 | * mkfs.xfs /dev/pmem0 | |
43 | * mkdir /mnt/pmem0 | |
44 | * mount -o dax /dev/pmem0 /mnt/pmem0 | |
45 | * | |
ae0db592 TI |
46 | * See examples/libpmem.fio for more. |
47 | * | |
48 | * | |
49 | * libpmem.so | |
50 | * By default, the libpmem engine will let the system find the libpmem.so | |
51 | * that it uses. You can use an alternative libpmem by setting the | |
52 | * FIO_PMEM_LIB environment variable to the full path to the desired | |
67719e13 | 53 | * libpmem.so. This engine requires PMDK >= 1.5. |
ae0db592 TI |
54 | */ |
55 | ||
56 | #include <stdio.h> | |
57 | #include <limits.h> | |
58 | #include <stdlib.h> | |
59 | #include <unistd.h> | |
60 | #include <errno.h> | |
61 | #include <sys/mman.h> | |
62 | #include <sys/stat.h> | |
63 | #include <sys/sysmacros.h> | |
64 | #include <libgen.h> | |
65 | #include <libpmem.h> | |
66 | ||
67 | #include "../fio.h" | |
68 | #include "../verify.h" | |
69 | ||
ae0db592 TI |
70 | struct fio_libpmem_data { |
71 | void *libpmem_ptr; | |
72 | size_t libpmem_sz; | |
73 | off_t libpmem_off; | |
74 | }; | |
75 | ||
67719e13 | 76 | static int fio_libpmem_init(struct thread_data *td) |
ae0db592 | 77 | { |
67719e13 | 78 | struct thread_options *o = &td->o; |
ae0db592 | 79 | |
67719e13 ŁS |
80 | dprint(FD_IO,"o->rw_min_bs %llu \n o->fsync_blocks %u \n o->fdatasync_blocks %u \n", |
81 | o->rw_min_bs,o->fsync_blocks,o->fdatasync_blocks); | |
82 | dprint(FD_IO, "DEBUG fio_libpmem_init\n"); | |
ae0db592 | 83 | |
67719e13 ŁS |
84 | if ((o->rw_min_bs & page_mask) && |
85 | (o->fsync_blocks || o->fdatasync_blocks)) { | |
86 | log_err("libpmem: mmap options dictate a minimum block size of " | |
87 | "%llu bytes\n", (unsigned long long) page_size); | |
88 | return 1; | |
ae0db592 | 89 | } |
67719e13 | 90 | return 0; |
ae0db592 TI |
91 | } |
92 | ||
93 | /* | |
67719e13 | 94 | * This is the pmem_map_file execution function |
ae0db592 TI |
95 | */ |
96 | static int fio_libpmem_file(struct thread_data *td, struct fio_file *f, | |
97 | size_t length, off_t off) | |
98 | { | |
99 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); | |
67719e13 ŁS |
100 | mode_t mode = 0; |
101 | size_t mapped_len; | |
102 | int is_pmem; | |
ae0db592 | 103 | |
67719e13 ŁS |
104 | if(td_rw(td)) |
105 | mode = S_IWUSR | S_IRUSR; | |
106 | else if (td_write(td)) | |
107 | mode = S_IWUSR; | |
108 | else | |
109 | mode = S_IRUSR; | |
ae0db592 | 110 | |
67719e13 ŁS |
111 | dprint(FD_IO, "DEBUG fio_libpmem_file\n"); |
112 | dprint(FD_IO, "f->file_name = %s td->o.verify = %d \n", f->file_name, | |
ae0db592 | 113 | td->o.verify); |
67719e13 ŁS |
114 | dprint(FD_IO, "length = %ld f->fd = %d off = %ld file mode = %d \n", |
115 | length, f->fd, off, mode); | |
ae0db592 | 116 | |
67719e13 ŁS |
117 | /* unmap any existing mapping */ |
118 | if (fdd->libpmem_ptr) { | |
119 | dprint(FD_IO,"pmem_unmap \n"); | |
120 | if (pmem_unmap(fdd->libpmem_ptr, fdd->libpmem_sz) < 0) | |
121 | return errno; | |
ae0db592 | 122 | fdd->libpmem_ptr = NULL; |
ae0db592 TI |
123 | } |
124 | ||
67719e13 ŁS |
125 | if((fdd->libpmem_ptr = pmem_map_file(f->file_name, length, PMEM_FILE_CREATE, mode, &mapped_len, &is_pmem)) == NULL) { |
126 | td_verror(td, errno, pmem_errormsg()); | |
127 | goto err; | |
ae0db592 TI |
128 | } |
129 | ||
67719e13 ŁS |
130 | if (!is_pmem) { |
131 | td_verror(td, errno, "file_name does not point to persistent memory"); | |
132 | } | |
ae0db592 | 133 | |
67719e13 ŁS |
134 | err: |
135 | if (td->error && fdd->libpmem_ptr) | |
136 | pmem_unmap(fdd->libpmem_ptr, length); | |
ae0db592 | 137 | |
67719e13 | 138 | return td->error; |
ae0db592 TI |
139 | } |
140 | ||
67719e13 | 141 | static int fio_libpmem_open_file(struct thread_data *td, struct fio_file *f) |
ae0db592 | 142 | { |
67719e13 | 143 | struct fio_libpmem_data *fdd; |
ae0db592 | 144 | |
67719e13 ŁS |
145 | dprint(FD_IO,"DEBUG fio_libpmem_open_file\n"); |
146 | dprint(FD_IO,"f->io_size=%ld \n",f->io_size); | |
147 | dprint(FD_IO,"td->o.size=%lld \n",td->o.size); | |
148 | dprint(FD_IO,"td->o.iodepth=%d\n",td->o.iodepth); | |
149 | dprint(FD_IO,"td->o.iodepth_batch=%d \n",td->o.iodepth_batch); | |
ae0db592 | 150 | |
67719e13 ŁS |
151 | if (fio_file_open(f)) |
152 | td_io_close_file(td, f); | |
ae0db592 | 153 | |
67719e13 ŁS |
154 | fdd = calloc(1, sizeof(*fdd)); |
155 | if (!fdd) { | |
156 | return 1; | |
ae0db592 | 157 | } |
67719e13 | 158 | FILE_SET_ENG_DATA(f, fdd); |
ae0db592 TI |
159 | fdd->libpmem_sz = f->io_size; |
160 | fdd->libpmem_off = 0; | |
161 | ||
67719e13 | 162 | return fio_libpmem_file(td, f, fdd->libpmem_sz, fdd->libpmem_off); |
ae0db592 TI |
163 | } |
164 | ||
165 | static int fio_libpmem_prep(struct thread_data *td, struct io_u *io_u) | |
166 | { | |
167 | struct fio_file *f = io_u->file; | |
168 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); | |
ae0db592 TI |
169 | |
170 | dprint(FD_IO, "DEBUG fio_libpmem_prep\n" ); | |
67719e13 ŁS |
171 | dprint(FD_IO," io_u->offset %llu : fdd->libpmem_off %ld : " |
172 | "io_u->buflen %llu : fdd->libpmem_sz %ld\n", | |
173 | io_u->offset, fdd->libpmem_off, | |
174 | io_u->buflen, fdd->libpmem_sz); | |
ae0db592 | 175 | |
67719e13 ŁS |
176 | if (io_u->buflen > f->real_file_size) { |
177 | log_err("libpmem: bs bigger than the file size\n"); | |
178 | return EIO; | |
ae0db592 TI |
179 | } |
180 | ||
ae0db592 | 181 | io_u->mmap_data = fdd->libpmem_ptr + io_u->offset - fdd->libpmem_off |
597a6533 | 182 | - f->file_offset; |
ae0db592 TI |
183 | return 0; |
184 | } | |
185 | ||
2e4ef4fb JA |
186 | static enum fio_q_status fio_libpmem_queue(struct thread_data *td, |
187 | struct io_u *io_u) | |
ae0db592 | 188 | { |
67719e13 ŁS |
189 | unsigned flags = 0; |
190 | ||
ae0db592 TI |
191 | fio_ro_check(td, io_u); |
192 | io_u->error = 0; | |
193 | ||
194 | dprint(FD_IO, "DEBUG fio_libpmem_queue\n"); | |
67719e13 ŁS |
195 | dprint(FD_IO,"td->o.odirect %d td->o.sync_io %d \n",td->o.odirect, td->o.sync_io); |
196 | flags = td->o.sync_io ? 0 : PMEM_F_MEM_NODRAIN; | |
197 | flags |= td->o.odirect ? PMEM_F_MEM_NONTEMPORAL : PMEM_F_MEM_TEMPORAL; | |
ae0db592 TI |
198 | |
199 | switch (io_u->ddir) { | |
597a6533 JA |
200 | case DDIR_READ: |
201 | memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen); | |
202 | break; | |
203 | case DDIR_WRITE: | |
204 | dprint(FD_IO, "DEBUG mmap_data=%p, xfer_buf=%p\n", | |
205 | io_u->mmap_data, io_u->xfer_buf ); | |
67719e13 ŁS |
206 | pmem_memcpy(io_u->mmap_data, |
207 | io_u->xfer_buf, | |
208 | io_u->xfer_buflen, | |
209 | flags); | |
597a6533 JA |
210 | break; |
211 | case DDIR_SYNC: | |
212 | case DDIR_DATASYNC: | |
213 | case DDIR_SYNC_FILE_RANGE: | |
67719e13 | 214 | pmem_drain(); |
597a6533 JA |
215 | break; |
216 | default: | |
217 | io_u->error = EINVAL; | |
218 | break; | |
ae0db592 TI |
219 | } |
220 | ||
221 | return FIO_Q_COMPLETED; | |
222 | } | |
223 | ||
ae0db592 TI |
224 | static int fio_libpmem_close_file(struct thread_data *td, struct fio_file *f) |
225 | { | |
226 | struct fio_libpmem_data *fdd = FILE_ENG_DATA(f); | |
67719e13 | 227 | int ret = 0; |
ae0db592 TI |
228 | |
229 | dprint(FD_IO,"DEBUG fio_libpmem_close_file\n"); | |
230 | dprint(FD_IO,"td->o.odirect %d \n",td->o.odirect); | |
231 | ||
597a6533 | 232 | if (!td->o.odirect) { |
ae0db592 TI |
233 | dprint(FD_IO,"pmem_drain\n"); |
234 | pmem_drain(); | |
235 | } | |
236 | ||
67719e13 ŁS |
237 | if (fdd->libpmem_ptr) |
238 | ret = pmem_unmap(fdd->libpmem_ptr, fdd->libpmem_sz); | |
239 | if (fio_file_open(f)) | |
240 | ret &= generic_close_file(td, f); | |
241 | ||
ae0db592 TI |
242 | FILE_SET_ENG_DATA(f, NULL); |
243 | free(fdd); | |
ae0db592 | 244 | |
67719e13 | 245 | return ret; |
ae0db592 TI |
246 | } |
247 | ||
5a8a6a03 | 248 | FIO_STATIC struct ioengine_ops ioengine = { |
597a6533 JA |
249 | .name = "libpmem", |
250 | .version = FIO_IOOPS_VERSION, | |
251 | .init = fio_libpmem_init, | |
252 | .prep = fio_libpmem_prep, | |
253 | .queue = fio_libpmem_queue, | |
254 | .open_file = fio_libpmem_open_file, | |
255 | .close_file = fio_libpmem_close_file, | |
256 | .get_file_size = generic_get_file_size, | |
67719e13 ŁS |
257 | .flags = FIO_SYNCIO | FIO_RAWIO | FIO_DISKLESSIO | FIO_NOEXTEND | |
258 | FIO_NODISKUTIL | FIO_BARRIER | FIO_MEMALIGN, | |
ae0db592 TI |
259 | }; |
260 | ||
261 | static void fio_init fio_libpmem_register(void) | |
262 | { | |
263 | register_ioengine(&ioengine); | |
264 | } | |
265 | ||
266 | static void fio_exit fio_libpmem_unregister(void) | |
267 | { | |
268 | unregister_ioengine(&ioengine); | |
269 | } |