engines:io_uring: update arguments to fetch nvme data
[fio.git] / engines / nvme.c
CommitLineData
b3d5e3fd
AK
1/*
2 * nvme structure declarations and helper functions for the
3 * io_uring_cmd engine.
4 */
5
6#include "nvme.h"
7
4885a6eb
VF
8static inline __u64 get_slba(struct nvme_data *data, struct io_u *io_u)
9{
10 if (data->lba_ext)
11 return io_u->offset / data->lba_ext;
12 else
13 return io_u->offset >> data->lba_shift;
14}
15
16static inline __u32 get_nlb(struct nvme_data *data, struct io_u *io_u)
17{
18 if (data->lba_ext)
19 return io_u->xfer_buflen / data->lba_ext - 1;
20 else
21 return (io_u->xfer_buflen >> data->lba_shift) - 1;
22}
23
24void fio_nvme_uring_cmd_trim_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
25 struct nvme_dsm_range *dsm)
26{
27 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
28
29 cmd->opcode = nvme_cmd_dsm;
30 cmd->nsid = data->nsid;
31 cmd->cdw10 = 0;
32 cmd->cdw11 = NVME_ATTRIBUTE_DEALLOCATE;
33 cmd->addr = (__u64) (uintptr_t) dsm;
34 cmd->data_len = sizeof(*dsm);
35
36 dsm->slba = get_slba(data, io_u);
37 /* nlb is a 1-based value for deallocate */
38 dsm->nlb = get_nlb(data, io_u) + 1;
39}
40
b3d5e3fd 41int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
4885a6eb 42 struct iovec *iov, struct nvme_dsm_range *dsm)
b3d5e3fd
AK
43{
44 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
45 __u64 slba;
46 __u32 nlb;
47
48 memset(cmd, 0, sizeof(struct nvme_uring_cmd));
49
4885a6eb
VF
50 switch (io_u->ddir) {
51 case DDIR_READ:
b3d5e3fd 52 cmd->opcode = nvme_cmd_read;
4885a6eb
VF
53 break;
54 case DDIR_WRITE:
b3d5e3fd 55 cmd->opcode = nvme_cmd_write;
4885a6eb
VF
56 break;
57 case DDIR_TRIM:
58 fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm);
59 return 0;
60 default:
b3d5e3fd 61 return -ENOTSUP;
345fa8fd 62 }
b3d5e3fd 63
4885a6eb
VF
64 slba = get_slba(data, io_u);
65 nlb = get_nlb(data, io_u);
66
b3d5e3fd
AK
67 /* cdw10 and cdw11 represent starting lba */
68 cmd->cdw10 = slba & 0xffffffff;
69 cmd->cdw11 = slba >> 32;
70 /* cdw12 represent number of lba's for read/write */
a7e8aae0
KB
71 cmd->cdw12 = nlb | (io_u->dtype << 20);
72 cmd->cdw13 = io_u->dspec << 16;
b3d5e3fd
AK
73 if (iov) {
74 iov->iov_base = io_u->xfer_buf;
75 iov->iov_len = io_u->xfer_buflen;
76 cmd->addr = (__u64)(uintptr_t)iov;
77 cmd->data_len = 1;
78 } else {
79 cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf;
80 cmd->data_len = io_u->xfer_buflen;
81 }
82 cmd->nsid = data->nsid;
83 return 0;
84}
85
86static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
87 enum nvme_csi csi, void *data)
88{
89 struct nvme_passthru_cmd cmd = {
90 .opcode = nvme_admin_identify,
91 .nsid = nsid,
92 .addr = (__u64)(uintptr_t)data,
93 .data_len = NVME_IDENTIFY_DATA_SIZE,
94 .cdw10 = cns,
95 .cdw11 = csi << NVME_IDENTIFY_CSI_SHIFT,
96 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
97 };
98
99 return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
100}
101
e7e5023b 102int fio_nvme_get_info(struct fio_file *f, __u64 *nlba, struct nvme_data *data)
b3d5e3fd
AK
103{
104 struct nvme_id_ns ns;
37a0881f 105 int namespace_id;
b3d5e3fd 106 int fd, err;
01a7d384 107 __u32 format_idx;
b3d5e3fd
AK
108
109 if (f->filetype != FIO_TYPE_CHAR) {
110 log_err("ioengine io_uring_cmd only works with nvme ns "
111 "generic char devices (/dev/ngXnY)\n");
112 return 1;
113 }
114
115 fd = open(f->file_name, O_RDONLY);
116 if (fd < 0)
117 return -errno;
118
119 namespace_id = ioctl(fd, NVME_IOCTL_ID);
120 if (namespace_id < 0) {
af10f514 121 err = -errno;
345fa8fd
AK
122 log_err("%s: failed to fetch namespace-id\n", f->file_name);
123 goto out;
b3d5e3fd
AK
124 }
125
126 /*
127 * Identify namespace to get namespace-id, namespace size in LBA's
128 * and LBA data size.
129 */
130 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
131 NVME_CSI_NVM, &ns);
132 if (err) {
345fa8fd
AK
133 log_err("%s: failed to fetch identify namespace\n",
134 f->file_name);
b3d5e3fd
AK
135 close(fd);
136 return err;
137 }
138
e7e5023b 139 data->nsid = namespace_id;
01a7d384
AK
140
141 /*
142 * 16 or 64 as maximum number of supported LBA formats.
143 * From flbas bit 0-3 indicates lsb and bit 5-6 indicates msb
144 * of the format index used to format the namespace.
145 */
146 if (ns.nlbaf < 16)
147 format_idx = ns.flbas & 0xf;
148 else
149 format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4);
150
e7e5023b 151 data->lba_size = 1 << ns.lbaf[format_idx].ds;
345fa8fd
AK
152
153 /*
154 * Only extended LBA can be supported.
155 * Bit 4 for flbas indicates if metadata is transferred at the end of
156 * logical block creating an extended LBA.
157 */
e7e5023b
AK
158 data->ms = le16_to_cpu(ns.lbaf[format_idx].ms);
159 if (data->ms && !((ns.flbas >> 4) & 0x1)) {
345fa8fd
AK
160 log_err("%s: only extended logical block can be supported\n",
161 f->file_name);
162 err = -ENOTSUP;
163 goto out;
164 }
165
e7e5023b
AK
166 if (data->ms)
167 data->lba_ext = data->lba_size + data->ms;
168 else
169 data->lba_shift = ilog2(data->lba_size);
170
345fa8fd
AK
171 /* Check for end to end data protection support */
172 if (ns.dps & 0x3) {
173 log_err("%s: end to end data protection not supported\n",
174 f->file_name);
175 err = -ENOTSUP;
176 goto out;
177 }
b3d5e3fd
AK
178 *nlba = ns.nsze;
179
345fa8fd 180out:
b3d5e3fd 181 close(fd);
345fa8fd 182 return err;
b3d5e3fd 183}
3d05e0ff
AK
184
185int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
186 enum zbd_zoned_model *model)
187{
188 struct nvme_data *data = FILE_ENG_DATA(f);
189 struct nvme_id_ns ns;
190 struct nvme_passthru_cmd cmd;
191 int fd, ret = 0;
192
193 if (f->filetype != FIO_TYPE_CHAR)
194 return -EINVAL;
195
196 /* File is not yet opened */
197 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
198 if (fd < 0)
199 return -errno;
200
201 /* Using nvme_id_ns for data as sizes are same */
202 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL,
203 NVME_CSI_ZNS, &ns);
204 if (ret) {
205 *model = ZBD_NONE;
206 goto out;
207 }
208
209 memset(&cmd, 0, sizeof(struct nvme_passthru_cmd));
210
211 /* Using nvme_id_ns for data as sizes are same */
212 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
213 NVME_CSI_ZNS, &ns);
214 if (ret) {
215 *model = ZBD_NONE;
216 goto out;
217 }
218
219 *model = ZBD_HOST_MANAGED;
220out:
221 close(fd);
222 return 0;
223}
224
225static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat,
226 __u32 data_len, void *data)
227{
228 struct nvme_passthru_cmd cmd = {
229 .opcode = nvme_zns_cmd_mgmt_recv,
230 .nsid = nsid,
231 .addr = (__u64)(uintptr_t)data,
232 .data_len = data_len,
233 .cdw10 = slba & 0xffffffff,
234 .cdw11 = slba >> 32,
235 .cdw12 = (data_len >> 2) - 1,
236 .cdw13 = NVME_ZNS_ZRA_REPORT_ZONES | zras_feat,
237 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
238 };
239
240 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
241}
242
243int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
244 uint64_t offset, struct zbd_zone *zbdz,
245 unsigned int nr_zones)
246{
247 struct nvme_data *data = FILE_ENG_DATA(f);
248 struct nvme_zone_report *zr;
249 struct nvme_zns_id_ns zns_ns;
250 struct nvme_id_ns ns;
251 unsigned int i = 0, j, zones_fetched = 0;
252 unsigned int max_zones, zones_chunks = 1024;
253 int fd, ret = 0;
254 __u32 zr_len;
255 __u64 zlen;
256
257 /* File is not yet opened */
258 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
259 if (fd < 0)
260 return -errno;
261
262 zones_fetched = 0;
263 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
264 zr = calloc(1, zr_len);
3efcb23f
JA
265 if (!zr) {
266 close(fd);
3d05e0ff 267 return -ENOMEM;
3efcb23f 268 }
3d05e0ff
AK
269
270 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS,
271 NVME_CSI_NVM, &ns);
272 if (ret) {
273 log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name,
274 ret);
275 goto out;
276 }
277
278 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
279 NVME_CSI_ZNS, &zns_ns);
280 if (ret) {
281 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
282 f->file_name, ret);
283 goto out;
284 }
285 zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift;
286
287 max_zones = (f->real_file_size - offset) / zlen;
288 if (max_zones < nr_zones)
289 nr_zones = max_zones;
290
291 if (nr_zones < zones_chunks)
292 zones_chunks = nr_zones;
293
294 while (zones_fetched < nr_zones) {
295 if (zones_fetched + zones_chunks >= nr_zones) {
296 zones_chunks = nr_zones - zones_fetched;
297 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
298 }
299 ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift,
300 NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr);
301 if (ret) {
302 log_err("%s: nvme_zns_report_zones failed, err=%d\n",
303 f->file_name, ret);
304 goto out;
305 }
306
307 /* Transform the zone-report */
308 for (j = 0; j < zr->nr_zones; j++, i++) {
309 struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]);
310
311 zbdz[i].start = desc->zslba << data->lba_shift;
312 zbdz[i].len = zlen;
313 zbdz[i].wp = desc->wp << data->lba_shift;
314 zbdz[i].capacity = desc->zcap << data->lba_shift;
315
316 /* Zone Type is stored in first 4 bits. */
317 switch (desc->zt & 0x0f) {
318 case NVME_ZONE_TYPE_SEQWRITE_REQ:
319 zbdz[i].type = ZBD_ZONE_TYPE_SWR;
320 break;
321 default:
322 log_err("%s: invalid type for zone at offset %llu.\n",
2fa0ab21 323 f->file_name, (unsigned long long) desc->zslba);
3d05e0ff
AK
324 ret = -EIO;
325 goto out;
326 }
327
328 /* Zone State is stored in last 4 bits. */
329 switch (desc->zs >> 4) {
330 case NVME_ZNS_ZS_EMPTY:
331 zbdz[i].cond = ZBD_ZONE_COND_EMPTY;
332 break;
333 case NVME_ZNS_ZS_IMPL_OPEN:
334 zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN;
335 break;
336 case NVME_ZNS_ZS_EXPL_OPEN:
337 zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN;
338 break;
339 case NVME_ZNS_ZS_CLOSED:
340 zbdz[i].cond = ZBD_ZONE_COND_CLOSED;
341 break;
342 case NVME_ZNS_ZS_FULL:
343 zbdz[i].cond = ZBD_ZONE_COND_FULL;
344 break;
345 case NVME_ZNS_ZS_READ_ONLY:
346 case NVME_ZNS_ZS_OFFLINE:
347 default:
348 /* Treat all these conditions as offline (don't use!) */
349 zbdz[i].cond = ZBD_ZONE_COND_OFFLINE;
350 zbdz[i].wp = zbdz[i].start;
351 }
352 }
353 zones_fetched += zr->nr_zones;
354 offset += zr->nr_zones * zlen;
355 }
356
357 ret = zones_fetched;
358out:
359 free(zr);
360 close(fd);
361
362 return ret;
363}
364
365int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
366 uint64_t offset, uint64_t length)
367{
368 struct nvme_data *data = FILE_ENG_DATA(f);
369 unsigned int nr_zones;
370 unsigned long long zslba;
371 int i, fd, ret = 0;
372
373 /* If the file is not yet opened, open it for this function. */
374 fd = f->fd;
375 if (fd < 0) {
376 fd = open(f->file_name, O_RDWR | O_LARGEFILE);
377 if (fd < 0)
378 return -errno;
379 }
380
381 zslba = offset >> data->lba_shift;
382 nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
383
384 for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) {
385 struct nvme_passthru_cmd cmd = {
386 .opcode = nvme_zns_cmd_mgmt_send,
387 .nsid = data->nsid,
388 .cdw10 = zslba & 0xffffffff,
389 .cdw11 = zslba >> 32,
390 .cdw13 = NVME_ZNS_ZSA_RESET,
391 .addr = (__u64)(uintptr_t)NULL,
392 .data_len = 0,
393 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
394 };
395
396 ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
397 }
398
399 if (f->fd < 0)
400 close(fd);
401 return -ret;
402}
403
404int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
405 unsigned int *max_open_zones)
406{
407 struct nvme_data *data = FILE_ENG_DATA(f);
408 struct nvme_zns_id_ns zns_ns;
409 int fd, ret = 0;
410
411 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
412 if (fd < 0)
413 return -errno;
414
415 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
416 NVME_CSI_ZNS, &zns_ns);
417 if (ret) {
418 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
419 f->file_name, ret);
420 goto out;
421 }
422
423 *max_open_zones = zns_ns.mor + 1;
424out:
425 close(fd);
426 return ret;
427}
a7e8aae0
KB
428
429static inline int nvme_fdp_reclaim_unit_handle_status(int fd, __u32 nsid,
430 __u32 data_len, void *data)
431{
432 struct nvme_passthru_cmd cmd = {
433 .opcode = nvme_cmd_io_mgmt_recv,
434 .nsid = nsid,
435 .addr = (__u64)(uintptr_t)data,
436 .data_len = data_len,
437 .cdw10 = 1,
438 .cdw11 = (data_len >> 2) - 1,
439 };
440
441 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
442}
443
444int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
445 struct nvme_fdp_ruh_status *ruhs, __u32 bytes)
446{
447 struct nvme_data *data = FILE_ENG_DATA(f);
448 int fd, ret;
449
450 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
451 if (fd < 0)
452 return -errno;
453
454 ret = nvme_fdp_reclaim_unit_handle_status(fd, data->nsid, bytes, ruhs);
455 if (ret) {
456 log_err("%s: nvme_fdp_reclaim_unit_handle_status failed, err=%d\n",
457 f->file_name, ret);
458 errno = ENOTSUP;
459 } else
460 errno = 0;
461
af10f514 462 ret = -errno;
a7e8aae0 463 close(fd);
af10f514 464 return ret;
a7e8aae0 465}