engines:io_uring: uring_cmd add support for protection info
[fio.git] / engines / nvme.c
CommitLineData
b3d5e3fd
AK
1/*
2 * nvme structure declarations and helper functions for the
3 * io_uring_cmd engine.
4 */
5
6#include "nvme.h"
7
4885a6eb
VF
8static inline __u64 get_slba(struct nvme_data *data, struct io_u *io_u)
9{
10 if (data->lba_ext)
11 return io_u->offset / data->lba_ext;
12 else
13 return io_u->offset >> data->lba_shift;
14}
15
16static inline __u32 get_nlb(struct nvme_data *data, struct io_u *io_u)
17{
18 if (data->lba_ext)
19 return io_u->xfer_buflen / data->lba_ext - 1;
20 else
21 return (io_u->xfer_buflen >> data->lba_shift) - 1;
22}
23
24void fio_nvme_uring_cmd_trim_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
25 struct nvme_dsm_range *dsm)
26{
27 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
28
29 cmd->opcode = nvme_cmd_dsm;
30 cmd->nsid = data->nsid;
31 cmd->cdw10 = 0;
32 cmd->cdw11 = NVME_ATTRIBUTE_DEALLOCATE;
33 cmd->addr = (__u64) (uintptr_t) dsm;
34 cmd->data_len = sizeof(*dsm);
35
36 dsm->slba = get_slba(data, io_u);
37 /* nlb is a 1-based value for deallocate */
38 dsm->nlb = get_nlb(data, io_u) + 1;
39}
40
b3d5e3fd 41int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
4885a6eb 42 struct iovec *iov, struct nvme_dsm_range *dsm)
b3d5e3fd
AK
43{
44 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
45 __u64 slba;
46 __u32 nlb;
47
48 memset(cmd, 0, sizeof(struct nvme_uring_cmd));
49
4885a6eb
VF
50 switch (io_u->ddir) {
51 case DDIR_READ:
b3d5e3fd 52 cmd->opcode = nvme_cmd_read;
4885a6eb
VF
53 break;
54 case DDIR_WRITE:
b3d5e3fd 55 cmd->opcode = nvme_cmd_write;
4885a6eb
VF
56 break;
57 case DDIR_TRIM:
58 fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm);
59 return 0;
60 default:
b3d5e3fd 61 return -ENOTSUP;
345fa8fd 62 }
b3d5e3fd 63
4885a6eb
VF
64 slba = get_slba(data, io_u);
65 nlb = get_nlb(data, io_u);
66
b3d5e3fd
AK
67 /* cdw10 and cdw11 represent starting lba */
68 cmd->cdw10 = slba & 0xffffffff;
69 cmd->cdw11 = slba >> 32;
70 /* cdw12 represent number of lba's for read/write */
a7e8aae0
KB
71 cmd->cdw12 = nlb | (io_u->dtype << 20);
72 cmd->cdw13 = io_u->dspec << 16;
b3d5e3fd
AK
73 if (iov) {
74 iov->iov_base = io_u->xfer_buf;
75 iov->iov_len = io_u->xfer_buflen;
76 cmd->addr = (__u64)(uintptr_t)iov;
77 cmd->data_len = 1;
78 } else {
79 cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf;
80 cmd->data_len = io_u->xfer_buflen;
81 }
2d6451c9
AK
82 if (data->lba_shift && data->ms) {
83 cmd->metadata = (__u64)(uintptr_t)io_u->mmap_data;
84 cmd->metadata_len = (nlb + 1) * data->ms;
85 }
b3d5e3fd
AK
86 cmd->nsid = data->nsid;
87 return 0;
88}
89
3ee8311a
AK
90void fio_nvme_pi_fill(struct nvme_uring_cmd *cmd, struct io_u *io_u,
91 struct nvme_cmd_ext_io_opts *opts)
92{
93 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
94 __u64 slba;
95
96 slba = get_slba(data, io_u);
97 cmd->cdw12 |= opts->io_flags;
98
99 switch (data->pi_type) {
100 case NVME_NS_DPS_PI_TYPE1:
101 case NVME_NS_DPS_PI_TYPE2:
102 switch (data->guard_type) {
103 case NVME_NVM_NS_16B_GUARD:
104 cmd->cdw14 = (__u32)slba;
105 break;
106 case NVME_NVM_NS_64B_GUARD:
107 cmd->cdw14 = (__u32)slba;
108 cmd->cdw3 = ((slba >> 32) & 0xffff);
109 break;
110 default:
111 break;
112 }
113 cmd->cdw15 = (opts->apptag_mask << 16 | opts->apptag);
114 break;
115 case NVME_NS_DPS_PI_TYPE3:
116 cmd->cdw15 = (opts->apptag_mask << 16 | opts->apptag);
117 break;
118 case NVME_NS_DPS_PI_NONE:
119 break;
120 }
121}
122
b3d5e3fd
AK
123static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
124 enum nvme_csi csi, void *data)
125{
126 struct nvme_passthru_cmd cmd = {
127 .opcode = nvme_admin_identify,
128 .nsid = nsid,
129 .addr = (__u64)(uintptr_t)data,
130 .data_len = NVME_IDENTIFY_DATA_SIZE,
131 .cdw10 = cns,
132 .cdw11 = csi << NVME_IDENTIFY_CSI_SHIFT,
133 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
134 };
135
136 return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
137}
138
3ee8311a
AK
139int fio_nvme_get_info(struct fio_file *f, __u64 *nlba, __u32 pi_act,
140 struct nvme_data *data)
b3d5e3fd
AK
141{
142 struct nvme_id_ns ns;
3ee8311a
AK
143 struct nvme_id_ctrl ctrl;
144 struct nvme_nvm_id_ns nvm_ns;
37a0881f 145 int namespace_id;
b3d5e3fd 146 int fd, err;
3ee8311a 147 __u32 format_idx, elbaf;
b3d5e3fd
AK
148
149 if (f->filetype != FIO_TYPE_CHAR) {
150 log_err("ioengine io_uring_cmd only works with nvme ns "
151 "generic char devices (/dev/ngXnY)\n");
152 return 1;
153 }
154
155 fd = open(f->file_name, O_RDONLY);
156 if (fd < 0)
157 return -errno;
158
159 namespace_id = ioctl(fd, NVME_IOCTL_ID);
160 if (namespace_id < 0) {
af10f514 161 err = -errno;
345fa8fd
AK
162 log_err("%s: failed to fetch namespace-id\n", f->file_name);
163 goto out;
b3d5e3fd
AK
164 }
165
3ee8311a
AK
166 err = nvme_identify(fd, 0, NVME_IDENTIFY_CNS_CTRL, NVME_CSI_NVM, &ctrl);
167 if (err) {
168 log_err("%s: failed to fetch identify ctrl\n", f->file_name);
169 goto out;
170 }
171
b3d5e3fd
AK
172 /*
173 * Identify namespace to get namespace-id, namespace size in LBA's
174 * and LBA data size.
175 */
176 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
177 NVME_CSI_NVM, &ns);
178 if (err) {
345fa8fd
AK
179 log_err("%s: failed to fetch identify namespace\n",
180 f->file_name);
3ee8311a 181 goto out;
b3d5e3fd
AK
182 }
183
e7e5023b 184 data->nsid = namespace_id;
01a7d384
AK
185
186 /*
187 * 16 or 64 as maximum number of supported LBA formats.
188 * From flbas bit 0-3 indicates lsb and bit 5-6 indicates msb
189 * of the format index used to format the namespace.
190 */
191 if (ns.nlbaf < 16)
192 format_idx = ns.flbas & 0xf;
193 else
194 format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4);
195
e7e5023b 196 data->lba_size = 1 << ns.lbaf[format_idx].ds;
2d6451c9 197 data->ms = le16_to_cpu(ns.lbaf[format_idx].ms);
345fa8fd 198
3ee8311a
AK
199 /* Check for end to end data protection support */
200 if (data->ms && (ns.dps & NVME_NS_DPS_PI_MASK))
201 data->pi_type = (ns.dps & NVME_NS_DPS_PI_MASK);
202
203 if (!data->pi_type)
204 goto check_elba;
205
206 if (ctrl.ctratt & NVME_CTRL_CTRATT_ELBAS) {
207 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_CSI_NS,
208 NVME_CSI_NVM, &nvm_ns);
209 if (err) {
210 log_err("%s: failed to fetch identify nvm namespace\n",
211 f->file_name);
212 goto out;
213 }
214
215 elbaf = le32_to_cpu(nvm_ns.elbaf[format_idx]);
216
217 /* Currently we don't support storage tags */
218 if (elbaf & NVME_ID_NS_NVM_STS_MASK) {
219 log_err("%s: Storage tag not supported\n",
220 f->file_name);
221 err = -ENOTSUP;
222 goto out;
223 }
224
225 data->guard_type = (elbaf >> NVME_ID_NS_NVM_GUARD_SHIFT) &
226 NVME_ID_NS_NVM_GUARD_MASK;
227
228 /* No 32 bit guard, as storage tag is mandatory for it */
229 switch (data->guard_type) {
230 case NVME_NVM_NS_16B_GUARD:
231 data->pi_size = sizeof(struct nvme_16b_guard_pif);
232 break;
233 case NVME_NVM_NS_64B_GUARD:
234 data->pi_size = sizeof(struct nvme_64b_guard_pif);
235 break;
236 default:
237 break;
238 }
239 } else {
240 data->guard_type = NVME_NVM_NS_16B_GUARD;
241 data->pi_size = sizeof(struct nvme_16b_guard_pif);
242 }
243
244 /*
245 * when PRACT bit is set to 1, and metadata size is equal to protection
246 * information size, controller inserts and removes PI for write and
247 * read commands respectively.
248 */
249 if (pi_act && data->ms == data->pi_size)
250 data->ms = 0;
251
252 data->pi_loc = (ns.dps & NVME_NS_DPS_PI_FIRST);
253
254check_elba:
345fa8fd 255 /*
345fa8fd
AK
256 * Bit 4 for flbas indicates if metadata is transferred at the end of
257 * logical block creating an extended LBA.
258 */
2d6451c9 259 if (data->ms && ((ns.flbas >> 4) & 0x1))
e7e5023b
AK
260 data->lba_ext = data->lba_size + data->ms;
261 else
262 data->lba_shift = ilog2(data->lba_size);
263
b3d5e3fd
AK
264 *nlba = ns.nsze;
265
345fa8fd 266out:
b3d5e3fd 267 close(fd);
345fa8fd 268 return err;
b3d5e3fd 269}
3d05e0ff
AK
270
271int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
272 enum zbd_zoned_model *model)
273{
274 struct nvme_data *data = FILE_ENG_DATA(f);
275 struct nvme_id_ns ns;
276 struct nvme_passthru_cmd cmd;
277 int fd, ret = 0;
278
279 if (f->filetype != FIO_TYPE_CHAR)
280 return -EINVAL;
281
282 /* File is not yet opened */
283 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
284 if (fd < 0)
285 return -errno;
286
287 /* Using nvme_id_ns for data as sizes are same */
288 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL,
289 NVME_CSI_ZNS, &ns);
290 if (ret) {
291 *model = ZBD_NONE;
292 goto out;
293 }
294
295 memset(&cmd, 0, sizeof(struct nvme_passthru_cmd));
296
297 /* Using nvme_id_ns for data as sizes are same */
298 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
299 NVME_CSI_ZNS, &ns);
300 if (ret) {
301 *model = ZBD_NONE;
302 goto out;
303 }
304
305 *model = ZBD_HOST_MANAGED;
306out:
307 close(fd);
308 return 0;
309}
310
311static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat,
312 __u32 data_len, void *data)
313{
314 struct nvme_passthru_cmd cmd = {
315 .opcode = nvme_zns_cmd_mgmt_recv,
316 .nsid = nsid,
317 .addr = (__u64)(uintptr_t)data,
318 .data_len = data_len,
319 .cdw10 = slba & 0xffffffff,
320 .cdw11 = slba >> 32,
321 .cdw12 = (data_len >> 2) - 1,
322 .cdw13 = NVME_ZNS_ZRA_REPORT_ZONES | zras_feat,
323 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
324 };
325
326 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
327}
328
329int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
330 uint64_t offset, struct zbd_zone *zbdz,
331 unsigned int nr_zones)
332{
333 struct nvme_data *data = FILE_ENG_DATA(f);
334 struct nvme_zone_report *zr;
335 struct nvme_zns_id_ns zns_ns;
336 struct nvme_id_ns ns;
337 unsigned int i = 0, j, zones_fetched = 0;
338 unsigned int max_zones, zones_chunks = 1024;
339 int fd, ret = 0;
340 __u32 zr_len;
341 __u64 zlen;
342
343 /* File is not yet opened */
344 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
345 if (fd < 0)
346 return -errno;
347
348 zones_fetched = 0;
349 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
350 zr = calloc(1, zr_len);
3efcb23f
JA
351 if (!zr) {
352 close(fd);
3d05e0ff 353 return -ENOMEM;
3efcb23f 354 }
3d05e0ff
AK
355
356 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS,
357 NVME_CSI_NVM, &ns);
358 if (ret) {
359 log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name,
360 ret);
361 goto out;
362 }
363
364 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
365 NVME_CSI_ZNS, &zns_ns);
366 if (ret) {
367 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
368 f->file_name, ret);
369 goto out;
370 }
371 zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift;
372
373 max_zones = (f->real_file_size - offset) / zlen;
374 if (max_zones < nr_zones)
375 nr_zones = max_zones;
376
377 if (nr_zones < zones_chunks)
378 zones_chunks = nr_zones;
379
380 while (zones_fetched < nr_zones) {
381 if (zones_fetched + zones_chunks >= nr_zones) {
382 zones_chunks = nr_zones - zones_fetched;
383 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
384 }
385 ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift,
386 NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr);
387 if (ret) {
388 log_err("%s: nvme_zns_report_zones failed, err=%d\n",
389 f->file_name, ret);
390 goto out;
391 }
392
393 /* Transform the zone-report */
394 for (j = 0; j < zr->nr_zones; j++, i++) {
395 struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]);
396
397 zbdz[i].start = desc->zslba << data->lba_shift;
398 zbdz[i].len = zlen;
399 zbdz[i].wp = desc->wp << data->lba_shift;
400 zbdz[i].capacity = desc->zcap << data->lba_shift;
401
402 /* Zone Type is stored in first 4 bits. */
403 switch (desc->zt & 0x0f) {
404 case NVME_ZONE_TYPE_SEQWRITE_REQ:
405 zbdz[i].type = ZBD_ZONE_TYPE_SWR;
406 break;
407 default:
408 log_err("%s: invalid type for zone at offset %llu.\n",
2fa0ab21 409 f->file_name, (unsigned long long) desc->zslba);
3d05e0ff
AK
410 ret = -EIO;
411 goto out;
412 }
413
414 /* Zone State is stored in last 4 bits. */
415 switch (desc->zs >> 4) {
416 case NVME_ZNS_ZS_EMPTY:
417 zbdz[i].cond = ZBD_ZONE_COND_EMPTY;
418 break;
419 case NVME_ZNS_ZS_IMPL_OPEN:
420 zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN;
421 break;
422 case NVME_ZNS_ZS_EXPL_OPEN:
423 zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN;
424 break;
425 case NVME_ZNS_ZS_CLOSED:
426 zbdz[i].cond = ZBD_ZONE_COND_CLOSED;
427 break;
428 case NVME_ZNS_ZS_FULL:
429 zbdz[i].cond = ZBD_ZONE_COND_FULL;
430 break;
431 case NVME_ZNS_ZS_READ_ONLY:
432 case NVME_ZNS_ZS_OFFLINE:
433 default:
434 /* Treat all these conditions as offline (don't use!) */
435 zbdz[i].cond = ZBD_ZONE_COND_OFFLINE;
436 zbdz[i].wp = zbdz[i].start;
437 }
438 }
439 zones_fetched += zr->nr_zones;
440 offset += zr->nr_zones * zlen;
441 }
442
443 ret = zones_fetched;
444out:
445 free(zr);
446 close(fd);
447
448 return ret;
449}
450
451int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
452 uint64_t offset, uint64_t length)
453{
454 struct nvme_data *data = FILE_ENG_DATA(f);
455 unsigned int nr_zones;
456 unsigned long long zslba;
457 int i, fd, ret = 0;
458
459 /* If the file is not yet opened, open it for this function. */
460 fd = f->fd;
461 if (fd < 0) {
462 fd = open(f->file_name, O_RDWR | O_LARGEFILE);
463 if (fd < 0)
464 return -errno;
465 }
466
467 zslba = offset >> data->lba_shift;
468 nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
469
470 for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) {
471 struct nvme_passthru_cmd cmd = {
472 .opcode = nvme_zns_cmd_mgmt_send,
473 .nsid = data->nsid,
474 .cdw10 = zslba & 0xffffffff,
475 .cdw11 = zslba >> 32,
476 .cdw13 = NVME_ZNS_ZSA_RESET,
477 .addr = (__u64)(uintptr_t)NULL,
478 .data_len = 0,
479 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
480 };
481
482 ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
483 }
484
485 if (f->fd < 0)
486 close(fd);
487 return -ret;
488}
489
490int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
491 unsigned int *max_open_zones)
492{
493 struct nvme_data *data = FILE_ENG_DATA(f);
494 struct nvme_zns_id_ns zns_ns;
495 int fd, ret = 0;
496
497 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
498 if (fd < 0)
499 return -errno;
500
501 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
502 NVME_CSI_ZNS, &zns_ns);
503 if (ret) {
504 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
505 f->file_name, ret);
506 goto out;
507 }
508
509 *max_open_zones = zns_ns.mor + 1;
510out:
511 close(fd);
512 return ret;
513}
a7e8aae0
KB
514
515static inline int nvme_fdp_reclaim_unit_handle_status(int fd, __u32 nsid,
516 __u32 data_len, void *data)
517{
518 struct nvme_passthru_cmd cmd = {
519 .opcode = nvme_cmd_io_mgmt_recv,
520 .nsid = nsid,
521 .addr = (__u64)(uintptr_t)data,
522 .data_len = data_len,
523 .cdw10 = 1,
524 .cdw11 = (data_len >> 2) - 1,
525 };
526
527 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
528}
529
530int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
531 struct nvme_fdp_ruh_status *ruhs, __u32 bytes)
532{
533 struct nvme_data *data = FILE_ENG_DATA(f);
534 int fd, ret;
535
536 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
537 if (fd < 0)
538 return -errno;
539
540 ret = nvme_fdp_reclaim_unit_handle_status(fd, data->nsid, bytes, ruhs);
541 if (ret) {
542 log_err("%s: nvme_fdp_reclaim_unit_handle_status failed, err=%d\n",
543 f->file_name, ret);
544 errno = ENOTSUP;
545 } else
546 errno = 0;
547
af10f514 548 ret = -errno;
a7e8aae0 549 close(fd);
af10f514 550 return ret;
a7e8aae0 551}