engines/io_uring_cmd: skip pi verify checks for error cases
[fio.git] / engines / nvme.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * nvme structure declarations and helper functions for the
4  * io_uring_cmd engine.
5  */
6
7 #include "nvme.h"
8 #include "../crc/crc-t10dif.h"
9 #include "../crc/crc64.h"
10
11 static inline __u64 get_slba(struct nvme_data *data, struct io_u *io_u)
12 {
13         if (data->lba_ext)
14                 return io_u->offset / data->lba_ext;
15         else
16                 return io_u->offset >> data->lba_shift;
17 }
18
19 static inline __u32 get_nlb(struct nvme_data *data, struct io_u *io_u)
20 {
21         if (data->lba_ext)
22                 return io_u->xfer_buflen / data->lba_ext - 1;
23         else
24                 return (io_u->xfer_buflen >> data->lba_shift) - 1;
25 }
26
27 static void fio_nvme_generate_pi_16b_guard(struct nvme_data *data,
28                                            struct io_u *io_u,
29                                            struct nvme_cmd_ext_io_opts *opts)
30 {
31         struct nvme_pi_data *pi_data = io_u->engine_data;
32         struct nvme_16b_guard_pif *pi;
33         unsigned char *buf = io_u->xfer_buf;
34         unsigned char *md_buf = io_u->mmap_data;
35         __u64 slba = get_slba(data, io_u);
36         __u32 nlb = get_nlb(data, io_u) + 1;
37         __u32 lba_num = 0;
38         __u16 guard = 0;
39
40         if (data->pi_loc) {
41                 if (data->lba_ext)
42                         pi_data->interval = data->lba_ext - data->ms;
43                 else
44                         pi_data->interval = 0;
45         } else {
46                 if (data->lba_ext)
47                         pi_data->interval = data->lba_ext - sizeof(struct nvme_16b_guard_pif);
48                 else
49                         pi_data->interval = data->ms - sizeof(struct nvme_16b_guard_pif);
50         }
51
52         if (io_u->ddir != DDIR_WRITE)
53                 return;
54
55         while (lba_num < nlb) {
56                 if (data->lba_ext)
57                         pi = (struct nvme_16b_guard_pif *)(buf + pi_data->interval);
58                 else
59                         pi = (struct nvme_16b_guard_pif *)(md_buf + pi_data->interval);
60
61                 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
62                         if (data->lba_ext) {
63                                 guard = fio_crc_t10dif(0, buf, pi_data->interval);
64                         } else {
65                                 guard = fio_crc_t10dif(0, buf, data->lba_size);
66                                 guard = fio_crc_t10dif(guard, md_buf, pi_data->interval);
67                         }
68                         pi->guard = cpu_to_be16(guard);
69                 }
70
71                 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
72                         pi->apptag = cpu_to_be16(pi_data->apptag);
73
74                 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
75                         switch (data->pi_type) {
76                         case NVME_NS_DPS_PI_TYPE1:
77                         case NVME_NS_DPS_PI_TYPE2:
78                                 pi->srtag = cpu_to_be32((__u32)slba + lba_num);
79                                 break;
80                         case NVME_NS_DPS_PI_TYPE3:
81                                 break;
82                         }
83                 }
84                 if (data->lba_ext) {
85                         buf += data->lba_ext;
86                 } else {
87                         buf += data->lba_size;
88                         md_buf += data->ms;
89                 }
90                 lba_num++;
91         }
92 }
93
94 static int fio_nvme_verify_pi_16b_guard(struct nvme_data *data,
95                                         struct io_u *io_u)
96 {
97         struct nvme_pi_data *pi_data = io_u->engine_data;
98         struct nvme_16b_guard_pif *pi;
99         struct fio_file *f = io_u->file;
100         unsigned char *buf = io_u->xfer_buf;
101         unsigned char *md_buf = io_u->mmap_data;
102         __u64 slba = get_slba(data, io_u);
103         __u32 nlb = get_nlb(data, io_u) + 1;
104         __u32 lba_num = 0;
105         __u16 unmask_app, unmask_app_exp, guard = 0;
106
107         while (lba_num < nlb) {
108                 if (data->lba_ext)
109                         pi = (struct nvme_16b_guard_pif *)(buf + pi_data->interval);
110                 else
111                         pi = (struct nvme_16b_guard_pif *)(md_buf + pi_data->interval);
112
113                 if (data->pi_type == NVME_NS_DPS_PI_TYPE3) {
114                         if (pi->apptag == NVME_PI_APP_DISABLE &&
115                             pi->srtag == NVME_PI_REF_DISABLE)
116                                 goto next;
117                 } else if (data->pi_type == NVME_NS_DPS_PI_TYPE1 ||
118                            data->pi_type == NVME_NS_DPS_PI_TYPE2) {
119                         if (pi->apptag == NVME_PI_APP_DISABLE)
120                                 goto next;
121                 }
122
123                 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
124                         if (data->lba_ext) {
125                                 guard = fio_crc_t10dif(0, buf, pi_data->interval);
126                         } else {
127                                 guard = fio_crc_t10dif(0, buf, data->lba_size);
128                                 guard = fio_crc_t10dif(guard, md_buf, pi_data->interval);
129                         }
130                         if (be16_to_cpu(pi->guard) != guard) {
131                                 log_err("%s: Guard compare error: LBA: %llu Expected=%x, Actual=%x\n",
132                                         f->file_name, (unsigned long long)slba,
133                                         guard, be16_to_cpu(pi->guard));
134                                 return -EIO;
135                         }
136                 }
137
138                 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_APP) {
139                         unmask_app = be16_to_cpu(pi->apptag) & pi_data->apptag_mask;
140                         unmask_app_exp = pi_data->apptag & pi_data->apptag_mask;
141                         if (unmask_app != unmask_app_exp) {
142                                 log_err("%s: APPTAG compare error: LBA: %llu Expected=%x, Actual=%x\n",
143                                         f->file_name, (unsigned long long)slba,
144                                         unmask_app_exp, unmask_app);
145                                 return -EIO;
146                         }
147                 }
148
149                 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
150                         switch (data->pi_type) {
151                         case NVME_NS_DPS_PI_TYPE1:
152                         case NVME_NS_DPS_PI_TYPE2:
153                                 if (be32_to_cpu(pi->srtag) !=
154                                     ((__u32)slba + lba_num)) {
155                                         log_err("%s: REFTAG compare error: LBA: %llu Expected=%x, Actual=%x\n",
156                                                 f->file_name, (unsigned long long)slba,
157                                                 (__u32)slba + lba_num,
158                                                 be32_to_cpu(pi->srtag));
159                                         return -EIO;
160                                 }
161                                 break;
162                         case NVME_NS_DPS_PI_TYPE3:
163                                 break;
164                         }
165                 }
166 next:
167                 if (data->lba_ext) {
168                         buf += data->lba_ext;
169                 } else {
170                         buf += data->lba_size;
171                         md_buf += data->ms;
172                 }
173                 lba_num++;
174         }
175
176         return 0;
177 }
178
179 static void fio_nvme_generate_pi_64b_guard(struct nvme_data *data,
180                                            struct io_u *io_u,
181                                            struct nvme_cmd_ext_io_opts *opts)
182 {
183         struct nvme_pi_data *pi_data = io_u->engine_data;
184         struct nvme_64b_guard_pif *pi;
185         unsigned char *buf = io_u->xfer_buf;
186         unsigned char *md_buf = io_u->mmap_data;
187         uint64_t guard = 0;
188         __u64 slba = get_slba(data, io_u);
189         __u32 nlb = get_nlb(data, io_u) + 1;
190         __u32 lba_num = 0;
191
192         if (data->pi_loc) {
193                 if (data->lba_ext)
194                         pi_data->interval = data->lba_ext - data->ms;
195                 else
196                         pi_data->interval = 0;
197         } else {
198                 if (data->lba_ext)
199                         pi_data->interval = data->lba_ext - sizeof(struct nvme_64b_guard_pif);
200                 else
201                         pi_data->interval = data->ms - sizeof(struct nvme_64b_guard_pif);
202         }
203
204         if (io_u->ddir != DDIR_WRITE)
205                 return;
206
207         while (lba_num < nlb) {
208                 if (data->lba_ext)
209                         pi = (struct nvme_64b_guard_pif *)(buf + pi_data->interval);
210                 else
211                         pi = (struct nvme_64b_guard_pif *)(md_buf + pi_data->interval);
212
213                 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
214                         if (data->lba_ext) {
215                                 guard = fio_crc64_nvme(0, buf, pi_data->interval);
216                         } else {
217                                 guard = fio_crc64_nvme(0, buf, data->lba_size);
218                                 guard = fio_crc64_nvme(guard, md_buf, pi_data->interval);
219                         }
220                         pi->guard = cpu_to_be64(guard);
221                 }
222
223                 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
224                         pi->apptag = cpu_to_be16(pi_data->apptag);
225
226                 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
227                         switch (data->pi_type) {
228                         case NVME_NS_DPS_PI_TYPE1:
229                         case NVME_NS_DPS_PI_TYPE2:
230                                 put_unaligned_be48(slba + lba_num, pi->srtag);
231                                 break;
232                         case NVME_NS_DPS_PI_TYPE3:
233                                 break;
234                         }
235                 }
236                 if (data->lba_ext) {
237                         buf += data->lba_ext;
238                 } else {
239                         buf += data->lba_size;
240                         md_buf += data->ms;
241                 }
242                 lba_num++;
243         }
244 }
245
246 static int fio_nvme_verify_pi_64b_guard(struct nvme_data *data,
247                                         struct io_u *io_u)
248 {
249         struct nvme_pi_data *pi_data = io_u->engine_data;
250         struct nvme_64b_guard_pif *pi;
251         struct fio_file *f = io_u->file;
252         unsigned char *buf = io_u->xfer_buf;
253         unsigned char *md_buf = io_u->mmap_data;
254         __u64 slba = get_slba(data, io_u);
255         __u64 ref, ref_exp, guard = 0;
256         __u32 nlb = get_nlb(data, io_u) + 1;
257         __u32 lba_num = 0;
258         __u16 unmask_app, unmask_app_exp;
259
260         while (lba_num < nlb) {
261                 if (data->lba_ext)
262                         pi = (struct nvme_64b_guard_pif *)(buf + pi_data->interval);
263                 else
264                         pi = (struct nvme_64b_guard_pif *)(md_buf + pi_data->interval);
265
266                 if (data->pi_type == NVME_NS_DPS_PI_TYPE3) {
267                         if (pi->apptag == NVME_PI_APP_DISABLE &&
268                             fio_nvme_pi_ref_escape(pi->srtag))
269                                 goto next;
270                 } else if (data->pi_type == NVME_NS_DPS_PI_TYPE1 ||
271                            data->pi_type == NVME_NS_DPS_PI_TYPE2) {
272                         if (pi->apptag == NVME_PI_APP_DISABLE)
273                                 goto next;
274                 }
275
276                 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
277                         if (data->lba_ext) {
278                                 guard = fio_crc64_nvme(0, buf, pi_data->interval);
279                         } else {
280                                 guard = fio_crc64_nvme(0, buf, data->lba_size);
281                                 guard = fio_crc64_nvme(guard, md_buf, pi_data->interval);
282                         }
283                         if (be64_to_cpu((uint64_t)pi->guard) != guard) {
284                                 log_err("%s: Guard compare error: LBA: %llu Expected=%llx, Actual=%llx\n",
285                                         f->file_name, (unsigned long long)slba,
286                                         guard, be64_to_cpu((uint64_t)pi->guard));
287                                 return -EIO;
288                         }
289                 }
290
291                 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_APP) {
292                         unmask_app = be16_to_cpu(pi->apptag) & pi_data->apptag_mask;
293                         unmask_app_exp = pi_data->apptag & pi_data->apptag_mask;
294                         if (unmask_app != unmask_app_exp) {
295                                 log_err("%s: APPTAG compare error: LBA: %llu Expected=%x, Actual=%x\n",
296                                         f->file_name, (unsigned long long)slba,
297                                         unmask_app_exp, unmask_app);
298                                 return -EIO;
299                         }
300                 }
301
302                 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
303                         switch (data->pi_type) {
304                         case NVME_NS_DPS_PI_TYPE1:
305                         case NVME_NS_DPS_PI_TYPE2:
306                                 ref = get_unaligned_be48(pi->srtag);
307                                 ref_exp = (slba + lba_num) & ((1ULL << 48) - 1);
308                                 if (ref != ref_exp) {
309                                         log_err("%s: REFTAG compare error: LBA: %llu Expected=%llx, Actual=%llx\n",
310                                                 f->file_name, (unsigned long long)slba,
311                                                 ref_exp, ref);
312                                         return -EIO;
313                                 }
314                                 break;
315                         case NVME_NS_DPS_PI_TYPE3:
316                                 break;
317                         }
318                 }
319 next:
320                 if (data->lba_ext) {
321                         buf += data->lba_ext;
322                 } else {
323                         buf += data->lba_size;
324                         md_buf += data->ms;
325                 }
326                 lba_num++;
327         }
328
329         return 0;
330 }
331 void fio_nvme_uring_cmd_trim_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
332                                   struct nvme_dsm_range *dsm)
333 {
334         struct nvme_data *data = FILE_ENG_DATA(io_u->file);
335
336         cmd->opcode = nvme_cmd_dsm;
337         cmd->nsid = data->nsid;
338         cmd->cdw10 = 0;
339         cmd->cdw11 = NVME_ATTRIBUTE_DEALLOCATE;
340         cmd->addr = (__u64) (uintptr_t) dsm;
341         cmd->data_len = sizeof(*dsm);
342
343         dsm->slba = get_slba(data, io_u);
344         /* nlb is a 1-based value for deallocate */
345         dsm->nlb = get_nlb(data, io_u) + 1;
346 }
347
348 int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
349                             struct iovec *iov, struct nvme_dsm_range *dsm)
350 {
351         struct nvme_data *data = FILE_ENG_DATA(io_u->file);
352         __u64 slba;
353         __u32 nlb;
354
355         memset(cmd, 0, sizeof(struct nvme_uring_cmd));
356
357         switch (io_u->ddir) {
358         case DDIR_READ:
359                 cmd->opcode = nvme_cmd_read;
360                 break;
361         case DDIR_WRITE:
362                 cmd->opcode = nvme_cmd_write;
363                 break;
364         case DDIR_TRIM:
365                 fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm);
366                 return 0;
367         default:
368                 return -ENOTSUP;
369         }
370
371         slba = get_slba(data, io_u);
372         nlb = get_nlb(data, io_u);
373
374         /* cdw10 and cdw11 represent starting lba */
375         cmd->cdw10 = slba & 0xffffffff;
376         cmd->cdw11 = slba >> 32;
377         /* cdw12 represent number of lba's for read/write */
378         cmd->cdw12 = nlb | (io_u->dtype << 20);
379         cmd->cdw13 = io_u->dspec << 16;
380         if (iov) {
381                 iov->iov_base = io_u->xfer_buf;
382                 iov->iov_len = io_u->xfer_buflen;
383                 cmd->addr = (__u64)(uintptr_t)iov;
384                 cmd->data_len = 1;
385         } else {
386                 cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf;
387                 cmd->data_len = io_u->xfer_buflen;
388         }
389         if (data->lba_shift && data->ms) {
390                 cmd->metadata = (__u64)(uintptr_t)io_u->mmap_data;
391                 cmd->metadata_len = (nlb + 1) * data->ms;
392         }
393         cmd->nsid = data->nsid;
394         return 0;
395 }
396
397 void fio_nvme_pi_fill(struct nvme_uring_cmd *cmd, struct io_u *io_u,
398                       struct nvme_cmd_ext_io_opts *opts)
399 {
400         struct nvme_data *data = FILE_ENG_DATA(io_u->file);
401         __u64 slba;
402
403         slba = get_slba(data, io_u);
404         cmd->cdw12 |= opts->io_flags;
405
406         if (data->pi_type && !(opts->io_flags & NVME_IO_PRINFO_PRACT)) {
407                 if (data->guard_type == NVME_NVM_NS_16B_GUARD)
408                         fio_nvme_generate_pi_16b_guard(data, io_u, opts);
409                 else if (data->guard_type == NVME_NVM_NS_64B_GUARD)
410                         fio_nvme_generate_pi_64b_guard(data, io_u, opts);
411         }
412
413         switch (data->pi_type) {
414         case NVME_NS_DPS_PI_TYPE1:
415         case NVME_NS_DPS_PI_TYPE2:
416                 switch (data->guard_type) {
417                 case NVME_NVM_NS_16B_GUARD:
418                         if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF)
419                                 cmd->cdw14 = (__u32)slba;
420                         break;
421                 case NVME_NVM_NS_64B_GUARD:
422                         if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
423                                 cmd->cdw14 = (__u32)slba;
424                                 cmd->cdw3 = ((slba >> 32) & 0xffff);
425                         }
426                         break;
427                 default:
428                         break;
429                 }
430                 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
431                         cmd->cdw15 = (opts->apptag_mask << 16 | opts->apptag);
432                 break;
433         case NVME_NS_DPS_PI_TYPE3:
434                 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
435                         cmd->cdw15 = (opts->apptag_mask << 16 | opts->apptag);
436                 break;
437         case NVME_NS_DPS_PI_NONE:
438                 break;
439         }
440 }
441
442 int fio_nvme_pi_verify(struct nvme_data *data, struct io_u *io_u)
443 {
444         int ret = 0;
445
446         switch (data->guard_type) {
447         case NVME_NVM_NS_16B_GUARD:
448                 ret = fio_nvme_verify_pi_16b_guard(data, io_u);
449                 break;
450         case NVME_NVM_NS_64B_GUARD:
451                 ret = fio_nvme_verify_pi_64b_guard(data, io_u);
452                 break;
453         default:
454                 break;
455         }
456
457         return ret;
458 }
459
460 static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
461                          enum nvme_csi csi, void *data)
462 {
463         struct nvme_passthru_cmd cmd = {
464                 .opcode         = nvme_admin_identify,
465                 .nsid           = nsid,
466                 .addr           = (__u64)(uintptr_t)data,
467                 .data_len       = NVME_IDENTIFY_DATA_SIZE,
468                 .cdw10          = cns,
469                 .cdw11          = csi << NVME_IDENTIFY_CSI_SHIFT,
470                 .timeout_ms     = NVME_DEFAULT_IOCTL_TIMEOUT,
471         };
472
473         return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
474 }
475
476 int fio_nvme_get_info(struct fio_file *f, __u64 *nlba, __u32 pi_act,
477                       struct nvme_data *data)
478 {
479         struct nvme_id_ns ns;
480         struct nvme_id_ctrl ctrl;
481         struct nvme_nvm_id_ns nvm_ns;
482         int namespace_id;
483         int fd, err;
484         __u32 format_idx, elbaf;
485
486         if (f->filetype != FIO_TYPE_CHAR) {
487                 log_err("ioengine io_uring_cmd only works with nvme ns "
488                         "generic char devices (/dev/ngXnY)\n");
489                 return 1;
490         }
491
492         fd = open(f->file_name, O_RDONLY);
493         if (fd < 0)
494                 return -errno;
495
496         namespace_id = ioctl(fd, NVME_IOCTL_ID);
497         if (namespace_id < 0) {
498                 err = -errno;
499                 log_err("%s: failed to fetch namespace-id\n", f->file_name);
500                 goto out;
501         }
502
503         err = nvme_identify(fd, 0, NVME_IDENTIFY_CNS_CTRL, NVME_CSI_NVM, &ctrl);
504         if (err) {
505                 log_err("%s: failed to fetch identify ctrl\n", f->file_name);
506                 goto out;
507         }
508
509         /*
510          * Identify namespace to get namespace-id, namespace size in LBA's
511          * and LBA data size.
512          */
513         err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
514                                 NVME_CSI_NVM, &ns);
515         if (err) {
516                 log_err("%s: failed to fetch identify namespace\n",
517                         f->file_name);
518                 goto out;
519         }
520
521         data->nsid = namespace_id;
522
523         /*
524          * 16 or 64 as maximum number of supported LBA formats.
525          * From flbas bit 0-3 indicates lsb and bit 5-6 indicates msb
526          * of the format index used to format the namespace.
527          */
528         if (ns.nlbaf < 16)
529                 format_idx = ns.flbas & 0xf;
530         else
531                 format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4);
532
533         data->lba_size = 1 << ns.lbaf[format_idx].ds;
534         data->ms = le16_to_cpu(ns.lbaf[format_idx].ms);
535
536         /* Check for end to end data protection support */
537         if (data->ms && (ns.dps & NVME_NS_DPS_PI_MASK))
538                 data->pi_type = (ns.dps & NVME_NS_DPS_PI_MASK);
539
540         if (!data->pi_type)
541                 goto check_elba;
542
543         if (ctrl.ctratt & NVME_CTRL_CTRATT_ELBAS) {
544                 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_CSI_NS,
545                                         NVME_CSI_NVM, &nvm_ns);
546                 if (err) {
547                         log_err("%s: failed to fetch identify nvm namespace\n",
548                                 f->file_name);
549                         goto out;
550                 }
551
552                 elbaf = le32_to_cpu(nvm_ns.elbaf[format_idx]);
553
554                 /* Currently we don't support storage tags */
555                 if (elbaf & NVME_ID_NS_NVM_STS_MASK) {
556                         log_err("%s: Storage tag not supported\n",
557                                 f->file_name);
558                         err = -ENOTSUP;
559                         goto out;
560                 }
561
562                 data->guard_type = (elbaf >> NVME_ID_NS_NVM_GUARD_SHIFT) &
563                                 NVME_ID_NS_NVM_GUARD_MASK;
564
565                 /* No 32 bit guard, as storage tag is mandatory for it */
566                 switch (data->guard_type) {
567                 case NVME_NVM_NS_16B_GUARD:
568                         data->pi_size = sizeof(struct nvme_16b_guard_pif);
569                         break;
570                 case NVME_NVM_NS_64B_GUARD:
571                         data->pi_size = sizeof(struct nvme_64b_guard_pif);
572                         break;
573                 default:
574                         break;
575                 }
576         } else {
577                 data->guard_type = NVME_NVM_NS_16B_GUARD;
578                 data->pi_size = sizeof(struct nvme_16b_guard_pif);
579         }
580
581         /*
582          * when PRACT bit is set to 1, and metadata size is equal to protection
583          * information size, controller inserts and removes PI for write and
584          * read commands respectively.
585          */
586         if (pi_act && data->ms == data->pi_size)
587                 data->ms = 0;
588
589         data->pi_loc = (ns.dps & NVME_NS_DPS_PI_FIRST);
590
591 check_elba:
592         /*
593          * Bit 4 for flbas indicates if metadata is transferred at the end of
594          * logical block creating an extended LBA.
595          */
596         if (data->ms && ((ns.flbas >> 4) & 0x1))
597                 data->lba_ext = data->lba_size + data->ms;
598         else
599                 data->lba_shift = ilog2(data->lba_size);
600
601         *nlba = ns.nsze;
602
603 out:
604         close(fd);
605         return err;
606 }
607
608 int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
609                              enum zbd_zoned_model *model)
610 {
611         struct nvme_data *data = FILE_ENG_DATA(f);
612         struct nvme_id_ns ns;
613         struct nvme_passthru_cmd cmd;
614         int fd, ret = 0;
615
616         if (f->filetype != FIO_TYPE_CHAR)
617                 return -EINVAL;
618
619         /* File is not yet opened */
620         fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
621         if (fd < 0)
622                 return -errno;
623
624         /* Using nvme_id_ns for data as sizes are same */
625         ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL,
626                                 NVME_CSI_ZNS, &ns);
627         if (ret) {
628                 *model = ZBD_NONE;
629                 goto out;
630         }
631
632         memset(&cmd, 0, sizeof(struct nvme_passthru_cmd));
633
634         /* Using nvme_id_ns for data as sizes are same */
635         ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
636                                 NVME_CSI_ZNS, &ns);
637         if (ret) {
638                 *model = ZBD_NONE;
639                 goto out;
640         }
641
642         *model = ZBD_HOST_MANAGED;
643 out:
644         close(fd);
645         return 0;
646 }
647
648 static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat,
649                              __u32 data_len, void *data)
650 {
651         struct nvme_passthru_cmd cmd = {
652                 .opcode         = nvme_zns_cmd_mgmt_recv,
653                 .nsid           = nsid,
654                 .addr           = (__u64)(uintptr_t)data,
655                 .data_len       = data_len,
656                 .cdw10          = slba & 0xffffffff,
657                 .cdw11          = slba >> 32,
658                 .cdw12          = (data_len >> 2) - 1,
659                 .cdw13          = NVME_ZNS_ZRA_REPORT_ZONES | zras_feat,
660                 .timeout_ms     = NVME_DEFAULT_IOCTL_TIMEOUT,
661         };
662
663         return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
664 }
665
666 int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
667                           uint64_t offset, struct zbd_zone *zbdz,
668                           unsigned int nr_zones)
669 {
670         struct nvme_data *data = FILE_ENG_DATA(f);
671         struct nvme_zone_report *zr;
672         struct nvme_zns_id_ns zns_ns;
673         struct nvme_id_ns ns;
674         unsigned int i = 0, j, zones_fetched = 0;
675         unsigned int max_zones, zones_chunks = 1024;
676         int fd, ret = 0;
677         __u32 zr_len;
678         __u64 zlen;
679
680         /* File is not yet opened */
681         fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
682         if (fd < 0)
683                 return -errno;
684
685         zones_fetched = 0;
686         zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
687         zr = calloc(1, zr_len);
688         if (!zr) {
689                 close(fd);
690                 return -ENOMEM;
691         }
692
693         ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS,
694                                 NVME_CSI_NVM, &ns);
695         if (ret) {
696                 log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name,
697                         ret);
698                 goto out;
699         }
700
701         ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
702                                 NVME_CSI_ZNS, &zns_ns);
703         if (ret) {
704                 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
705                         f->file_name, ret);
706                 goto out;
707         }
708         zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift;
709
710         max_zones = (f->real_file_size - offset) / zlen;
711         if (max_zones < nr_zones)
712                 nr_zones = max_zones;
713
714         if (nr_zones < zones_chunks)
715                 zones_chunks = nr_zones;
716
717         while (zones_fetched < nr_zones) {
718                 if (zones_fetched + zones_chunks >= nr_zones) {
719                         zones_chunks = nr_zones - zones_fetched;
720                         zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
721                 }
722                 ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift,
723                                         NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr);
724                 if (ret) {
725                         log_err("%s: nvme_zns_report_zones failed, err=%d\n",
726                                 f->file_name, ret);
727                         goto out;
728                 }
729
730                 /* Transform the zone-report */
731                 for (j = 0; j < zr->nr_zones; j++, i++) {
732                         struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]);
733
734                         zbdz[i].start = desc->zslba << data->lba_shift;
735                         zbdz[i].len = zlen;
736                         zbdz[i].wp = desc->wp << data->lba_shift;
737                         zbdz[i].capacity = desc->zcap << data->lba_shift;
738
739                         /* Zone Type is stored in first 4 bits. */
740                         switch (desc->zt & 0x0f) {
741                         case NVME_ZONE_TYPE_SEQWRITE_REQ:
742                                 zbdz[i].type = ZBD_ZONE_TYPE_SWR;
743                                 break;
744                         default:
745                                 log_err("%s: invalid type for zone at offset %llu.\n",
746                                         f->file_name, (unsigned long long) desc->zslba);
747                                 ret = -EIO;
748                                 goto out;
749                         }
750
751                         /* Zone State is stored in last 4 bits. */
752                         switch (desc->zs >> 4) {
753                         case NVME_ZNS_ZS_EMPTY:
754                                 zbdz[i].cond = ZBD_ZONE_COND_EMPTY;
755                                 break;
756                         case NVME_ZNS_ZS_IMPL_OPEN:
757                                 zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN;
758                                 break;
759                         case NVME_ZNS_ZS_EXPL_OPEN:
760                                 zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN;
761                                 break;
762                         case NVME_ZNS_ZS_CLOSED:
763                                 zbdz[i].cond = ZBD_ZONE_COND_CLOSED;
764                                 break;
765                         case NVME_ZNS_ZS_FULL:
766                                 zbdz[i].cond = ZBD_ZONE_COND_FULL;
767                                 break;
768                         case NVME_ZNS_ZS_READ_ONLY:
769                         case NVME_ZNS_ZS_OFFLINE:
770                         default:
771                                 /* Treat all these conditions as offline (don't use!) */
772                                 zbdz[i].cond = ZBD_ZONE_COND_OFFLINE;
773                                 zbdz[i].wp = zbdz[i].start;
774                         }
775                 }
776                 zones_fetched += zr->nr_zones;
777                 offset += zr->nr_zones * zlen;
778         }
779
780         ret = zones_fetched;
781 out:
782         free(zr);
783         close(fd);
784
785         return ret;
786 }
787
788 int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
789                       uint64_t offset, uint64_t length)
790 {
791         struct nvme_data *data = FILE_ENG_DATA(f);
792         unsigned int nr_zones;
793         unsigned long long zslba;
794         int i, fd, ret = 0;
795
796         /* If the file is not yet opened, open it for this function. */
797         fd = f->fd;
798         if (fd < 0) {
799                 fd = open(f->file_name, O_RDWR | O_LARGEFILE);
800                 if (fd < 0)
801                         return -errno;
802         }
803
804         zslba = offset >> data->lba_shift;
805         nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
806
807         for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) {
808                 struct nvme_passthru_cmd cmd = {
809                         .opcode         = nvme_zns_cmd_mgmt_send,
810                         .nsid           = data->nsid,
811                         .cdw10          = zslba & 0xffffffff,
812                         .cdw11          = zslba >> 32,
813                         .cdw13          = NVME_ZNS_ZSA_RESET,
814                         .addr           = (__u64)(uintptr_t)NULL,
815                         .data_len       = 0,
816                         .timeout_ms     = NVME_DEFAULT_IOCTL_TIMEOUT,
817                 };
818
819                 ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
820         }
821
822         if (f->fd < 0)
823                 close(fd);
824         return -ret;
825 }
826
827 int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
828                                 unsigned int *max_open_zones)
829 {
830         struct nvme_data *data = FILE_ENG_DATA(f);
831         struct nvme_zns_id_ns zns_ns;
832         int fd, ret = 0;
833
834         fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
835         if (fd < 0)
836                 return -errno;
837
838         ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
839                                 NVME_CSI_ZNS, &zns_ns);
840         if (ret) {
841                 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
842                         f->file_name, ret);
843                 goto out;
844         }
845
846         *max_open_zones = zns_ns.mor + 1;
847 out:
848         close(fd);
849         return ret;
850 }
851
852 static inline int nvme_fdp_reclaim_unit_handle_status(int fd, __u32 nsid,
853                                                       __u32 data_len, void *data)
854 {
855         struct nvme_passthru_cmd cmd = {
856                 .opcode         = nvme_cmd_io_mgmt_recv,
857                 .nsid           = nsid,
858                 .addr           = (__u64)(uintptr_t)data,
859                 .data_len       = data_len,
860                 .cdw10          = 1,
861                 .cdw11          = (data_len >> 2) - 1,
862         };
863
864         return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
865 }
866
867 int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
868                          struct nvme_fdp_ruh_status *ruhs, __u32 bytes)
869 {
870         struct nvme_data *data = FILE_ENG_DATA(f);
871         int fd, ret;
872
873         fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
874         if (fd < 0)
875                 return -errno;
876
877         ret = nvme_fdp_reclaim_unit_handle_status(fd, data->nsid, bytes, ruhs);
878         if (ret) {
879                 log_err("%s: nvme_fdp_reclaim_unit_handle_status failed, err=%d\n",
880                         f->file_name, ret);
881                 errno = ENOTSUP;
882         } else
883                 errno = 0;
884
885         ret = -errno;
886         close(fd);
887         return ret;
888 }