1 // SPDX-License-Identifier: GPL-2.0
3 * nvme structure declarations and helper functions for the
8 #include "../crc/crc-t10dif.h"
9 #include "../crc/crc64.h"
11 static void fio_nvme_generate_pi_16b_guard(struct nvme_data *data,
13 struct nvme_cmd_ext_io_opts *opts)
15 struct nvme_pi_data *pi_data = io_u->engine_data;
16 struct nvme_16b_guard_pif *pi;
17 unsigned char *buf = io_u->xfer_buf;
18 unsigned char *md_buf = io_u->mmap_data;
19 __u64 slba = get_slba(data, io_u->offset);
20 __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1;
26 pi_data->interval = data->lba_ext - data->ms;
28 pi_data->interval = 0;
31 pi_data->interval = data->lba_ext - sizeof(struct nvme_16b_guard_pif);
33 pi_data->interval = data->ms - sizeof(struct nvme_16b_guard_pif);
36 if (io_u->ddir != DDIR_WRITE)
39 while (lba_num < nlb) {
41 pi = (struct nvme_16b_guard_pif *)(buf + pi_data->interval);
43 pi = (struct nvme_16b_guard_pif *)(md_buf + pi_data->interval);
45 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
47 guard = fio_crc_t10dif(0, buf, pi_data->interval);
49 guard = fio_crc_t10dif(0, buf, data->lba_size);
50 guard = fio_crc_t10dif(guard, md_buf, pi_data->interval);
52 pi->guard = cpu_to_be16(guard);
55 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
56 pi->apptag = cpu_to_be16(pi_data->apptag);
58 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
59 switch (data->pi_type) {
60 case NVME_NS_DPS_PI_TYPE1:
61 case NVME_NS_DPS_PI_TYPE2:
62 pi->srtag = cpu_to_be32((__u32)slba + lba_num);
64 case NVME_NS_DPS_PI_TYPE3:
71 buf += data->lba_size;
78 static int fio_nvme_verify_pi_16b_guard(struct nvme_data *data,
81 struct nvme_pi_data *pi_data = io_u->engine_data;
82 struct nvme_16b_guard_pif *pi;
83 struct fio_file *f = io_u->file;
84 unsigned char *buf = io_u->xfer_buf;
85 unsigned char *md_buf = io_u->mmap_data;
86 __u64 slba = get_slba(data, io_u->offset);
87 __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1;
89 __u16 unmask_app, unmask_app_exp, guard = 0;
91 while (lba_num < nlb) {
93 pi = (struct nvme_16b_guard_pif *)(buf + pi_data->interval);
95 pi = (struct nvme_16b_guard_pif *)(md_buf + pi_data->interval);
97 if (data->pi_type == NVME_NS_DPS_PI_TYPE3) {
98 if (pi->apptag == NVME_PI_APP_DISABLE &&
99 pi->srtag == NVME_PI_REF_DISABLE)
101 } else if (data->pi_type == NVME_NS_DPS_PI_TYPE1 ||
102 data->pi_type == NVME_NS_DPS_PI_TYPE2) {
103 if (pi->apptag == NVME_PI_APP_DISABLE)
107 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
109 guard = fio_crc_t10dif(0, buf, pi_data->interval);
111 guard = fio_crc_t10dif(0, buf, data->lba_size);
112 guard = fio_crc_t10dif(guard, md_buf, pi_data->interval);
114 if (be16_to_cpu(pi->guard) != guard) {
115 log_err("%s: Guard compare error: LBA: %llu Expected=%x, Actual=%x\n",
116 f->file_name, (unsigned long long)slba,
117 guard, be16_to_cpu(pi->guard));
122 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_APP) {
123 unmask_app = be16_to_cpu(pi->apptag) & pi_data->apptag_mask;
124 unmask_app_exp = pi_data->apptag & pi_data->apptag_mask;
125 if (unmask_app != unmask_app_exp) {
126 log_err("%s: APPTAG compare error: LBA: %llu Expected=%x, Actual=%x\n",
127 f->file_name, (unsigned long long)slba,
128 unmask_app_exp, unmask_app);
133 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
134 switch (data->pi_type) {
135 case NVME_NS_DPS_PI_TYPE1:
136 case NVME_NS_DPS_PI_TYPE2:
137 if (be32_to_cpu(pi->srtag) !=
138 ((__u32)slba + lba_num)) {
139 log_err("%s: REFTAG compare error: LBA: %llu Expected=%x, Actual=%x\n",
140 f->file_name, (unsigned long long)slba,
141 (__u32)slba + lba_num,
142 be32_to_cpu(pi->srtag));
146 case NVME_NS_DPS_PI_TYPE3:
152 buf += data->lba_ext;
154 buf += data->lba_size;
163 static void fio_nvme_generate_pi_64b_guard(struct nvme_data *data,
165 struct nvme_cmd_ext_io_opts *opts)
167 struct nvme_pi_data *pi_data = io_u->engine_data;
168 struct nvme_64b_guard_pif *pi;
169 unsigned char *buf = io_u->xfer_buf;
170 unsigned char *md_buf = io_u->mmap_data;
172 __u64 slba = get_slba(data, io_u->offset);
173 __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1;
178 pi_data->interval = data->lba_ext - data->ms;
180 pi_data->interval = 0;
183 pi_data->interval = data->lba_ext - sizeof(struct nvme_64b_guard_pif);
185 pi_data->interval = data->ms - sizeof(struct nvme_64b_guard_pif);
188 if (io_u->ddir != DDIR_WRITE)
191 while (lba_num < nlb) {
193 pi = (struct nvme_64b_guard_pif *)(buf + pi_data->interval);
195 pi = (struct nvme_64b_guard_pif *)(md_buf + pi_data->interval);
197 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
199 guard = fio_crc64_nvme(0, buf, pi_data->interval);
201 guard = fio_crc64_nvme(0, buf, data->lba_size);
202 guard = fio_crc64_nvme(guard, md_buf, pi_data->interval);
204 pi->guard = cpu_to_be64(guard);
207 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
208 pi->apptag = cpu_to_be16(pi_data->apptag);
210 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
211 switch (data->pi_type) {
212 case NVME_NS_DPS_PI_TYPE1:
213 case NVME_NS_DPS_PI_TYPE2:
214 put_unaligned_be48(slba + lba_num, pi->srtag);
216 case NVME_NS_DPS_PI_TYPE3:
221 buf += data->lba_ext;
223 buf += data->lba_size;
230 static int fio_nvme_verify_pi_64b_guard(struct nvme_data *data,
233 struct nvme_pi_data *pi_data = io_u->engine_data;
234 struct nvme_64b_guard_pif *pi;
235 struct fio_file *f = io_u->file;
236 unsigned char *buf = io_u->xfer_buf;
237 unsigned char *md_buf = io_u->mmap_data;
238 __u64 slba = get_slba(data, io_u->offset);
239 __u64 ref, ref_exp, guard = 0;
240 __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1;
242 __u16 unmask_app, unmask_app_exp;
244 while (lba_num < nlb) {
246 pi = (struct nvme_64b_guard_pif *)(buf + pi_data->interval);
248 pi = (struct nvme_64b_guard_pif *)(md_buf + pi_data->interval);
250 if (data->pi_type == NVME_NS_DPS_PI_TYPE3) {
251 if (pi->apptag == NVME_PI_APP_DISABLE &&
252 fio_nvme_pi_ref_escape(pi->srtag))
254 } else if (data->pi_type == NVME_NS_DPS_PI_TYPE1 ||
255 data->pi_type == NVME_NS_DPS_PI_TYPE2) {
256 if (pi->apptag == NVME_PI_APP_DISABLE)
260 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
262 guard = fio_crc64_nvme(0, buf, pi_data->interval);
264 guard = fio_crc64_nvme(0, buf, data->lba_size);
265 guard = fio_crc64_nvme(guard, md_buf, pi_data->interval);
267 if (be64_to_cpu((uint64_t)pi->guard) != guard) {
268 log_err("%s: Guard compare error: LBA: %llu Expected=%llx, Actual=%llx\n",
269 f->file_name, (unsigned long long)slba,
270 guard, be64_to_cpu((uint64_t)pi->guard));
275 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_APP) {
276 unmask_app = be16_to_cpu(pi->apptag) & pi_data->apptag_mask;
277 unmask_app_exp = pi_data->apptag & pi_data->apptag_mask;
278 if (unmask_app != unmask_app_exp) {
279 log_err("%s: APPTAG compare error: LBA: %llu Expected=%x, Actual=%x\n",
280 f->file_name, (unsigned long long)slba,
281 unmask_app_exp, unmask_app);
286 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
287 switch (data->pi_type) {
288 case NVME_NS_DPS_PI_TYPE1:
289 case NVME_NS_DPS_PI_TYPE2:
290 ref = get_unaligned_be48(pi->srtag);
291 ref_exp = (slba + lba_num) & ((1ULL << 48) - 1);
292 if (ref != ref_exp) {
293 log_err("%s: REFTAG compare error: LBA: %llu Expected=%llx, Actual=%llx\n",
294 f->file_name, (unsigned long long)slba,
299 case NVME_NS_DPS_PI_TYPE3:
305 buf += data->lba_ext;
307 buf += data->lba_size;
315 static void fio_nvme_uring_cmd_trim_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
316 struct nvme_dsm *dsm)
318 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
319 struct trim_range *range;
323 cmd->opcode = nvme_cmd_dsm;
324 cmd->nsid = data->nsid;
325 cmd->cdw11 = NVME_ATTRIBUTE_DEALLOCATE;
326 cmd->addr = (__u64) (uintptr_t) (&dsm->range[0]);
328 if (dsm->nr_ranges == 1) {
329 dsm->range[0].slba = get_slba(data, io_u->offset);
330 /* nlb is a 1-based value for deallocate */
331 dsm->range[0].nlb = get_nlb(data, io_u->xfer_buflen) + 1;
333 cmd->data_len = sizeof(struct nvme_dsm_range);
335 buf_point = io_u->xfer_buf;
336 for (i = 0; i < io_u->number_trim; i++) {
337 range = (struct trim_range *)buf_point;
338 dsm->range[i].slba = get_slba(data, range->start);
339 /* nlb is a 1-based value for deallocate */
340 dsm->range[i].nlb = get_nlb(data, range->len) + 1;
341 buf_point += sizeof(struct trim_range);
343 cmd->cdw10 = io_u->number_trim - 1;
344 cmd->data_len = io_u->number_trim * sizeof(struct nvme_dsm_range);
348 int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
349 struct iovec *iov, struct nvme_dsm *dsm,
350 uint8_t read_opcode, uint8_t write_opcode,
351 unsigned int cdw12_flags)
353 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
357 memset(cmd, 0, sizeof(struct nvme_uring_cmd));
359 switch (io_u->ddir) {
361 cmd->opcode = read_opcode;
364 cmd->opcode = write_opcode;
367 fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm);
371 cmd->opcode = nvme_cmd_flush;
372 cmd->nsid = data->nsid;
378 slba = get_slba(data, io_u->offset);
379 nlb = get_nlb(data, io_u->xfer_buflen);
381 /* cdw10 and cdw11 represent starting lba */
382 cmd->cdw10 = slba & 0xffffffff;
383 cmd->cdw11 = slba >> 32;
384 /* cdw12 represent number of lba's for read/write */
385 cmd->cdw12 = nlb | (io_u->dtype << 20) | cdw12_flags;
386 cmd->cdw13 = io_u->dspec << 16;
388 iov->iov_base = io_u->xfer_buf;
389 iov->iov_len = io_u->xfer_buflen;
390 cmd->addr = (__u64)(uintptr_t)iov;
393 /* no buffer for write zeroes */
394 if (cmd->opcode != nvme_cmd_write_zeroes)
395 cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf;
397 cmd->addr = (__u64)(uintptr_t)NULL;
398 cmd->data_len = io_u->xfer_buflen;
400 if (data->lba_shift && data->ms) {
401 cmd->metadata = (__u64)(uintptr_t)io_u->mmap_data;
402 cmd->metadata_len = (nlb + 1) * data->ms;
404 cmd->nsid = data->nsid;
408 void fio_nvme_generate_guard(struct io_u *io_u, struct nvme_cmd_ext_io_opts *opts)
410 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
412 if (data->pi_type && !(opts->io_flags & NVME_IO_PRINFO_PRACT)) {
413 if (data->guard_type == NVME_NVM_NS_16B_GUARD)
414 fio_nvme_generate_pi_16b_guard(data, io_u, opts);
415 else if (data->guard_type == NVME_NVM_NS_64B_GUARD)
416 fio_nvme_generate_pi_64b_guard(data, io_u, opts);
420 void fio_nvme_pi_fill(struct nvme_uring_cmd *cmd, struct io_u *io_u,
421 struct nvme_cmd_ext_io_opts *opts)
423 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
426 slba = get_slba(data, io_u->offset);
427 cmd->cdw12 |= opts->io_flags;
429 fio_nvme_generate_guard(io_u, opts);
431 switch (data->pi_type) {
432 case NVME_NS_DPS_PI_TYPE1:
433 case NVME_NS_DPS_PI_TYPE2:
434 switch (data->guard_type) {
435 case NVME_NVM_NS_16B_GUARD:
436 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF)
437 cmd->cdw14 = (__u32)slba;
439 case NVME_NVM_NS_64B_GUARD:
440 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
441 cmd->cdw14 = (__u32)slba;
442 cmd->cdw3 = ((slba >> 32) & 0xffff);
448 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
449 cmd->cdw15 = (opts->apptag_mask << 16 | opts->apptag);
451 case NVME_NS_DPS_PI_TYPE3:
452 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
453 cmd->cdw15 = (opts->apptag_mask << 16 | opts->apptag);
455 case NVME_NS_DPS_PI_NONE:
460 int fio_nvme_pi_verify(struct nvme_data *data, struct io_u *io_u)
464 switch (data->guard_type) {
465 case NVME_NVM_NS_16B_GUARD:
466 ret = fio_nvme_verify_pi_16b_guard(data, io_u);
468 case NVME_NVM_NS_64B_GUARD:
469 ret = fio_nvme_verify_pi_64b_guard(data, io_u);
478 static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
479 enum nvme_csi csi, void *data)
481 struct nvme_passthru_cmd cmd = {
482 .opcode = nvme_admin_identify,
484 .addr = (__u64)(uintptr_t)data,
485 .data_len = NVME_IDENTIFY_DATA_SIZE,
487 .cdw11 = csi << NVME_IDENTIFY_CSI_SHIFT,
488 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
491 return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
494 int fio_nvme_get_info(struct fio_file *f, __u64 *nlba, __u32 pi_act,
495 struct nvme_data *data)
497 struct nvme_id_ns ns;
498 struct nvme_id_ctrl ctrl;
499 struct nvme_nvm_id_ns nvm_ns;
502 __u32 format_idx, elbaf;
504 if (f->filetype != FIO_TYPE_CHAR) {
505 log_err("ioengine io_uring_cmd only works with nvme ns "
506 "generic char devices (/dev/ngXnY)\n");
510 fd = open(f->file_name, O_RDONLY);
514 namespace_id = ioctl(fd, NVME_IOCTL_ID);
515 if (namespace_id < 0) {
517 log_err("%s: failed to fetch namespace-id\n", f->file_name);
521 err = nvme_identify(fd, 0, NVME_IDENTIFY_CNS_CTRL, NVME_CSI_NVM, &ctrl);
523 log_err("%s: failed to fetch identify ctrl\n", f->file_name);
528 * Identify namespace to get namespace-id, namespace size in LBA's
531 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
534 log_err("%s: failed to fetch identify namespace\n",
539 data->nsid = namespace_id;
542 * 16 or 64 as maximum number of supported LBA formats.
543 * From flbas bit 0-3 indicates lsb and bit 5-6 indicates msb
544 * of the format index used to format the namespace.
547 format_idx = ns.flbas & 0xf;
549 format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4);
551 data->lba_size = 1 << ns.lbaf[format_idx].ds;
552 data->ms = le16_to_cpu(ns.lbaf[format_idx].ms);
554 /* Check for end to end data protection support */
555 if (data->ms && (ns.dps & NVME_NS_DPS_PI_MASK))
556 data->pi_type = (ns.dps & NVME_NS_DPS_PI_MASK);
561 if (ctrl.ctratt & NVME_CTRL_CTRATT_ELBAS) {
562 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_CSI_NS,
563 NVME_CSI_NVM, &nvm_ns);
565 log_err("%s: failed to fetch identify nvm namespace\n",
570 elbaf = le32_to_cpu(nvm_ns.elbaf[format_idx]);
572 /* Currently we don't support storage tags */
573 if (elbaf & NVME_ID_NS_NVM_STS_MASK) {
574 log_err("%s: Storage tag not supported\n",
580 data->guard_type = (elbaf >> NVME_ID_NS_NVM_GUARD_SHIFT) &
581 NVME_ID_NS_NVM_GUARD_MASK;
583 /* No 32 bit guard, as storage tag is mandatory for it */
584 switch (data->guard_type) {
585 case NVME_NVM_NS_16B_GUARD:
586 data->pi_size = sizeof(struct nvme_16b_guard_pif);
588 case NVME_NVM_NS_64B_GUARD:
589 data->pi_size = sizeof(struct nvme_64b_guard_pif);
595 data->guard_type = NVME_NVM_NS_16B_GUARD;
596 data->pi_size = sizeof(struct nvme_16b_guard_pif);
600 * when PRACT bit is set to 1, and metadata size is equal to protection
601 * information size, controller inserts and removes PI for write and
602 * read commands respectively.
604 if (pi_act && data->ms == data->pi_size)
607 data->pi_loc = (ns.dps & NVME_NS_DPS_PI_FIRST);
611 * Bit 4 for flbas indicates if metadata is transferred at the end of
612 * logical block creating an extended LBA.
614 if (data->ms && ((ns.flbas >> 4) & 0x1))
615 data->lba_ext = data->lba_size + data->ms;
617 data->lba_shift = ilog2(data->lba_size);
626 int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
627 enum zbd_zoned_model *model)
629 struct nvme_data *data = FILE_ENG_DATA(f);
630 struct nvme_id_ns ns;
631 struct nvme_passthru_cmd cmd;
634 if (f->filetype != FIO_TYPE_CHAR)
637 /* File is not yet opened */
638 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
642 /* Using nvme_id_ns for data as sizes are same */
643 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL,
650 memset(&cmd, 0, sizeof(struct nvme_passthru_cmd));
652 /* Using nvme_id_ns for data as sizes are same */
653 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
660 *model = ZBD_HOST_MANAGED;
666 static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat,
667 __u32 data_len, void *data)
669 struct nvme_passthru_cmd cmd = {
670 .opcode = nvme_zns_cmd_mgmt_recv,
672 .addr = (__u64)(uintptr_t)data,
673 .data_len = data_len,
674 .cdw10 = slba & 0xffffffff,
676 .cdw12 = (data_len >> 2) - 1,
677 .cdw13 = NVME_ZNS_ZRA_REPORT_ZONES | zras_feat,
678 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
681 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
684 int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
685 uint64_t offset, struct zbd_zone *zbdz,
686 unsigned int nr_zones)
688 struct nvme_data *data = FILE_ENG_DATA(f);
689 struct nvme_zone_report *zr;
690 struct nvme_zns_id_ns zns_ns;
691 struct nvme_id_ns ns;
692 unsigned int i = 0, j, zones_fetched = 0;
693 unsigned int max_zones, zones_chunks = 1024;
698 /* File is not yet opened */
699 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
704 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
705 zr = calloc(1, zr_len);
711 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS,
714 log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name,
719 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
720 NVME_CSI_ZNS, &zns_ns);
722 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
726 zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift;
728 max_zones = (f->real_file_size - offset) / zlen;
729 if (max_zones < nr_zones)
730 nr_zones = max_zones;
732 if (nr_zones < zones_chunks)
733 zones_chunks = nr_zones;
735 while (zones_fetched < nr_zones) {
736 if (zones_fetched + zones_chunks >= nr_zones) {
737 zones_chunks = nr_zones - zones_fetched;
738 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
740 ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift,
741 NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr);
743 log_err("%s: nvme_zns_report_zones failed, err=%d\n",
748 /* Transform the zone-report */
749 for (j = 0; j < zr->nr_zones; j++, i++) {
750 struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]);
752 zbdz[i].start = desc->zslba << data->lba_shift;
754 zbdz[i].wp = desc->wp << data->lba_shift;
755 zbdz[i].capacity = desc->zcap << data->lba_shift;
757 /* Zone Type is stored in first 4 bits. */
758 switch (desc->zt & 0x0f) {
759 case NVME_ZONE_TYPE_SEQWRITE_REQ:
760 zbdz[i].type = ZBD_ZONE_TYPE_SWR;
763 log_err("%s: invalid type for zone at offset %llu.\n",
764 f->file_name, (unsigned long long) desc->zslba);
769 /* Zone State is stored in last 4 bits. */
770 switch (desc->zs >> 4) {
771 case NVME_ZNS_ZS_EMPTY:
772 zbdz[i].cond = ZBD_ZONE_COND_EMPTY;
774 case NVME_ZNS_ZS_IMPL_OPEN:
775 zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN;
777 case NVME_ZNS_ZS_EXPL_OPEN:
778 zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN;
780 case NVME_ZNS_ZS_CLOSED:
781 zbdz[i].cond = ZBD_ZONE_COND_CLOSED;
783 case NVME_ZNS_ZS_FULL:
784 zbdz[i].cond = ZBD_ZONE_COND_FULL;
786 case NVME_ZNS_ZS_READ_ONLY:
787 case NVME_ZNS_ZS_OFFLINE:
789 /* Treat all these conditions as offline (don't use!) */
790 zbdz[i].cond = ZBD_ZONE_COND_OFFLINE;
791 zbdz[i].wp = zbdz[i].start;
794 zones_fetched += zr->nr_zones;
795 offset += zr->nr_zones * zlen;
806 int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
807 uint64_t offset, uint64_t length)
809 struct nvme_data *data = FILE_ENG_DATA(f);
810 unsigned int nr_zones;
811 unsigned long long zslba;
814 /* If the file is not yet opened, open it for this function. */
817 fd = open(f->file_name, O_RDWR | O_LARGEFILE);
822 zslba = offset >> data->lba_shift;
823 nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
825 for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) {
826 struct nvme_passthru_cmd cmd = {
827 .opcode = nvme_zns_cmd_mgmt_send,
829 .cdw10 = zslba & 0xffffffff,
830 .cdw11 = zslba >> 32,
831 .cdw13 = NVME_ZNS_ZSA_RESET,
832 .addr = (__u64)(uintptr_t)NULL,
834 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
837 ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
845 int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
846 unsigned int *max_open_zones)
848 struct nvme_data *data = FILE_ENG_DATA(f);
849 struct nvme_zns_id_ns zns_ns;
852 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
856 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
857 NVME_CSI_ZNS, &zns_ns);
859 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
864 *max_open_zones = zns_ns.mor + 1;
870 static inline int nvme_fdp_reclaim_unit_handle_status(int fd, __u32 nsid,
871 __u32 data_len, void *data)
873 struct nvme_passthru_cmd cmd = {
874 .opcode = nvme_cmd_io_mgmt_recv,
876 .addr = (__u64)(uintptr_t)data,
877 .data_len = data_len,
879 .cdw11 = (data_len >> 2) - 1,
882 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
885 int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
886 struct nvme_fdp_ruh_status *ruhs, __u32 bytes)
888 struct nvme_data *data = FILE_ENG_DATA(f);
891 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
895 ret = nvme_fdp_reclaim_unit_handle_status(fd, data->nsid, bytes, ruhs);
897 log_err("%s: nvme_fdp_reclaim_unit_handle_status failed, err=%d\n",