2 * nvme structure declarations and helper functions for the
8 static inline __u64 get_slba(struct nvme_data *data, struct io_u *io_u)
11 return io_u->offset / data->lba_ext;
13 return io_u->offset >> data->lba_shift;
16 static inline __u32 get_nlb(struct nvme_data *data, struct io_u *io_u)
19 return io_u->xfer_buflen / data->lba_ext - 1;
21 return (io_u->xfer_buflen >> data->lba_shift) - 1;
24 void fio_nvme_uring_cmd_trim_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
25 struct nvme_dsm_range *dsm)
27 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
29 cmd->opcode = nvme_cmd_dsm;
30 cmd->nsid = data->nsid;
32 cmd->cdw11 = NVME_ATTRIBUTE_DEALLOCATE;
33 cmd->addr = (__u64) (uintptr_t) dsm;
34 cmd->data_len = sizeof(*dsm);
36 dsm->slba = get_slba(data, io_u);
37 /* nlb is a 1-based value for deallocate */
38 dsm->nlb = get_nlb(data, io_u) + 1;
41 int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
42 struct iovec *iov, struct nvme_dsm_range *dsm)
44 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
48 memset(cmd, 0, sizeof(struct nvme_uring_cmd));
52 cmd->opcode = nvme_cmd_read;
55 cmd->opcode = nvme_cmd_write;
58 fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm);
64 slba = get_slba(data, io_u);
65 nlb = get_nlb(data, io_u);
67 /* cdw10 and cdw11 represent starting lba */
68 cmd->cdw10 = slba & 0xffffffff;
69 cmd->cdw11 = slba >> 32;
70 /* cdw12 represent number of lba's for read/write */
71 cmd->cdw12 = nlb | (io_u->dtype << 20);
72 cmd->cdw13 = io_u->dspec << 16;
74 iov->iov_base = io_u->xfer_buf;
75 iov->iov_len = io_u->xfer_buflen;
76 cmd->addr = (__u64)(uintptr_t)iov;
79 cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf;
80 cmd->data_len = io_u->xfer_buflen;
82 cmd->nsid = data->nsid;
86 static int nvme_trim(int fd, __u32 nsid, __u32 nr_range, __u32 data_len,
89 struct nvme_passthru_cmd cmd = {
90 .opcode = nvme_cmd_dsm,
92 .addr = (__u64)(uintptr_t)data,
94 .cdw10 = nr_range - 1,
95 .cdw11 = NVME_ATTRIBUTE_DEALLOCATE,
98 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
101 int fio_nvme_trim(const struct thread_data *td, struct fio_file *f,
102 unsigned long long offset, unsigned long long len)
104 struct nvme_data *data = FILE_ENG_DATA(f);
105 struct nvme_dsm_range dsm;
109 dsm.nlb = len / data->lba_ext;
110 dsm.slba = offset / data->lba_ext;
112 dsm.nlb = len >> data->lba_shift;
113 dsm.slba = offset >> data->lba_shift;
116 ret = nvme_trim(f->fd, data->nsid, 1, sizeof(struct nvme_dsm_range),
119 log_err("%s: nvme_trim failed for offset %llu and len %llu, err=%d\n",
120 f->file_name, offset, len, ret);
125 static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
126 enum nvme_csi csi, void *data)
128 struct nvme_passthru_cmd cmd = {
129 .opcode = nvme_admin_identify,
131 .addr = (__u64)(uintptr_t)data,
132 .data_len = NVME_IDENTIFY_DATA_SIZE,
134 .cdw11 = csi << NVME_IDENTIFY_CSI_SHIFT,
135 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
138 return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
141 int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
142 __u32 *ms, __u64 *nlba)
144 struct nvme_id_ns ns;
149 if (f->filetype != FIO_TYPE_CHAR) {
150 log_err("ioengine io_uring_cmd only works with nvme ns "
151 "generic char devices (/dev/ngXnY)\n");
155 fd = open(f->file_name, O_RDONLY);
159 namespace_id = ioctl(fd, NVME_IOCTL_ID);
160 if (namespace_id < 0) {
162 log_err("%s: failed to fetch namespace-id\n", f->file_name);
167 * Identify namespace to get namespace-id, namespace size in LBA's
170 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
173 log_err("%s: failed to fetch identify namespace\n",
179 *nsid = namespace_id;
182 * 16 or 64 as maximum number of supported LBA formats.
183 * From flbas bit 0-3 indicates lsb and bit 5-6 indicates msb
184 * of the format index used to format the namespace.
187 format_idx = ns.flbas & 0xf;
189 format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4);
191 *lba_sz = 1 << ns.lbaf[format_idx].ds;
194 * Only extended LBA can be supported.
195 * Bit 4 for flbas indicates if metadata is transferred at the end of
196 * logical block creating an extended LBA.
198 *ms = le16_to_cpu(ns.lbaf[format_idx].ms);
199 if (*ms && !((ns.flbas >> 4) & 0x1)) {
200 log_err("%s: only extended logical block can be supported\n",
206 /* Check for end to end data protection support */
208 log_err("%s: end to end data protection not supported\n",
220 int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
221 enum zbd_zoned_model *model)
223 struct nvme_data *data = FILE_ENG_DATA(f);
224 struct nvme_id_ns ns;
225 struct nvme_passthru_cmd cmd;
228 if (f->filetype != FIO_TYPE_CHAR)
231 /* File is not yet opened */
232 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
236 /* Using nvme_id_ns for data as sizes are same */
237 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL,
244 memset(&cmd, 0, sizeof(struct nvme_passthru_cmd));
246 /* Using nvme_id_ns for data as sizes are same */
247 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
254 *model = ZBD_HOST_MANAGED;
260 static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat,
261 __u32 data_len, void *data)
263 struct nvme_passthru_cmd cmd = {
264 .opcode = nvme_zns_cmd_mgmt_recv,
266 .addr = (__u64)(uintptr_t)data,
267 .data_len = data_len,
268 .cdw10 = slba & 0xffffffff,
270 .cdw12 = (data_len >> 2) - 1,
271 .cdw13 = NVME_ZNS_ZRA_REPORT_ZONES | zras_feat,
272 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
275 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
278 int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
279 uint64_t offset, struct zbd_zone *zbdz,
280 unsigned int nr_zones)
282 struct nvme_data *data = FILE_ENG_DATA(f);
283 struct nvme_zone_report *zr;
284 struct nvme_zns_id_ns zns_ns;
285 struct nvme_id_ns ns;
286 unsigned int i = 0, j, zones_fetched = 0;
287 unsigned int max_zones, zones_chunks = 1024;
292 /* File is not yet opened */
293 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
298 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
299 zr = calloc(1, zr_len);
305 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS,
308 log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name,
313 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
314 NVME_CSI_ZNS, &zns_ns);
316 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
320 zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift;
322 max_zones = (f->real_file_size - offset) / zlen;
323 if (max_zones < nr_zones)
324 nr_zones = max_zones;
326 if (nr_zones < zones_chunks)
327 zones_chunks = nr_zones;
329 while (zones_fetched < nr_zones) {
330 if (zones_fetched + zones_chunks >= nr_zones) {
331 zones_chunks = nr_zones - zones_fetched;
332 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
334 ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift,
335 NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr);
337 log_err("%s: nvme_zns_report_zones failed, err=%d\n",
342 /* Transform the zone-report */
343 for (j = 0; j < zr->nr_zones; j++, i++) {
344 struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]);
346 zbdz[i].start = desc->zslba << data->lba_shift;
348 zbdz[i].wp = desc->wp << data->lba_shift;
349 zbdz[i].capacity = desc->zcap << data->lba_shift;
351 /* Zone Type is stored in first 4 bits. */
352 switch (desc->zt & 0x0f) {
353 case NVME_ZONE_TYPE_SEQWRITE_REQ:
354 zbdz[i].type = ZBD_ZONE_TYPE_SWR;
357 log_err("%s: invalid type for zone at offset %llu.\n",
358 f->file_name, (unsigned long long) desc->zslba);
363 /* Zone State is stored in last 4 bits. */
364 switch (desc->zs >> 4) {
365 case NVME_ZNS_ZS_EMPTY:
366 zbdz[i].cond = ZBD_ZONE_COND_EMPTY;
368 case NVME_ZNS_ZS_IMPL_OPEN:
369 zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN;
371 case NVME_ZNS_ZS_EXPL_OPEN:
372 zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN;
374 case NVME_ZNS_ZS_CLOSED:
375 zbdz[i].cond = ZBD_ZONE_COND_CLOSED;
377 case NVME_ZNS_ZS_FULL:
378 zbdz[i].cond = ZBD_ZONE_COND_FULL;
380 case NVME_ZNS_ZS_READ_ONLY:
381 case NVME_ZNS_ZS_OFFLINE:
383 /* Treat all these conditions as offline (don't use!) */
384 zbdz[i].cond = ZBD_ZONE_COND_OFFLINE;
385 zbdz[i].wp = zbdz[i].start;
388 zones_fetched += zr->nr_zones;
389 offset += zr->nr_zones * zlen;
400 int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
401 uint64_t offset, uint64_t length)
403 struct nvme_data *data = FILE_ENG_DATA(f);
404 unsigned int nr_zones;
405 unsigned long long zslba;
408 /* If the file is not yet opened, open it for this function. */
411 fd = open(f->file_name, O_RDWR | O_LARGEFILE);
416 zslba = offset >> data->lba_shift;
417 nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
419 for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) {
420 struct nvme_passthru_cmd cmd = {
421 .opcode = nvme_zns_cmd_mgmt_send,
423 .cdw10 = zslba & 0xffffffff,
424 .cdw11 = zslba >> 32,
425 .cdw13 = NVME_ZNS_ZSA_RESET,
426 .addr = (__u64)(uintptr_t)NULL,
428 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
431 ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
439 int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
440 unsigned int *max_open_zones)
442 struct nvme_data *data = FILE_ENG_DATA(f);
443 struct nvme_zns_id_ns zns_ns;
446 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
450 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
451 NVME_CSI_ZNS, &zns_ns);
453 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
458 *max_open_zones = zns_ns.mor + 1;
464 static inline int nvme_fdp_reclaim_unit_handle_status(int fd, __u32 nsid,
465 __u32 data_len, void *data)
467 struct nvme_passthru_cmd cmd = {
468 .opcode = nvme_cmd_io_mgmt_recv,
470 .addr = (__u64)(uintptr_t)data,
471 .data_len = data_len,
473 .cdw11 = (data_len >> 2) - 1,
476 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
479 int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
480 struct nvme_fdp_ruh_status *ruhs, __u32 bytes)
482 struct nvme_data *data = FILE_ENG_DATA(f);
485 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
489 ret = nvme_fdp_reclaim_unit_handle_status(fd, data->nsid, bytes, ruhs);
491 log_err("%s: nvme_fdp_reclaim_unit_handle_status failed, err=%d\n",