2 * nvme structure declarations and helper functions for the
8 static inline __u64 get_slba(struct nvme_data *data, struct io_u *io_u)
11 return io_u->offset / data->lba_ext;
13 return io_u->offset >> data->lba_shift;
16 static inline __u32 get_nlb(struct nvme_data *data, struct io_u *io_u)
19 return io_u->xfer_buflen / data->lba_ext - 1;
21 return (io_u->xfer_buflen >> data->lba_shift) - 1;
24 void fio_nvme_uring_cmd_trim_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
25 struct nvme_dsm_range *dsm)
27 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
29 cmd->opcode = nvme_cmd_dsm;
30 cmd->nsid = data->nsid;
32 cmd->cdw11 = NVME_ATTRIBUTE_DEALLOCATE;
33 cmd->addr = (__u64) (uintptr_t) dsm;
34 cmd->data_len = sizeof(*dsm);
36 dsm->slba = get_slba(data, io_u);
37 /* nlb is a 1-based value for deallocate */
38 dsm->nlb = get_nlb(data, io_u) + 1;
41 int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
42 struct iovec *iov, struct nvme_dsm_range *dsm)
44 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
48 memset(cmd, 0, sizeof(struct nvme_uring_cmd));
52 cmd->opcode = nvme_cmd_read;
55 cmd->opcode = nvme_cmd_write;
58 fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm);
64 slba = get_slba(data, io_u);
65 nlb = get_nlb(data, io_u);
67 /* cdw10 and cdw11 represent starting lba */
68 cmd->cdw10 = slba & 0xffffffff;
69 cmd->cdw11 = slba >> 32;
70 /* cdw12 represent number of lba's for read/write */
71 cmd->cdw12 = nlb | (io_u->dtype << 20);
72 cmd->cdw13 = io_u->dspec << 16;
74 iov->iov_base = io_u->xfer_buf;
75 iov->iov_len = io_u->xfer_buflen;
76 cmd->addr = (__u64)(uintptr_t)iov;
79 cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf;
80 cmd->data_len = io_u->xfer_buflen;
82 cmd->nsid = data->nsid;
86 static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
87 enum nvme_csi csi, void *data)
89 struct nvme_passthru_cmd cmd = {
90 .opcode = nvme_admin_identify,
92 .addr = (__u64)(uintptr_t)data,
93 .data_len = NVME_IDENTIFY_DATA_SIZE,
95 .cdw11 = csi << NVME_IDENTIFY_CSI_SHIFT,
96 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
99 return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
102 int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
103 __u32 *ms, __u64 *nlba)
105 struct nvme_id_ns ns;
110 if (f->filetype != FIO_TYPE_CHAR) {
111 log_err("ioengine io_uring_cmd only works with nvme ns "
112 "generic char devices (/dev/ngXnY)\n");
116 fd = open(f->file_name, O_RDONLY);
120 namespace_id = ioctl(fd, NVME_IOCTL_ID);
121 if (namespace_id < 0) {
123 log_err("%s: failed to fetch namespace-id\n", f->file_name);
128 * Identify namespace to get namespace-id, namespace size in LBA's
131 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
134 log_err("%s: failed to fetch identify namespace\n",
140 *nsid = namespace_id;
143 * 16 or 64 as maximum number of supported LBA formats.
144 * From flbas bit 0-3 indicates lsb and bit 5-6 indicates msb
145 * of the format index used to format the namespace.
148 format_idx = ns.flbas & 0xf;
150 format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4);
152 *lba_sz = 1 << ns.lbaf[format_idx].ds;
155 * Only extended LBA can be supported.
156 * Bit 4 for flbas indicates if metadata is transferred at the end of
157 * logical block creating an extended LBA.
159 *ms = le16_to_cpu(ns.lbaf[format_idx].ms);
160 if (*ms && !((ns.flbas >> 4) & 0x1)) {
161 log_err("%s: only extended logical block can be supported\n",
167 /* Check for end to end data protection support */
169 log_err("%s: end to end data protection not supported\n",
181 int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
182 enum zbd_zoned_model *model)
184 struct nvme_data *data = FILE_ENG_DATA(f);
185 struct nvme_id_ns ns;
186 struct nvme_passthru_cmd cmd;
189 if (f->filetype != FIO_TYPE_CHAR)
192 /* File is not yet opened */
193 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
197 /* Using nvme_id_ns for data as sizes are same */
198 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL,
205 memset(&cmd, 0, sizeof(struct nvme_passthru_cmd));
207 /* Using nvme_id_ns for data as sizes are same */
208 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
215 *model = ZBD_HOST_MANAGED;
221 static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat,
222 __u32 data_len, void *data)
224 struct nvme_passthru_cmd cmd = {
225 .opcode = nvme_zns_cmd_mgmt_recv,
227 .addr = (__u64)(uintptr_t)data,
228 .data_len = data_len,
229 .cdw10 = slba & 0xffffffff,
231 .cdw12 = (data_len >> 2) - 1,
232 .cdw13 = NVME_ZNS_ZRA_REPORT_ZONES | zras_feat,
233 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
236 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
239 int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
240 uint64_t offset, struct zbd_zone *zbdz,
241 unsigned int nr_zones)
243 struct nvme_data *data = FILE_ENG_DATA(f);
244 struct nvme_zone_report *zr;
245 struct nvme_zns_id_ns zns_ns;
246 struct nvme_id_ns ns;
247 unsigned int i = 0, j, zones_fetched = 0;
248 unsigned int max_zones, zones_chunks = 1024;
253 /* File is not yet opened */
254 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
259 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
260 zr = calloc(1, zr_len);
266 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS,
269 log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name,
274 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
275 NVME_CSI_ZNS, &zns_ns);
277 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
281 zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift;
283 max_zones = (f->real_file_size - offset) / zlen;
284 if (max_zones < nr_zones)
285 nr_zones = max_zones;
287 if (nr_zones < zones_chunks)
288 zones_chunks = nr_zones;
290 while (zones_fetched < nr_zones) {
291 if (zones_fetched + zones_chunks >= nr_zones) {
292 zones_chunks = nr_zones - zones_fetched;
293 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
295 ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift,
296 NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr);
298 log_err("%s: nvme_zns_report_zones failed, err=%d\n",
303 /* Transform the zone-report */
304 for (j = 0; j < zr->nr_zones; j++, i++) {
305 struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]);
307 zbdz[i].start = desc->zslba << data->lba_shift;
309 zbdz[i].wp = desc->wp << data->lba_shift;
310 zbdz[i].capacity = desc->zcap << data->lba_shift;
312 /* Zone Type is stored in first 4 bits. */
313 switch (desc->zt & 0x0f) {
314 case NVME_ZONE_TYPE_SEQWRITE_REQ:
315 zbdz[i].type = ZBD_ZONE_TYPE_SWR;
318 log_err("%s: invalid type for zone at offset %llu.\n",
319 f->file_name, (unsigned long long) desc->zslba);
324 /* Zone State is stored in last 4 bits. */
325 switch (desc->zs >> 4) {
326 case NVME_ZNS_ZS_EMPTY:
327 zbdz[i].cond = ZBD_ZONE_COND_EMPTY;
329 case NVME_ZNS_ZS_IMPL_OPEN:
330 zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN;
332 case NVME_ZNS_ZS_EXPL_OPEN:
333 zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN;
335 case NVME_ZNS_ZS_CLOSED:
336 zbdz[i].cond = ZBD_ZONE_COND_CLOSED;
338 case NVME_ZNS_ZS_FULL:
339 zbdz[i].cond = ZBD_ZONE_COND_FULL;
341 case NVME_ZNS_ZS_READ_ONLY:
342 case NVME_ZNS_ZS_OFFLINE:
344 /* Treat all these conditions as offline (don't use!) */
345 zbdz[i].cond = ZBD_ZONE_COND_OFFLINE;
346 zbdz[i].wp = zbdz[i].start;
349 zones_fetched += zr->nr_zones;
350 offset += zr->nr_zones * zlen;
361 int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
362 uint64_t offset, uint64_t length)
364 struct nvme_data *data = FILE_ENG_DATA(f);
365 unsigned int nr_zones;
366 unsigned long long zslba;
369 /* If the file is not yet opened, open it for this function. */
372 fd = open(f->file_name, O_RDWR | O_LARGEFILE);
377 zslba = offset >> data->lba_shift;
378 nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
380 for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) {
381 struct nvme_passthru_cmd cmd = {
382 .opcode = nvme_zns_cmd_mgmt_send,
384 .cdw10 = zslba & 0xffffffff,
385 .cdw11 = zslba >> 32,
386 .cdw13 = NVME_ZNS_ZSA_RESET,
387 .addr = (__u64)(uintptr_t)NULL,
389 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
392 ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
400 int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
401 unsigned int *max_open_zones)
403 struct nvme_data *data = FILE_ENG_DATA(f);
404 struct nvme_zns_id_ns zns_ns;
407 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
411 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
412 NVME_CSI_ZNS, &zns_ns);
414 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
419 *max_open_zones = zns_ns.mor + 1;
425 static inline int nvme_fdp_reclaim_unit_handle_status(int fd, __u32 nsid,
426 __u32 data_len, void *data)
428 struct nvme_passthru_cmd cmd = {
429 .opcode = nvme_cmd_io_mgmt_recv,
431 .addr = (__u64)(uintptr_t)data,
432 .data_len = data_len,
434 .cdw11 = (data_len >> 2) - 1,
437 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
440 int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
441 struct nvme_fdp_ruh_status *ruhs, __u32 bytes)
443 struct nvme_data *data = FILE_ENG_DATA(f);
446 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
450 ret = nvme_fdp_reclaim_unit_handle_status(fd, data->nsid, bytes, ruhs);
452 log_err("%s: nvme_fdp_reclaim_unit_handle_status failed, err=%d\n",