fix: io_uring sqpoll issue_time empty when kernel not yet read sq
[fio.git] / engines / nvme.c
CommitLineData
b3d5e3fd
AK
1/*
2 * nvme structure declarations and helper functions for the
3 * io_uring_cmd engine.
4 */
5
6#include "nvme.h"
7
4885a6eb
VF
8static inline __u64 get_slba(struct nvme_data *data, struct io_u *io_u)
9{
10 if (data->lba_ext)
11 return io_u->offset / data->lba_ext;
12 else
13 return io_u->offset >> data->lba_shift;
14}
15
16static inline __u32 get_nlb(struct nvme_data *data, struct io_u *io_u)
17{
18 if (data->lba_ext)
19 return io_u->xfer_buflen / data->lba_ext - 1;
20 else
21 return (io_u->xfer_buflen >> data->lba_shift) - 1;
22}
23
24void fio_nvme_uring_cmd_trim_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
25 struct nvme_dsm_range *dsm)
26{
27 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
28
29 cmd->opcode = nvme_cmd_dsm;
30 cmd->nsid = data->nsid;
31 cmd->cdw10 = 0;
32 cmd->cdw11 = NVME_ATTRIBUTE_DEALLOCATE;
33 cmd->addr = (__u64) (uintptr_t) dsm;
34 cmd->data_len = sizeof(*dsm);
35
36 dsm->slba = get_slba(data, io_u);
37 /* nlb is a 1-based value for deallocate */
38 dsm->nlb = get_nlb(data, io_u) + 1;
39}
40
b3d5e3fd 41int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
4885a6eb 42 struct iovec *iov, struct nvme_dsm_range *dsm)
b3d5e3fd
AK
43{
44 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
45 __u64 slba;
46 __u32 nlb;
47
48 memset(cmd, 0, sizeof(struct nvme_uring_cmd));
49
4885a6eb
VF
50 switch (io_u->ddir) {
51 case DDIR_READ:
b3d5e3fd 52 cmd->opcode = nvme_cmd_read;
4885a6eb
VF
53 break;
54 case DDIR_WRITE:
b3d5e3fd 55 cmd->opcode = nvme_cmd_write;
4885a6eb
VF
56 break;
57 case DDIR_TRIM:
58 fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm);
59 return 0;
60 default:
b3d5e3fd 61 return -ENOTSUP;
345fa8fd 62 }
b3d5e3fd 63
4885a6eb
VF
64 slba = get_slba(data, io_u);
65 nlb = get_nlb(data, io_u);
66
b3d5e3fd
AK
67 /* cdw10 and cdw11 represent starting lba */
68 cmd->cdw10 = slba & 0xffffffff;
69 cmd->cdw11 = slba >> 32;
70 /* cdw12 represent number of lba's for read/write */
a7e8aae0
KB
71 cmd->cdw12 = nlb | (io_u->dtype << 20);
72 cmd->cdw13 = io_u->dspec << 16;
b3d5e3fd
AK
73 if (iov) {
74 iov->iov_base = io_u->xfer_buf;
75 iov->iov_len = io_u->xfer_buflen;
76 cmd->addr = (__u64)(uintptr_t)iov;
77 cmd->data_len = 1;
78 } else {
79 cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf;
80 cmd->data_len = io_u->xfer_buflen;
81 }
82 cmd->nsid = data->nsid;
83 return 0;
84}
85
86static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
87 enum nvme_csi csi, void *data)
88{
89 struct nvme_passthru_cmd cmd = {
90 .opcode = nvme_admin_identify,
91 .nsid = nsid,
92 .addr = (__u64)(uintptr_t)data,
93 .data_len = NVME_IDENTIFY_DATA_SIZE,
94 .cdw10 = cns,
95 .cdw11 = csi << NVME_IDENTIFY_CSI_SHIFT,
96 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
97 };
98
99 return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
100}
101
102int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
345fa8fd 103 __u32 *ms, __u64 *nlba)
b3d5e3fd
AK
104{
105 struct nvme_id_ns ns;
37a0881f 106 int namespace_id;
b3d5e3fd 107 int fd, err;
01a7d384 108 __u32 format_idx;
b3d5e3fd
AK
109
110 if (f->filetype != FIO_TYPE_CHAR) {
111 log_err("ioengine io_uring_cmd only works with nvme ns "
112 "generic char devices (/dev/ngXnY)\n");
113 return 1;
114 }
115
116 fd = open(f->file_name, O_RDONLY);
117 if (fd < 0)
118 return -errno;
119
120 namespace_id = ioctl(fd, NVME_IOCTL_ID);
121 if (namespace_id < 0) {
af10f514 122 err = -errno;
345fa8fd
AK
123 log_err("%s: failed to fetch namespace-id\n", f->file_name);
124 goto out;
b3d5e3fd
AK
125 }
126
127 /*
128 * Identify namespace to get namespace-id, namespace size in LBA's
129 * and LBA data size.
130 */
131 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
132 NVME_CSI_NVM, &ns);
133 if (err) {
345fa8fd
AK
134 log_err("%s: failed to fetch identify namespace\n",
135 f->file_name);
b3d5e3fd
AK
136 close(fd);
137 return err;
138 }
139
140 *nsid = namespace_id;
01a7d384
AK
141
142 /*
143 * 16 or 64 as maximum number of supported LBA formats.
144 * From flbas bit 0-3 indicates lsb and bit 5-6 indicates msb
145 * of the format index used to format the namespace.
146 */
147 if (ns.nlbaf < 16)
148 format_idx = ns.flbas & 0xf;
149 else
150 format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4);
151
152 *lba_sz = 1 << ns.lbaf[format_idx].ds;
345fa8fd
AK
153
154 /*
155 * Only extended LBA can be supported.
156 * Bit 4 for flbas indicates if metadata is transferred at the end of
157 * logical block creating an extended LBA.
158 */
159 *ms = le16_to_cpu(ns.lbaf[format_idx].ms);
160 if (*ms && !((ns.flbas >> 4) & 0x1)) {
161 log_err("%s: only extended logical block can be supported\n",
162 f->file_name);
163 err = -ENOTSUP;
164 goto out;
165 }
166
167 /* Check for end to end data protection support */
168 if (ns.dps & 0x3) {
169 log_err("%s: end to end data protection not supported\n",
170 f->file_name);
171 err = -ENOTSUP;
172 goto out;
173 }
b3d5e3fd
AK
174 *nlba = ns.nsze;
175
345fa8fd 176out:
b3d5e3fd 177 close(fd);
345fa8fd 178 return err;
b3d5e3fd 179}
3d05e0ff
AK
180
181int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
182 enum zbd_zoned_model *model)
183{
184 struct nvme_data *data = FILE_ENG_DATA(f);
185 struct nvme_id_ns ns;
186 struct nvme_passthru_cmd cmd;
187 int fd, ret = 0;
188
189 if (f->filetype != FIO_TYPE_CHAR)
190 return -EINVAL;
191
192 /* File is not yet opened */
193 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
194 if (fd < 0)
195 return -errno;
196
197 /* Using nvme_id_ns for data as sizes are same */
198 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL,
199 NVME_CSI_ZNS, &ns);
200 if (ret) {
201 *model = ZBD_NONE;
202 goto out;
203 }
204
205 memset(&cmd, 0, sizeof(struct nvme_passthru_cmd));
206
207 /* Using nvme_id_ns for data as sizes are same */
208 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
209 NVME_CSI_ZNS, &ns);
210 if (ret) {
211 *model = ZBD_NONE;
212 goto out;
213 }
214
215 *model = ZBD_HOST_MANAGED;
216out:
217 close(fd);
218 return 0;
219}
220
221static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat,
222 __u32 data_len, void *data)
223{
224 struct nvme_passthru_cmd cmd = {
225 .opcode = nvme_zns_cmd_mgmt_recv,
226 .nsid = nsid,
227 .addr = (__u64)(uintptr_t)data,
228 .data_len = data_len,
229 .cdw10 = slba & 0xffffffff,
230 .cdw11 = slba >> 32,
231 .cdw12 = (data_len >> 2) - 1,
232 .cdw13 = NVME_ZNS_ZRA_REPORT_ZONES | zras_feat,
233 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
234 };
235
236 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
237}
238
239int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
240 uint64_t offset, struct zbd_zone *zbdz,
241 unsigned int nr_zones)
242{
243 struct nvme_data *data = FILE_ENG_DATA(f);
244 struct nvme_zone_report *zr;
245 struct nvme_zns_id_ns zns_ns;
246 struct nvme_id_ns ns;
247 unsigned int i = 0, j, zones_fetched = 0;
248 unsigned int max_zones, zones_chunks = 1024;
249 int fd, ret = 0;
250 __u32 zr_len;
251 __u64 zlen;
252
253 /* File is not yet opened */
254 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
255 if (fd < 0)
256 return -errno;
257
258 zones_fetched = 0;
259 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
260 zr = calloc(1, zr_len);
3efcb23f
JA
261 if (!zr) {
262 close(fd);
3d05e0ff 263 return -ENOMEM;
3efcb23f 264 }
3d05e0ff
AK
265
266 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS,
267 NVME_CSI_NVM, &ns);
268 if (ret) {
269 log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name,
270 ret);
271 goto out;
272 }
273
274 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
275 NVME_CSI_ZNS, &zns_ns);
276 if (ret) {
277 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
278 f->file_name, ret);
279 goto out;
280 }
281 zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift;
282
283 max_zones = (f->real_file_size - offset) / zlen;
284 if (max_zones < nr_zones)
285 nr_zones = max_zones;
286
287 if (nr_zones < zones_chunks)
288 zones_chunks = nr_zones;
289
290 while (zones_fetched < nr_zones) {
291 if (zones_fetched + zones_chunks >= nr_zones) {
292 zones_chunks = nr_zones - zones_fetched;
293 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
294 }
295 ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift,
296 NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr);
297 if (ret) {
298 log_err("%s: nvme_zns_report_zones failed, err=%d\n",
299 f->file_name, ret);
300 goto out;
301 }
302
303 /* Transform the zone-report */
304 for (j = 0; j < zr->nr_zones; j++, i++) {
305 struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]);
306
307 zbdz[i].start = desc->zslba << data->lba_shift;
308 zbdz[i].len = zlen;
309 zbdz[i].wp = desc->wp << data->lba_shift;
310 zbdz[i].capacity = desc->zcap << data->lba_shift;
311
312 /* Zone Type is stored in first 4 bits. */
313 switch (desc->zt & 0x0f) {
314 case NVME_ZONE_TYPE_SEQWRITE_REQ:
315 zbdz[i].type = ZBD_ZONE_TYPE_SWR;
316 break;
317 default:
318 log_err("%s: invalid type for zone at offset %llu.\n",
2fa0ab21 319 f->file_name, (unsigned long long) desc->zslba);
3d05e0ff
AK
320 ret = -EIO;
321 goto out;
322 }
323
324 /* Zone State is stored in last 4 bits. */
325 switch (desc->zs >> 4) {
326 case NVME_ZNS_ZS_EMPTY:
327 zbdz[i].cond = ZBD_ZONE_COND_EMPTY;
328 break;
329 case NVME_ZNS_ZS_IMPL_OPEN:
330 zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN;
331 break;
332 case NVME_ZNS_ZS_EXPL_OPEN:
333 zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN;
334 break;
335 case NVME_ZNS_ZS_CLOSED:
336 zbdz[i].cond = ZBD_ZONE_COND_CLOSED;
337 break;
338 case NVME_ZNS_ZS_FULL:
339 zbdz[i].cond = ZBD_ZONE_COND_FULL;
340 break;
341 case NVME_ZNS_ZS_READ_ONLY:
342 case NVME_ZNS_ZS_OFFLINE:
343 default:
344 /* Treat all these conditions as offline (don't use!) */
345 zbdz[i].cond = ZBD_ZONE_COND_OFFLINE;
346 zbdz[i].wp = zbdz[i].start;
347 }
348 }
349 zones_fetched += zr->nr_zones;
350 offset += zr->nr_zones * zlen;
351 }
352
353 ret = zones_fetched;
354out:
355 free(zr);
356 close(fd);
357
358 return ret;
359}
360
361int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
362 uint64_t offset, uint64_t length)
363{
364 struct nvme_data *data = FILE_ENG_DATA(f);
365 unsigned int nr_zones;
366 unsigned long long zslba;
367 int i, fd, ret = 0;
368
369 /* If the file is not yet opened, open it for this function. */
370 fd = f->fd;
371 if (fd < 0) {
372 fd = open(f->file_name, O_RDWR | O_LARGEFILE);
373 if (fd < 0)
374 return -errno;
375 }
376
377 zslba = offset >> data->lba_shift;
378 nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
379
380 for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) {
381 struct nvme_passthru_cmd cmd = {
382 .opcode = nvme_zns_cmd_mgmt_send,
383 .nsid = data->nsid,
384 .cdw10 = zslba & 0xffffffff,
385 .cdw11 = zslba >> 32,
386 .cdw13 = NVME_ZNS_ZSA_RESET,
387 .addr = (__u64)(uintptr_t)NULL,
388 .data_len = 0,
389 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
390 };
391
392 ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
393 }
394
395 if (f->fd < 0)
396 close(fd);
397 return -ret;
398}
399
400int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
401 unsigned int *max_open_zones)
402{
403 struct nvme_data *data = FILE_ENG_DATA(f);
404 struct nvme_zns_id_ns zns_ns;
405 int fd, ret = 0;
406
407 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
408 if (fd < 0)
409 return -errno;
410
411 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
412 NVME_CSI_ZNS, &zns_ns);
413 if (ret) {
414 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
415 f->file_name, ret);
416 goto out;
417 }
418
419 *max_open_zones = zns_ns.mor + 1;
420out:
421 close(fd);
422 return ret;
423}
a7e8aae0
KB
424
425static inline int nvme_fdp_reclaim_unit_handle_status(int fd, __u32 nsid,
426 __u32 data_len, void *data)
427{
428 struct nvme_passthru_cmd cmd = {
429 .opcode = nvme_cmd_io_mgmt_recv,
430 .nsid = nsid,
431 .addr = (__u64)(uintptr_t)data,
432 .data_len = data_len,
433 .cdw10 = 1,
434 .cdw11 = (data_len >> 2) - 1,
435 };
436
437 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
438}
439
440int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
441 struct nvme_fdp_ruh_status *ruhs, __u32 bytes)
442{
443 struct nvme_data *data = FILE_ENG_DATA(f);
444 int fd, ret;
445
446 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
447 if (fd < 0)
448 return -errno;
449
450 ret = nvme_fdp_reclaim_unit_handle_status(fd, data->nsid, bytes, ruhs);
451 if (ret) {
452 log_err("%s: nvme_fdp_reclaim_unit_handle_status failed, err=%d\n",
453 f->file_name, ret);
454 errno = ENOTSUP;
455 } else
456 errno = 0;
457
af10f514 458 ret = -errno;
a7e8aae0 459 close(fd);
af10f514 460 return ret;
a7e8aae0 461}