engines/io_uring_cmd: add extended LBA support
[fio.git] / engines / nvme.c
CommitLineData
b3d5e3fd
AK
1/*
2 * nvme structure declarations and helper functions for the
3 * io_uring_cmd engine.
4 */
5
6#include "nvme.h"
7
8int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
9 struct iovec *iov)
10{
11 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
12 __u64 slba;
13 __u32 nlb;
14
15 memset(cmd, 0, sizeof(struct nvme_uring_cmd));
16
17 if (io_u->ddir == DDIR_READ)
18 cmd->opcode = nvme_cmd_read;
19 else if (io_u->ddir == DDIR_WRITE)
20 cmd->opcode = nvme_cmd_write;
21 else
22 return -ENOTSUP;
23
345fa8fd
AK
24 if (data->lba_ext) {
25 slba = io_u->offset / data->lba_ext;
26 nlb = (io_u->xfer_buflen / data->lba_ext) - 1;
27 } else {
28 slba = io_u->offset >> data->lba_shift;
29 nlb = (io_u->xfer_buflen >> data->lba_shift) - 1;
30 }
b3d5e3fd
AK
31
32 /* cdw10 and cdw11 represent starting lba */
33 cmd->cdw10 = slba & 0xffffffff;
34 cmd->cdw11 = slba >> 32;
35 /* cdw12 represent number of lba's for read/write */
a7e8aae0
KB
36 cmd->cdw12 = nlb | (io_u->dtype << 20);
37 cmd->cdw13 = io_u->dspec << 16;
b3d5e3fd
AK
38 if (iov) {
39 iov->iov_base = io_u->xfer_buf;
40 iov->iov_len = io_u->xfer_buflen;
41 cmd->addr = (__u64)(uintptr_t)iov;
42 cmd->data_len = 1;
43 } else {
44 cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf;
45 cmd->data_len = io_u->xfer_buflen;
46 }
47 cmd->nsid = data->nsid;
48 return 0;
49}
50
16be6037
AK
51static int nvme_trim(int fd, __u32 nsid, __u32 nr_range, __u32 data_len,
52 void *data)
53{
54 struct nvme_passthru_cmd cmd = {
55 .opcode = nvme_cmd_dsm,
56 .nsid = nsid,
57 .addr = (__u64)(uintptr_t)data,
58 .data_len = data_len,
59 .cdw10 = nr_range - 1,
60 .cdw11 = NVME_ATTRIBUTE_DEALLOCATE,
61 };
62
63 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
64}
65
66int fio_nvme_trim(const struct thread_data *td, struct fio_file *f,
67 unsigned long long offset, unsigned long long len)
68{
69 struct nvme_data *data = FILE_ENG_DATA(f);
70 struct nvme_dsm_range dsm;
71 int ret;
72
345fa8fd
AK
73 if (data->lba_ext) {
74 dsm.nlb = len / data->lba_ext;
75 dsm.slba = offset / data->lba_ext;
76 } else {
77 dsm.nlb = len >> data->lba_shift;
78 dsm.slba = offset >> data->lba_shift;
79 }
16be6037
AK
80
81 ret = nvme_trim(f->fd, data->nsid, 1, sizeof(struct nvme_dsm_range),
82 &dsm);
83 if (ret)
84 log_err("%s: nvme_trim failed for offset %llu and len %llu, err=%d\n",
85 f->file_name, offset, len, ret);
86
87 return ret;
88}
89
b3d5e3fd
AK
90static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
91 enum nvme_csi csi, void *data)
92{
93 struct nvme_passthru_cmd cmd = {
94 .opcode = nvme_admin_identify,
95 .nsid = nsid,
96 .addr = (__u64)(uintptr_t)data,
97 .data_len = NVME_IDENTIFY_DATA_SIZE,
98 .cdw10 = cns,
99 .cdw11 = csi << NVME_IDENTIFY_CSI_SHIFT,
100 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
101 };
102
103 return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
104}
105
106int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
345fa8fd 107 __u32 *ms, __u64 *nlba)
b3d5e3fd
AK
108{
109 struct nvme_id_ns ns;
37a0881f 110 int namespace_id;
b3d5e3fd 111 int fd, err;
01a7d384 112 __u32 format_idx;
b3d5e3fd
AK
113
114 if (f->filetype != FIO_TYPE_CHAR) {
115 log_err("ioengine io_uring_cmd only works with nvme ns "
116 "generic char devices (/dev/ngXnY)\n");
117 return 1;
118 }
119
120 fd = open(f->file_name, O_RDONLY);
121 if (fd < 0)
122 return -errno;
123
124 namespace_id = ioctl(fd, NVME_IOCTL_ID);
125 if (namespace_id < 0) {
af10f514 126 err = -errno;
345fa8fd
AK
127 log_err("%s: failed to fetch namespace-id\n", f->file_name);
128 goto out;
b3d5e3fd
AK
129 }
130
131 /*
132 * Identify namespace to get namespace-id, namespace size in LBA's
133 * and LBA data size.
134 */
135 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
136 NVME_CSI_NVM, &ns);
137 if (err) {
345fa8fd
AK
138 log_err("%s: failed to fetch identify namespace\n",
139 f->file_name);
b3d5e3fd
AK
140 close(fd);
141 return err;
142 }
143
144 *nsid = namespace_id;
01a7d384
AK
145
146 /*
147 * 16 or 64 as maximum number of supported LBA formats.
148 * From flbas bit 0-3 indicates lsb and bit 5-6 indicates msb
149 * of the format index used to format the namespace.
150 */
151 if (ns.nlbaf < 16)
152 format_idx = ns.flbas & 0xf;
153 else
154 format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4);
155
156 *lba_sz = 1 << ns.lbaf[format_idx].ds;
345fa8fd
AK
157
158 /*
159 * Only extended LBA can be supported.
160 * Bit 4 for flbas indicates if metadata is transferred at the end of
161 * logical block creating an extended LBA.
162 */
163 *ms = le16_to_cpu(ns.lbaf[format_idx].ms);
164 if (*ms && !((ns.flbas >> 4) & 0x1)) {
165 log_err("%s: only extended logical block can be supported\n",
166 f->file_name);
167 err = -ENOTSUP;
168 goto out;
169 }
170
171 /* Check for end to end data protection support */
172 if (ns.dps & 0x3) {
173 log_err("%s: end to end data protection not supported\n",
174 f->file_name);
175 err = -ENOTSUP;
176 goto out;
177 }
b3d5e3fd
AK
178 *nlba = ns.nsze;
179
345fa8fd 180out:
b3d5e3fd 181 close(fd);
345fa8fd 182 return err;
b3d5e3fd 183}
3d05e0ff
AK
184
185int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
186 enum zbd_zoned_model *model)
187{
188 struct nvme_data *data = FILE_ENG_DATA(f);
189 struct nvme_id_ns ns;
190 struct nvme_passthru_cmd cmd;
191 int fd, ret = 0;
192
193 if (f->filetype != FIO_TYPE_CHAR)
194 return -EINVAL;
195
196 /* File is not yet opened */
197 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
198 if (fd < 0)
199 return -errno;
200
201 /* Using nvme_id_ns for data as sizes are same */
202 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL,
203 NVME_CSI_ZNS, &ns);
204 if (ret) {
205 *model = ZBD_NONE;
206 goto out;
207 }
208
209 memset(&cmd, 0, sizeof(struct nvme_passthru_cmd));
210
211 /* Using nvme_id_ns for data as sizes are same */
212 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
213 NVME_CSI_ZNS, &ns);
214 if (ret) {
215 *model = ZBD_NONE;
216 goto out;
217 }
218
219 *model = ZBD_HOST_MANAGED;
220out:
221 close(fd);
222 return 0;
223}
224
225static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat,
226 __u32 data_len, void *data)
227{
228 struct nvme_passthru_cmd cmd = {
229 .opcode = nvme_zns_cmd_mgmt_recv,
230 .nsid = nsid,
231 .addr = (__u64)(uintptr_t)data,
232 .data_len = data_len,
233 .cdw10 = slba & 0xffffffff,
234 .cdw11 = slba >> 32,
235 .cdw12 = (data_len >> 2) - 1,
236 .cdw13 = NVME_ZNS_ZRA_REPORT_ZONES | zras_feat,
237 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
238 };
239
240 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
241}
242
243int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
244 uint64_t offset, struct zbd_zone *zbdz,
245 unsigned int nr_zones)
246{
247 struct nvme_data *data = FILE_ENG_DATA(f);
248 struct nvme_zone_report *zr;
249 struct nvme_zns_id_ns zns_ns;
250 struct nvme_id_ns ns;
251 unsigned int i = 0, j, zones_fetched = 0;
252 unsigned int max_zones, zones_chunks = 1024;
253 int fd, ret = 0;
254 __u32 zr_len;
255 __u64 zlen;
256
257 /* File is not yet opened */
258 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
259 if (fd < 0)
260 return -errno;
261
262 zones_fetched = 0;
263 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
264 zr = calloc(1, zr_len);
3efcb23f
JA
265 if (!zr) {
266 close(fd);
3d05e0ff 267 return -ENOMEM;
3efcb23f 268 }
3d05e0ff
AK
269
270 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS,
271 NVME_CSI_NVM, &ns);
272 if (ret) {
273 log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name,
274 ret);
275 goto out;
276 }
277
278 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
279 NVME_CSI_ZNS, &zns_ns);
280 if (ret) {
281 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
282 f->file_name, ret);
283 goto out;
284 }
285 zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift;
286
287 max_zones = (f->real_file_size - offset) / zlen;
288 if (max_zones < nr_zones)
289 nr_zones = max_zones;
290
291 if (nr_zones < zones_chunks)
292 zones_chunks = nr_zones;
293
294 while (zones_fetched < nr_zones) {
295 if (zones_fetched + zones_chunks >= nr_zones) {
296 zones_chunks = nr_zones - zones_fetched;
297 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
298 }
299 ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift,
300 NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr);
301 if (ret) {
302 log_err("%s: nvme_zns_report_zones failed, err=%d\n",
303 f->file_name, ret);
304 goto out;
305 }
306
307 /* Transform the zone-report */
308 for (j = 0; j < zr->nr_zones; j++, i++) {
309 struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]);
310
311 zbdz[i].start = desc->zslba << data->lba_shift;
312 zbdz[i].len = zlen;
313 zbdz[i].wp = desc->wp << data->lba_shift;
314 zbdz[i].capacity = desc->zcap << data->lba_shift;
315
316 /* Zone Type is stored in first 4 bits. */
317 switch (desc->zt & 0x0f) {
318 case NVME_ZONE_TYPE_SEQWRITE_REQ:
319 zbdz[i].type = ZBD_ZONE_TYPE_SWR;
320 break;
321 default:
322 log_err("%s: invalid type for zone at offset %llu.\n",
2fa0ab21 323 f->file_name, (unsigned long long) desc->zslba);
3d05e0ff
AK
324 ret = -EIO;
325 goto out;
326 }
327
328 /* Zone State is stored in last 4 bits. */
329 switch (desc->zs >> 4) {
330 case NVME_ZNS_ZS_EMPTY:
331 zbdz[i].cond = ZBD_ZONE_COND_EMPTY;
332 break;
333 case NVME_ZNS_ZS_IMPL_OPEN:
334 zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN;
335 break;
336 case NVME_ZNS_ZS_EXPL_OPEN:
337 zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN;
338 break;
339 case NVME_ZNS_ZS_CLOSED:
340 zbdz[i].cond = ZBD_ZONE_COND_CLOSED;
341 break;
342 case NVME_ZNS_ZS_FULL:
343 zbdz[i].cond = ZBD_ZONE_COND_FULL;
344 break;
345 case NVME_ZNS_ZS_READ_ONLY:
346 case NVME_ZNS_ZS_OFFLINE:
347 default:
348 /* Treat all these conditions as offline (don't use!) */
349 zbdz[i].cond = ZBD_ZONE_COND_OFFLINE;
350 zbdz[i].wp = zbdz[i].start;
351 }
352 }
353 zones_fetched += zr->nr_zones;
354 offset += zr->nr_zones * zlen;
355 }
356
357 ret = zones_fetched;
358out:
359 free(zr);
360 close(fd);
361
362 return ret;
363}
364
365int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
366 uint64_t offset, uint64_t length)
367{
368 struct nvme_data *data = FILE_ENG_DATA(f);
369 unsigned int nr_zones;
370 unsigned long long zslba;
371 int i, fd, ret = 0;
372
373 /* If the file is not yet opened, open it for this function. */
374 fd = f->fd;
375 if (fd < 0) {
376 fd = open(f->file_name, O_RDWR | O_LARGEFILE);
377 if (fd < 0)
378 return -errno;
379 }
380
381 zslba = offset >> data->lba_shift;
382 nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
383
384 for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) {
385 struct nvme_passthru_cmd cmd = {
386 .opcode = nvme_zns_cmd_mgmt_send,
387 .nsid = data->nsid,
388 .cdw10 = zslba & 0xffffffff,
389 .cdw11 = zslba >> 32,
390 .cdw13 = NVME_ZNS_ZSA_RESET,
391 .addr = (__u64)(uintptr_t)NULL,
392 .data_len = 0,
393 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
394 };
395
396 ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
397 }
398
399 if (f->fd < 0)
400 close(fd);
401 return -ret;
402}
403
404int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
405 unsigned int *max_open_zones)
406{
407 struct nvme_data *data = FILE_ENG_DATA(f);
408 struct nvme_zns_id_ns zns_ns;
409 int fd, ret = 0;
410
411 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
412 if (fd < 0)
413 return -errno;
414
415 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
416 NVME_CSI_ZNS, &zns_ns);
417 if (ret) {
418 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
419 f->file_name, ret);
420 goto out;
421 }
422
423 *max_open_zones = zns_ns.mor + 1;
424out:
425 close(fd);
426 return ret;
427}
a7e8aae0
KB
428
429static inline int nvme_fdp_reclaim_unit_handle_status(int fd, __u32 nsid,
430 __u32 data_len, void *data)
431{
432 struct nvme_passthru_cmd cmd = {
433 .opcode = nvme_cmd_io_mgmt_recv,
434 .nsid = nsid,
435 .addr = (__u64)(uintptr_t)data,
436 .data_len = data_len,
437 .cdw10 = 1,
438 .cdw11 = (data_len >> 2) - 1,
439 };
440
441 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
442}
443
444int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
445 struct nvme_fdp_ruh_status *ruhs, __u32 bytes)
446{
447 struct nvme_data *data = FILE_ENG_DATA(f);
448 int fd, ret;
449
450 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
451 if (fd < 0)
452 return -errno;
453
454 ret = nvme_fdp_reclaim_unit_handle_status(fd, data->nsid, bytes, ruhs);
455 if (ret) {
456 log_err("%s: nvme_fdp_reclaim_unit_handle_status failed, err=%d\n",
457 f->file_name, ret);
458 errno = ENOTSUP;
459 } else
460 errno = 0;
461
af10f514 462 ret = -errno;
a7e8aae0 463 close(fd);
af10f514 464 return ret;
a7e8aae0 465}