t/io_uring: Vectored fixed buffer test support for nvme passthrough path
[fio.git] / engines / nvme.c
CommitLineData
ca59c41c 1// SPDX-License-Identifier: GPL-2.0
b3d5e3fd
AK
2/*
3 * nvme structure declarations and helper functions for the
4 * io_uring_cmd engine.
5 */
6
7#include "nvme.h"
5163f35e 8#include "../crc/crc-t10dif.h"
08371767 9#include "../crc/crc64.h"
b3d5e3fd 10
5163f35e
AK
11static void fio_nvme_generate_pi_16b_guard(struct nvme_data *data,
12 struct io_u *io_u,
13 struct nvme_cmd_ext_io_opts *opts)
14{
15 struct nvme_pi_data *pi_data = io_u->engine_data;
16 struct nvme_16b_guard_pif *pi;
17 unsigned char *buf = io_u->xfer_buf;
18 unsigned char *md_buf = io_u->mmap_data;
0d610785
AK
19 __u64 slba = get_slba(data, io_u->offset);
20 __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1;
5163f35e
AK
21 __u32 lba_num = 0;
22 __u16 guard = 0;
23
24 if (data->pi_loc) {
25 if (data->lba_ext)
26 pi_data->interval = data->lba_ext - data->ms;
27 else
28 pi_data->interval = 0;
29 } else {
30 if (data->lba_ext)
31 pi_data->interval = data->lba_ext - sizeof(struct nvme_16b_guard_pif);
32 else
33 pi_data->interval = data->ms - sizeof(struct nvme_16b_guard_pif);
34 }
35
36 if (io_u->ddir != DDIR_WRITE)
37 return;
38
39 while (lba_num < nlb) {
40 if (data->lba_ext)
41 pi = (struct nvme_16b_guard_pif *)(buf + pi_data->interval);
42 else
43 pi = (struct nvme_16b_guard_pif *)(md_buf + pi_data->interval);
44
45 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
46 if (data->lba_ext) {
47 guard = fio_crc_t10dif(0, buf, pi_data->interval);
48 } else {
49 guard = fio_crc_t10dif(0, buf, data->lba_size);
50 guard = fio_crc_t10dif(guard, md_buf, pi_data->interval);
51 }
52 pi->guard = cpu_to_be16(guard);
53 }
54
55 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
56 pi->apptag = cpu_to_be16(pi_data->apptag);
57
58 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
59 switch (data->pi_type) {
60 case NVME_NS_DPS_PI_TYPE1:
61 case NVME_NS_DPS_PI_TYPE2:
62 pi->srtag = cpu_to_be32((__u32)slba + lba_num);
63 break;
64 case NVME_NS_DPS_PI_TYPE3:
65 break;
66 }
67 }
68 if (data->lba_ext) {
69 buf += data->lba_ext;
70 } else {
71 buf += data->lba_size;
72 md_buf += data->ms;
73 }
74 lba_num++;
75 }
76}
77
78static int fio_nvme_verify_pi_16b_guard(struct nvme_data *data,
79 struct io_u *io_u)
80{
81 struct nvme_pi_data *pi_data = io_u->engine_data;
82 struct nvme_16b_guard_pif *pi;
83 struct fio_file *f = io_u->file;
84 unsigned char *buf = io_u->xfer_buf;
85 unsigned char *md_buf = io_u->mmap_data;
0d610785
AK
86 __u64 slba = get_slba(data, io_u->offset);
87 __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1;
5163f35e
AK
88 __u32 lba_num = 0;
89 __u16 unmask_app, unmask_app_exp, guard = 0;
90
91 while (lba_num < nlb) {
92 if (data->lba_ext)
93 pi = (struct nvme_16b_guard_pif *)(buf + pi_data->interval);
94 else
95 pi = (struct nvme_16b_guard_pif *)(md_buf + pi_data->interval);
96
97 if (data->pi_type == NVME_NS_DPS_PI_TYPE3) {
98 if (pi->apptag == NVME_PI_APP_DISABLE &&
99 pi->srtag == NVME_PI_REF_DISABLE)
100 goto next;
101 } else if (data->pi_type == NVME_NS_DPS_PI_TYPE1 ||
102 data->pi_type == NVME_NS_DPS_PI_TYPE2) {
103 if (pi->apptag == NVME_PI_APP_DISABLE)
104 goto next;
105 }
106
107 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
108 if (data->lba_ext) {
109 guard = fio_crc_t10dif(0, buf, pi_data->interval);
110 } else {
111 guard = fio_crc_t10dif(0, buf, data->lba_size);
112 guard = fio_crc_t10dif(guard, md_buf, pi_data->interval);
113 }
114 if (be16_to_cpu(pi->guard) != guard) {
115 log_err("%s: Guard compare error: LBA: %llu Expected=%x, Actual=%x\n",
116 f->file_name, (unsigned long long)slba,
117 guard, be16_to_cpu(pi->guard));
118 return -EIO;
119 }
120 }
121
122 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_APP) {
123 unmask_app = be16_to_cpu(pi->apptag) & pi_data->apptag_mask;
124 unmask_app_exp = pi_data->apptag & pi_data->apptag_mask;
125 if (unmask_app != unmask_app_exp) {
126 log_err("%s: APPTAG compare error: LBA: %llu Expected=%x, Actual=%x\n",
127 f->file_name, (unsigned long long)slba,
128 unmask_app_exp, unmask_app);
129 return -EIO;
130 }
131 }
132
133 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
134 switch (data->pi_type) {
135 case NVME_NS_DPS_PI_TYPE1:
136 case NVME_NS_DPS_PI_TYPE2:
137 if (be32_to_cpu(pi->srtag) !=
138 ((__u32)slba + lba_num)) {
139 log_err("%s: REFTAG compare error: LBA: %llu Expected=%x, Actual=%x\n",
140 f->file_name, (unsigned long long)slba,
141 (__u32)slba + lba_num,
142 be32_to_cpu(pi->srtag));
143 return -EIO;
144 }
145 break;
146 case NVME_NS_DPS_PI_TYPE3:
147 break;
148 }
149 }
150next:
151 if (data->lba_ext) {
152 buf += data->lba_ext;
153 } else {
154 buf += data->lba_size;
155 md_buf += data->ms;
156 }
157 lba_num++;
158 }
159
160 return 0;
161}
162
08371767
AK
163static void fio_nvme_generate_pi_64b_guard(struct nvme_data *data,
164 struct io_u *io_u,
165 struct nvme_cmd_ext_io_opts *opts)
166{
167 struct nvme_pi_data *pi_data = io_u->engine_data;
168 struct nvme_64b_guard_pif *pi;
169 unsigned char *buf = io_u->xfer_buf;
170 unsigned char *md_buf = io_u->mmap_data;
171 uint64_t guard = 0;
0d610785
AK
172 __u64 slba = get_slba(data, io_u->offset);
173 __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1;
08371767
AK
174 __u32 lba_num = 0;
175
176 if (data->pi_loc) {
177 if (data->lba_ext)
178 pi_data->interval = data->lba_ext - data->ms;
179 else
180 pi_data->interval = 0;
181 } else {
182 if (data->lba_ext)
183 pi_data->interval = data->lba_ext - sizeof(struct nvme_64b_guard_pif);
184 else
185 pi_data->interval = data->ms - sizeof(struct nvme_64b_guard_pif);
186 }
187
188 if (io_u->ddir != DDIR_WRITE)
189 return;
190
191 while (lba_num < nlb) {
192 if (data->lba_ext)
193 pi = (struct nvme_64b_guard_pif *)(buf + pi_data->interval);
194 else
195 pi = (struct nvme_64b_guard_pif *)(md_buf + pi_data->interval);
196
197 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
198 if (data->lba_ext) {
199 guard = fio_crc64_nvme(0, buf, pi_data->interval);
200 } else {
201 guard = fio_crc64_nvme(0, buf, data->lba_size);
202 guard = fio_crc64_nvme(guard, md_buf, pi_data->interval);
203 }
204 pi->guard = cpu_to_be64(guard);
205 }
206
207 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
208 pi->apptag = cpu_to_be16(pi_data->apptag);
209
210 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
211 switch (data->pi_type) {
212 case NVME_NS_DPS_PI_TYPE1:
213 case NVME_NS_DPS_PI_TYPE2:
214 put_unaligned_be48(slba + lba_num, pi->srtag);
215 break;
216 case NVME_NS_DPS_PI_TYPE3:
217 break;
218 }
219 }
220 if (data->lba_ext) {
221 buf += data->lba_ext;
222 } else {
223 buf += data->lba_size;
224 md_buf += data->ms;
225 }
226 lba_num++;
227 }
228}
229
230static int fio_nvme_verify_pi_64b_guard(struct nvme_data *data,
231 struct io_u *io_u)
232{
233 struct nvme_pi_data *pi_data = io_u->engine_data;
234 struct nvme_64b_guard_pif *pi;
235 struct fio_file *f = io_u->file;
236 unsigned char *buf = io_u->xfer_buf;
237 unsigned char *md_buf = io_u->mmap_data;
0d610785 238 __u64 slba = get_slba(data, io_u->offset);
08371767 239 __u64 ref, ref_exp, guard = 0;
0d610785 240 __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1;
08371767
AK
241 __u32 lba_num = 0;
242 __u16 unmask_app, unmask_app_exp;
243
244 while (lba_num < nlb) {
245 if (data->lba_ext)
246 pi = (struct nvme_64b_guard_pif *)(buf + pi_data->interval);
247 else
248 pi = (struct nvme_64b_guard_pif *)(md_buf + pi_data->interval);
249
250 if (data->pi_type == NVME_NS_DPS_PI_TYPE3) {
251 if (pi->apptag == NVME_PI_APP_DISABLE &&
252 fio_nvme_pi_ref_escape(pi->srtag))
253 goto next;
254 } else if (data->pi_type == NVME_NS_DPS_PI_TYPE1 ||
255 data->pi_type == NVME_NS_DPS_PI_TYPE2) {
256 if (pi->apptag == NVME_PI_APP_DISABLE)
257 goto next;
258 }
259
260 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
261 if (data->lba_ext) {
262 guard = fio_crc64_nvme(0, buf, pi_data->interval);
263 } else {
264 guard = fio_crc64_nvme(0, buf, data->lba_size);
265 guard = fio_crc64_nvme(guard, md_buf, pi_data->interval);
266 }
267 if (be64_to_cpu((uint64_t)pi->guard) != guard) {
268 log_err("%s: Guard compare error: LBA: %llu Expected=%llx, Actual=%llx\n",
269 f->file_name, (unsigned long long)slba,
270 guard, be64_to_cpu((uint64_t)pi->guard));
271 return -EIO;
272 }
273 }
274
275 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_APP) {
276 unmask_app = be16_to_cpu(pi->apptag) & pi_data->apptag_mask;
277 unmask_app_exp = pi_data->apptag & pi_data->apptag_mask;
278 if (unmask_app != unmask_app_exp) {
279 log_err("%s: APPTAG compare error: LBA: %llu Expected=%x, Actual=%x\n",
280 f->file_name, (unsigned long long)slba,
281 unmask_app_exp, unmask_app);
282 return -EIO;
283 }
284 }
285
286 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
287 switch (data->pi_type) {
288 case NVME_NS_DPS_PI_TYPE1:
289 case NVME_NS_DPS_PI_TYPE2:
290 ref = get_unaligned_be48(pi->srtag);
291 ref_exp = (slba + lba_num) & ((1ULL << 48) - 1);
292 if (ref != ref_exp) {
293 log_err("%s: REFTAG compare error: LBA: %llu Expected=%llx, Actual=%llx\n",
294 f->file_name, (unsigned long long)slba,
295 ref_exp, ref);
296 return -EIO;
297 }
298 break;
299 case NVME_NS_DPS_PI_TYPE3:
300 break;
301 }
302 }
303next:
304 if (data->lba_ext) {
305 buf += data->lba_ext;
306 } else {
307 buf += data->lba_size;
308 md_buf += data->ms;
309 }
310 lba_num++;
311 }
312
313 return 0;
314}
4b327db0
TW
315static void fio_nvme_uring_cmd_trim_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
316 struct nvme_dsm *dsm)
4885a6eb
VF
317{
318 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
5d4ee0de
AK
319 struct trim_range *range;
320 uint8_t *buf_point;
321 int i;
4885a6eb
VF
322
323 cmd->opcode = nvme_cmd_dsm;
324 cmd->nsid = data->nsid;
4885a6eb 325 cmd->cdw11 = NVME_ATTRIBUTE_DEALLOCATE;
5d4ee0de
AK
326 cmd->addr = (__u64) (uintptr_t) (&dsm->range[0]);
327
328 if (dsm->nr_ranges == 1) {
329 dsm->range[0].slba = get_slba(data, io_u->offset);
330 /* nlb is a 1-based value for deallocate */
331 dsm->range[0].nlb = get_nlb(data, io_u->xfer_buflen) + 1;
332 cmd->cdw10 = 0;
333 cmd->data_len = sizeof(struct nvme_dsm_range);
334 } else {
335 buf_point = io_u->xfer_buf;
336 for (i = 0; i < io_u->number_trim; i++) {
337 range = (struct trim_range *)buf_point;
338 dsm->range[i].slba = get_slba(data, range->start);
339 /* nlb is a 1-based value for deallocate */
340 dsm->range[i].nlb = get_nlb(data, range->len) + 1;
341 buf_point += sizeof(struct trim_range);
342 }
343 cmd->cdw10 = io_u->number_trim - 1;
344 cmd->data_len = io_u->number_trim * sizeof(struct nvme_dsm_range);
345 }
4885a6eb
VF
346}
347
b3d5e3fd 348int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
55e14d73 349 struct iovec *iov, struct nvme_dsm *dsm,
6170d92a
MI
350 uint8_t read_opcode, uint8_t write_opcode,
351 unsigned int cdw12_flags)
b3d5e3fd
AK
352{
353 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
354 __u64 slba;
355 __u32 nlb;
356
357 memset(cmd, 0, sizeof(struct nvme_uring_cmd));
358
4885a6eb
VF
359 switch (io_u->ddir) {
360 case DDIR_READ:
6170d92a 361 cmd->opcode = read_opcode;
4885a6eb
VF
362 break;
363 case DDIR_WRITE:
87a4903f 364 cmd->opcode = write_opcode;
4885a6eb
VF
365 break;
366 case DDIR_TRIM:
367 fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm);
368 return 0;
7116c1f4
MI
369 case DDIR_SYNC:
370 case DDIR_DATASYNC:
371 cmd->opcode = nvme_cmd_flush;
372 cmd->nsid = data->nsid;
373 return 0;
4885a6eb 374 default:
b3d5e3fd 375 return -ENOTSUP;
345fa8fd 376 }
b3d5e3fd 377
0d610785
AK
378 slba = get_slba(data, io_u->offset);
379 nlb = get_nlb(data, io_u->xfer_buflen);
4885a6eb 380
b3d5e3fd
AK
381 /* cdw10 and cdw11 represent starting lba */
382 cmd->cdw10 = slba & 0xffffffff;
383 cmd->cdw11 = slba >> 32;
384 /* cdw12 represent number of lba's for read/write */
55e14d73 385 cmd->cdw12 = nlb | (io_u->dtype << 20) | cdw12_flags;
a7e8aae0 386 cmd->cdw13 = io_u->dspec << 16;
b3d5e3fd
AK
387 if (iov) {
388 iov->iov_base = io_u->xfer_buf;
389 iov->iov_len = io_u->xfer_buflen;
390 cmd->addr = (__u64)(uintptr_t)iov;
391 cmd->data_len = 1;
392 } else {
a191635a
VF
393 /* no buffer for write zeroes */
394 if (cmd->opcode != nvme_cmd_write_zeroes)
395 cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf;
396 else
397 cmd->addr = (__u64)(uintptr_t)NULL;
b3d5e3fd
AK
398 cmd->data_len = io_u->xfer_buflen;
399 }
2d6451c9
AK
400 if (data->lba_shift && data->ms) {
401 cmd->metadata = (__u64)(uintptr_t)io_u->mmap_data;
402 cmd->metadata_len = (nlb + 1) * data->ms;
403 }
b3d5e3fd
AK
404 cmd->nsid = data->nsid;
405 return 0;
406}
407
2fb6a278 408void fio_nvme_generate_guard(struct io_u *io_u, struct nvme_cmd_ext_io_opts *opts)
3ee8311a
AK
409{
410 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
3ee8311a 411
5163f35e
AK
412 if (data->pi_type && !(opts->io_flags & NVME_IO_PRINFO_PRACT)) {
413 if (data->guard_type == NVME_NVM_NS_16B_GUARD)
414 fio_nvme_generate_pi_16b_guard(data, io_u, opts);
08371767
AK
415 else if (data->guard_type == NVME_NVM_NS_64B_GUARD)
416 fio_nvme_generate_pi_64b_guard(data, io_u, opts);
5163f35e 417 }
2fb6a278
VF
418}
419
420void fio_nvme_pi_fill(struct nvme_uring_cmd *cmd, struct io_u *io_u,
421 struct nvme_cmd_ext_io_opts *opts)
422{
423 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
424 __u64 slba;
425
426 slba = get_slba(data, io_u->offset);
427 cmd->cdw12 |= opts->io_flags;
428
429 fio_nvme_generate_guard(io_u, opts);
5163f35e 430
3ee8311a
AK
431 switch (data->pi_type) {
432 case NVME_NS_DPS_PI_TYPE1:
433 case NVME_NS_DPS_PI_TYPE2:
434 switch (data->guard_type) {
435 case NVME_NVM_NS_16B_GUARD:
e4a9812d
AK
436 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF)
437 cmd->cdw14 = (__u32)slba;
3ee8311a
AK
438 break;
439 case NVME_NVM_NS_64B_GUARD:
e4a9812d
AK
440 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
441 cmd->cdw14 = (__u32)slba;
442 cmd->cdw3 = ((slba >> 32) & 0xffff);
443 }
3ee8311a
AK
444 break;
445 default:
446 break;
447 }
e4a9812d
AK
448 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
449 cmd->cdw15 = (opts->apptag_mask << 16 | opts->apptag);
3ee8311a
AK
450 break;
451 case NVME_NS_DPS_PI_TYPE3:
e4a9812d
AK
452 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
453 cmd->cdw15 = (opts->apptag_mask << 16 | opts->apptag);
3ee8311a
AK
454 break;
455 case NVME_NS_DPS_PI_NONE:
456 break;
457 }
458}
459
5163f35e
AK
460int fio_nvme_pi_verify(struct nvme_data *data, struct io_u *io_u)
461{
462 int ret = 0;
463
464 switch (data->guard_type) {
465 case NVME_NVM_NS_16B_GUARD:
466 ret = fio_nvme_verify_pi_16b_guard(data, io_u);
467 break;
08371767
AK
468 case NVME_NVM_NS_64B_GUARD:
469 ret = fio_nvme_verify_pi_64b_guard(data, io_u);
470 break;
5163f35e
AK
471 default:
472 break;
473 }
474
475 return ret;
476}
477
b3d5e3fd
AK
478static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
479 enum nvme_csi csi, void *data)
480{
481 struct nvme_passthru_cmd cmd = {
482 .opcode = nvme_admin_identify,
483 .nsid = nsid,
484 .addr = (__u64)(uintptr_t)data,
485 .data_len = NVME_IDENTIFY_DATA_SIZE,
486 .cdw10 = cns,
487 .cdw11 = csi << NVME_IDENTIFY_CSI_SHIFT,
488 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
489 };
490
491 return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
492}
493
3ee8311a
AK
494int fio_nvme_get_info(struct fio_file *f, __u64 *nlba, __u32 pi_act,
495 struct nvme_data *data)
b3d5e3fd
AK
496{
497 struct nvme_id_ns ns;
3ee8311a
AK
498 struct nvme_id_ctrl ctrl;
499 struct nvme_nvm_id_ns nvm_ns;
37a0881f 500 int namespace_id;
b3d5e3fd 501 int fd, err;
3ee8311a 502 __u32 format_idx, elbaf;
b3d5e3fd
AK
503
504 if (f->filetype != FIO_TYPE_CHAR) {
505 log_err("ioengine io_uring_cmd only works with nvme ns "
506 "generic char devices (/dev/ngXnY)\n");
507 return 1;
508 }
509
510 fd = open(f->file_name, O_RDONLY);
511 if (fd < 0)
512 return -errno;
513
514 namespace_id = ioctl(fd, NVME_IOCTL_ID);
515 if (namespace_id < 0) {
af10f514 516 err = -errno;
345fa8fd
AK
517 log_err("%s: failed to fetch namespace-id\n", f->file_name);
518 goto out;
b3d5e3fd
AK
519 }
520
3ee8311a
AK
521 err = nvme_identify(fd, 0, NVME_IDENTIFY_CNS_CTRL, NVME_CSI_NVM, &ctrl);
522 if (err) {
523 log_err("%s: failed to fetch identify ctrl\n", f->file_name);
524 goto out;
525 }
526
b3d5e3fd
AK
527 /*
528 * Identify namespace to get namespace-id, namespace size in LBA's
529 * and LBA data size.
530 */
531 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
532 NVME_CSI_NVM, &ns);
533 if (err) {
345fa8fd
AK
534 log_err("%s: failed to fetch identify namespace\n",
535 f->file_name);
3ee8311a 536 goto out;
b3d5e3fd
AK
537 }
538
e7e5023b 539 data->nsid = namespace_id;
01a7d384
AK
540
541 /*
542 * 16 or 64 as maximum number of supported LBA formats.
543 * From flbas bit 0-3 indicates lsb and bit 5-6 indicates msb
544 * of the format index used to format the namespace.
545 */
546 if (ns.nlbaf < 16)
547 format_idx = ns.flbas & 0xf;
548 else
549 format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4);
550
e7e5023b 551 data->lba_size = 1 << ns.lbaf[format_idx].ds;
2d6451c9 552 data->ms = le16_to_cpu(ns.lbaf[format_idx].ms);
345fa8fd 553
3ee8311a
AK
554 /* Check for end to end data protection support */
555 if (data->ms && (ns.dps & NVME_NS_DPS_PI_MASK))
556 data->pi_type = (ns.dps & NVME_NS_DPS_PI_MASK);
557
558 if (!data->pi_type)
559 goto check_elba;
560
561 if (ctrl.ctratt & NVME_CTRL_CTRATT_ELBAS) {
562 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_CSI_NS,
563 NVME_CSI_NVM, &nvm_ns);
564 if (err) {
565 log_err("%s: failed to fetch identify nvm namespace\n",
566 f->file_name);
567 goto out;
568 }
569
570 elbaf = le32_to_cpu(nvm_ns.elbaf[format_idx]);
571
572 /* Currently we don't support storage tags */
573 if (elbaf & NVME_ID_NS_NVM_STS_MASK) {
574 log_err("%s: Storage tag not supported\n",
575 f->file_name);
576 err = -ENOTSUP;
577 goto out;
578 }
579
580 data->guard_type = (elbaf >> NVME_ID_NS_NVM_GUARD_SHIFT) &
581 NVME_ID_NS_NVM_GUARD_MASK;
582
583 /* No 32 bit guard, as storage tag is mandatory for it */
584 switch (data->guard_type) {
585 case NVME_NVM_NS_16B_GUARD:
586 data->pi_size = sizeof(struct nvme_16b_guard_pif);
587 break;
588 case NVME_NVM_NS_64B_GUARD:
589 data->pi_size = sizeof(struct nvme_64b_guard_pif);
590 break;
591 default:
592 break;
593 }
594 } else {
595 data->guard_type = NVME_NVM_NS_16B_GUARD;
596 data->pi_size = sizeof(struct nvme_16b_guard_pif);
597 }
598
599 /*
600 * when PRACT bit is set to 1, and metadata size is equal to protection
601 * information size, controller inserts and removes PI for write and
602 * read commands respectively.
603 */
604 if (pi_act && data->ms == data->pi_size)
605 data->ms = 0;
606
607 data->pi_loc = (ns.dps & NVME_NS_DPS_PI_FIRST);
608
609check_elba:
345fa8fd 610 /*
345fa8fd
AK
611 * Bit 4 for flbas indicates if metadata is transferred at the end of
612 * logical block creating an extended LBA.
613 */
2d6451c9 614 if (data->ms && ((ns.flbas >> 4) & 0x1))
e7e5023b
AK
615 data->lba_ext = data->lba_size + data->ms;
616 else
617 data->lba_shift = ilog2(data->lba_size);
618
b3d5e3fd
AK
619 *nlba = ns.nsze;
620
345fa8fd 621out:
b3d5e3fd 622 close(fd);
345fa8fd 623 return err;
b3d5e3fd 624}
3d05e0ff
AK
625
626int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
627 enum zbd_zoned_model *model)
628{
629 struct nvme_data *data = FILE_ENG_DATA(f);
630 struct nvme_id_ns ns;
631 struct nvme_passthru_cmd cmd;
632 int fd, ret = 0;
633
634 if (f->filetype != FIO_TYPE_CHAR)
635 return -EINVAL;
636
637 /* File is not yet opened */
638 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
639 if (fd < 0)
640 return -errno;
641
642 /* Using nvme_id_ns for data as sizes are same */
643 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL,
644 NVME_CSI_ZNS, &ns);
645 if (ret) {
646 *model = ZBD_NONE;
647 goto out;
648 }
649
650 memset(&cmd, 0, sizeof(struct nvme_passthru_cmd));
651
652 /* Using nvme_id_ns for data as sizes are same */
653 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
654 NVME_CSI_ZNS, &ns);
655 if (ret) {
656 *model = ZBD_NONE;
657 goto out;
658 }
659
660 *model = ZBD_HOST_MANAGED;
661out:
662 close(fd);
663 return 0;
664}
665
666static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat,
667 __u32 data_len, void *data)
668{
669 struct nvme_passthru_cmd cmd = {
670 .opcode = nvme_zns_cmd_mgmt_recv,
671 .nsid = nsid,
672 .addr = (__u64)(uintptr_t)data,
673 .data_len = data_len,
674 .cdw10 = slba & 0xffffffff,
675 .cdw11 = slba >> 32,
676 .cdw12 = (data_len >> 2) - 1,
677 .cdw13 = NVME_ZNS_ZRA_REPORT_ZONES | zras_feat,
678 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
679 };
680
681 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
682}
683
684int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
685 uint64_t offset, struct zbd_zone *zbdz,
686 unsigned int nr_zones)
687{
688 struct nvme_data *data = FILE_ENG_DATA(f);
689 struct nvme_zone_report *zr;
690 struct nvme_zns_id_ns zns_ns;
691 struct nvme_id_ns ns;
692 unsigned int i = 0, j, zones_fetched = 0;
693 unsigned int max_zones, zones_chunks = 1024;
694 int fd, ret = 0;
695 __u32 zr_len;
696 __u64 zlen;
697
698 /* File is not yet opened */
699 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
700 if (fd < 0)
701 return -errno;
702
703 zones_fetched = 0;
704 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
705 zr = calloc(1, zr_len);
3efcb23f
JA
706 if (!zr) {
707 close(fd);
3d05e0ff 708 return -ENOMEM;
3efcb23f 709 }
3d05e0ff
AK
710
711 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS,
712 NVME_CSI_NVM, &ns);
713 if (ret) {
714 log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name,
715 ret);
716 goto out;
717 }
718
719 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
720 NVME_CSI_ZNS, &zns_ns);
721 if (ret) {
722 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
723 f->file_name, ret);
724 goto out;
725 }
726 zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift;
727
728 max_zones = (f->real_file_size - offset) / zlen;
729 if (max_zones < nr_zones)
730 nr_zones = max_zones;
731
732 if (nr_zones < zones_chunks)
733 zones_chunks = nr_zones;
734
735 while (zones_fetched < nr_zones) {
736 if (zones_fetched + zones_chunks >= nr_zones) {
737 zones_chunks = nr_zones - zones_fetched;
738 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
739 }
740 ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift,
741 NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr);
742 if (ret) {
743 log_err("%s: nvme_zns_report_zones failed, err=%d\n",
744 f->file_name, ret);
745 goto out;
746 }
747
748 /* Transform the zone-report */
749 for (j = 0; j < zr->nr_zones; j++, i++) {
750 struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]);
751
752 zbdz[i].start = desc->zslba << data->lba_shift;
753 zbdz[i].len = zlen;
754 zbdz[i].wp = desc->wp << data->lba_shift;
755 zbdz[i].capacity = desc->zcap << data->lba_shift;
756
757 /* Zone Type is stored in first 4 bits. */
758 switch (desc->zt & 0x0f) {
759 case NVME_ZONE_TYPE_SEQWRITE_REQ:
760 zbdz[i].type = ZBD_ZONE_TYPE_SWR;
761 break;
762 default:
763 log_err("%s: invalid type for zone at offset %llu.\n",
2fa0ab21 764 f->file_name, (unsigned long long) desc->zslba);
3d05e0ff
AK
765 ret = -EIO;
766 goto out;
767 }
768
769 /* Zone State is stored in last 4 bits. */
770 switch (desc->zs >> 4) {
771 case NVME_ZNS_ZS_EMPTY:
772 zbdz[i].cond = ZBD_ZONE_COND_EMPTY;
773 break;
774 case NVME_ZNS_ZS_IMPL_OPEN:
775 zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN;
776 break;
777 case NVME_ZNS_ZS_EXPL_OPEN:
778 zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN;
779 break;
780 case NVME_ZNS_ZS_CLOSED:
781 zbdz[i].cond = ZBD_ZONE_COND_CLOSED;
782 break;
783 case NVME_ZNS_ZS_FULL:
784 zbdz[i].cond = ZBD_ZONE_COND_FULL;
785 break;
786 case NVME_ZNS_ZS_READ_ONLY:
787 case NVME_ZNS_ZS_OFFLINE:
788 default:
789 /* Treat all these conditions as offline (don't use!) */
790 zbdz[i].cond = ZBD_ZONE_COND_OFFLINE;
791 zbdz[i].wp = zbdz[i].start;
792 }
793 }
794 zones_fetched += zr->nr_zones;
795 offset += zr->nr_zones * zlen;
796 }
797
798 ret = zones_fetched;
799out:
800 free(zr);
801 close(fd);
802
803 return ret;
804}
805
806int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
807 uint64_t offset, uint64_t length)
808{
809 struct nvme_data *data = FILE_ENG_DATA(f);
810 unsigned int nr_zones;
811 unsigned long long zslba;
812 int i, fd, ret = 0;
813
814 /* If the file is not yet opened, open it for this function. */
815 fd = f->fd;
816 if (fd < 0) {
817 fd = open(f->file_name, O_RDWR | O_LARGEFILE);
818 if (fd < 0)
819 return -errno;
820 }
821
822 zslba = offset >> data->lba_shift;
823 nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
824
825 for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) {
826 struct nvme_passthru_cmd cmd = {
827 .opcode = nvme_zns_cmd_mgmt_send,
828 .nsid = data->nsid,
829 .cdw10 = zslba & 0xffffffff,
830 .cdw11 = zslba >> 32,
831 .cdw13 = NVME_ZNS_ZSA_RESET,
832 .addr = (__u64)(uintptr_t)NULL,
833 .data_len = 0,
834 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
835 };
836
837 ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
838 }
839
840 if (f->fd < 0)
841 close(fd);
842 return -ret;
843}
844
845int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
846 unsigned int *max_open_zones)
847{
848 struct nvme_data *data = FILE_ENG_DATA(f);
849 struct nvme_zns_id_ns zns_ns;
850 int fd, ret = 0;
851
852 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
853 if (fd < 0)
854 return -errno;
855
856 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
857 NVME_CSI_ZNS, &zns_ns);
858 if (ret) {
859 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
860 f->file_name, ret);
861 goto out;
862 }
863
864 *max_open_zones = zns_ns.mor + 1;
865out:
866 close(fd);
867 return ret;
868}
a7e8aae0
KB
869
870static inline int nvme_fdp_reclaim_unit_handle_status(int fd, __u32 nsid,
871 __u32 data_len, void *data)
872{
873 struct nvme_passthru_cmd cmd = {
874 .opcode = nvme_cmd_io_mgmt_recv,
875 .nsid = nsid,
876 .addr = (__u64)(uintptr_t)data,
877 .data_len = data_len,
878 .cdw10 = 1,
879 .cdw11 = (data_len >> 2) - 1,
880 };
881
882 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
883}
884
885int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
886 struct nvme_fdp_ruh_status *ruhs, __u32 bytes)
887{
888 struct nvme_data *data = FILE_ENG_DATA(f);
889 int fd, ret;
890
891 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
892 if (fd < 0)
893 return -errno;
894
895 ret = nvme_fdp_reclaim_unit_handle_status(fd, data->nsid, bytes, ruhs);
896 if (ret) {
897 log_err("%s: nvme_fdp_reclaim_unit_handle_status failed, err=%d\n",
898 f->file_name, ret);
899 errno = ENOTSUP;
900 } else
901 errno = 0;
902
af10f514 903 ret = -errno;
a7e8aae0 904 close(fd);
af10f514 905 return ret;
a7e8aae0 906}