Merge branch 'fix-randtrimwrite' of https://github.com/minwooim/fio
[fio.git] / engines / nvme.c
CommitLineData
ca59c41c 1// SPDX-License-Identifier: GPL-2.0
b3d5e3fd
AK
2/*
3 * nvme structure declarations and helper functions for the
4 * io_uring_cmd engine.
5 */
6
7#include "nvme.h"
5163f35e 8#include "../crc/crc-t10dif.h"
08371767 9#include "../crc/crc64.h"
b3d5e3fd 10
0d610785 11static inline __u64 get_slba(struct nvme_data *data, __u64 offset)
4885a6eb
VF
12{
13 if (data->lba_ext)
0d610785
AK
14 return offset / data->lba_ext;
15
16 return offset >> data->lba_shift;
4885a6eb
VF
17}
18
0d610785 19static inline __u32 get_nlb(struct nvme_data *data, __u64 len)
4885a6eb
VF
20{
21 if (data->lba_ext)
0d610785
AK
22 return len / data->lba_ext - 1;
23
24 return (len >> data->lba_shift) - 1;
4885a6eb
VF
25}
26
5163f35e
AK
27static void fio_nvme_generate_pi_16b_guard(struct nvme_data *data,
28 struct io_u *io_u,
29 struct nvme_cmd_ext_io_opts *opts)
30{
31 struct nvme_pi_data *pi_data = io_u->engine_data;
32 struct nvme_16b_guard_pif *pi;
33 unsigned char *buf = io_u->xfer_buf;
34 unsigned char *md_buf = io_u->mmap_data;
0d610785
AK
35 __u64 slba = get_slba(data, io_u->offset);
36 __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1;
5163f35e
AK
37 __u32 lba_num = 0;
38 __u16 guard = 0;
39
40 if (data->pi_loc) {
41 if (data->lba_ext)
42 pi_data->interval = data->lba_ext - data->ms;
43 else
44 pi_data->interval = 0;
45 } else {
46 if (data->lba_ext)
47 pi_data->interval = data->lba_ext - sizeof(struct nvme_16b_guard_pif);
48 else
49 pi_data->interval = data->ms - sizeof(struct nvme_16b_guard_pif);
50 }
51
52 if (io_u->ddir != DDIR_WRITE)
53 return;
54
55 while (lba_num < nlb) {
56 if (data->lba_ext)
57 pi = (struct nvme_16b_guard_pif *)(buf + pi_data->interval);
58 else
59 pi = (struct nvme_16b_guard_pif *)(md_buf + pi_data->interval);
60
61 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
62 if (data->lba_ext) {
63 guard = fio_crc_t10dif(0, buf, pi_data->interval);
64 } else {
65 guard = fio_crc_t10dif(0, buf, data->lba_size);
66 guard = fio_crc_t10dif(guard, md_buf, pi_data->interval);
67 }
68 pi->guard = cpu_to_be16(guard);
69 }
70
71 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
72 pi->apptag = cpu_to_be16(pi_data->apptag);
73
74 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
75 switch (data->pi_type) {
76 case NVME_NS_DPS_PI_TYPE1:
77 case NVME_NS_DPS_PI_TYPE2:
78 pi->srtag = cpu_to_be32((__u32)slba + lba_num);
79 break;
80 case NVME_NS_DPS_PI_TYPE3:
81 break;
82 }
83 }
84 if (data->lba_ext) {
85 buf += data->lba_ext;
86 } else {
87 buf += data->lba_size;
88 md_buf += data->ms;
89 }
90 lba_num++;
91 }
92}
93
94static int fio_nvme_verify_pi_16b_guard(struct nvme_data *data,
95 struct io_u *io_u)
96{
97 struct nvme_pi_data *pi_data = io_u->engine_data;
98 struct nvme_16b_guard_pif *pi;
99 struct fio_file *f = io_u->file;
100 unsigned char *buf = io_u->xfer_buf;
101 unsigned char *md_buf = io_u->mmap_data;
0d610785
AK
102 __u64 slba = get_slba(data, io_u->offset);
103 __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1;
5163f35e
AK
104 __u32 lba_num = 0;
105 __u16 unmask_app, unmask_app_exp, guard = 0;
106
107 while (lba_num < nlb) {
108 if (data->lba_ext)
109 pi = (struct nvme_16b_guard_pif *)(buf + pi_data->interval);
110 else
111 pi = (struct nvme_16b_guard_pif *)(md_buf + pi_data->interval);
112
113 if (data->pi_type == NVME_NS_DPS_PI_TYPE3) {
114 if (pi->apptag == NVME_PI_APP_DISABLE &&
115 pi->srtag == NVME_PI_REF_DISABLE)
116 goto next;
117 } else if (data->pi_type == NVME_NS_DPS_PI_TYPE1 ||
118 data->pi_type == NVME_NS_DPS_PI_TYPE2) {
119 if (pi->apptag == NVME_PI_APP_DISABLE)
120 goto next;
121 }
122
123 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
124 if (data->lba_ext) {
125 guard = fio_crc_t10dif(0, buf, pi_data->interval);
126 } else {
127 guard = fio_crc_t10dif(0, buf, data->lba_size);
128 guard = fio_crc_t10dif(guard, md_buf, pi_data->interval);
129 }
130 if (be16_to_cpu(pi->guard) != guard) {
131 log_err("%s: Guard compare error: LBA: %llu Expected=%x, Actual=%x\n",
132 f->file_name, (unsigned long long)slba,
133 guard, be16_to_cpu(pi->guard));
134 return -EIO;
135 }
136 }
137
138 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_APP) {
139 unmask_app = be16_to_cpu(pi->apptag) & pi_data->apptag_mask;
140 unmask_app_exp = pi_data->apptag & pi_data->apptag_mask;
141 if (unmask_app != unmask_app_exp) {
142 log_err("%s: APPTAG compare error: LBA: %llu Expected=%x, Actual=%x\n",
143 f->file_name, (unsigned long long)slba,
144 unmask_app_exp, unmask_app);
145 return -EIO;
146 }
147 }
148
149 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
150 switch (data->pi_type) {
151 case NVME_NS_DPS_PI_TYPE1:
152 case NVME_NS_DPS_PI_TYPE2:
153 if (be32_to_cpu(pi->srtag) !=
154 ((__u32)slba + lba_num)) {
155 log_err("%s: REFTAG compare error: LBA: %llu Expected=%x, Actual=%x\n",
156 f->file_name, (unsigned long long)slba,
157 (__u32)slba + lba_num,
158 be32_to_cpu(pi->srtag));
159 return -EIO;
160 }
161 break;
162 case NVME_NS_DPS_PI_TYPE3:
163 break;
164 }
165 }
166next:
167 if (data->lba_ext) {
168 buf += data->lba_ext;
169 } else {
170 buf += data->lba_size;
171 md_buf += data->ms;
172 }
173 lba_num++;
174 }
175
176 return 0;
177}
178
08371767
AK
179static void fio_nvme_generate_pi_64b_guard(struct nvme_data *data,
180 struct io_u *io_u,
181 struct nvme_cmd_ext_io_opts *opts)
182{
183 struct nvme_pi_data *pi_data = io_u->engine_data;
184 struct nvme_64b_guard_pif *pi;
185 unsigned char *buf = io_u->xfer_buf;
186 unsigned char *md_buf = io_u->mmap_data;
187 uint64_t guard = 0;
0d610785
AK
188 __u64 slba = get_slba(data, io_u->offset);
189 __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1;
08371767
AK
190 __u32 lba_num = 0;
191
192 if (data->pi_loc) {
193 if (data->lba_ext)
194 pi_data->interval = data->lba_ext - data->ms;
195 else
196 pi_data->interval = 0;
197 } else {
198 if (data->lba_ext)
199 pi_data->interval = data->lba_ext - sizeof(struct nvme_64b_guard_pif);
200 else
201 pi_data->interval = data->ms - sizeof(struct nvme_64b_guard_pif);
202 }
203
204 if (io_u->ddir != DDIR_WRITE)
205 return;
206
207 while (lba_num < nlb) {
208 if (data->lba_ext)
209 pi = (struct nvme_64b_guard_pif *)(buf + pi_data->interval);
210 else
211 pi = (struct nvme_64b_guard_pif *)(md_buf + pi_data->interval);
212
213 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
214 if (data->lba_ext) {
215 guard = fio_crc64_nvme(0, buf, pi_data->interval);
216 } else {
217 guard = fio_crc64_nvme(0, buf, data->lba_size);
218 guard = fio_crc64_nvme(guard, md_buf, pi_data->interval);
219 }
220 pi->guard = cpu_to_be64(guard);
221 }
222
223 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
224 pi->apptag = cpu_to_be16(pi_data->apptag);
225
226 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
227 switch (data->pi_type) {
228 case NVME_NS_DPS_PI_TYPE1:
229 case NVME_NS_DPS_PI_TYPE2:
230 put_unaligned_be48(slba + lba_num, pi->srtag);
231 break;
232 case NVME_NS_DPS_PI_TYPE3:
233 break;
234 }
235 }
236 if (data->lba_ext) {
237 buf += data->lba_ext;
238 } else {
239 buf += data->lba_size;
240 md_buf += data->ms;
241 }
242 lba_num++;
243 }
244}
245
246static int fio_nvme_verify_pi_64b_guard(struct nvme_data *data,
247 struct io_u *io_u)
248{
249 struct nvme_pi_data *pi_data = io_u->engine_data;
250 struct nvme_64b_guard_pif *pi;
251 struct fio_file *f = io_u->file;
252 unsigned char *buf = io_u->xfer_buf;
253 unsigned char *md_buf = io_u->mmap_data;
0d610785 254 __u64 slba = get_slba(data, io_u->offset);
08371767 255 __u64 ref, ref_exp, guard = 0;
0d610785 256 __u32 nlb = get_nlb(data, io_u->xfer_buflen) + 1;
08371767
AK
257 __u32 lba_num = 0;
258 __u16 unmask_app, unmask_app_exp;
259
260 while (lba_num < nlb) {
261 if (data->lba_ext)
262 pi = (struct nvme_64b_guard_pif *)(buf + pi_data->interval);
263 else
264 pi = (struct nvme_64b_guard_pif *)(md_buf + pi_data->interval);
265
266 if (data->pi_type == NVME_NS_DPS_PI_TYPE3) {
267 if (pi->apptag == NVME_PI_APP_DISABLE &&
268 fio_nvme_pi_ref_escape(pi->srtag))
269 goto next;
270 } else if (data->pi_type == NVME_NS_DPS_PI_TYPE1 ||
271 data->pi_type == NVME_NS_DPS_PI_TYPE2) {
272 if (pi->apptag == NVME_PI_APP_DISABLE)
273 goto next;
274 }
275
276 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_GUARD) {
277 if (data->lba_ext) {
278 guard = fio_crc64_nvme(0, buf, pi_data->interval);
279 } else {
280 guard = fio_crc64_nvme(0, buf, data->lba_size);
281 guard = fio_crc64_nvme(guard, md_buf, pi_data->interval);
282 }
283 if (be64_to_cpu((uint64_t)pi->guard) != guard) {
284 log_err("%s: Guard compare error: LBA: %llu Expected=%llx, Actual=%llx\n",
285 f->file_name, (unsigned long long)slba,
286 guard, be64_to_cpu((uint64_t)pi->guard));
287 return -EIO;
288 }
289 }
290
291 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_APP) {
292 unmask_app = be16_to_cpu(pi->apptag) & pi_data->apptag_mask;
293 unmask_app_exp = pi_data->apptag & pi_data->apptag_mask;
294 if (unmask_app != unmask_app_exp) {
295 log_err("%s: APPTAG compare error: LBA: %llu Expected=%x, Actual=%x\n",
296 f->file_name, (unsigned long long)slba,
297 unmask_app_exp, unmask_app);
298 return -EIO;
299 }
300 }
301
302 if (pi_data->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
303 switch (data->pi_type) {
304 case NVME_NS_DPS_PI_TYPE1:
305 case NVME_NS_DPS_PI_TYPE2:
306 ref = get_unaligned_be48(pi->srtag);
307 ref_exp = (slba + lba_num) & ((1ULL << 48) - 1);
308 if (ref != ref_exp) {
309 log_err("%s: REFTAG compare error: LBA: %llu Expected=%llx, Actual=%llx\n",
310 f->file_name, (unsigned long long)slba,
311 ref_exp, ref);
312 return -EIO;
313 }
314 break;
315 case NVME_NS_DPS_PI_TYPE3:
316 break;
317 }
318 }
319next:
320 if (data->lba_ext) {
321 buf += data->lba_ext;
322 } else {
323 buf += data->lba_size;
324 md_buf += data->ms;
325 }
326 lba_num++;
327 }
328
329 return 0;
330}
4885a6eb 331void fio_nvme_uring_cmd_trim_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
5d4ee0de 332 struct nvme_dsm *dsm)
4885a6eb
VF
333{
334 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
5d4ee0de
AK
335 struct trim_range *range;
336 uint8_t *buf_point;
337 int i;
4885a6eb
VF
338
339 cmd->opcode = nvme_cmd_dsm;
340 cmd->nsid = data->nsid;
4885a6eb 341 cmd->cdw11 = NVME_ATTRIBUTE_DEALLOCATE;
5d4ee0de
AK
342 cmd->addr = (__u64) (uintptr_t) (&dsm->range[0]);
343
344 if (dsm->nr_ranges == 1) {
345 dsm->range[0].slba = get_slba(data, io_u->offset);
346 /* nlb is a 1-based value for deallocate */
347 dsm->range[0].nlb = get_nlb(data, io_u->xfer_buflen) + 1;
348 cmd->cdw10 = 0;
349 cmd->data_len = sizeof(struct nvme_dsm_range);
350 } else {
351 buf_point = io_u->xfer_buf;
352 for (i = 0; i < io_u->number_trim; i++) {
353 range = (struct trim_range *)buf_point;
354 dsm->range[i].slba = get_slba(data, range->start);
355 /* nlb is a 1-based value for deallocate */
356 dsm->range[i].nlb = get_nlb(data, range->len) + 1;
357 buf_point += sizeof(struct trim_range);
358 }
359 cmd->cdw10 = io_u->number_trim - 1;
360 cmd->data_len = io_u->number_trim * sizeof(struct nvme_dsm_range);
361 }
4885a6eb
VF
362}
363
b3d5e3fd 364int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
55e14d73 365 struct iovec *iov, struct nvme_dsm *dsm,
6170d92a
MI
366 uint8_t read_opcode, uint8_t write_opcode,
367 unsigned int cdw12_flags)
b3d5e3fd
AK
368{
369 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
370 __u64 slba;
371 __u32 nlb;
372
373 memset(cmd, 0, sizeof(struct nvme_uring_cmd));
374
4885a6eb
VF
375 switch (io_u->ddir) {
376 case DDIR_READ:
6170d92a 377 cmd->opcode = read_opcode;
4885a6eb
VF
378 break;
379 case DDIR_WRITE:
87a4903f 380 cmd->opcode = write_opcode;
4885a6eb
VF
381 break;
382 case DDIR_TRIM:
383 fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm);
384 return 0;
7116c1f4
MI
385 case DDIR_SYNC:
386 case DDIR_DATASYNC:
387 cmd->opcode = nvme_cmd_flush;
388 cmd->nsid = data->nsid;
389 return 0;
4885a6eb 390 default:
b3d5e3fd 391 return -ENOTSUP;
345fa8fd 392 }
b3d5e3fd 393
0d610785
AK
394 slba = get_slba(data, io_u->offset);
395 nlb = get_nlb(data, io_u->xfer_buflen);
4885a6eb 396
b3d5e3fd
AK
397 /* cdw10 and cdw11 represent starting lba */
398 cmd->cdw10 = slba & 0xffffffff;
399 cmd->cdw11 = slba >> 32;
400 /* cdw12 represent number of lba's for read/write */
55e14d73 401 cmd->cdw12 = nlb | (io_u->dtype << 20) | cdw12_flags;
a7e8aae0 402 cmd->cdw13 = io_u->dspec << 16;
b3d5e3fd
AK
403 if (iov) {
404 iov->iov_base = io_u->xfer_buf;
405 iov->iov_len = io_u->xfer_buflen;
406 cmd->addr = (__u64)(uintptr_t)iov;
407 cmd->data_len = 1;
408 } else {
a191635a
VF
409 /* no buffer for write zeroes */
410 if (cmd->opcode != nvme_cmd_write_zeroes)
411 cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf;
412 else
413 cmd->addr = (__u64)(uintptr_t)NULL;
b3d5e3fd
AK
414 cmd->data_len = io_u->xfer_buflen;
415 }
2d6451c9
AK
416 if (data->lba_shift && data->ms) {
417 cmd->metadata = (__u64)(uintptr_t)io_u->mmap_data;
418 cmd->metadata_len = (nlb + 1) * data->ms;
419 }
b3d5e3fd
AK
420 cmd->nsid = data->nsid;
421 return 0;
422}
423
3ee8311a
AK
424void fio_nvme_pi_fill(struct nvme_uring_cmd *cmd, struct io_u *io_u,
425 struct nvme_cmd_ext_io_opts *opts)
426{
427 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
428 __u64 slba;
429
0d610785 430 slba = get_slba(data, io_u->offset);
3ee8311a
AK
431 cmd->cdw12 |= opts->io_flags;
432
5163f35e
AK
433 if (data->pi_type && !(opts->io_flags & NVME_IO_PRINFO_PRACT)) {
434 if (data->guard_type == NVME_NVM_NS_16B_GUARD)
435 fio_nvme_generate_pi_16b_guard(data, io_u, opts);
08371767
AK
436 else if (data->guard_type == NVME_NVM_NS_64B_GUARD)
437 fio_nvme_generate_pi_64b_guard(data, io_u, opts);
5163f35e
AK
438 }
439
3ee8311a
AK
440 switch (data->pi_type) {
441 case NVME_NS_DPS_PI_TYPE1:
442 case NVME_NS_DPS_PI_TYPE2:
443 switch (data->guard_type) {
444 case NVME_NVM_NS_16B_GUARD:
e4a9812d
AK
445 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF)
446 cmd->cdw14 = (__u32)slba;
3ee8311a
AK
447 break;
448 case NVME_NVM_NS_64B_GUARD:
e4a9812d
AK
449 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_REF) {
450 cmd->cdw14 = (__u32)slba;
451 cmd->cdw3 = ((slba >> 32) & 0xffff);
452 }
3ee8311a
AK
453 break;
454 default:
455 break;
456 }
e4a9812d
AK
457 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
458 cmd->cdw15 = (opts->apptag_mask << 16 | opts->apptag);
3ee8311a
AK
459 break;
460 case NVME_NS_DPS_PI_TYPE3:
e4a9812d
AK
461 if (opts->io_flags & NVME_IO_PRINFO_PRCHK_APP)
462 cmd->cdw15 = (opts->apptag_mask << 16 | opts->apptag);
3ee8311a
AK
463 break;
464 case NVME_NS_DPS_PI_NONE:
465 break;
466 }
467}
468
5163f35e
AK
469int fio_nvme_pi_verify(struct nvme_data *data, struct io_u *io_u)
470{
471 int ret = 0;
472
473 switch (data->guard_type) {
474 case NVME_NVM_NS_16B_GUARD:
475 ret = fio_nvme_verify_pi_16b_guard(data, io_u);
476 break;
08371767
AK
477 case NVME_NVM_NS_64B_GUARD:
478 ret = fio_nvme_verify_pi_64b_guard(data, io_u);
479 break;
5163f35e
AK
480 default:
481 break;
482 }
483
484 return ret;
485}
486
b3d5e3fd
AK
487static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
488 enum nvme_csi csi, void *data)
489{
490 struct nvme_passthru_cmd cmd = {
491 .opcode = nvme_admin_identify,
492 .nsid = nsid,
493 .addr = (__u64)(uintptr_t)data,
494 .data_len = NVME_IDENTIFY_DATA_SIZE,
495 .cdw10 = cns,
496 .cdw11 = csi << NVME_IDENTIFY_CSI_SHIFT,
497 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
498 };
499
500 return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
501}
502
3ee8311a
AK
503int fio_nvme_get_info(struct fio_file *f, __u64 *nlba, __u32 pi_act,
504 struct nvme_data *data)
b3d5e3fd
AK
505{
506 struct nvme_id_ns ns;
3ee8311a
AK
507 struct nvme_id_ctrl ctrl;
508 struct nvme_nvm_id_ns nvm_ns;
37a0881f 509 int namespace_id;
b3d5e3fd 510 int fd, err;
3ee8311a 511 __u32 format_idx, elbaf;
b3d5e3fd
AK
512
513 if (f->filetype != FIO_TYPE_CHAR) {
514 log_err("ioengine io_uring_cmd only works with nvme ns "
515 "generic char devices (/dev/ngXnY)\n");
516 return 1;
517 }
518
519 fd = open(f->file_name, O_RDONLY);
520 if (fd < 0)
521 return -errno;
522
523 namespace_id = ioctl(fd, NVME_IOCTL_ID);
524 if (namespace_id < 0) {
af10f514 525 err = -errno;
345fa8fd
AK
526 log_err("%s: failed to fetch namespace-id\n", f->file_name);
527 goto out;
b3d5e3fd
AK
528 }
529
3ee8311a
AK
530 err = nvme_identify(fd, 0, NVME_IDENTIFY_CNS_CTRL, NVME_CSI_NVM, &ctrl);
531 if (err) {
532 log_err("%s: failed to fetch identify ctrl\n", f->file_name);
533 goto out;
534 }
535
b3d5e3fd
AK
536 /*
537 * Identify namespace to get namespace-id, namespace size in LBA's
538 * and LBA data size.
539 */
540 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
541 NVME_CSI_NVM, &ns);
542 if (err) {
345fa8fd
AK
543 log_err("%s: failed to fetch identify namespace\n",
544 f->file_name);
3ee8311a 545 goto out;
b3d5e3fd
AK
546 }
547
e7e5023b 548 data->nsid = namespace_id;
01a7d384
AK
549
550 /*
551 * 16 or 64 as maximum number of supported LBA formats.
552 * From flbas bit 0-3 indicates lsb and bit 5-6 indicates msb
553 * of the format index used to format the namespace.
554 */
555 if (ns.nlbaf < 16)
556 format_idx = ns.flbas & 0xf;
557 else
558 format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4);
559
e7e5023b 560 data->lba_size = 1 << ns.lbaf[format_idx].ds;
2d6451c9 561 data->ms = le16_to_cpu(ns.lbaf[format_idx].ms);
345fa8fd 562
3ee8311a
AK
563 /* Check for end to end data protection support */
564 if (data->ms && (ns.dps & NVME_NS_DPS_PI_MASK))
565 data->pi_type = (ns.dps & NVME_NS_DPS_PI_MASK);
566
567 if (!data->pi_type)
568 goto check_elba;
569
570 if (ctrl.ctratt & NVME_CTRL_CTRATT_ELBAS) {
571 err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_CSI_NS,
572 NVME_CSI_NVM, &nvm_ns);
573 if (err) {
574 log_err("%s: failed to fetch identify nvm namespace\n",
575 f->file_name);
576 goto out;
577 }
578
579 elbaf = le32_to_cpu(nvm_ns.elbaf[format_idx]);
580
581 /* Currently we don't support storage tags */
582 if (elbaf & NVME_ID_NS_NVM_STS_MASK) {
583 log_err("%s: Storage tag not supported\n",
584 f->file_name);
585 err = -ENOTSUP;
586 goto out;
587 }
588
589 data->guard_type = (elbaf >> NVME_ID_NS_NVM_GUARD_SHIFT) &
590 NVME_ID_NS_NVM_GUARD_MASK;
591
592 /* No 32 bit guard, as storage tag is mandatory for it */
593 switch (data->guard_type) {
594 case NVME_NVM_NS_16B_GUARD:
595 data->pi_size = sizeof(struct nvme_16b_guard_pif);
596 break;
597 case NVME_NVM_NS_64B_GUARD:
598 data->pi_size = sizeof(struct nvme_64b_guard_pif);
599 break;
600 default:
601 break;
602 }
603 } else {
604 data->guard_type = NVME_NVM_NS_16B_GUARD;
605 data->pi_size = sizeof(struct nvme_16b_guard_pif);
606 }
607
608 /*
609 * when PRACT bit is set to 1, and metadata size is equal to protection
610 * information size, controller inserts and removes PI for write and
611 * read commands respectively.
612 */
613 if (pi_act && data->ms == data->pi_size)
614 data->ms = 0;
615
616 data->pi_loc = (ns.dps & NVME_NS_DPS_PI_FIRST);
617
618check_elba:
345fa8fd 619 /*
345fa8fd
AK
620 * Bit 4 for flbas indicates if metadata is transferred at the end of
621 * logical block creating an extended LBA.
622 */
2d6451c9 623 if (data->ms && ((ns.flbas >> 4) & 0x1))
e7e5023b
AK
624 data->lba_ext = data->lba_size + data->ms;
625 else
626 data->lba_shift = ilog2(data->lba_size);
627
b3d5e3fd
AK
628 *nlba = ns.nsze;
629
345fa8fd 630out:
b3d5e3fd 631 close(fd);
345fa8fd 632 return err;
b3d5e3fd 633}
3d05e0ff
AK
634
635int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
636 enum zbd_zoned_model *model)
637{
638 struct nvme_data *data = FILE_ENG_DATA(f);
639 struct nvme_id_ns ns;
640 struct nvme_passthru_cmd cmd;
641 int fd, ret = 0;
642
643 if (f->filetype != FIO_TYPE_CHAR)
644 return -EINVAL;
645
646 /* File is not yet opened */
647 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
648 if (fd < 0)
649 return -errno;
650
651 /* Using nvme_id_ns for data as sizes are same */
652 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL,
653 NVME_CSI_ZNS, &ns);
654 if (ret) {
655 *model = ZBD_NONE;
656 goto out;
657 }
658
659 memset(&cmd, 0, sizeof(struct nvme_passthru_cmd));
660
661 /* Using nvme_id_ns for data as sizes are same */
662 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
663 NVME_CSI_ZNS, &ns);
664 if (ret) {
665 *model = ZBD_NONE;
666 goto out;
667 }
668
669 *model = ZBD_HOST_MANAGED;
670out:
671 close(fd);
672 return 0;
673}
674
675static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat,
676 __u32 data_len, void *data)
677{
678 struct nvme_passthru_cmd cmd = {
679 .opcode = nvme_zns_cmd_mgmt_recv,
680 .nsid = nsid,
681 .addr = (__u64)(uintptr_t)data,
682 .data_len = data_len,
683 .cdw10 = slba & 0xffffffff,
684 .cdw11 = slba >> 32,
685 .cdw12 = (data_len >> 2) - 1,
686 .cdw13 = NVME_ZNS_ZRA_REPORT_ZONES | zras_feat,
687 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
688 };
689
690 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
691}
692
693int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
694 uint64_t offset, struct zbd_zone *zbdz,
695 unsigned int nr_zones)
696{
697 struct nvme_data *data = FILE_ENG_DATA(f);
698 struct nvme_zone_report *zr;
699 struct nvme_zns_id_ns zns_ns;
700 struct nvme_id_ns ns;
701 unsigned int i = 0, j, zones_fetched = 0;
702 unsigned int max_zones, zones_chunks = 1024;
703 int fd, ret = 0;
704 __u32 zr_len;
705 __u64 zlen;
706
707 /* File is not yet opened */
708 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
709 if (fd < 0)
710 return -errno;
711
712 zones_fetched = 0;
713 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
714 zr = calloc(1, zr_len);
3efcb23f
JA
715 if (!zr) {
716 close(fd);
3d05e0ff 717 return -ENOMEM;
3efcb23f 718 }
3d05e0ff
AK
719
720 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS,
721 NVME_CSI_NVM, &ns);
722 if (ret) {
723 log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name,
724 ret);
725 goto out;
726 }
727
728 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
729 NVME_CSI_ZNS, &zns_ns);
730 if (ret) {
731 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
732 f->file_name, ret);
733 goto out;
734 }
735 zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift;
736
737 max_zones = (f->real_file_size - offset) / zlen;
738 if (max_zones < nr_zones)
739 nr_zones = max_zones;
740
741 if (nr_zones < zones_chunks)
742 zones_chunks = nr_zones;
743
744 while (zones_fetched < nr_zones) {
745 if (zones_fetched + zones_chunks >= nr_zones) {
746 zones_chunks = nr_zones - zones_fetched;
747 zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
748 }
749 ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift,
750 NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr);
751 if (ret) {
752 log_err("%s: nvme_zns_report_zones failed, err=%d\n",
753 f->file_name, ret);
754 goto out;
755 }
756
757 /* Transform the zone-report */
758 for (j = 0; j < zr->nr_zones; j++, i++) {
759 struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]);
760
761 zbdz[i].start = desc->zslba << data->lba_shift;
762 zbdz[i].len = zlen;
763 zbdz[i].wp = desc->wp << data->lba_shift;
764 zbdz[i].capacity = desc->zcap << data->lba_shift;
765
766 /* Zone Type is stored in first 4 bits. */
767 switch (desc->zt & 0x0f) {
768 case NVME_ZONE_TYPE_SEQWRITE_REQ:
769 zbdz[i].type = ZBD_ZONE_TYPE_SWR;
770 break;
771 default:
772 log_err("%s: invalid type for zone at offset %llu.\n",
2fa0ab21 773 f->file_name, (unsigned long long) desc->zslba);
3d05e0ff
AK
774 ret = -EIO;
775 goto out;
776 }
777
778 /* Zone State is stored in last 4 bits. */
779 switch (desc->zs >> 4) {
780 case NVME_ZNS_ZS_EMPTY:
781 zbdz[i].cond = ZBD_ZONE_COND_EMPTY;
782 break;
783 case NVME_ZNS_ZS_IMPL_OPEN:
784 zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN;
785 break;
786 case NVME_ZNS_ZS_EXPL_OPEN:
787 zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN;
788 break;
789 case NVME_ZNS_ZS_CLOSED:
790 zbdz[i].cond = ZBD_ZONE_COND_CLOSED;
791 break;
792 case NVME_ZNS_ZS_FULL:
793 zbdz[i].cond = ZBD_ZONE_COND_FULL;
794 break;
795 case NVME_ZNS_ZS_READ_ONLY:
796 case NVME_ZNS_ZS_OFFLINE:
797 default:
798 /* Treat all these conditions as offline (don't use!) */
799 zbdz[i].cond = ZBD_ZONE_COND_OFFLINE;
800 zbdz[i].wp = zbdz[i].start;
801 }
802 }
803 zones_fetched += zr->nr_zones;
804 offset += zr->nr_zones * zlen;
805 }
806
807 ret = zones_fetched;
808out:
809 free(zr);
810 close(fd);
811
812 return ret;
813}
814
815int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
816 uint64_t offset, uint64_t length)
817{
818 struct nvme_data *data = FILE_ENG_DATA(f);
819 unsigned int nr_zones;
820 unsigned long long zslba;
821 int i, fd, ret = 0;
822
823 /* If the file is not yet opened, open it for this function. */
824 fd = f->fd;
825 if (fd < 0) {
826 fd = open(f->file_name, O_RDWR | O_LARGEFILE);
827 if (fd < 0)
828 return -errno;
829 }
830
831 zslba = offset >> data->lba_shift;
832 nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
833
834 for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) {
835 struct nvme_passthru_cmd cmd = {
836 .opcode = nvme_zns_cmd_mgmt_send,
837 .nsid = data->nsid,
838 .cdw10 = zslba & 0xffffffff,
839 .cdw11 = zslba >> 32,
840 .cdw13 = NVME_ZNS_ZSA_RESET,
841 .addr = (__u64)(uintptr_t)NULL,
842 .data_len = 0,
843 .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT,
844 };
845
846 ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
847 }
848
849 if (f->fd < 0)
850 close(fd);
851 return -ret;
852}
853
854int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
855 unsigned int *max_open_zones)
856{
857 struct nvme_data *data = FILE_ENG_DATA(f);
858 struct nvme_zns_id_ns zns_ns;
859 int fd, ret = 0;
860
861 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
862 if (fd < 0)
863 return -errno;
864
865 ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
866 NVME_CSI_ZNS, &zns_ns);
867 if (ret) {
868 log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
869 f->file_name, ret);
870 goto out;
871 }
872
873 *max_open_zones = zns_ns.mor + 1;
874out:
875 close(fd);
876 return ret;
877}
a7e8aae0
KB
878
879static inline int nvme_fdp_reclaim_unit_handle_status(int fd, __u32 nsid,
880 __u32 data_len, void *data)
881{
882 struct nvme_passthru_cmd cmd = {
883 .opcode = nvme_cmd_io_mgmt_recv,
884 .nsid = nsid,
885 .addr = (__u64)(uintptr_t)data,
886 .data_len = data_len,
887 .cdw10 = 1,
888 .cdw11 = (data_len >> 2) - 1,
889 };
890
891 return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
892}
893
894int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
895 struct nvme_fdp_ruh_status *ruhs, __u32 bytes)
896{
897 struct nvme_data *data = FILE_ENG_DATA(f);
898 int fd, ret;
899
900 fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
901 if (fd < 0)
902 return -errno;
903
904 ret = nvme_fdp_reclaim_unit_handle_status(fd, data->nsid, bytes, ruhs);
905 if (ret) {
906 log_err("%s: nvme_fdp_reclaim_unit_handle_status failed, err=%d\n",
907 f->file_name, ret);
908 errno = ENOTSUP;
909 } else
910 errno = 0;
911
af10f514 912 ret = -errno;
a7e8aae0 913 close(fd);
af10f514 914 return ret;
a7e8aae0 915}