Commit | Line | Data |
---|---|---|
2405252a CH |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (c) 2011-2014, Intel Corporation. | |
4 | * Copyright (c) 2017-2021 Christoph Hellwig. | |
5 | */ | |
da042a36 | 6 | #include <linux/bio-integrity.h> |
7c2fd760 | 7 | #include <linux/blk-integrity.h> |
2405252a CH |
8 | #include <linux/ptrace.h> /* for force_successful_syscall_return */ |
9 | #include <linux/nvme_ioctl.h> | |
b66509b8 | 10 | #include <linux/io_uring/cmd.h> |
2405252a CH |
11 | #include "nvme.h" |
12 | ||
7b7fdb8e CH |
13 | enum { |
14 | NVME_IOCTL_VEC = (1 << 0), | |
313c08c7 | 15 | NVME_IOCTL_PARTITION = (1 << 1), |
7b7fdb8e CH |
16 | }; |
17 | ||
855b7717 | 18 | static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, |
7d9d7d59 | 19 | unsigned int flags, bool open_for_write) |
855b7717 | 20 | { |
6f99ac04 CH |
21 | u32 effects; |
22 | ||
313c08c7 CH |
23 | /* |
24 | * Do not allow unprivileged passthrough on partitions, as that allows an | |
25 | * escape from the containment of the partition. | |
26 | */ | |
27 | if (flags & NVME_IOCTL_PARTITION) | |
7be866b1 | 28 | goto admin; |
313c08c7 | 29 | |
855b7717 KJ |
30 | /* |
31 | * Do not allow unprivileged processes to send vendor specific or fabrics | |
32 | * commands as we can't be sure about their effects. | |
33 | */ | |
34 | if (c->common.opcode >= nvme_cmd_vendor_start || | |
35 | c->common.opcode == nvme_fabrics_command) | |
7be866b1 | 36 | goto admin; |
855b7717 | 37 | |
e4fbcf32 KJ |
38 | /* |
39 | * Do not allow unprivileged passthrough of admin commands except | |
40 | * for a subset of identify commands that contain information required | |
41 | * to form proper I/O commands in userspace and do not expose any | |
42 | * potentially sensitive information. | |
43 | */ | |
44 | if (!ns) { | |
45 | if (c->common.opcode == nvme_admin_identify) { | |
46 | switch (c->identify.cns) { | |
47 | case NVME_ID_CNS_NS: | |
48 | case NVME_ID_CNS_CS_NS: | |
49 | case NVME_ID_CNS_NS_CS_INDEP: | |
ea43fcee JG |
50 | case NVME_ID_CNS_CS_CTRL: |
51 | case NVME_ID_CNS_CTRL: | |
e4fbcf32 KJ |
52 | return true; |
53 | } | |
54 | } | |
7be866b1 | 55 | goto admin; |
e4fbcf32 | 56 | } |
855b7717 KJ |
57 | |
58 | /* | |
6f99ac04 CH |
59 | * Check if the controller provides a Commands Supported and Effects log |
60 | * and marks this command as supported. If not reject unprivileged | |
61 | * passthrough. | |
62 | */ | |
63 | effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); | |
64 | if (!(effects & NVME_CMD_EFFECTS_CSUPP)) | |
7be866b1 | 65 | goto admin; |
6f99ac04 CH |
66 | |
67 | /* | |
68 | * Don't allow passthrough for command that have intrusive (or unknown) | |
69 | * effects. | |
70 | */ | |
71 | if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | | |
72 | NVME_CMD_EFFECTS_UUID_SEL | | |
73 | NVME_CMD_EFFECTS_SCOPE_MASK)) | |
7be866b1 | 74 | goto admin; |
6f99ac04 CH |
75 | |
76 | /* | |
77 | * Only allow I/O commands that transfer data to the controller or that | |
78 | * change the logical block contents if the file descriptor is open for | |
79 | * writing. | |
855b7717 | 80 | */ |
7be866b1 KB |
81 | if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) && |
82 | !open_for_write) | |
83 | goto admin; | |
84 | ||
855b7717 | 85 | return true; |
7be866b1 KB |
86 | admin: |
87 | return capable(CAP_SYS_ADMIN); | |
855b7717 KJ |
88 | } |
89 | ||
2405252a CH |
90 | /* |
91 | * Convert integer values from ioctl structures to user pointers, silently | |
92 | * ignoring the upper bits in the compat case to match behaviour of 32-bit | |
93 | * kernels. | |
94 | */ | |
95 | static void __user *nvme_to_user_ptr(uintptr_t ptrval) | |
96 | { | |
97 | if (in_compat_syscall()) | |
98 | ptrval = (compat_uptr_t)ptrval; | |
99 | return (void __user *)ptrval; | |
100 | } | |
101 | ||
bcad2565 | 102 | static struct request *nvme_alloc_user_request(struct request_queue *q, |
470e900c KJ |
103 | struct nvme_command *cmd, blk_opf_t rq_flags, |
104 | blk_mq_req_flags_t blk_flags) | |
2405252a | 105 | { |
2405252a | 106 | struct request *req; |
2405252a | 107 | |
456cba38 | 108 | req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); |
2405252a | 109 | if (IS_ERR(req)) |
bcad2565 | 110 | return req; |
e559398f | 111 | nvme_init_request(req, cmd); |
2405252a | 112 | nvme_req(req)->flags |= NVME_REQ_USERCMD; |
470e900c KJ |
113 | return req; |
114 | } | |
2405252a | 115 | |
4d174486 | 116 | static int nvme_map_user_request(struct request *req, u64 ubuffer, |
470e900c | 117 | unsigned bufflen, void __user *meta_buffer, unsigned meta_len, |
d6aacee9 | 118 | u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags) |
470e900c KJ |
119 | { |
120 | struct request_queue *q = req->q; | |
121 | struct nvme_ns *ns = q->queuedata; | |
122 | struct block_device *bdev = ns ? ns->disk->part0 : NULL; | |
7c2fd760 PM |
123 | bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); |
124 | bool has_metadata = meta_buffer && meta_len; | |
470e900c | 125 | struct bio *bio = NULL; |
470e900c KJ |
126 | int ret; |
127 | ||
7c2fd760 PM |
128 | if (has_metadata && !supports_metadata) |
129 | return -EINVAL; | |
130 | ||
23fd22e5 KJ |
131 | if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { |
132 | struct iov_iter iter; | |
133 | ||
134 | /* fixedbufs is only for non-vectored io */ | |
7b7fdb8e | 135 | if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) |
23fd22e5 KJ |
136 | return -EINVAL; |
137 | ret = io_uring_cmd_import_fixed(ubuffer, bufflen, | |
138 | rq_data_dir(req), &iter, ioucmd); | |
139 | if (ret < 0) | |
140 | goto out; | |
141 | ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); | |
142 | } else { | |
143 | ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), | |
7b7fdb8e CH |
144 | bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, |
145 | 0, rq_data_dir(req)); | |
23fd22e5 | 146 | } |
470e900c KJ |
147 | |
148 | if (ret) | |
149 | goto out; | |
d6aacee9 | 150 | |
470e900c | 151 | bio = req->bio; |
7c2fd760 | 152 | if (bdev) |
470e900c | 153 | bio_set_dev(bio, bdev); |
7c2fd760 PM |
154 | |
155 | if (has_metadata) { | |
156 | ret = bio_integrity_map_user(bio, meta_buffer, meta_len, | |
157 | meta_seed); | |
158 | if (ret) | |
159 | goto out_unmap; | |
160 | req->cmd_flags |= REQ_INTEGRITY; | |
2405252a CH |
161 | } |
162 | ||
470e900c | 163 | return ret; |
bcad2565 CH |
164 | |
165 | out_unmap: | |
166 | if (bio) | |
f8924374 | 167 | blk_rq_unmap_user(bio); |
bcad2565 CH |
168 | out: |
169 | blk_mq_free_request(req); | |
470e900c | 170 | return ret; |
bcad2565 CH |
171 | } |
172 | ||
173 | static int nvme_submit_user_cmd(struct request_queue *q, | |
7b7fdb8e CH |
174 | struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, |
175 | void __user *meta_buffer, unsigned meta_len, u32 meta_seed, | |
176 | u64 *result, unsigned timeout, unsigned int flags) | |
bcad2565 | 177 | { |
62281b9e | 178 | struct nvme_ns *ns = q->queuedata; |
bc8fb906 | 179 | struct nvme_ctrl *ctrl; |
bcad2565 | 180 | struct request *req; |
bcad2565 | 181 | struct bio *bio; |
bc8fb906 | 182 | u32 effects; |
bcad2565 CH |
183 | int ret; |
184 | ||
470e900c | 185 | req = nvme_alloc_user_request(q, cmd, 0, 0); |
bcad2565 CH |
186 | if (IS_ERR(req)) |
187 | return PTR_ERR(req); | |
188 | ||
470e900c KJ |
189 | req->timeout = timeout; |
190 | if (ubuffer && bufflen) { | |
191 | ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, | |
d6aacee9 | 192 | meta_len, meta_seed, NULL, flags); |
470e900c KJ |
193 | if (ret) |
194 | return ret; | |
195 | } | |
196 | ||
bcad2565 | 197 | bio = req->bio; |
bc8fb906 | 198 | ctrl = nvme_req(req)->ctrl; |
bcad2565 | 199 | |
62281b9e CH |
200 | effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); |
201 | ret = nvme_execute_rq(req, false); | |
2405252a CH |
202 | if (result) |
203 | *result = le64_to_cpu(nvme_req(req)->result.u64); | |
2405252a | 204 | if (bio) |
f8924374 | 205 | blk_rq_unmap_user(bio); |
2405252a | 206 | blk_mq_free_request(req); |
bc8fb906 KB |
207 | |
208 | if (effects) | |
31a59782 | 209 | nvme_passthru_end(ctrl, ns, effects, cmd, ret); |
bc8fb906 | 210 | |
2405252a CH |
211 | return ret; |
212 | } | |
213 | ||
2405252a CH |
214 | static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) |
215 | { | |
216 | struct nvme_user_io io; | |
217 | struct nvme_command c; | |
218 | unsigned length, meta_len; | |
219 | void __user *metadata; | |
220 | ||
221 | if (copy_from_user(&io, uio, sizeof(io))) | |
222 | return -EFAULT; | |
223 | if (io.flags) | |
224 | return -EINVAL; | |
225 | ||
226 | switch (io.opcode) { | |
227 | case nvme_cmd_write: | |
228 | case nvme_cmd_read: | |
229 | case nvme_cmd_compare: | |
230 | break; | |
231 | default: | |
232 | return -EINVAL; | |
233 | } | |
234 | ||
9419e71b | 235 | length = (io.nblocks + 1) << ns->head->lba_shift; |
2405252a CH |
236 | |
237 | if ((io.control & NVME_RW_PRINFO_PRACT) && | |
40547052 | 238 | (ns->head->ms == ns->head->pi_size)) { |
2405252a CH |
239 | /* |
240 | * Protection information is stripped/inserted by the | |
241 | * controller. | |
242 | */ | |
243 | if (nvme_to_user_ptr(io.metadata)) | |
244 | return -EINVAL; | |
245 | meta_len = 0; | |
246 | metadata = NULL; | |
247 | } else { | |
9419e71b | 248 | meta_len = (io.nblocks + 1) * ns->head->ms; |
2405252a CH |
249 | metadata = nvme_to_user_ptr(io.metadata); |
250 | } | |
251 | ||
9419e71b | 252 | if (ns->head->features & NVME_NS_EXT_LBAS) { |
2405252a CH |
253 | length += meta_len; |
254 | meta_len = 0; | |
255 | } else if (meta_len) { | |
256 | if ((io.metadata & 3) || !io.metadata) | |
257 | return -EINVAL; | |
258 | } | |
259 | ||
260 | memset(&c, 0, sizeof(c)); | |
261 | c.rw.opcode = io.opcode; | |
262 | c.rw.flags = io.flags; | |
263 | c.rw.nsid = cpu_to_le32(ns->head->ns_id); | |
264 | c.rw.slba = cpu_to_le64(io.slba); | |
265 | c.rw.length = cpu_to_le16(io.nblocks); | |
266 | c.rw.control = cpu_to_le16(io.control); | |
267 | c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); | |
268 | c.rw.reftag = cpu_to_le32(io.reftag); | |
cead0b89 AG |
269 | c.rw.lbat = cpu_to_le16(io.apptag); |
270 | c.rw.lbatm = cpu_to_le16(io.appmask); | |
2405252a | 271 | |
7b7fdb8e CH |
272 | return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, |
273 | meta_len, lower_32_bits(io.slba), NULL, 0, 0); | |
2405252a CH |
274 | } |
275 | ||
e7d4b549 CK |
276 | static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, |
277 | struct nvme_ns *ns, __u32 nsid) | |
278 | { | |
279 | if (ns && nsid != ns->head->ns_id) { | |
280 | dev_err(ctrl->device, | |
281 | "%s: nsid (%u) in cmd does not match nsid (%u)" | |
282 | "of namespace\n", | |
283 | current->comm, nsid, ns->head->ns_id); | |
284 | return false; | |
285 | } | |
286 | ||
287 | return true; | |
288 | } | |
289 | ||
2405252a | 290 | static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, |
313c08c7 | 291 | struct nvme_passthru_cmd __user *ucmd, unsigned int flags, |
7d9d7d59 | 292 | bool open_for_write) |
2405252a CH |
293 | { |
294 | struct nvme_passthru_cmd cmd; | |
295 | struct nvme_command c; | |
296 | unsigned timeout = 0; | |
297 | u64 result; | |
298 | int status; | |
299 | ||
2405252a CH |
300 | if (copy_from_user(&cmd, ucmd, sizeof(cmd))) |
301 | return -EFAULT; | |
302 | if (cmd.flags) | |
303 | return -EINVAL; | |
e7d4b549 | 304 | if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) |
2405252a | 305 | return -EINVAL; |
2405252a CH |
306 | |
307 | memset(&c, 0, sizeof(c)); | |
308 | c.common.opcode = cmd.opcode; | |
309 | c.common.flags = cmd.flags; | |
310 | c.common.nsid = cpu_to_le32(cmd.nsid); | |
311 | c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); | |
312 | c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); | |
313 | c.common.cdw10 = cpu_to_le32(cmd.cdw10); | |
314 | c.common.cdw11 = cpu_to_le32(cmd.cdw11); | |
315 | c.common.cdw12 = cpu_to_le32(cmd.cdw12); | |
316 | c.common.cdw13 = cpu_to_le32(cmd.cdw13); | |
317 | c.common.cdw14 = cpu_to_le32(cmd.cdw14); | |
318 | c.common.cdw15 = cpu_to_le32(cmd.cdw15); | |
319 | ||
7d9d7d59 | 320 | if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) |
855b7717 KJ |
321 | return -EACCES; |
322 | ||
2405252a CH |
323 | if (cmd.timeout_ms) |
324 | timeout = msecs_to_jiffies(cmd.timeout_ms); | |
325 | ||
326 | status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, | |
7b7fdb8e CH |
327 | cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), |
328 | cmd.metadata_len, 0, &result, timeout, 0); | |
2405252a CH |
329 | |
330 | if (status >= 0) { | |
331 | if (put_user(result, &ucmd->result)) | |
332 | return -EFAULT; | |
333 | } | |
334 | ||
335 | return status; | |
336 | } | |
337 | ||
338 | static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, | |
7b7fdb8e | 339 | struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, |
7d9d7d59 | 340 | bool open_for_write) |
2405252a CH |
341 | { |
342 | struct nvme_passthru_cmd64 cmd; | |
343 | struct nvme_command c; | |
344 | unsigned timeout = 0; | |
345 | int status; | |
346 | ||
2405252a CH |
347 | if (copy_from_user(&cmd, ucmd, sizeof(cmd))) |
348 | return -EFAULT; | |
349 | if (cmd.flags) | |
350 | return -EINVAL; | |
e7d4b549 | 351 | if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) |
2405252a | 352 | return -EINVAL; |
2405252a CH |
353 | |
354 | memset(&c, 0, sizeof(c)); | |
355 | c.common.opcode = cmd.opcode; | |
356 | c.common.flags = cmd.flags; | |
357 | c.common.nsid = cpu_to_le32(cmd.nsid); | |
358 | c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); | |
359 | c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); | |
360 | c.common.cdw10 = cpu_to_le32(cmd.cdw10); | |
361 | c.common.cdw11 = cpu_to_le32(cmd.cdw11); | |
362 | c.common.cdw12 = cpu_to_le32(cmd.cdw12); | |
363 | c.common.cdw13 = cpu_to_le32(cmd.cdw13); | |
364 | c.common.cdw14 = cpu_to_le32(cmd.cdw14); | |
365 | c.common.cdw15 = cpu_to_le32(cmd.cdw15); | |
366 | ||
7d9d7d59 | 367 | if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) |
855b7717 KJ |
368 | return -EACCES; |
369 | ||
2405252a CH |
370 | if (cmd.timeout_ms) |
371 | timeout = msecs_to_jiffies(cmd.timeout_ms); | |
372 | ||
373 | status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, | |
7b7fdb8e CH |
374 | cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), |
375 | cmd.metadata_len, 0, &cmd.result, timeout, flags); | |
2405252a CH |
376 | |
377 | if (status >= 0) { | |
378 | if (put_user(cmd.result, &ucmd->result)) | |
379 | return -EFAULT; | |
380 | } | |
381 | ||
382 | return status; | |
383 | } | |
384 | ||
456cba38 KJ |
385 | struct nvme_uring_data { |
386 | __u64 metadata; | |
387 | __u64 addr; | |
388 | __u32 data_len; | |
389 | __u32 metadata_len; | |
390 | __u32 timeout_ms; | |
391 | }; | |
392 | ||
393 | /* | |
394 | * This overlays struct io_uring_cmd pdu. | |
395 | * Expect build errors if this grows larger than that. | |
396 | */ | |
397 | struct nvme_uring_cmd_pdu { | |
d6aacee9 KB |
398 | struct request *req; |
399 | struct bio *bio; | |
400 | u64 result; | |
401 | int status; | |
456cba38 KJ |
402 | }; |
403 | ||
404 | static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( | |
405 | struct io_uring_cmd *ioucmd) | |
406 | { | |
407 | return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; | |
408 | } | |
409 | ||
9d2789ac JA |
410 | static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, |
411 | unsigned issue_flags) | |
c0a7ba77 JA |
412 | { |
413 | struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); | |
414 | ||
415 | if (pdu->bio) | |
f8924374 | 416 | blk_rq_unmap_user(pdu->bio); |
d6aacee9 | 417 | io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); |
c0a7ba77 JA |
418 | } |
419 | ||
de671d61 JA |
420 | static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, |
421 | blk_status_t err) | |
456cba38 KJ |
422 | { |
423 | struct io_uring_cmd *ioucmd = req->end_io_data; | |
424 | struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); | |
456cba38 | 425 | |
d6aacee9 KB |
426 | if (nvme_req(req)->flags & NVME_REQ_CANCELLED) |
427 | pdu->status = -EINTR; | |
428 | else | |
429 | pdu->status = nvme_req(req)->status; | |
430 | pdu->result = le64_to_cpu(nvme_req(req)->result.u64); | |
585079b6 KJ |
431 | |
432 | /* | |
1afdb760 JA |
433 | * For iopoll, complete it directly. Note that using the uring_cmd |
434 | * helper for this is safe only because we check blk_rq_is_poll(). | |
435 | * As that returns false if we're NOT on a polled queue, then it's | |
436 | * safe to use the polled completion helper. | |
437 | * | |
585079b6 KJ |
438 | * Otherwise, move the completion to task work. |
439 | */ | |
1afdb760 JA |
440 | if (blk_rq_is_poll(req)) { |
441 | if (pdu->bio) | |
f8924374 | 442 | blk_rq_unmap_user(pdu->bio); |
1afdb760 JA |
443 | io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); |
444 | } else { | |
f026be0e | 445 | io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); |
1afdb760 | 446 | } |
de671d61 | 447 | |
851eb780 | 448 | return RQ_END_IO_FREE; |
c0a7ba77 JA |
449 | } |
450 | ||
456cba38 | 451 | static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, |
f569add4 | 452 | struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) |
456cba38 KJ |
453 | { |
454 | struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); | |
fd9b8547 | 455 | const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); |
456cba38 KJ |
456 | struct request_queue *q = ns ? ns->queue : ctrl->admin_q; |
457 | struct nvme_uring_data d; | |
458 | struct nvme_command c; | |
459 | struct request *req; | |
888545cb | 460 | blk_opf_t rq_flags = REQ_ALLOC_CACHE; |
456cba38 | 461 | blk_mq_req_flags_t blk_flags = 0; |
470e900c | 462 | int ret; |
456cba38 | 463 | |
456cba38 KJ |
464 | c.common.opcode = READ_ONCE(cmd->opcode); |
465 | c.common.flags = READ_ONCE(cmd->flags); | |
466 | if (c.common.flags) | |
467 | return -EINVAL; | |
468 | ||
469 | c.common.command_id = 0; | |
470 | c.common.nsid = cpu_to_le32(cmd->nsid); | |
471 | if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) | |
472 | return -EINVAL; | |
473 | ||
474 | c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); | |
475 | c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); | |
476 | c.common.metadata = 0; | |
477 | c.common.dptr.prp1 = c.common.dptr.prp2 = 0; | |
478 | c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); | |
479 | c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); | |
480 | c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); | |
481 | c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); | |
482 | c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); | |
483 | c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); | |
484 | ||
7d9d7d59 | 485 | if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) |
855b7717 KJ |
486 | return -EACCES; |
487 | ||
456cba38 KJ |
488 | d.metadata = READ_ONCE(cmd->metadata); |
489 | d.addr = READ_ONCE(cmd->addr); | |
490 | d.data_len = READ_ONCE(cmd->data_len); | |
491 | d.metadata_len = READ_ONCE(cmd->metadata_len); | |
492 | d.timeout_ms = READ_ONCE(cmd->timeout_ms); | |
493 | ||
494 | if (issue_flags & IO_URING_F_NONBLOCK) { | |
888545cb | 495 | rq_flags |= REQ_NOWAIT; |
456cba38 KJ |
496 | blk_flags = BLK_MQ_REQ_NOWAIT; |
497 | } | |
585079b6 KJ |
498 | if (issue_flags & IO_URING_F_IOPOLL) |
499 | rq_flags |= REQ_POLLED; | |
456cba38 | 500 | |
470e900c | 501 | req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); |
456cba38 KJ |
502 | if (IS_ERR(req)) |
503 | return PTR_ERR(req); | |
470e900c KJ |
504 | req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; |
505 | ||
506 | if (d.addr && d.data_len) { | |
4d174486 | 507 | ret = nvme_map_user_request(req, d.addr, |
470e900c | 508 | d.data_len, nvme_to_user_ptr(d.metadata), |
d6aacee9 | 509 | d.metadata_len, 0, ioucmd, vec); |
470e900c KJ |
510 | if (ret) |
511 | return ret; | |
512 | } | |
456cba38 KJ |
513 | |
514 | /* to free bio on completion, as req->bio will be null at that time */ | |
515 | pdu->bio = req->bio; | |
d6aacee9 | 516 | pdu->req = req; |
c0a7ba77 | 517 | req->end_io_data = ioucmd; |
d6aacee9 | 518 | req->end_io = nvme_uring_cmd_end_io; |
e2e53086 | 519 | blk_execute_rq_nowait(req, false); |
456cba38 KJ |
520 | return -EIOCBQUEUED; |
521 | } | |
522 | ||
2405252a CH |
523 | static bool is_ctrl_ioctl(unsigned int cmd) |
524 | { | |
525 | if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) | |
526 | return true; | |
527 | if (is_sed_ioctl(cmd)) | |
528 | return true; | |
529 | return false; | |
530 | } | |
531 | ||
532 | static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, | |
7d9d7d59 | 533 | void __user *argp, bool open_for_write) |
2405252a CH |
534 | { |
535 | switch (cmd) { | |
536 | case NVME_IOCTL_ADMIN_CMD: | |
7d9d7d59 | 537 | return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); |
2405252a | 538 | case NVME_IOCTL_ADMIN64_CMD: |
7d9d7d59 | 539 | return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); |
2405252a CH |
540 | default: |
541 | return sed_ioctl(ctrl->opal_dev, cmd, argp); | |
542 | } | |
543 | } | |
544 | ||
545 | #ifdef COMPAT_FOR_U64_ALIGNMENT | |
546 | struct nvme_user_io32 { | |
547 | __u8 opcode; | |
548 | __u8 flags; | |
549 | __u16 control; | |
550 | __u16 nblocks; | |
551 | __u16 rsvd; | |
552 | __u64 metadata; | |
553 | __u64 addr; | |
554 | __u64 slba; | |
555 | __u32 dsmgmt; | |
556 | __u32 reftag; | |
557 | __u16 apptag; | |
558 | __u16 appmask; | |
559 | } __attribute__((__packed__)); | |
560 | #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) | |
561 | #endif /* COMPAT_FOR_U64_ALIGNMENT */ | |
562 | ||
563 | static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, | |
7d9d7d59 | 564 | void __user *argp, unsigned int flags, bool open_for_write) |
2405252a CH |
565 | { |
566 | switch (cmd) { | |
567 | case NVME_IOCTL_ID: | |
568 | force_successful_syscall_return(); | |
569 | return ns->head->ns_id; | |
570 | case NVME_IOCTL_IO_CMD: | |
7d9d7d59 | 571 | return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); |
2405252a CH |
572 | /* |
573 | * struct nvme_user_io can have different padding on some 32-bit ABIs. | |
574 | * Just accept the compat version as all fields that are used are the | |
575 | * same size and at the same offset. | |
576 | */ | |
577 | #ifdef COMPAT_FOR_U64_ALIGNMENT | |
578 | case NVME_IOCTL_SUBMIT_IO32: | |
579 | #endif | |
580 | case NVME_IOCTL_SUBMIT_IO: | |
581 | return nvme_submit_io(ns, argp); | |
89377bc1 | 582 | case NVME_IOCTL_IO64_CMD_VEC: |
7b7fdb8e CH |
583 | flags |= NVME_IOCTL_VEC; |
584 | fallthrough; | |
585 | case NVME_IOCTL_IO64_CMD: | |
7d9d7d59 CH |
586 | return nvme_user_cmd64(ns->ctrl, ns, argp, flags, |
587 | open_for_write); | |
2405252a | 588 | default: |
9ea9b9c4 | 589 | return -ENOTTY; |
2405252a CH |
590 | } |
591 | } | |
592 | ||
05bdb996 | 593 | int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, |
2405252a CH |
594 | unsigned int cmd, unsigned long arg) |
595 | { | |
596 | struct nvme_ns *ns = bdev->bd_disk->private_data; | |
05bdb996 | 597 | bool open_for_write = mode & BLK_OPEN_WRITE; |
2fa1dc86 | 598 | void __user *argp = (void __user *)arg; |
313c08c7 CH |
599 | unsigned int flags = 0; |
600 | ||
601 | if (bdev_is_partition(bdev)) | |
602 | flags |= NVME_IOCTL_PARTITION; | |
2405252a | 603 | |
2fa1dc86 | 604 | if (is_ctrl_ioctl(cmd)) |
7d9d7d59 CH |
605 | return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); |
606 | return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); | |
2637baed MI |
607 | } |
608 | ||
609 | long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |
610 | { | |
611 | struct nvme_ns *ns = | |
612 | container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); | |
7d9d7d59 | 613 | bool open_for_write = file->f_mode & FMODE_WRITE; |
2fa1dc86 | 614 | void __user *argp = (void __user *)arg; |
2637baed | 615 | |
2fa1dc86 | 616 | if (is_ctrl_ioctl(cmd)) |
7d9d7d59 CH |
617 | return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); |
618 | return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); | |
2405252a CH |
619 | } |
620 | ||
00fc2eeb | 621 | static int nvme_uring_cmd_checks(unsigned int issue_flags) |
456cba38 | 622 | { |
456cba38 | 623 | |
00fc2eeb | 624 | /* NVMe passthrough requires big SQE/CQE support */ |
456cba38 KJ |
625 | if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != |
626 | (IO_URING_F_SQE128|IO_URING_F_CQE32)) | |
627 | return -EOPNOTSUPP; | |
00fc2eeb KJ |
628 | return 0; |
629 | } | |
630 | ||
631 | static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, | |
632 | unsigned int issue_flags) | |
633 | { | |
634 | struct nvme_ctrl *ctrl = ns->ctrl; | |
635 | int ret; | |
636 | ||
637 | BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu)); | |
638 | ||
639 | ret = nvme_uring_cmd_checks(issue_flags); | |
640 | if (ret) | |
641 | return ret; | |
456cba38 KJ |
642 | |
643 | switch (ioucmd->cmd_op) { | |
644 | case NVME_URING_CMD_IO: | |
f569add4 AG |
645 | ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); |
646 | break; | |
647 | case NVME_URING_CMD_IO_VEC: | |
648 | ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); | |
456cba38 KJ |
649 | break; |
650 | default: | |
651 | ret = -ENOTTY; | |
652 | } | |
653 | ||
654 | return ret; | |
655 | } | |
656 | ||
657 | int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) | |
658 | { | |
659 | struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, | |
660 | struct nvme_ns, cdev); | |
661 | ||
662 | return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); | |
663 | } | |
664 | ||
de97fcb3 JA |
665 | int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, |
666 | struct io_comp_batch *iob, | |
667 | unsigned int poll_flags) | |
585079b6 | 668 | { |
d6aacee9 KB |
669 | struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); |
670 | struct request *req = pdu->req; | |
585079b6 | 671 | |
9408d8a3 | 672 | if (req && blk_rq_is_poll(req)) |
d6aacee9 KB |
673 | return blk_rq_poll(req, iob, poll_flags); |
674 | return 0; | |
585079b6 | 675 | } |
2405252a | 676 | #ifdef CONFIG_NVME_MULTIPATH |
48145b62 | 677 | static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, |
855b7717 | 678 | void __user *argp, struct nvme_ns_head *head, int srcu_idx, |
7d9d7d59 | 679 | bool open_for_write) |
85b790a7 | 680 | __releases(&head->srcu) |
2405252a | 681 | { |
48145b62 | 682 | struct nvme_ctrl *ctrl = ns->ctrl; |
2405252a CH |
683 | int ret; |
684 | ||
48145b62 | 685 | nvme_get_ctrl(ns->ctrl); |
3e7d1a55 | 686 | srcu_read_unlock(&head->srcu, srcu_idx); |
7d9d7d59 | 687 | ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); |
2405252a | 688 | |
48145b62 | 689 | nvme_put_ctrl(ctrl); |
2405252a CH |
690 | return ret; |
691 | } | |
692 | ||
05bdb996 | 693 | int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, |
2405252a CH |
694 | unsigned int cmd, unsigned long arg) |
695 | { | |
86b4284d | 696 | struct nvme_ns_head *head = bdev->bd_disk->private_data; |
05bdb996 | 697 | bool open_for_write = mode & BLK_OPEN_WRITE; |
2637baed | 698 | void __user *argp = (void __user *)arg; |
48145b62 | 699 | struct nvme_ns *ns; |
86b4284d | 700 | int srcu_idx, ret = -EWOULDBLOCK; |
313c08c7 CH |
701 | unsigned int flags = 0; |
702 | ||
703 | if (bdev_is_partition(bdev)) | |
704 | flags |= NVME_IOCTL_PARTITION; | |
48145b62 | 705 | |
86b4284d CH |
706 | srcu_idx = srcu_read_lock(&head->srcu); |
707 | ns = nvme_find_path(head); | |
708 | if (!ns) | |
709 | goto out_unlock; | |
2637baed | 710 | |
48145b62 MI |
711 | /* |
712 | * Handle ioctls that apply to the controller instead of the namespace | |
713 | * seperately and drop the ns SRCU reference early. This avoids a | |
714 | * deadlock when deleting namespaces using the passthrough interface. | |
715 | */ | |
2637baed | 716 | if (is_ctrl_ioctl(cmd)) |
855b7717 | 717 | return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, |
7d9d7d59 | 718 | open_for_write); |
48145b62 | 719 | |
7d9d7d59 | 720 | ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); |
86b4284d CH |
721 | out_unlock: |
722 | srcu_read_unlock(&head->srcu, srcu_idx); | |
48145b62 | 723 | return ret; |
2637baed MI |
724 | } |
725 | ||
726 | long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, | |
727 | unsigned long arg) | |
728 | { | |
7d9d7d59 | 729 | bool open_for_write = file->f_mode & FMODE_WRITE; |
2637baed MI |
730 | struct cdev *cdev = file_inode(file)->i_cdev; |
731 | struct nvme_ns_head *head = | |
732 | container_of(cdev, struct nvme_ns_head, cdev); | |
733 | void __user *argp = (void __user *)arg; | |
48145b62 | 734 | struct nvme_ns *ns; |
f423c85c | 735 | int srcu_idx, ret = -EWOULDBLOCK; |
48145b62 MI |
736 | |
737 | srcu_idx = srcu_read_lock(&head->srcu); | |
738 | ns = nvme_find_path(head); | |
f423c85c CH |
739 | if (!ns) |
740 | goto out_unlock; | |
2405252a CH |
741 | |
742 | if (is_ctrl_ioctl(cmd)) | |
855b7717 | 743 | return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, |
7d9d7d59 | 744 | open_for_write); |
48145b62 | 745 | |
7d9d7d59 | 746 | ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); |
f423c85c CH |
747 | out_unlock: |
748 | srcu_read_unlock(&head->srcu, srcu_idx); | |
48145b62 | 749 | return ret; |
2405252a | 750 | } |
456cba38 KJ |
751 | |
752 | int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, | |
753 | unsigned int issue_flags) | |
754 | { | |
755 | struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; | |
756 | struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); | |
757 | int srcu_idx = srcu_read_lock(&head->srcu); | |
758 | struct nvme_ns *ns = nvme_find_path(head); | |
759 | int ret = -EINVAL; | |
760 | ||
761 | if (ns) | |
762 | ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); | |
763 | srcu_read_unlock(&head->srcu, srcu_idx); | |
764 | return ret; | |
765 | } | |
2405252a CH |
766 | #endif /* CONFIG_NVME_MULTIPATH */ |
767 | ||
58e5bdeb KJ |
768 | int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) |
769 | { | |
770 | struct nvme_ctrl *ctrl = ioucmd->file->private_data; | |
771 | int ret; | |
772 | ||
585079b6 KJ |
773 | /* IOPOLL not supported yet */ |
774 | if (issue_flags & IO_URING_F_IOPOLL) | |
775 | return -EOPNOTSUPP; | |
776 | ||
58e5bdeb KJ |
777 | ret = nvme_uring_cmd_checks(issue_flags); |
778 | if (ret) | |
779 | return ret; | |
780 | ||
781 | switch (ioucmd->cmd_op) { | |
782 | case NVME_URING_CMD_ADMIN: | |
783 | ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); | |
784 | break; | |
785 | case NVME_URING_CMD_ADMIN_VEC: | |
786 | ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); | |
787 | break; | |
788 | default: | |
789 | ret = -ENOTTY; | |
790 | } | |
791 | ||
792 | return ret; | |
793 | } | |
794 | ||
855b7717 | 795 | static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, |
7d9d7d59 | 796 | bool open_for_write) |
2405252a CH |
797 | { |
798 | struct nvme_ns *ns; | |
be647e2c | 799 | int ret, srcu_idx; |
2405252a | 800 | |
be647e2c | 801 | srcu_idx = srcu_read_lock(&ctrl->srcu); |
2405252a CH |
802 | if (list_empty(&ctrl->namespaces)) { |
803 | ret = -ENOTTY; | |
804 | goto out_unlock; | |
805 | } | |
806 | ||
be647e2c | 807 | ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); |
2405252a CH |
808 | if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { |
809 | dev_warn(ctrl->device, | |
810 | "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); | |
811 | ret = -EINVAL; | |
812 | goto out_unlock; | |
813 | } | |
814 | ||
815 | dev_warn(ctrl->device, | |
816 | "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); | |
be647e2c KB |
817 | if (!nvme_get_ns(ns)) { |
818 | ret = -ENXIO; | |
819 | goto out_unlock; | |
820 | } | |
821 | srcu_read_unlock(&ctrl->srcu, srcu_idx); | |
2405252a | 822 | |
7d9d7d59 | 823 | ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); |
2405252a CH |
824 | nvme_put_ns(ns); |
825 | return ret; | |
826 | ||
827 | out_unlock: | |
be647e2c | 828 | srcu_read_unlock(&ctrl->srcu, srcu_idx); |
2405252a CH |
829 | return ret; |
830 | } | |
831 | ||
832 | long nvme_dev_ioctl(struct file *file, unsigned int cmd, | |
833 | unsigned long arg) | |
834 | { | |
7d9d7d59 | 835 | bool open_for_write = file->f_mode & FMODE_WRITE; |
2405252a CH |
836 | struct nvme_ctrl *ctrl = file->private_data; |
837 | void __user *argp = (void __user *)arg; | |
838 | ||
839 | switch (cmd) { | |
840 | case NVME_IOCTL_ADMIN_CMD: | |
7d9d7d59 | 841 | return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); |
2405252a | 842 | case NVME_IOCTL_ADMIN64_CMD: |
7d9d7d59 | 843 | return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); |
2405252a | 844 | case NVME_IOCTL_IO_CMD: |
7d9d7d59 | 845 | return nvme_dev_user_cmd(ctrl, argp, open_for_write); |
2405252a | 846 | case NVME_IOCTL_RESET: |
23e085b2 KB |
847 | if (!capable(CAP_SYS_ADMIN)) |
848 | return -EACCES; | |
2405252a CH |
849 | dev_warn(ctrl->device, "resetting controller\n"); |
850 | return nvme_reset_ctrl_sync(ctrl); | |
851 | case NVME_IOCTL_SUBSYS_RESET: | |
23e085b2 KB |
852 | if (!capable(CAP_SYS_ADMIN)) |
853 | return -EACCES; | |
2405252a CH |
854 | return nvme_reset_subsystem(ctrl); |
855 | case NVME_IOCTL_RESCAN: | |
23e085b2 KB |
856 | if (!capable(CAP_SYS_ADMIN)) |
857 | return -EACCES; | |
2405252a CH |
858 | nvme_queue_scan(ctrl); |
859 | return 0; | |
860 | default: | |
861 | return -ENOTTY; | |
862 | } | |
863 | } |