Commit | Line | Data |
---|---|---|
09c434b8 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
f87d0fbb RR |
2 | /* |
3 | * Helpers for the host side of a virtio ring. | |
4 | * | |
5 | * Since these may be in userspace, we use (inline) accessors. | |
6 | */ | |
9d1b972f | 7 | #include <linux/compiler.h> |
f558a845 | 8 | #include <linux/module.h> |
f87d0fbb RR |
9 | #include <linux/vringh.h> |
10 | #include <linux/virtio_ring.h> | |
11 | #include <linux/kernel.h> | |
12 | #include <linux/ratelimit.h> | |
13 | #include <linux/uaccess.h> | |
14 | #include <linux/slab.h> | |
15 | #include <linux/export.h> | |
b9f7ac8c | 16 | #include <uapi/linux/virtio_config.h> |
f87d0fbb RR |
17 | |
18 | static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) | |
19 | { | |
20 | static DEFINE_RATELIMIT_STATE(vringh_rs, | |
21 | DEFAULT_RATELIMIT_INTERVAL, | |
22 | DEFAULT_RATELIMIT_BURST); | |
23 | if (__ratelimit(&vringh_rs)) { | |
24 | va_list ap; | |
25 | va_start(ap, fmt); | |
26 | printk(KERN_NOTICE "vringh:"); | |
27 | vprintk(fmt, ap); | |
28 | va_end(ap); | |
29 | } | |
30 | } | |
31 | ||
32 | /* Returns vring->num if empty, -ve on error. */ | |
33 | static inline int __vringh_get_head(const struct vringh *vrh, | |
b9f7ac8c MT |
34 | int (*getu16)(const struct vringh *vrh, |
35 | u16 *val, const __virtio16 *p), | |
f87d0fbb RR |
36 | u16 *last_avail_idx) |
37 | { | |
38 | u16 avail_idx, i, head; | |
39 | int err; | |
40 | ||
b9f7ac8c | 41 | err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx); |
f87d0fbb RR |
42 | if (err) { |
43 | vringh_bad("Failed to access avail idx at %p", | |
44 | &vrh->vring.avail->idx); | |
45 | return err; | |
46 | } | |
47 | ||
48 | if (*last_avail_idx == avail_idx) | |
49 | return vrh->vring.num; | |
50 | ||
51 | /* Only get avail ring entries after they have been exposed by guest. */ | |
52 | virtio_rmb(vrh->weak_barriers); | |
53 | ||
54 | i = *last_avail_idx & (vrh->vring.num - 1); | |
55 | ||
b9f7ac8c | 56 | err = getu16(vrh, &head, &vrh->vring.avail->ring[i]); |
f87d0fbb RR |
57 | if (err) { |
58 | vringh_bad("Failed to read head: idx %d address %p", | |
59 | *last_avail_idx, &vrh->vring.avail->ring[i]); | |
60 | return err; | |
61 | } | |
62 | ||
63 | if (head >= vrh->vring.num) { | |
64 | vringh_bad("Guest says index %u > %u is available", | |
65 | head, vrh->vring.num); | |
66 | return -EINVAL; | |
67 | } | |
68 | ||
69 | (*last_avail_idx)++; | |
70 | return head; | |
71 | } | |
72 | ||
73 | /* Copy some bytes to/from the iovec. Returns num copied. */ | |
74 | static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov, | |
75 | void *ptr, size_t len, | |
76 | int (*xfer)(void *addr, void *ptr, | |
77 | size_t len)) | |
78 | { | |
79 | int err, done = 0; | |
80 | ||
81 | while (len && iov->i < iov->used) { | |
82 | size_t partlen; | |
83 | ||
84 | partlen = min(iov->iov[iov->i].iov_len, len); | |
85 | err = xfer(iov->iov[iov->i].iov_base, ptr, partlen); | |
86 | if (err) | |
87 | return err; | |
88 | done += partlen; | |
89 | len -= partlen; | |
90 | ptr += partlen; | |
91 | iov->consumed += partlen; | |
92 | iov->iov[iov->i].iov_len -= partlen; | |
93 | iov->iov[iov->i].iov_base += partlen; | |
94 | ||
95 | if (!iov->iov[iov->i].iov_len) { | |
96 | /* Fix up old iov element then increment. */ | |
97 | iov->iov[iov->i].iov_len = iov->consumed; | |
98 | iov->iov[iov->i].iov_base -= iov->consumed; | |
99 | ||
100 | iov->consumed = 0; | |
101 | iov->i++; | |
102 | } | |
103 | } | |
104 | return done; | |
105 | } | |
106 | ||
107 | /* May reduce *len if range is shorter. */ | |
108 | static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len, | |
109 | struct vringh_range *range, | |
110 | bool (*getrange)(struct vringh *, | |
111 | u64, struct vringh_range *)) | |
112 | { | |
113 | if (addr < range->start || addr > range->end_incl) { | |
114 | if (!getrange(vrh, addr, range)) | |
115 | return false; | |
116 | } | |
117 | BUG_ON(addr < range->start || addr > range->end_incl); | |
118 | ||
119 | /* To end of memory? */ | |
120 | if (unlikely(addr + *len == 0)) { | |
121 | if (range->end_incl == -1ULL) | |
122 | return true; | |
123 | goto truncate; | |
124 | } | |
125 | ||
126 | /* Otherwise, don't wrap. */ | |
127 | if (addr + *len < addr) { | |
128 | vringh_bad("Wrapping descriptor %zu@0x%llx", | |
129 | *len, (unsigned long long)addr); | |
130 | return false; | |
131 | } | |
132 | ||
133 | if (unlikely(addr + *len - 1 > range->end_incl)) | |
134 | goto truncate; | |
135 | return true; | |
136 | ||
137 | truncate: | |
138 | *len = range->end_incl + 1 - addr; | |
139 | return true; | |
140 | } | |
141 | ||
142 | static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len, | |
143 | struct vringh_range *range, | |
144 | bool (*getrange)(struct vringh *, | |
145 | u64, struct vringh_range *)) | |
146 | { | |
147 | return true; | |
148 | } | |
149 | ||
150 | /* No reason for this code to be inline. */ | |
b9f7ac8c MT |
151 | static int move_to_indirect(const struct vringh *vrh, |
152 | int *up_next, u16 *i, void *addr, | |
f87d0fbb RR |
153 | const struct vring_desc *desc, |
154 | struct vring_desc **descs, int *desc_max) | |
155 | { | |
b9f7ac8c MT |
156 | u32 len; |
157 | ||
f87d0fbb RR |
158 | /* Indirect tables can't have indirect. */ |
159 | if (*up_next != -1) { | |
160 | vringh_bad("Multilevel indirect %u->%u", *up_next, *i); | |
161 | return -EINVAL; | |
162 | } | |
163 | ||
b9f7ac8c MT |
164 | len = vringh32_to_cpu(vrh, desc->len); |
165 | if (unlikely(len % sizeof(struct vring_desc))) { | |
f87d0fbb RR |
166 | vringh_bad("Strange indirect len %u", desc->len); |
167 | return -EINVAL; | |
168 | } | |
169 | ||
170 | /* We will check this when we follow it! */ | |
b9f7ac8c MT |
171 | if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) |
172 | *up_next = vringh16_to_cpu(vrh, desc->next); | |
f87d0fbb RR |
173 | else |
174 | *up_next = -2; | |
175 | *descs = addr; | |
b9f7ac8c | 176 | *desc_max = len / sizeof(struct vring_desc); |
f87d0fbb RR |
177 | |
178 | /* Now, start at the first indirect. */ | |
179 | *i = 0; | |
180 | return 0; | |
181 | } | |
182 | ||
183 | static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp) | |
184 | { | |
185 | struct kvec *new; | |
186 | unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2; | |
187 | ||
188 | if (new_num < 8) | |
189 | new_num = 8; | |
190 | ||
191 | flag = (iov->max_num & VRINGH_IOV_ALLOCATED); | |
192 | if (flag) | |
193 | new = krealloc(iov->iov, new_num * sizeof(struct iovec), gfp); | |
194 | else { | |
6da2ec56 | 195 | new = kmalloc_array(new_num, sizeof(struct iovec), gfp); |
f87d0fbb RR |
196 | if (new) { |
197 | memcpy(new, iov->iov, | |
198 | iov->max_num * sizeof(struct iovec)); | |
199 | flag = VRINGH_IOV_ALLOCATED; | |
200 | } | |
201 | } | |
202 | if (!new) | |
203 | return -ENOMEM; | |
204 | iov->iov = new; | |
205 | iov->max_num = (new_num | flag); | |
206 | return 0; | |
207 | } | |
208 | ||
209 | static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next, | |
210 | struct vring_desc **descs, int *desc_max) | |
211 | { | |
212 | u16 i = *up_next; | |
213 | ||
214 | *up_next = -1; | |
215 | *descs = vrh->vring.desc; | |
216 | *desc_max = vrh->vring.num; | |
217 | return i; | |
218 | } | |
219 | ||
220 | static int slow_copy(struct vringh *vrh, void *dst, const void *src, | |
221 | bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, | |
222 | struct vringh_range *range, | |
223 | bool (*getrange)(struct vringh *vrh, | |
224 | u64, | |
225 | struct vringh_range *)), | |
226 | bool (*getrange)(struct vringh *vrh, | |
227 | u64 addr, | |
228 | struct vringh_range *r), | |
229 | struct vringh_range *range, | |
230 | int (*copy)(void *dst, const void *src, size_t len)) | |
231 | { | |
232 | size_t part, len = sizeof(struct vring_desc); | |
233 | ||
234 | do { | |
235 | u64 addr; | |
236 | int err; | |
237 | ||
238 | part = len; | |
239 | addr = (u64)(unsigned long)src - range->offset; | |
240 | ||
241 | if (!rcheck(vrh, addr, &part, range, getrange)) | |
242 | return -EINVAL; | |
243 | ||
244 | err = copy(dst, src, part); | |
245 | if (err) | |
246 | return err; | |
247 | ||
248 | dst += part; | |
249 | src += part; | |
250 | len -= part; | |
251 | } while (len); | |
252 | return 0; | |
253 | } | |
254 | ||
255 | static inline int | |
256 | __vringh_iov(struct vringh *vrh, u16 i, | |
257 | struct vringh_kiov *riov, | |
258 | struct vringh_kiov *wiov, | |
259 | bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, | |
260 | struct vringh_range *range, | |
261 | bool (*getrange)(struct vringh *, u64, | |
262 | struct vringh_range *)), | |
263 | bool (*getrange)(struct vringh *, u64, struct vringh_range *), | |
264 | gfp_t gfp, | |
265 | int (*copy)(void *dst, const void *src, size_t len)) | |
266 | { | |
267 | int err, count = 0, up_next, desc_max; | |
268 | struct vring_desc desc, *descs; | |
269 | struct vringh_range range = { -1ULL, 0 }, slowrange; | |
270 | bool slow = false; | |
271 | ||
272 | /* We start traversing vring's descriptor table. */ | |
273 | descs = vrh->vring.desc; | |
274 | desc_max = vrh->vring.num; | |
275 | up_next = -1; | |
276 | ||
277 | if (riov) | |
278 | riov->i = riov->used = 0; | |
279 | else if (wiov) | |
280 | wiov->i = wiov->used = 0; | |
281 | else | |
282 | /* You must want something! */ | |
283 | BUG(); | |
284 | ||
285 | for (;;) { | |
286 | void *addr; | |
287 | struct vringh_kiov *iov; | |
288 | size_t len; | |
289 | ||
290 | if (unlikely(slow)) | |
291 | err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange, | |
292 | &slowrange, copy); | |
293 | else | |
294 | err = copy(&desc, &descs[i], sizeof(desc)); | |
295 | if (unlikely(err)) | |
296 | goto fail; | |
297 | ||
b9f7ac8c MT |
298 | if (unlikely(desc.flags & |
299 | cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) { | |
300 | u64 a = vringh64_to_cpu(vrh, desc.addr); | |
301 | ||
f87d0fbb | 302 | /* Make sure it's OK, and get offset. */ |
b9f7ac8c MT |
303 | len = vringh32_to_cpu(vrh, desc.len); |
304 | if (!rcheck(vrh, a, &len, &range, getrange)) { | |
f87d0fbb RR |
305 | err = -EINVAL; |
306 | goto fail; | |
307 | } | |
308 | ||
b9f7ac8c | 309 | if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { |
f87d0fbb RR |
310 | slow = true; |
311 | /* We need to save this range to use offset */ | |
312 | slowrange = range; | |
313 | } | |
314 | ||
b9f7ac8c MT |
315 | addr = (void *)(long)(a + range.offset); |
316 | err = move_to_indirect(vrh, &up_next, &i, addr, &desc, | |
f87d0fbb RR |
317 | &descs, &desc_max); |
318 | if (err) | |
319 | goto fail; | |
320 | continue; | |
321 | } | |
322 | ||
323 | if (count++ == vrh->vring.num) { | |
324 | vringh_bad("Descriptor loop in %p", descs); | |
325 | err = -ELOOP; | |
326 | goto fail; | |
327 | } | |
328 | ||
b9f7ac8c | 329 | if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE)) |
f87d0fbb RR |
330 | iov = wiov; |
331 | else { | |
332 | iov = riov; | |
333 | if (unlikely(wiov && wiov->i)) { | |
334 | vringh_bad("Readable desc %p after writable", | |
335 | &descs[i]); | |
336 | err = -EINVAL; | |
337 | goto fail; | |
338 | } | |
339 | } | |
340 | ||
341 | if (!iov) { | |
342 | vringh_bad("Unexpected %s desc", | |
343 | !wiov ? "writable" : "readable"); | |
344 | err = -EPROTO; | |
345 | goto fail; | |
346 | } | |
347 | ||
348 | again: | |
349 | /* Make sure it's OK, and get offset. */ | |
b9f7ac8c MT |
350 | len = vringh32_to_cpu(vrh, desc.len); |
351 | if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range, | |
352 | getrange)) { | |
f87d0fbb RR |
353 | err = -EINVAL; |
354 | goto fail; | |
355 | } | |
b9f7ac8c MT |
356 | addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) + |
357 | range.offset); | |
f87d0fbb RR |
358 | |
359 | if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) { | |
360 | err = resize_iovec(iov, gfp); | |
361 | if (err) | |
362 | goto fail; | |
363 | } | |
364 | ||
365 | iov->iov[iov->used].iov_base = addr; | |
366 | iov->iov[iov->used].iov_len = len; | |
367 | iov->used++; | |
368 | ||
b9f7ac8c MT |
369 | if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { |
370 | desc.len = cpu_to_vringh32(vrh, | |
371 | vringh32_to_cpu(vrh, desc.len) - len); | |
372 | desc.addr = cpu_to_vringh64(vrh, | |
373 | vringh64_to_cpu(vrh, desc.addr) + len); | |
f87d0fbb RR |
374 | goto again; |
375 | } | |
376 | ||
b9f7ac8c MT |
377 | if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) { |
378 | i = vringh16_to_cpu(vrh, desc.next); | |
f87d0fbb RR |
379 | } else { |
380 | /* Just in case we need to finish traversing above. */ | |
381 | if (unlikely(up_next > 0)) { | |
382 | i = return_from_indirect(vrh, &up_next, | |
383 | &descs, &desc_max); | |
384 | slow = false; | |
385 | } else | |
386 | break; | |
387 | } | |
388 | ||
389 | if (i >= desc_max) { | |
390 | vringh_bad("Chained index %u > %u", i, desc_max); | |
391 | err = -EINVAL; | |
392 | goto fail; | |
393 | } | |
394 | } | |
395 | ||
396 | return 0; | |
397 | ||
398 | fail: | |
399 | return err; | |
400 | } | |
401 | ||
402 | static inline int __vringh_complete(struct vringh *vrh, | |
403 | const struct vring_used_elem *used, | |
404 | unsigned int num_used, | |
b9f7ac8c MT |
405 | int (*putu16)(const struct vringh *vrh, |
406 | __virtio16 *p, u16 val), | |
f87d0fbb RR |
407 | int (*putused)(struct vring_used_elem *dst, |
408 | const struct vring_used_elem | |
409 | *src, unsigned num)) | |
410 | { | |
411 | struct vring_used *used_ring; | |
412 | int err; | |
413 | u16 used_idx, off; | |
414 | ||
415 | used_ring = vrh->vring.used; | |
416 | used_idx = vrh->last_used_idx + vrh->completed; | |
417 | ||
418 | off = used_idx % vrh->vring.num; | |
419 | ||
420 | /* Compiler knows num_used == 1 sometimes, hence extra check */ | |
421 | if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) { | |
422 | u16 part = vrh->vring.num - off; | |
423 | err = putused(&used_ring->ring[off], used, part); | |
424 | if (!err) | |
425 | err = putused(&used_ring->ring[0], used + part, | |
426 | num_used - part); | |
427 | } else | |
428 | err = putused(&used_ring->ring[off], used, num_used); | |
429 | ||
430 | if (err) { | |
431 | vringh_bad("Failed to write %u used entries %u at %p", | |
432 | num_used, off, &used_ring->ring[off]); | |
433 | return err; | |
434 | } | |
435 | ||
436 | /* Make sure buffer is written before we update index. */ | |
437 | virtio_wmb(vrh->weak_barriers); | |
438 | ||
b9f7ac8c | 439 | err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used); |
f87d0fbb RR |
440 | if (err) { |
441 | vringh_bad("Failed to update used index at %p", | |
442 | &vrh->vring.used->idx); | |
443 | return err; | |
444 | } | |
445 | ||
446 | vrh->completed += num_used; | |
447 | return 0; | |
448 | } | |
449 | ||
450 | ||
451 | static inline int __vringh_need_notify(struct vringh *vrh, | |
b9f7ac8c MT |
452 | int (*getu16)(const struct vringh *vrh, |
453 | u16 *val, | |
454 | const __virtio16 *p)) | |
f87d0fbb RR |
455 | { |
456 | bool notify; | |
457 | u16 used_event; | |
458 | int err; | |
459 | ||
460 | /* Flush out used index update. This is paired with the | |
461 | * barrier that the Guest executes when enabling | |
462 | * interrupts. */ | |
463 | virtio_mb(vrh->weak_barriers); | |
464 | ||
465 | /* Old-style, without event indices. */ | |
466 | if (!vrh->event_indices) { | |
467 | u16 flags; | |
b9f7ac8c | 468 | err = getu16(vrh, &flags, &vrh->vring.avail->flags); |
f87d0fbb RR |
469 | if (err) { |
470 | vringh_bad("Failed to get flags at %p", | |
471 | &vrh->vring.avail->flags); | |
472 | return err; | |
473 | } | |
474 | return (!(flags & VRING_AVAIL_F_NO_INTERRUPT)); | |
475 | } | |
476 | ||
477 | /* Modern: we know when other side wants to know. */ | |
b9f7ac8c | 478 | err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring)); |
f87d0fbb RR |
479 | if (err) { |
480 | vringh_bad("Failed to get used event idx at %p", | |
481 | &vring_used_event(&vrh->vring)); | |
482 | return err; | |
483 | } | |
484 | ||
485 | /* Just in case we added so many that we wrap. */ | |
486 | if (unlikely(vrh->completed > 0xffff)) | |
487 | notify = true; | |
488 | else | |
489 | notify = vring_need_event(used_event, | |
490 | vrh->last_used_idx + vrh->completed, | |
491 | vrh->last_used_idx); | |
492 | ||
493 | vrh->last_used_idx += vrh->completed; | |
494 | vrh->completed = 0; | |
495 | return notify; | |
496 | } | |
497 | ||
498 | static inline bool __vringh_notify_enable(struct vringh *vrh, | |
b9f7ac8c MT |
499 | int (*getu16)(const struct vringh *vrh, |
500 | u16 *val, const __virtio16 *p), | |
501 | int (*putu16)(const struct vringh *vrh, | |
502 | __virtio16 *p, u16 val)) | |
f87d0fbb RR |
503 | { |
504 | u16 avail; | |
505 | ||
506 | if (!vrh->event_indices) { | |
507 | /* Old-school; update flags. */ | |
b9f7ac8c | 508 | if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) { |
f87d0fbb RR |
509 | vringh_bad("Clearing used flags %p", |
510 | &vrh->vring.used->flags); | |
511 | return true; | |
512 | } | |
513 | } else { | |
b9f7ac8c | 514 | if (putu16(vrh, &vring_avail_event(&vrh->vring), |
f87d0fbb RR |
515 | vrh->last_avail_idx) != 0) { |
516 | vringh_bad("Updating avail event index %p", | |
517 | &vring_avail_event(&vrh->vring)); | |
518 | return true; | |
519 | } | |
520 | } | |
521 | ||
522 | /* They could have slipped one in as we were doing that: make | |
523 | * sure it's written, then check again. */ | |
524 | virtio_mb(vrh->weak_barriers); | |
525 | ||
b9f7ac8c | 526 | if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) { |
f87d0fbb RR |
527 | vringh_bad("Failed to check avail idx at %p", |
528 | &vrh->vring.avail->idx); | |
529 | return true; | |
530 | } | |
531 | ||
532 | /* This is unlikely, so we just leave notifications enabled | |
533 | * (if we're using event_indices, we'll only get one | |
534 | * notification anyway). */ | |
535 | return avail == vrh->last_avail_idx; | |
536 | } | |
537 | ||
538 | static inline void __vringh_notify_disable(struct vringh *vrh, | |
b9f7ac8c MT |
539 | int (*putu16)(const struct vringh *vrh, |
540 | __virtio16 *p, u16 val)) | |
f87d0fbb RR |
541 | { |
542 | if (!vrh->event_indices) { | |
543 | /* Old-school; update flags. */ | |
b9f7ac8c MT |
544 | if (putu16(vrh, &vrh->vring.used->flags, |
545 | VRING_USED_F_NO_NOTIFY)) { | |
f87d0fbb RR |
546 | vringh_bad("Setting used flags %p", |
547 | &vrh->vring.used->flags); | |
548 | } | |
549 | } | |
550 | } | |
551 | ||
552 | /* Userspace access helpers: in this case, addresses are really userspace. */ | |
b9f7ac8c | 553 | static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p) |
f87d0fbb | 554 | { |
b9f7ac8c MT |
555 | __virtio16 v = 0; |
556 | int rc = get_user(v, (__force __virtio16 __user *)p); | |
557 | *val = vringh16_to_cpu(vrh, v); | |
558 | return rc; | |
f87d0fbb RR |
559 | } |
560 | ||
b9f7ac8c | 561 | static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val) |
f87d0fbb | 562 | { |
b9f7ac8c MT |
563 | __virtio16 v = cpu_to_vringh16(vrh, val); |
564 | return put_user(v, (__force __virtio16 __user *)p); | |
f87d0fbb RR |
565 | } |
566 | ||
567 | static inline int copydesc_user(void *dst, const void *src, size_t len) | |
568 | { | |
569 | return copy_from_user(dst, (__force void __user *)src, len) ? | |
570 | -EFAULT : 0; | |
571 | } | |
572 | ||
573 | static inline int putused_user(struct vring_used_elem *dst, | |
574 | const struct vring_used_elem *src, | |
575 | unsigned int num) | |
576 | { | |
577 | return copy_to_user((__force void __user *)dst, src, | |
578 | sizeof(*dst) * num) ? -EFAULT : 0; | |
579 | } | |
580 | ||
581 | static inline int xfer_from_user(void *src, void *dst, size_t len) | |
582 | { | |
583 | return copy_from_user(dst, (__force void __user *)src, len) ? | |
584 | -EFAULT : 0; | |
585 | } | |
586 | ||
587 | static inline int xfer_to_user(void *dst, void *src, size_t len) | |
588 | { | |
589 | return copy_to_user((__force void __user *)dst, src, len) ? | |
590 | -EFAULT : 0; | |
591 | } | |
592 | ||
593 | /** | |
594 | * vringh_init_user - initialize a vringh for a userspace vring. | |
595 | * @vrh: the vringh to initialize. | |
596 | * @features: the feature bits for this ring. | |
597 | * @num: the number of elements. | |
598 | * @weak_barriers: true if we only need memory barriers, not I/O. | |
599 | * @desc: the userpace descriptor pointer. | |
600 | * @avail: the userpace avail pointer. | |
601 | * @used: the userpace used pointer. | |
602 | * | |
603 | * Returns an error if num is invalid: you should check pointers | |
604 | * yourself! | |
605 | */ | |
b97a8a90 | 606 | int vringh_init_user(struct vringh *vrh, u64 features, |
f87d0fbb RR |
607 | unsigned int num, bool weak_barriers, |
608 | struct vring_desc __user *desc, | |
609 | struct vring_avail __user *avail, | |
610 | struct vring_used __user *used) | |
611 | { | |
612 | /* Sane power of 2 please! */ | |
613 | if (!num || num > 0xffff || (num & (num - 1))) { | |
614 | vringh_bad("Bad ring size %u", num); | |
615 | return -EINVAL; | |
616 | } | |
617 | ||
b9f7ac8c | 618 | vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); |
f87d0fbb RR |
619 | vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); |
620 | vrh->weak_barriers = weak_barriers; | |
621 | vrh->completed = 0; | |
622 | vrh->last_avail_idx = 0; | |
623 | vrh->last_used_idx = 0; | |
624 | vrh->vring.num = num; | |
625 | /* vring expects kernel addresses, but only used via accessors. */ | |
626 | vrh->vring.desc = (__force struct vring_desc *)desc; | |
627 | vrh->vring.avail = (__force struct vring_avail *)avail; | |
628 | vrh->vring.used = (__force struct vring_used *)used; | |
629 | return 0; | |
630 | } | |
631 | EXPORT_SYMBOL(vringh_init_user); | |
632 | ||
633 | /** | |
634 | * vringh_getdesc_user - get next available descriptor from userspace ring. | |
635 | * @vrh: the userspace vring. | |
636 | * @riov: where to put the readable descriptors (or NULL) | |
637 | * @wiov: where to put the writable descriptors (or NULL) | |
638 | * @getrange: function to call to check ranges. | |
639 | * @head: head index we received, for passing to vringh_complete_user(). | |
640 | * | |
641 | * Returns 0 if there was no descriptor, 1 if there was, or -errno. | |
642 | * | |
643 | * Note that on error return, you can tell the difference between an | |
644 | * invalid ring and a single invalid descriptor: in the former case, | |
645 | * *head will be vrh->vring.num. You may be able to ignore an invalid | |
646 | * descriptor, but there's not much you can do with an invalid ring. | |
647 | * | |
648 | * Note that you may need to clean up riov and wiov, even on error! | |
649 | */ | |
650 | int vringh_getdesc_user(struct vringh *vrh, | |
651 | struct vringh_iov *riov, | |
652 | struct vringh_iov *wiov, | |
653 | bool (*getrange)(struct vringh *vrh, | |
654 | u64 addr, struct vringh_range *r), | |
655 | u16 *head) | |
656 | { | |
657 | int err; | |
658 | ||
659 | *head = vrh->vring.num; | |
660 | err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx); | |
661 | if (err < 0) | |
662 | return err; | |
663 | ||
664 | /* Empty... */ | |
665 | if (err == vrh->vring.num) | |
666 | return 0; | |
667 | ||
668 | /* We need the layouts to be the identical for this to work */ | |
669 | BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov)); | |
670 | BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) != | |
671 | offsetof(struct vringh_iov, iov)); | |
672 | BUILD_BUG_ON(offsetof(struct vringh_kiov, i) != | |
673 | offsetof(struct vringh_iov, i)); | |
674 | BUILD_BUG_ON(offsetof(struct vringh_kiov, used) != | |
675 | offsetof(struct vringh_iov, used)); | |
676 | BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) != | |
677 | offsetof(struct vringh_iov, max_num)); | |
678 | BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); | |
679 | BUILD_BUG_ON(offsetof(struct iovec, iov_base) != | |
680 | offsetof(struct kvec, iov_base)); | |
681 | BUILD_BUG_ON(offsetof(struct iovec, iov_len) != | |
682 | offsetof(struct kvec, iov_len)); | |
683 | BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base) | |
684 | != sizeof(((struct kvec *)NULL)->iov_base)); | |
685 | BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len) | |
686 | != sizeof(((struct kvec *)NULL)->iov_len)); | |
687 | ||
688 | *head = err; | |
689 | err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov, | |
690 | (struct vringh_kiov *)wiov, | |
691 | range_check, getrange, GFP_KERNEL, copydesc_user); | |
692 | if (err) | |
693 | return err; | |
694 | ||
695 | return 1; | |
696 | } | |
697 | EXPORT_SYMBOL(vringh_getdesc_user); | |
698 | ||
699 | /** | |
700 | * vringh_iov_pull_user - copy bytes from vring_iov. | |
701 | * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume) | |
702 | * @dst: the place to copy. | |
703 | * @len: the maximum length to copy. | |
704 | * | |
705 | * Returns the bytes copied <= len or a negative errno. | |
706 | */ | |
707 | ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len) | |
708 | { | |
709 | return vringh_iov_xfer((struct vringh_kiov *)riov, | |
710 | dst, len, xfer_from_user); | |
711 | } | |
712 | EXPORT_SYMBOL(vringh_iov_pull_user); | |
713 | ||
714 | /** | |
715 | * vringh_iov_push_user - copy bytes into vring_iov. | |
716 | * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume) | |
717 | * @dst: the place to copy. | |
718 | * @len: the maximum length to copy. | |
719 | * | |
720 | * Returns the bytes copied <= len or a negative errno. | |
721 | */ | |
722 | ssize_t vringh_iov_push_user(struct vringh_iov *wiov, | |
723 | const void *src, size_t len) | |
724 | { | |
725 | return vringh_iov_xfer((struct vringh_kiov *)wiov, | |
726 | (void *)src, len, xfer_to_user); | |
727 | } | |
728 | EXPORT_SYMBOL(vringh_iov_push_user); | |
729 | ||
730 | /** | |
731 | * vringh_abandon_user - we've decided not to handle the descriptor(s). | |
732 | * @vrh: the vring. | |
733 | * @num: the number of descriptors to put back (ie. num | |
734 | * vringh_get_user() to undo). | |
735 | * | |
736 | * The next vringh_get_user() will return the old descriptor(s) again. | |
737 | */ | |
738 | void vringh_abandon_user(struct vringh *vrh, unsigned int num) | |
739 | { | |
740 | /* We only update vring_avail_event(vr) when we want to be notified, | |
741 | * so we haven't changed that yet. */ | |
742 | vrh->last_avail_idx -= num; | |
743 | } | |
744 | EXPORT_SYMBOL(vringh_abandon_user); | |
745 | ||
746 | /** | |
747 | * vringh_complete_user - we've finished with descriptor, publish it. | |
748 | * @vrh: the vring. | |
749 | * @head: the head as filled in by vringh_getdesc_user. | |
750 | * @len: the length of data we have written. | |
751 | * | |
752 | * You should check vringh_need_notify_user() after one or more calls | |
753 | * to this function. | |
754 | */ | |
755 | int vringh_complete_user(struct vringh *vrh, u16 head, u32 len) | |
756 | { | |
757 | struct vring_used_elem used; | |
758 | ||
b9f7ac8c MT |
759 | used.id = cpu_to_vringh32(vrh, head); |
760 | used.len = cpu_to_vringh32(vrh, len); | |
f87d0fbb RR |
761 | return __vringh_complete(vrh, &used, 1, putu16_user, putused_user); |
762 | } | |
763 | EXPORT_SYMBOL(vringh_complete_user); | |
764 | ||
765 | /** | |
766 | * vringh_complete_multi_user - we've finished with many descriptors. | |
767 | * @vrh: the vring. | |
768 | * @used: the head, length pairs. | |
769 | * @num_used: the number of used elements. | |
770 | * | |
771 | * You should check vringh_need_notify_user() after one or more calls | |
772 | * to this function. | |
773 | */ | |
774 | int vringh_complete_multi_user(struct vringh *vrh, | |
775 | const struct vring_used_elem used[], | |
776 | unsigned num_used) | |
777 | { | |
778 | return __vringh_complete(vrh, used, num_used, | |
779 | putu16_user, putused_user); | |
780 | } | |
781 | EXPORT_SYMBOL(vringh_complete_multi_user); | |
782 | ||
783 | /** | |
784 | * vringh_notify_enable_user - we want to know if something changes. | |
785 | * @vrh: the vring. | |
786 | * | |
787 | * This always enables notifications, but returns false if there are | |
788 | * now more buffers available in the vring. | |
789 | */ | |
790 | bool vringh_notify_enable_user(struct vringh *vrh) | |
791 | { | |
792 | return __vringh_notify_enable(vrh, getu16_user, putu16_user); | |
793 | } | |
794 | EXPORT_SYMBOL(vringh_notify_enable_user); | |
795 | ||
796 | /** | |
797 | * vringh_notify_disable_user - don't tell us if something changes. | |
798 | * @vrh: the vring. | |
799 | * | |
800 | * This is our normal running state: we disable and then only enable when | |
801 | * we're going to sleep. | |
802 | */ | |
803 | void vringh_notify_disable_user(struct vringh *vrh) | |
804 | { | |
805 | __vringh_notify_disable(vrh, putu16_user); | |
806 | } | |
807 | EXPORT_SYMBOL(vringh_notify_disable_user); | |
808 | ||
809 | /** | |
810 | * vringh_need_notify_user - must we tell the other side about used buffers? | |
811 | * @vrh: the vring we've called vringh_complete_user() on. | |
812 | * | |
813 | * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. | |
814 | */ | |
815 | int vringh_need_notify_user(struct vringh *vrh) | |
816 | { | |
817 | return __vringh_need_notify(vrh, getu16_user); | |
818 | } | |
819 | EXPORT_SYMBOL(vringh_need_notify_user); | |
820 | ||
821 | /* Kernelspace access helpers. */ | |
b9f7ac8c MT |
822 | static inline int getu16_kern(const struct vringh *vrh, |
823 | u16 *val, const __virtio16 *p) | |
f87d0fbb | 824 | { |
9d1b972f | 825 | *val = vringh16_to_cpu(vrh, READ_ONCE(*p)); |
f87d0fbb RR |
826 | return 0; |
827 | } | |
828 | ||
b9f7ac8c | 829 | static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val) |
f87d0fbb | 830 | { |
9d1b972f | 831 | WRITE_ONCE(*p, cpu_to_vringh16(vrh, val)); |
f87d0fbb RR |
832 | return 0; |
833 | } | |
834 | ||
835 | static inline int copydesc_kern(void *dst, const void *src, size_t len) | |
836 | { | |
837 | memcpy(dst, src, len); | |
838 | return 0; | |
839 | } | |
840 | ||
841 | static inline int putused_kern(struct vring_used_elem *dst, | |
842 | const struct vring_used_elem *src, | |
843 | unsigned int num) | |
844 | { | |
845 | memcpy(dst, src, num * sizeof(*dst)); | |
846 | return 0; | |
847 | } | |
848 | ||
849 | static inline int xfer_kern(void *src, void *dst, size_t len) | |
850 | { | |
851 | memcpy(dst, src, len); | |
852 | return 0; | |
853 | } | |
854 | ||
855 | /** | |
856 | * vringh_init_kern - initialize a vringh for a kernelspace vring. | |
857 | * @vrh: the vringh to initialize. | |
858 | * @features: the feature bits for this ring. | |
859 | * @num: the number of elements. | |
860 | * @weak_barriers: true if we only need memory barriers, not I/O. | |
861 | * @desc: the userpace descriptor pointer. | |
862 | * @avail: the userpace avail pointer. | |
863 | * @used: the userpace used pointer. | |
864 | * | |
865 | * Returns an error if num is invalid. | |
866 | */ | |
b97a8a90 | 867 | int vringh_init_kern(struct vringh *vrh, u64 features, |
f87d0fbb RR |
868 | unsigned int num, bool weak_barriers, |
869 | struct vring_desc *desc, | |
870 | struct vring_avail *avail, | |
871 | struct vring_used *used) | |
872 | { | |
873 | /* Sane power of 2 please! */ | |
874 | if (!num || num > 0xffff || (num & (num - 1))) { | |
875 | vringh_bad("Bad ring size %u", num); | |
876 | return -EINVAL; | |
877 | } | |
878 | ||
b9f7ac8c | 879 | vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); |
f87d0fbb RR |
880 | vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); |
881 | vrh->weak_barriers = weak_barriers; | |
882 | vrh->completed = 0; | |
883 | vrh->last_avail_idx = 0; | |
884 | vrh->last_used_idx = 0; | |
885 | vrh->vring.num = num; | |
886 | vrh->vring.desc = desc; | |
887 | vrh->vring.avail = avail; | |
888 | vrh->vring.used = used; | |
889 | return 0; | |
890 | } | |
891 | EXPORT_SYMBOL(vringh_init_kern); | |
892 | ||
893 | /** | |
894 | * vringh_getdesc_kern - get next available descriptor from kernelspace ring. | |
895 | * @vrh: the kernelspace vring. | |
896 | * @riov: where to put the readable descriptors (or NULL) | |
897 | * @wiov: where to put the writable descriptors (or NULL) | |
898 | * @head: head index we received, for passing to vringh_complete_kern(). | |
899 | * @gfp: flags for allocating larger riov/wiov. | |
900 | * | |
901 | * Returns 0 if there was no descriptor, 1 if there was, or -errno. | |
902 | * | |
903 | * Note that on error return, you can tell the difference between an | |
904 | * invalid ring and a single invalid descriptor: in the former case, | |
905 | * *head will be vrh->vring.num. You may be able to ignore an invalid | |
906 | * descriptor, but there's not much you can do with an invalid ring. | |
907 | * | |
908 | * Note that you may need to clean up riov and wiov, even on error! | |
909 | */ | |
910 | int vringh_getdesc_kern(struct vringh *vrh, | |
911 | struct vringh_kiov *riov, | |
912 | struct vringh_kiov *wiov, | |
913 | u16 *head, | |
914 | gfp_t gfp) | |
915 | { | |
916 | int err; | |
917 | ||
918 | err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx); | |
919 | if (err < 0) | |
920 | return err; | |
921 | ||
922 | /* Empty... */ | |
923 | if (err == vrh->vring.num) | |
924 | return 0; | |
925 | ||
926 | *head = err; | |
927 | err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, | |
928 | gfp, copydesc_kern); | |
929 | if (err) | |
930 | return err; | |
931 | ||
932 | return 1; | |
933 | } | |
934 | EXPORT_SYMBOL(vringh_getdesc_kern); | |
935 | ||
936 | /** | |
937 | * vringh_iov_pull_kern - copy bytes from vring_iov. | |
938 | * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume) | |
939 | * @dst: the place to copy. | |
940 | * @len: the maximum length to copy. | |
941 | * | |
942 | * Returns the bytes copied <= len or a negative errno. | |
943 | */ | |
944 | ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len) | |
945 | { | |
946 | return vringh_iov_xfer(riov, dst, len, xfer_kern); | |
947 | } | |
948 | EXPORT_SYMBOL(vringh_iov_pull_kern); | |
949 | ||
950 | /** | |
951 | * vringh_iov_push_kern - copy bytes into vring_iov. | |
952 | * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume) | |
953 | * @dst: the place to copy. | |
954 | * @len: the maximum length to copy. | |
955 | * | |
956 | * Returns the bytes copied <= len or a negative errno. | |
957 | */ | |
958 | ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, | |
959 | const void *src, size_t len) | |
960 | { | |
961 | return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern); | |
962 | } | |
963 | EXPORT_SYMBOL(vringh_iov_push_kern); | |
964 | ||
965 | /** | |
966 | * vringh_abandon_kern - we've decided not to handle the descriptor(s). | |
967 | * @vrh: the vring. | |
968 | * @num: the number of descriptors to put back (ie. num | |
969 | * vringh_get_kern() to undo). | |
970 | * | |
971 | * The next vringh_get_kern() will return the old descriptor(s) again. | |
972 | */ | |
973 | void vringh_abandon_kern(struct vringh *vrh, unsigned int num) | |
974 | { | |
975 | /* We only update vring_avail_event(vr) when we want to be notified, | |
976 | * so we haven't changed that yet. */ | |
977 | vrh->last_avail_idx -= num; | |
978 | } | |
979 | EXPORT_SYMBOL(vringh_abandon_kern); | |
980 | ||
981 | /** | |
982 | * vringh_complete_kern - we've finished with descriptor, publish it. | |
983 | * @vrh: the vring. | |
984 | * @head: the head as filled in by vringh_getdesc_kern. | |
985 | * @len: the length of data we have written. | |
986 | * | |
987 | * You should check vringh_need_notify_kern() after one or more calls | |
988 | * to this function. | |
989 | */ | |
990 | int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len) | |
991 | { | |
992 | struct vring_used_elem used; | |
993 | ||
b9f7ac8c MT |
994 | used.id = cpu_to_vringh32(vrh, head); |
995 | used.len = cpu_to_vringh32(vrh, len); | |
f87d0fbb RR |
996 | |
997 | return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern); | |
998 | } | |
999 | EXPORT_SYMBOL(vringh_complete_kern); | |
1000 | ||
1001 | /** | |
1002 | * vringh_notify_enable_kern - we want to know if something changes. | |
1003 | * @vrh: the vring. | |
1004 | * | |
1005 | * This always enables notifications, but returns false if there are | |
1006 | * now more buffers available in the vring. | |
1007 | */ | |
1008 | bool vringh_notify_enable_kern(struct vringh *vrh) | |
1009 | { | |
1010 | return __vringh_notify_enable(vrh, getu16_kern, putu16_kern); | |
1011 | } | |
1012 | EXPORT_SYMBOL(vringh_notify_enable_kern); | |
1013 | ||
1014 | /** | |
1015 | * vringh_notify_disable_kern - don't tell us if something changes. | |
1016 | * @vrh: the vring. | |
1017 | * | |
1018 | * This is our normal running state: we disable and then only enable when | |
1019 | * we're going to sleep. | |
1020 | */ | |
1021 | void vringh_notify_disable_kern(struct vringh *vrh) | |
1022 | { | |
1023 | __vringh_notify_disable(vrh, putu16_kern); | |
1024 | } | |
1025 | EXPORT_SYMBOL(vringh_notify_disable_kern); | |
1026 | ||
1027 | /** | |
1028 | * vringh_need_notify_kern - must we tell the other side about used buffers? | |
1029 | * @vrh: the vring we've called vringh_complete_kern() on. | |
1030 | * | |
1031 | * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. | |
1032 | */ | |
1033 | int vringh_need_notify_kern(struct vringh *vrh) | |
1034 | { | |
1035 | return __vringh_need_notify(vrh, getu16_kern); | |
1036 | } | |
1037 | EXPORT_SYMBOL(vringh_need_notify_kern); | |
f558a845 DJ |
1038 | |
1039 | MODULE_LICENSE("GPL"); |