Commit | Line | Data |
---|---|---|
09c434b8 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
f87d0fbb RR |
2 | /* |
3 | * Helpers for the host side of a virtio ring. | |
4 | * | |
5 | * Since these may be in userspace, we use (inline) accessors. | |
6 | */ | |
9d1b972f | 7 | #include <linux/compiler.h> |
f558a845 | 8 | #include <linux/module.h> |
f87d0fbb RR |
9 | #include <linux/vringh.h> |
10 | #include <linux/virtio_ring.h> | |
11 | #include <linux/kernel.h> | |
12 | #include <linux/ratelimit.h> | |
13 | #include <linux/uaccess.h> | |
14 | #include <linux/slab.h> | |
15 | #include <linux/export.h> | |
3302363a | 16 | #if IS_REACHABLE(CONFIG_VHOST_IOTLB) |
9ad9c49c JW |
17 | #include <linux/bvec.h> |
18 | #include <linux/highmem.h> | |
19 | #include <linux/vhost_iotlb.h> | |
3302363a | 20 | #endif |
b9f7ac8c | 21 | #include <uapi/linux/virtio_config.h> |
f87d0fbb RR |
22 | |
23 | static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) | |
24 | { | |
25 | static DEFINE_RATELIMIT_STATE(vringh_rs, | |
26 | DEFAULT_RATELIMIT_INTERVAL, | |
27 | DEFAULT_RATELIMIT_BURST); | |
28 | if (__ratelimit(&vringh_rs)) { | |
29 | va_list ap; | |
30 | va_start(ap, fmt); | |
31 | printk(KERN_NOTICE "vringh:"); | |
32 | vprintk(fmt, ap); | |
33 | va_end(ap); | |
34 | } | |
35 | } | |
36 | ||
37 | /* Returns vring->num if empty, -ve on error. */ | |
38 | static inline int __vringh_get_head(const struct vringh *vrh, | |
b9f7ac8c MT |
39 | int (*getu16)(const struct vringh *vrh, |
40 | u16 *val, const __virtio16 *p), | |
f87d0fbb RR |
41 | u16 *last_avail_idx) |
42 | { | |
43 | u16 avail_idx, i, head; | |
44 | int err; | |
45 | ||
b9f7ac8c | 46 | err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx); |
f87d0fbb RR |
47 | if (err) { |
48 | vringh_bad("Failed to access avail idx at %p", | |
49 | &vrh->vring.avail->idx); | |
50 | return err; | |
51 | } | |
52 | ||
53 | if (*last_avail_idx == avail_idx) | |
54 | return vrh->vring.num; | |
55 | ||
56 | /* Only get avail ring entries after they have been exposed by guest. */ | |
57 | virtio_rmb(vrh->weak_barriers); | |
58 | ||
59 | i = *last_avail_idx & (vrh->vring.num - 1); | |
60 | ||
b9f7ac8c | 61 | err = getu16(vrh, &head, &vrh->vring.avail->ring[i]); |
f87d0fbb RR |
62 | if (err) { |
63 | vringh_bad("Failed to read head: idx %d address %p", | |
64 | *last_avail_idx, &vrh->vring.avail->ring[i]); | |
65 | return err; | |
66 | } | |
67 | ||
68 | if (head >= vrh->vring.num) { | |
69 | vringh_bad("Guest says index %u > %u is available", | |
70 | head, vrh->vring.num); | |
71 | return -EINVAL; | |
72 | } | |
73 | ||
74 | (*last_avail_idx)++; | |
75 | return head; | |
76 | } | |
77 | ||
b8c06ad4 SG |
78 | /** |
79 | * vringh_kiov_advance - skip bytes from vring_kiov | |
80 | * @iov: an iov passed to vringh_getdesc_*() (updated as we consume) | |
81 | * @len: the maximum length to advance | |
82 | */ | |
83 | void vringh_kiov_advance(struct vringh_kiov *iov, size_t len) | |
84 | { | |
85 | while (len && iov->i < iov->used) { | |
86 | size_t partlen = min(iov->iov[iov->i].iov_len, len); | |
87 | ||
88 | iov->consumed += partlen; | |
89 | iov->iov[iov->i].iov_len -= partlen; | |
90 | iov->iov[iov->i].iov_base += partlen; | |
91 | ||
92 | if (!iov->iov[iov->i].iov_len) { | |
93 | /* Fix up old iov element then increment. */ | |
94 | iov->iov[iov->i].iov_len = iov->consumed; | |
95 | iov->iov[iov->i].iov_base -= iov->consumed; | |
96 | ||
97 | iov->consumed = 0; | |
98 | iov->i++; | |
99 | } | |
100 | ||
101 | len -= partlen; | |
102 | } | |
103 | } | |
104 | EXPORT_SYMBOL(vringh_kiov_advance); | |
105 | ||
f87d0fbb | 106 | /* Copy some bytes to/from the iovec. Returns num copied. */ |
9ad9c49c JW |
107 | static inline ssize_t vringh_iov_xfer(struct vringh *vrh, |
108 | struct vringh_kiov *iov, | |
f87d0fbb | 109 | void *ptr, size_t len, |
9ad9c49c JW |
110 | int (*xfer)(const struct vringh *vrh, |
111 | void *addr, void *ptr, | |
f87d0fbb RR |
112 | size_t len)) |
113 | { | |
114 | int err, done = 0; | |
115 | ||
116 | while (len && iov->i < iov->used) { | |
117 | size_t partlen; | |
118 | ||
119 | partlen = min(iov->iov[iov->i].iov_len, len); | |
9ad9c49c | 120 | err = xfer(vrh, iov->iov[iov->i].iov_base, ptr, partlen); |
f87d0fbb RR |
121 | if (err) |
122 | return err; | |
123 | done += partlen; | |
124 | len -= partlen; | |
125 | ptr += partlen; | |
f87d0fbb | 126 | |
b8c06ad4 | 127 | vringh_kiov_advance(iov, partlen); |
f87d0fbb RR |
128 | } |
129 | return done; | |
130 | } | |
131 | ||
132 | /* May reduce *len if range is shorter. */ | |
133 | static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len, | |
134 | struct vringh_range *range, | |
135 | bool (*getrange)(struct vringh *, | |
136 | u64, struct vringh_range *)) | |
137 | { | |
138 | if (addr < range->start || addr > range->end_incl) { | |
139 | if (!getrange(vrh, addr, range)) | |
140 | return false; | |
141 | } | |
142 | BUG_ON(addr < range->start || addr > range->end_incl); | |
143 | ||
144 | /* To end of memory? */ | |
145 | if (unlikely(addr + *len == 0)) { | |
146 | if (range->end_incl == -1ULL) | |
147 | return true; | |
148 | goto truncate; | |
149 | } | |
150 | ||
151 | /* Otherwise, don't wrap. */ | |
152 | if (addr + *len < addr) { | |
153 | vringh_bad("Wrapping descriptor %zu@0x%llx", | |
154 | *len, (unsigned long long)addr); | |
155 | return false; | |
156 | } | |
157 | ||
158 | if (unlikely(addr + *len - 1 > range->end_incl)) | |
159 | goto truncate; | |
160 | return true; | |
161 | ||
162 | truncate: | |
163 | *len = range->end_incl + 1 - addr; | |
164 | return true; | |
165 | } | |
166 | ||
167 | static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len, | |
168 | struct vringh_range *range, | |
169 | bool (*getrange)(struct vringh *, | |
170 | u64, struct vringh_range *)) | |
171 | { | |
172 | return true; | |
173 | } | |
174 | ||
175 | /* No reason for this code to be inline. */ | |
b9f7ac8c MT |
176 | static int move_to_indirect(const struct vringh *vrh, |
177 | int *up_next, u16 *i, void *addr, | |
f87d0fbb RR |
178 | const struct vring_desc *desc, |
179 | struct vring_desc **descs, int *desc_max) | |
180 | { | |
b9f7ac8c MT |
181 | u32 len; |
182 | ||
f87d0fbb RR |
183 | /* Indirect tables can't have indirect. */ |
184 | if (*up_next != -1) { | |
185 | vringh_bad("Multilevel indirect %u->%u", *up_next, *i); | |
186 | return -EINVAL; | |
187 | } | |
188 | ||
b9f7ac8c MT |
189 | len = vringh32_to_cpu(vrh, desc->len); |
190 | if (unlikely(len % sizeof(struct vring_desc))) { | |
f87d0fbb RR |
191 | vringh_bad("Strange indirect len %u", desc->len); |
192 | return -EINVAL; | |
193 | } | |
194 | ||
195 | /* We will check this when we follow it! */ | |
b9f7ac8c MT |
196 | if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) |
197 | *up_next = vringh16_to_cpu(vrh, desc->next); | |
f87d0fbb RR |
198 | else |
199 | *up_next = -2; | |
200 | *descs = addr; | |
b9f7ac8c | 201 | *desc_max = len / sizeof(struct vring_desc); |
f87d0fbb RR |
202 | |
203 | /* Now, start at the first indirect. */ | |
204 | *i = 0; | |
205 | return 0; | |
206 | } | |
207 | ||
208 | static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp) | |
209 | { | |
210 | struct kvec *new; | |
211 | unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2; | |
212 | ||
213 | if (new_num < 8) | |
214 | new_num = 8; | |
215 | ||
216 | flag = (iov->max_num & VRINGH_IOV_ALLOCATED); | |
217 | if (flag) | |
3a999748 BG |
218 | new = krealloc_array(iov->iov, new_num, |
219 | sizeof(struct iovec), gfp); | |
f87d0fbb | 220 | else { |
6da2ec56 | 221 | new = kmalloc_array(new_num, sizeof(struct iovec), gfp); |
f87d0fbb RR |
222 | if (new) { |
223 | memcpy(new, iov->iov, | |
224 | iov->max_num * sizeof(struct iovec)); | |
225 | flag = VRINGH_IOV_ALLOCATED; | |
226 | } | |
227 | } | |
228 | if (!new) | |
229 | return -ENOMEM; | |
230 | iov->iov = new; | |
231 | iov->max_num = (new_num | flag); | |
232 | return 0; | |
233 | } | |
234 | ||
235 | static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next, | |
236 | struct vring_desc **descs, int *desc_max) | |
237 | { | |
238 | u16 i = *up_next; | |
239 | ||
240 | *up_next = -1; | |
241 | *descs = vrh->vring.desc; | |
242 | *desc_max = vrh->vring.num; | |
243 | return i; | |
244 | } | |
245 | ||
246 | static int slow_copy(struct vringh *vrh, void *dst, const void *src, | |
247 | bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, | |
248 | struct vringh_range *range, | |
249 | bool (*getrange)(struct vringh *vrh, | |
250 | u64, | |
251 | struct vringh_range *)), | |
252 | bool (*getrange)(struct vringh *vrh, | |
253 | u64 addr, | |
254 | struct vringh_range *r), | |
255 | struct vringh_range *range, | |
9ad9c49c JW |
256 | int (*copy)(const struct vringh *vrh, |
257 | void *dst, const void *src, size_t len)) | |
f87d0fbb RR |
258 | { |
259 | size_t part, len = sizeof(struct vring_desc); | |
260 | ||
261 | do { | |
262 | u64 addr; | |
263 | int err; | |
264 | ||
265 | part = len; | |
266 | addr = (u64)(unsigned long)src - range->offset; | |
267 | ||
268 | if (!rcheck(vrh, addr, &part, range, getrange)) | |
269 | return -EINVAL; | |
270 | ||
9ad9c49c | 271 | err = copy(vrh, dst, src, part); |
f87d0fbb RR |
272 | if (err) |
273 | return err; | |
274 | ||
275 | dst += part; | |
276 | src += part; | |
277 | len -= part; | |
278 | } while (len); | |
279 | return 0; | |
280 | } | |
281 | ||
282 | static inline int | |
283 | __vringh_iov(struct vringh *vrh, u16 i, | |
284 | struct vringh_kiov *riov, | |
285 | struct vringh_kiov *wiov, | |
286 | bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, | |
287 | struct vringh_range *range, | |
288 | bool (*getrange)(struct vringh *, u64, | |
289 | struct vringh_range *)), | |
290 | bool (*getrange)(struct vringh *, u64, struct vringh_range *), | |
291 | gfp_t gfp, | |
9ad9c49c JW |
292 | int (*copy)(const struct vringh *vrh, |
293 | void *dst, const void *src, size_t len)) | |
f87d0fbb | 294 | { |
dbd29e07 | 295 | int err, count = 0, indirect_count = 0, up_next, desc_max; |
f87d0fbb RR |
296 | struct vring_desc desc, *descs; |
297 | struct vringh_range range = { -1ULL, 0 }, slowrange; | |
298 | bool slow = false; | |
299 | ||
300 | /* We start traversing vring's descriptor table. */ | |
301 | descs = vrh->vring.desc; | |
302 | desc_max = vrh->vring.num; | |
303 | up_next = -1; | |
304 | ||
5745bcfb SG |
305 | /* You must want something! */ |
306 | if (WARN_ON(!riov && !wiov)) | |
307 | return -EINVAL; | |
308 | ||
f87d0fbb | 309 | if (riov) |
bbc2c372 | 310 | riov->i = riov->used = riov->consumed = 0; |
5745bcfb | 311 | if (wiov) |
bbc2c372 | 312 | wiov->i = wiov->used = wiov->consumed = 0; |
f87d0fbb RR |
313 | |
314 | for (;;) { | |
315 | void *addr; | |
316 | struct vringh_kiov *iov; | |
317 | size_t len; | |
318 | ||
319 | if (unlikely(slow)) | |
320 | err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange, | |
321 | &slowrange, copy); | |
322 | else | |
9ad9c49c | 323 | err = copy(vrh, &desc, &descs[i], sizeof(desc)); |
f87d0fbb RR |
324 | if (unlikely(err)) |
325 | goto fail; | |
326 | ||
b9f7ac8c MT |
327 | if (unlikely(desc.flags & |
328 | cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) { | |
329 | u64 a = vringh64_to_cpu(vrh, desc.addr); | |
330 | ||
f87d0fbb | 331 | /* Make sure it's OK, and get offset. */ |
b9f7ac8c MT |
332 | len = vringh32_to_cpu(vrh, desc.len); |
333 | if (!rcheck(vrh, a, &len, &range, getrange)) { | |
f87d0fbb RR |
334 | err = -EINVAL; |
335 | goto fail; | |
336 | } | |
337 | ||
b9f7ac8c | 338 | if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { |
f87d0fbb RR |
339 | slow = true; |
340 | /* We need to save this range to use offset */ | |
341 | slowrange = range; | |
342 | } | |
343 | ||
b9f7ac8c MT |
344 | addr = (void *)(long)(a + range.offset); |
345 | err = move_to_indirect(vrh, &up_next, &i, addr, &desc, | |
f87d0fbb RR |
346 | &descs, &desc_max); |
347 | if (err) | |
348 | goto fail; | |
349 | continue; | |
350 | } | |
351 | ||
dbd29e07 XY |
352 | if (up_next == -1) |
353 | count++; | |
354 | else | |
355 | indirect_count++; | |
356 | ||
357 | if (count > vrh->vring.num || indirect_count > desc_max) { | |
f87d0fbb RR |
358 | vringh_bad("Descriptor loop in %p", descs); |
359 | err = -ELOOP; | |
360 | goto fail; | |
361 | } | |
362 | ||
b9f7ac8c | 363 | if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE)) |
f87d0fbb RR |
364 | iov = wiov; |
365 | else { | |
366 | iov = riov; | |
e74cfa91 | 367 | if (unlikely(wiov && wiov->used)) { |
f87d0fbb RR |
368 | vringh_bad("Readable desc %p after writable", |
369 | &descs[i]); | |
370 | err = -EINVAL; | |
371 | goto fail; | |
372 | } | |
373 | } | |
374 | ||
375 | if (!iov) { | |
376 | vringh_bad("Unexpected %s desc", | |
377 | !wiov ? "writable" : "readable"); | |
378 | err = -EPROTO; | |
379 | goto fail; | |
380 | } | |
381 | ||
382 | again: | |
383 | /* Make sure it's OK, and get offset. */ | |
b9f7ac8c MT |
384 | len = vringh32_to_cpu(vrh, desc.len); |
385 | if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range, | |
386 | getrange)) { | |
f87d0fbb RR |
387 | err = -EINVAL; |
388 | goto fail; | |
389 | } | |
b9f7ac8c MT |
390 | addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) + |
391 | range.offset); | |
f87d0fbb RR |
392 | |
393 | if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) { | |
394 | err = resize_iovec(iov, gfp); | |
395 | if (err) | |
396 | goto fail; | |
397 | } | |
398 | ||
399 | iov->iov[iov->used].iov_base = addr; | |
400 | iov->iov[iov->used].iov_len = len; | |
401 | iov->used++; | |
402 | ||
b9f7ac8c MT |
403 | if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { |
404 | desc.len = cpu_to_vringh32(vrh, | |
405 | vringh32_to_cpu(vrh, desc.len) - len); | |
406 | desc.addr = cpu_to_vringh64(vrh, | |
407 | vringh64_to_cpu(vrh, desc.addr) + len); | |
f87d0fbb RR |
408 | goto again; |
409 | } | |
410 | ||
b9f7ac8c MT |
411 | if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) { |
412 | i = vringh16_to_cpu(vrh, desc.next); | |
f87d0fbb RR |
413 | } else { |
414 | /* Just in case we need to finish traversing above. */ | |
415 | if (unlikely(up_next > 0)) { | |
416 | i = return_from_indirect(vrh, &up_next, | |
417 | &descs, &desc_max); | |
418 | slow = false; | |
dbd29e07 | 419 | indirect_count = 0; |
f87d0fbb RR |
420 | } else |
421 | break; | |
422 | } | |
423 | ||
424 | if (i >= desc_max) { | |
425 | vringh_bad("Chained index %u > %u", i, desc_max); | |
426 | err = -EINVAL; | |
427 | goto fail; | |
428 | } | |
429 | } | |
430 | ||
431 | return 0; | |
432 | ||
433 | fail: | |
434 | return err; | |
435 | } | |
436 | ||
437 | static inline int __vringh_complete(struct vringh *vrh, | |
438 | const struct vring_used_elem *used, | |
439 | unsigned int num_used, | |
b9f7ac8c MT |
440 | int (*putu16)(const struct vringh *vrh, |
441 | __virtio16 *p, u16 val), | |
9ad9c49c JW |
442 | int (*putused)(const struct vringh *vrh, |
443 | struct vring_used_elem *dst, | |
f87d0fbb RR |
444 | const struct vring_used_elem |
445 | *src, unsigned num)) | |
446 | { | |
447 | struct vring_used *used_ring; | |
448 | int err; | |
449 | u16 used_idx, off; | |
450 | ||
451 | used_ring = vrh->vring.used; | |
452 | used_idx = vrh->last_used_idx + vrh->completed; | |
453 | ||
454 | off = used_idx % vrh->vring.num; | |
455 | ||
456 | /* Compiler knows num_used == 1 sometimes, hence extra check */ | |
457 | if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) { | |
458 | u16 part = vrh->vring.num - off; | |
9ad9c49c | 459 | err = putused(vrh, &used_ring->ring[off], used, part); |
f87d0fbb | 460 | if (!err) |
9ad9c49c | 461 | err = putused(vrh, &used_ring->ring[0], used + part, |
f87d0fbb RR |
462 | num_used - part); |
463 | } else | |
9ad9c49c | 464 | err = putused(vrh, &used_ring->ring[off], used, num_used); |
f87d0fbb RR |
465 | |
466 | if (err) { | |
467 | vringh_bad("Failed to write %u used entries %u at %p", | |
468 | num_used, off, &used_ring->ring[off]); | |
469 | return err; | |
470 | } | |
471 | ||
472 | /* Make sure buffer is written before we update index. */ | |
473 | virtio_wmb(vrh->weak_barriers); | |
474 | ||
b9f7ac8c | 475 | err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used); |
f87d0fbb RR |
476 | if (err) { |
477 | vringh_bad("Failed to update used index at %p", | |
478 | &vrh->vring.used->idx); | |
479 | return err; | |
480 | } | |
481 | ||
482 | vrh->completed += num_used; | |
483 | return 0; | |
484 | } | |
485 | ||
486 | ||
487 | static inline int __vringh_need_notify(struct vringh *vrh, | |
b9f7ac8c MT |
488 | int (*getu16)(const struct vringh *vrh, |
489 | u16 *val, | |
490 | const __virtio16 *p)) | |
f87d0fbb RR |
491 | { |
492 | bool notify; | |
493 | u16 used_event; | |
494 | int err; | |
495 | ||
496 | /* Flush out used index update. This is paired with the | |
497 | * barrier that the Guest executes when enabling | |
498 | * interrupts. */ | |
499 | virtio_mb(vrh->weak_barriers); | |
500 | ||
501 | /* Old-style, without event indices. */ | |
502 | if (!vrh->event_indices) { | |
503 | u16 flags; | |
b9f7ac8c | 504 | err = getu16(vrh, &flags, &vrh->vring.avail->flags); |
f87d0fbb RR |
505 | if (err) { |
506 | vringh_bad("Failed to get flags at %p", | |
507 | &vrh->vring.avail->flags); | |
508 | return err; | |
509 | } | |
510 | return (!(flags & VRING_AVAIL_F_NO_INTERRUPT)); | |
511 | } | |
512 | ||
513 | /* Modern: we know when other side wants to know. */ | |
b9f7ac8c | 514 | err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring)); |
f87d0fbb RR |
515 | if (err) { |
516 | vringh_bad("Failed to get used event idx at %p", | |
517 | &vring_used_event(&vrh->vring)); | |
518 | return err; | |
519 | } | |
520 | ||
521 | /* Just in case we added so many that we wrap. */ | |
522 | if (unlikely(vrh->completed > 0xffff)) | |
523 | notify = true; | |
524 | else | |
525 | notify = vring_need_event(used_event, | |
526 | vrh->last_used_idx + vrh->completed, | |
527 | vrh->last_used_idx); | |
528 | ||
529 | vrh->last_used_idx += vrh->completed; | |
530 | vrh->completed = 0; | |
531 | return notify; | |
532 | } | |
533 | ||
534 | static inline bool __vringh_notify_enable(struct vringh *vrh, | |
b9f7ac8c MT |
535 | int (*getu16)(const struct vringh *vrh, |
536 | u16 *val, const __virtio16 *p), | |
537 | int (*putu16)(const struct vringh *vrh, | |
538 | __virtio16 *p, u16 val)) | |
f87d0fbb RR |
539 | { |
540 | u16 avail; | |
541 | ||
542 | if (!vrh->event_indices) { | |
543 | /* Old-school; update flags. */ | |
b9f7ac8c | 544 | if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) { |
f87d0fbb RR |
545 | vringh_bad("Clearing used flags %p", |
546 | &vrh->vring.used->flags); | |
547 | return true; | |
548 | } | |
549 | } else { | |
b9f7ac8c | 550 | if (putu16(vrh, &vring_avail_event(&vrh->vring), |
f87d0fbb RR |
551 | vrh->last_avail_idx) != 0) { |
552 | vringh_bad("Updating avail event index %p", | |
553 | &vring_avail_event(&vrh->vring)); | |
554 | return true; | |
555 | } | |
556 | } | |
557 | ||
558 | /* They could have slipped one in as we were doing that: make | |
559 | * sure it's written, then check again. */ | |
560 | virtio_mb(vrh->weak_barriers); | |
561 | ||
b9f7ac8c | 562 | if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) { |
f87d0fbb RR |
563 | vringh_bad("Failed to check avail idx at %p", |
564 | &vrh->vring.avail->idx); | |
565 | return true; | |
566 | } | |
567 | ||
568 | /* This is unlikely, so we just leave notifications enabled | |
569 | * (if we're using event_indices, we'll only get one | |
570 | * notification anyway). */ | |
571 | return avail == vrh->last_avail_idx; | |
572 | } | |
573 | ||
574 | static inline void __vringh_notify_disable(struct vringh *vrh, | |
b9f7ac8c MT |
575 | int (*putu16)(const struct vringh *vrh, |
576 | __virtio16 *p, u16 val)) | |
f87d0fbb RR |
577 | { |
578 | if (!vrh->event_indices) { | |
579 | /* Old-school; update flags. */ | |
b9f7ac8c MT |
580 | if (putu16(vrh, &vrh->vring.used->flags, |
581 | VRING_USED_F_NO_NOTIFY)) { | |
f87d0fbb RR |
582 | vringh_bad("Setting used flags %p", |
583 | &vrh->vring.used->flags); | |
584 | } | |
585 | } | |
586 | } | |
587 | ||
588 | /* Userspace access helpers: in this case, addresses are really userspace. */ | |
b9f7ac8c | 589 | static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p) |
f87d0fbb | 590 | { |
b9f7ac8c MT |
591 | __virtio16 v = 0; |
592 | int rc = get_user(v, (__force __virtio16 __user *)p); | |
593 | *val = vringh16_to_cpu(vrh, v); | |
594 | return rc; | |
f87d0fbb RR |
595 | } |
596 | ||
b9f7ac8c | 597 | static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val) |
f87d0fbb | 598 | { |
b9f7ac8c MT |
599 | __virtio16 v = cpu_to_vringh16(vrh, val); |
600 | return put_user(v, (__force __virtio16 __user *)p); | |
f87d0fbb RR |
601 | } |
602 | ||
9ad9c49c JW |
603 | static inline int copydesc_user(const struct vringh *vrh, |
604 | void *dst, const void *src, size_t len) | |
f87d0fbb RR |
605 | { |
606 | return copy_from_user(dst, (__force void __user *)src, len) ? | |
607 | -EFAULT : 0; | |
608 | } | |
609 | ||
9ad9c49c JW |
610 | static inline int putused_user(const struct vringh *vrh, |
611 | struct vring_used_elem *dst, | |
f87d0fbb RR |
612 | const struct vring_used_elem *src, |
613 | unsigned int num) | |
614 | { | |
615 | return copy_to_user((__force void __user *)dst, src, | |
616 | sizeof(*dst) * num) ? -EFAULT : 0; | |
617 | } | |
618 | ||
9ad9c49c JW |
619 | static inline int xfer_from_user(const struct vringh *vrh, void *src, |
620 | void *dst, size_t len) | |
f87d0fbb RR |
621 | { |
622 | return copy_from_user(dst, (__force void __user *)src, len) ? | |
623 | -EFAULT : 0; | |
624 | } | |
625 | ||
9ad9c49c JW |
626 | static inline int xfer_to_user(const struct vringh *vrh, |
627 | void *dst, void *src, size_t len) | |
f87d0fbb RR |
628 | { |
629 | return copy_to_user((__force void __user *)dst, src, len) ? | |
630 | -EFAULT : 0; | |
631 | } | |
632 | ||
633 | /** | |
634 | * vringh_init_user - initialize a vringh for a userspace vring. | |
635 | * @vrh: the vringh to initialize. | |
636 | * @features: the feature bits for this ring. | |
637 | * @num: the number of elements. | |
638 | * @weak_barriers: true if we only need memory barriers, not I/O. | |
639 | * @desc: the userpace descriptor pointer. | |
640 | * @avail: the userpace avail pointer. | |
641 | * @used: the userpace used pointer. | |
642 | * | |
643 | * Returns an error if num is invalid: you should check pointers | |
644 | * yourself! | |
645 | */ | |
b97a8a90 | 646 | int vringh_init_user(struct vringh *vrh, u64 features, |
f87d0fbb | 647 | unsigned int num, bool weak_barriers, |
a865e420 MT |
648 | vring_desc_t __user *desc, |
649 | vring_avail_t __user *avail, | |
650 | vring_used_t __user *used) | |
f87d0fbb RR |
651 | { |
652 | /* Sane power of 2 please! */ | |
653 | if (!num || num > 0xffff || (num & (num - 1))) { | |
654 | vringh_bad("Bad ring size %u", num); | |
655 | return -EINVAL; | |
656 | } | |
657 | ||
b9f7ac8c | 658 | vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); |
f87d0fbb RR |
659 | vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); |
660 | vrh->weak_barriers = weak_barriers; | |
661 | vrh->completed = 0; | |
662 | vrh->last_avail_idx = 0; | |
663 | vrh->last_used_idx = 0; | |
664 | vrh->vring.num = num; | |
665 | /* vring expects kernel addresses, but only used via accessors. */ | |
666 | vrh->vring.desc = (__force struct vring_desc *)desc; | |
667 | vrh->vring.avail = (__force struct vring_avail *)avail; | |
668 | vrh->vring.used = (__force struct vring_used *)used; | |
669 | return 0; | |
670 | } | |
671 | EXPORT_SYMBOL(vringh_init_user); | |
672 | ||
673 | /** | |
674 | * vringh_getdesc_user - get next available descriptor from userspace ring. | |
675 | * @vrh: the userspace vring. | |
676 | * @riov: where to put the readable descriptors (or NULL) | |
677 | * @wiov: where to put the writable descriptors (or NULL) | |
678 | * @getrange: function to call to check ranges. | |
679 | * @head: head index we received, for passing to vringh_complete_user(). | |
680 | * | |
681 | * Returns 0 if there was no descriptor, 1 if there was, or -errno. | |
682 | * | |
683 | * Note that on error return, you can tell the difference between an | |
684 | * invalid ring and a single invalid descriptor: in the former case, | |
685 | * *head will be vrh->vring.num. You may be able to ignore an invalid | |
686 | * descriptor, but there's not much you can do with an invalid ring. | |
687 | * | |
69c13c58 SG |
688 | * Note that you can reuse riov and wiov with subsequent calls. Content is |
689 | * overwritten and memory reallocated if more space is needed. | |
690 | * When you don't have to use riov and wiov anymore, you should clean up them | |
691 | * calling vringh_iov_cleanup() to release the memory, even on error! | |
f87d0fbb RR |
692 | */ |
693 | int vringh_getdesc_user(struct vringh *vrh, | |
694 | struct vringh_iov *riov, | |
695 | struct vringh_iov *wiov, | |
696 | bool (*getrange)(struct vringh *vrh, | |
697 | u64 addr, struct vringh_range *r), | |
698 | u16 *head) | |
699 | { | |
700 | int err; | |
701 | ||
702 | *head = vrh->vring.num; | |
703 | err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx); | |
704 | if (err < 0) | |
705 | return err; | |
706 | ||
707 | /* Empty... */ | |
708 | if (err == vrh->vring.num) | |
709 | return 0; | |
710 | ||
711 | /* We need the layouts to be the identical for this to work */ | |
712 | BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov)); | |
713 | BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) != | |
714 | offsetof(struct vringh_iov, iov)); | |
715 | BUILD_BUG_ON(offsetof(struct vringh_kiov, i) != | |
716 | offsetof(struct vringh_iov, i)); | |
717 | BUILD_BUG_ON(offsetof(struct vringh_kiov, used) != | |
718 | offsetof(struct vringh_iov, used)); | |
719 | BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) != | |
720 | offsetof(struct vringh_iov, max_num)); | |
721 | BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); | |
722 | BUILD_BUG_ON(offsetof(struct iovec, iov_base) != | |
723 | offsetof(struct kvec, iov_base)); | |
724 | BUILD_BUG_ON(offsetof(struct iovec, iov_len) != | |
725 | offsetof(struct kvec, iov_len)); | |
726 | BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base) | |
727 | != sizeof(((struct kvec *)NULL)->iov_base)); | |
728 | BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len) | |
729 | != sizeof(((struct kvec *)NULL)->iov_len)); | |
730 | ||
731 | *head = err; | |
732 | err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov, | |
733 | (struct vringh_kiov *)wiov, | |
734 | range_check, getrange, GFP_KERNEL, copydesc_user); | |
735 | if (err) | |
736 | return err; | |
737 | ||
738 | return 1; | |
739 | } | |
740 | EXPORT_SYMBOL(vringh_getdesc_user); | |
741 | ||
742 | /** | |
743 | * vringh_iov_pull_user - copy bytes from vring_iov. | |
744 | * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume) | |
745 | * @dst: the place to copy. | |
746 | * @len: the maximum length to copy. | |
747 | * | |
748 | * Returns the bytes copied <= len or a negative errno. | |
749 | */ | |
750 | ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len) | |
751 | { | |
9ad9c49c | 752 | return vringh_iov_xfer(NULL, (struct vringh_kiov *)riov, |
f87d0fbb RR |
753 | dst, len, xfer_from_user); |
754 | } | |
755 | EXPORT_SYMBOL(vringh_iov_pull_user); | |
756 | ||
757 | /** | |
758 | * vringh_iov_push_user - copy bytes into vring_iov. | |
759 | * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume) | |
8009b0f4 | 760 | * @src: the place to copy from. |
f87d0fbb RR |
761 | * @len: the maximum length to copy. |
762 | * | |
763 | * Returns the bytes copied <= len or a negative errno. | |
764 | */ | |
765 | ssize_t vringh_iov_push_user(struct vringh_iov *wiov, | |
766 | const void *src, size_t len) | |
767 | { | |
9ad9c49c | 768 | return vringh_iov_xfer(NULL, (struct vringh_kiov *)wiov, |
f87d0fbb RR |
769 | (void *)src, len, xfer_to_user); |
770 | } | |
771 | EXPORT_SYMBOL(vringh_iov_push_user); | |
772 | ||
773 | /** | |
774 | * vringh_abandon_user - we've decided not to handle the descriptor(s). | |
775 | * @vrh: the vring. | |
776 | * @num: the number of descriptors to put back (ie. num | |
777 | * vringh_get_user() to undo). | |
778 | * | |
779 | * The next vringh_get_user() will return the old descriptor(s) again. | |
780 | */ | |
781 | void vringh_abandon_user(struct vringh *vrh, unsigned int num) | |
782 | { | |
783 | /* We only update vring_avail_event(vr) when we want to be notified, | |
784 | * so we haven't changed that yet. */ | |
785 | vrh->last_avail_idx -= num; | |
786 | } | |
787 | EXPORT_SYMBOL(vringh_abandon_user); | |
788 | ||
789 | /** | |
790 | * vringh_complete_user - we've finished with descriptor, publish it. | |
791 | * @vrh: the vring. | |
792 | * @head: the head as filled in by vringh_getdesc_user. | |
793 | * @len: the length of data we have written. | |
794 | * | |
795 | * You should check vringh_need_notify_user() after one or more calls | |
796 | * to this function. | |
797 | */ | |
798 | int vringh_complete_user(struct vringh *vrh, u16 head, u32 len) | |
799 | { | |
800 | struct vring_used_elem used; | |
801 | ||
b9f7ac8c MT |
802 | used.id = cpu_to_vringh32(vrh, head); |
803 | used.len = cpu_to_vringh32(vrh, len); | |
f87d0fbb RR |
804 | return __vringh_complete(vrh, &used, 1, putu16_user, putused_user); |
805 | } | |
806 | EXPORT_SYMBOL(vringh_complete_user); | |
807 | ||
808 | /** | |
809 | * vringh_complete_multi_user - we've finished with many descriptors. | |
810 | * @vrh: the vring. | |
811 | * @used: the head, length pairs. | |
812 | * @num_used: the number of used elements. | |
813 | * | |
814 | * You should check vringh_need_notify_user() after one or more calls | |
815 | * to this function. | |
816 | */ | |
817 | int vringh_complete_multi_user(struct vringh *vrh, | |
818 | const struct vring_used_elem used[], | |
819 | unsigned num_used) | |
820 | { | |
821 | return __vringh_complete(vrh, used, num_used, | |
822 | putu16_user, putused_user); | |
823 | } | |
824 | EXPORT_SYMBOL(vringh_complete_multi_user); | |
825 | ||
826 | /** | |
827 | * vringh_notify_enable_user - we want to know if something changes. | |
828 | * @vrh: the vring. | |
829 | * | |
830 | * This always enables notifications, but returns false if there are | |
831 | * now more buffers available in the vring. | |
832 | */ | |
833 | bool vringh_notify_enable_user(struct vringh *vrh) | |
834 | { | |
835 | return __vringh_notify_enable(vrh, getu16_user, putu16_user); | |
836 | } | |
837 | EXPORT_SYMBOL(vringh_notify_enable_user); | |
838 | ||
839 | /** | |
840 | * vringh_notify_disable_user - don't tell us if something changes. | |
841 | * @vrh: the vring. | |
842 | * | |
843 | * This is our normal running state: we disable and then only enable when | |
844 | * we're going to sleep. | |
845 | */ | |
846 | void vringh_notify_disable_user(struct vringh *vrh) | |
847 | { | |
848 | __vringh_notify_disable(vrh, putu16_user); | |
849 | } | |
850 | EXPORT_SYMBOL(vringh_notify_disable_user); | |
851 | ||
852 | /** | |
853 | * vringh_need_notify_user - must we tell the other side about used buffers? | |
854 | * @vrh: the vring we've called vringh_complete_user() on. | |
855 | * | |
856 | * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. | |
857 | */ | |
858 | int vringh_need_notify_user(struct vringh *vrh) | |
859 | { | |
860 | return __vringh_need_notify(vrh, getu16_user); | |
861 | } | |
862 | EXPORT_SYMBOL(vringh_need_notify_user); | |
863 | ||
864 | /* Kernelspace access helpers. */ | |
b9f7ac8c MT |
865 | static inline int getu16_kern(const struct vringh *vrh, |
866 | u16 *val, const __virtio16 *p) | |
f87d0fbb | 867 | { |
9d1b972f | 868 | *val = vringh16_to_cpu(vrh, READ_ONCE(*p)); |
f87d0fbb RR |
869 | return 0; |
870 | } | |
871 | ||
b9f7ac8c | 872 | static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val) |
f87d0fbb | 873 | { |
9d1b972f | 874 | WRITE_ONCE(*p, cpu_to_vringh16(vrh, val)); |
f87d0fbb RR |
875 | return 0; |
876 | } | |
877 | ||
9ad9c49c JW |
878 | static inline int copydesc_kern(const struct vringh *vrh, |
879 | void *dst, const void *src, size_t len) | |
f87d0fbb RR |
880 | { |
881 | memcpy(dst, src, len); | |
882 | return 0; | |
883 | } | |
884 | ||
9ad9c49c JW |
885 | static inline int putused_kern(const struct vringh *vrh, |
886 | struct vring_used_elem *dst, | |
f87d0fbb RR |
887 | const struct vring_used_elem *src, |
888 | unsigned int num) | |
889 | { | |
890 | memcpy(dst, src, num * sizeof(*dst)); | |
891 | return 0; | |
892 | } | |
893 | ||
9ad9c49c JW |
894 | static inline int xfer_kern(const struct vringh *vrh, void *src, |
895 | void *dst, size_t len) | |
f87d0fbb RR |
896 | { |
897 | memcpy(dst, src, len); | |
898 | return 0; | |
899 | } | |
900 | ||
9ad9c49c JW |
901 | static inline int kern_xfer(const struct vringh *vrh, void *dst, |
902 | void *src, size_t len) | |
b3683dee JW |
903 | { |
904 | memcpy(dst, src, len); | |
905 | return 0; | |
906 | } | |
907 | ||
f87d0fbb RR |
908 | /** |
909 | * vringh_init_kern - initialize a vringh for a kernelspace vring. | |
910 | * @vrh: the vringh to initialize. | |
911 | * @features: the feature bits for this ring. | |
912 | * @num: the number of elements. | |
913 | * @weak_barriers: true if we only need memory barriers, not I/O. | |
914 | * @desc: the userpace descriptor pointer. | |
915 | * @avail: the userpace avail pointer. | |
916 | * @used: the userpace used pointer. | |
917 | * | |
918 | * Returns an error if num is invalid. | |
919 | */ | |
b97a8a90 | 920 | int vringh_init_kern(struct vringh *vrh, u64 features, |
f87d0fbb RR |
921 | unsigned int num, bool weak_barriers, |
922 | struct vring_desc *desc, | |
923 | struct vring_avail *avail, | |
924 | struct vring_used *used) | |
925 | { | |
926 | /* Sane power of 2 please! */ | |
927 | if (!num || num > 0xffff || (num & (num - 1))) { | |
928 | vringh_bad("Bad ring size %u", num); | |
929 | return -EINVAL; | |
930 | } | |
931 | ||
b9f7ac8c | 932 | vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); |
f87d0fbb RR |
933 | vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); |
934 | vrh->weak_barriers = weak_barriers; | |
935 | vrh->completed = 0; | |
936 | vrh->last_avail_idx = 0; | |
937 | vrh->last_used_idx = 0; | |
938 | vrh->vring.num = num; | |
939 | vrh->vring.desc = desc; | |
940 | vrh->vring.avail = avail; | |
941 | vrh->vring.used = used; | |
942 | return 0; | |
943 | } | |
944 | EXPORT_SYMBOL(vringh_init_kern); | |
945 | ||
946 | /** | |
947 | * vringh_getdesc_kern - get next available descriptor from kernelspace ring. | |
948 | * @vrh: the kernelspace vring. | |
949 | * @riov: where to put the readable descriptors (or NULL) | |
950 | * @wiov: where to put the writable descriptors (or NULL) | |
951 | * @head: head index we received, for passing to vringh_complete_kern(). | |
952 | * @gfp: flags for allocating larger riov/wiov. | |
953 | * | |
954 | * Returns 0 if there was no descriptor, 1 if there was, or -errno. | |
955 | * | |
956 | * Note that on error return, you can tell the difference between an | |
957 | * invalid ring and a single invalid descriptor: in the former case, | |
958 | * *head will be vrh->vring.num. You may be able to ignore an invalid | |
959 | * descriptor, but there's not much you can do with an invalid ring. | |
960 | * | |
69c13c58 SG |
961 | * Note that you can reuse riov and wiov with subsequent calls. Content is |
962 | * overwritten and memory reallocated if more space is needed. | |
963 | * When you don't have to use riov and wiov anymore, you should clean up them | |
964 | * calling vringh_kiov_cleanup() to release the memory, even on error! | |
f87d0fbb RR |
965 | */ |
966 | int vringh_getdesc_kern(struct vringh *vrh, | |
967 | struct vringh_kiov *riov, | |
968 | struct vringh_kiov *wiov, | |
969 | u16 *head, | |
970 | gfp_t gfp) | |
971 | { | |
972 | int err; | |
973 | ||
974 | err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx); | |
975 | if (err < 0) | |
976 | return err; | |
977 | ||
978 | /* Empty... */ | |
979 | if (err == vrh->vring.num) | |
980 | return 0; | |
981 | ||
982 | *head = err; | |
983 | err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, | |
984 | gfp, copydesc_kern); | |
985 | if (err) | |
986 | return err; | |
987 | ||
988 | return 1; | |
989 | } | |
990 | EXPORT_SYMBOL(vringh_getdesc_kern); | |
991 | ||
992 | /** | |
993 | * vringh_iov_pull_kern - copy bytes from vring_iov. | |
994 | * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume) | |
995 | * @dst: the place to copy. | |
996 | * @len: the maximum length to copy. | |
997 | * | |
998 | * Returns the bytes copied <= len or a negative errno. | |
999 | */ | |
1000 | ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len) | |
1001 | { | |
9ad9c49c | 1002 | return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern); |
f87d0fbb RR |
1003 | } |
1004 | EXPORT_SYMBOL(vringh_iov_pull_kern); | |
1005 | ||
1006 | /** | |
1007 | * vringh_iov_push_kern - copy bytes into vring_iov. | |
1008 | * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume) | |
8009b0f4 | 1009 | * @src: the place to copy from. |
f87d0fbb RR |
1010 | * @len: the maximum length to copy. |
1011 | * | |
1012 | * Returns the bytes copied <= len or a negative errno. | |
1013 | */ | |
1014 | ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, | |
1015 | const void *src, size_t len) | |
1016 | { | |
9ad9c49c | 1017 | return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer); |
f87d0fbb RR |
1018 | } |
1019 | EXPORT_SYMBOL(vringh_iov_push_kern); | |
1020 | ||
1021 | /** | |
1022 | * vringh_abandon_kern - we've decided not to handle the descriptor(s). | |
1023 | * @vrh: the vring. | |
1024 | * @num: the number of descriptors to put back (ie. num | |
1025 | * vringh_get_kern() to undo). | |
1026 | * | |
1027 | * The next vringh_get_kern() will return the old descriptor(s) again. | |
1028 | */ | |
1029 | void vringh_abandon_kern(struct vringh *vrh, unsigned int num) | |
1030 | { | |
1031 | /* We only update vring_avail_event(vr) when we want to be notified, | |
1032 | * so we haven't changed that yet. */ | |
1033 | vrh->last_avail_idx -= num; | |
1034 | } | |
1035 | EXPORT_SYMBOL(vringh_abandon_kern); | |
1036 | ||
1037 | /** | |
1038 | * vringh_complete_kern - we've finished with descriptor, publish it. | |
1039 | * @vrh: the vring. | |
1040 | * @head: the head as filled in by vringh_getdesc_kern. | |
1041 | * @len: the length of data we have written. | |
1042 | * | |
1043 | * You should check vringh_need_notify_kern() after one or more calls | |
1044 | * to this function. | |
1045 | */ | |
1046 | int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len) | |
1047 | { | |
1048 | struct vring_used_elem used; | |
1049 | ||
b9f7ac8c MT |
1050 | used.id = cpu_to_vringh32(vrh, head); |
1051 | used.len = cpu_to_vringh32(vrh, len); | |
f87d0fbb RR |
1052 | |
1053 | return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern); | |
1054 | } | |
1055 | EXPORT_SYMBOL(vringh_complete_kern); | |
1056 | ||
1057 | /** | |
1058 | * vringh_notify_enable_kern - we want to know if something changes. | |
1059 | * @vrh: the vring. | |
1060 | * | |
1061 | * This always enables notifications, but returns false if there are | |
1062 | * now more buffers available in the vring. | |
1063 | */ | |
1064 | bool vringh_notify_enable_kern(struct vringh *vrh) | |
1065 | { | |
1066 | return __vringh_notify_enable(vrh, getu16_kern, putu16_kern); | |
1067 | } | |
1068 | EXPORT_SYMBOL(vringh_notify_enable_kern); | |
1069 | ||
1070 | /** | |
1071 | * vringh_notify_disable_kern - don't tell us if something changes. | |
1072 | * @vrh: the vring. | |
1073 | * | |
1074 | * This is our normal running state: we disable and then only enable when | |
1075 | * we're going to sleep. | |
1076 | */ | |
1077 | void vringh_notify_disable_kern(struct vringh *vrh) | |
1078 | { | |
1079 | __vringh_notify_disable(vrh, putu16_kern); | |
1080 | } | |
1081 | EXPORT_SYMBOL(vringh_notify_disable_kern); | |
1082 | ||
1083 | /** | |
1084 | * vringh_need_notify_kern - must we tell the other side about used buffers? | |
1085 | * @vrh: the vring we've called vringh_complete_kern() on. | |
1086 | * | |
1087 | * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. | |
1088 | */ | |
1089 | int vringh_need_notify_kern(struct vringh *vrh) | |
1090 | { | |
1091 | return __vringh_need_notify(vrh, getu16_kern); | |
1092 | } | |
1093 | EXPORT_SYMBOL(vringh_need_notify_kern); | |
f558a845 | 1094 | |
3302363a MT |
1095 | #if IS_REACHABLE(CONFIG_VHOST_IOTLB) |
1096 | ||
9ad9c49c | 1097 | static int iotlb_translate(const struct vringh *vrh, |
309bba39 SG |
1098 | u64 addr, u64 len, u64 *translated, |
1099 | struct bio_vec iov[], | |
9ad9c49c JW |
1100 | int iov_size, u32 perm) |
1101 | { | |
1102 | struct vhost_iotlb_map *map; | |
1103 | struct vhost_iotlb *iotlb = vrh->iotlb; | |
1104 | int ret = 0; | |
f85efa9b | 1105 | u64 s = 0, last = addr + len - 1; |
9ad9c49c | 1106 | |
f53d9910 SG |
1107 | spin_lock(vrh->iotlb_lock); |
1108 | ||
9ad9c49c JW |
1109 | while (len > s) { |
1110 | u64 size, pa, pfn; | |
1111 | ||
1112 | if (unlikely(ret >= iov_size)) { | |
1113 | ret = -ENOBUFS; | |
1114 | break; | |
1115 | } | |
1116 | ||
f85efa9b | 1117 | map = vhost_iotlb_itree_first(iotlb, addr, last); |
9ad9c49c JW |
1118 | if (!map || map->start > addr) { |
1119 | ret = -EINVAL; | |
1120 | break; | |
1121 | } else if (!(map->perm & perm)) { | |
1122 | ret = -EPERM; | |
1123 | break; | |
1124 | } | |
1125 | ||
1126 | size = map->size - addr + map->start; | |
1127 | pa = map->addr + addr - map->start; | |
1128 | pfn = pa >> PAGE_SHIFT; | |
58dfe140 CH |
1129 | bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size), |
1130 | pa & (PAGE_SIZE - 1)); | |
9ad9c49c JW |
1131 | s += size; |
1132 | addr += size; | |
1133 | ++ret; | |
1134 | } | |
1135 | ||
f53d9910 SG |
1136 | spin_unlock(vrh->iotlb_lock); |
1137 | ||
309bba39 SG |
1138 | if (translated) |
1139 | *translated = min(len, s); | |
1140 | ||
9ad9c49c JW |
1141 | return ret; |
1142 | } | |
1143 | ||
1144 | static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, | |
1145 | void *src, size_t len) | |
1146 | { | |
309bba39 SG |
1147 | u64 total_translated = 0; |
1148 | ||
1149 | while (total_translated < len) { | |
1150 | struct bio_vec iov[16]; | |
1151 | struct iov_iter iter; | |
1152 | u64 translated; | |
1153 | int ret; | |
1154 | ||
1155 | ret = iotlb_translate(vrh, (u64)(uintptr_t)src, | |
1156 | len - total_translated, &translated, | |
1157 | iov, ARRAY_SIZE(iov), VHOST_MAP_RO); | |
1158 | if (ret == -ENOBUFS) | |
1159 | ret = ARRAY_SIZE(iov); | |
1160 | else if (ret < 0) | |
1161 | return ret; | |
1162 | ||
de4eda9d | 1163 | iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated); |
309bba39 SG |
1164 | |
1165 | ret = copy_from_iter(dst, translated, &iter); | |
1166 | if (ret < 0) | |
1167 | return ret; | |
1168 | ||
1169 | src += translated; | |
1170 | dst += translated; | |
1171 | total_translated += translated; | |
1172 | } | |
9ad9c49c | 1173 | |
309bba39 | 1174 | return total_translated; |
9ad9c49c JW |
1175 | } |
1176 | ||
1177 | static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, | |
1178 | void *src, size_t len) | |
1179 | { | |
309bba39 SG |
1180 | u64 total_translated = 0; |
1181 | ||
1182 | while (total_translated < len) { | |
1183 | struct bio_vec iov[16]; | |
1184 | struct iov_iter iter; | |
1185 | u64 translated; | |
1186 | int ret; | |
1187 | ||
1188 | ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, | |
1189 | len - total_translated, &translated, | |
1190 | iov, ARRAY_SIZE(iov), VHOST_MAP_WO); | |
1191 | if (ret == -ENOBUFS) | |
1192 | ret = ARRAY_SIZE(iov); | |
1193 | else if (ret < 0) | |
1194 | return ret; | |
1195 | ||
de4eda9d | 1196 | iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated); |
309bba39 SG |
1197 | |
1198 | ret = copy_to_iter(src, translated, &iter); | |
1199 | if (ret < 0) | |
1200 | return ret; | |
1201 | ||
1202 | src += translated; | |
1203 | dst += translated; | |
1204 | total_translated += translated; | |
1205 | } | |
9ad9c49c | 1206 | |
309bba39 | 1207 | return total_translated; |
9ad9c49c JW |
1208 | } |
1209 | ||
1210 | static inline int getu16_iotlb(const struct vringh *vrh, | |
1211 | u16 *val, const __virtio16 *p) | |
1212 | { | |
1213 | struct bio_vec iov; | |
1214 | void *kaddr, *from; | |
1215 | int ret; | |
1216 | ||
1217 | /* Atomic read is needed for getu16 */ | |
309bba39 | 1218 | ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL, |
9ad9c49c JW |
1219 | &iov, 1, VHOST_MAP_RO); |
1220 | if (ret < 0) | |
1221 | return ret; | |
1222 | ||
1223 | kaddr = kmap_atomic(iov.bv_page); | |
1224 | from = kaddr + iov.bv_offset; | |
1225 | *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from)); | |
1226 | kunmap_atomic(kaddr); | |
1227 | ||
1228 | return 0; | |
1229 | } | |
1230 | ||
1231 | static inline int putu16_iotlb(const struct vringh *vrh, | |
1232 | __virtio16 *p, u16 val) | |
1233 | { | |
1234 | struct bio_vec iov; | |
1235 | void *kaddr, *to; | |
1236 | int ret; | |
1237 | ||
1238 | /* Atomic write is needed for putu16 */ | |
309bba39 | 1239 | ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL, |
9ad9c49c JW |
1240 | &iov, 1, VHOST_MAP_WO); |
1241 | if (ret < 0) | |
1242 | return ret; | |
1243 | ||
1244 | kaddr = kmap_atomic(iov.bv_page); | |
1245 | to = kaddr + iov.bv_offset; | |
1246 | WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val)); | |
1247 | kunmap_atomic(kaddr); | |
1248 | ||
1249 | return 0; | |
1250 | } | |
1251 | ||
1252 | static inline int copydesc_iotlb(const struct vringh *vrh, | |
1253 | void *dst, const void *src, size_t len) | |
1254 | { | |
1255 | int ret; | |
1256 | ||
1257 | ret = copy_from_iotlb(vrh, dst, (void *)src, len); | |
1258 | if (ret != len) | |
1259 | return -EFAULT; | |
1260 | ||
1261 | return 0; | |
1262 | } | |
1263 | ||
1264 | static inline int xfer_from_iotlb(const struct vringh *vrh, void *src, | |
1265 | void *dst, size_t len) | |
1266 | { | |
1267 | int ret; | |
1268 | ||
1269 | ret = copy_from_iotlb(vrh, dst, src, len); | |
1270 | if (ret != len) | |
1271 | return -EFAULT; | |
1272 | ||
1273 | return 0; | |
1274 | } | |
1275 | ||
1276 | static inline int xfer_to_iotlb(const struct vringh *vrh, | |
1277 | void *dst, void *src, size_t len) | |
1278 | { | |
1279 | int ret; | |
1280 | ||
1281 | ret = copy_to_iotlb(vrh, dst, src, len); | |
1282 | if (ret != len) | |
1283 | return -EFAULT; | |
1284 | ||
1285 | return 0; | |
1286 | } | |
1287 | ||
1288 | static inline int putused_iotlb(const struct vringh *vrh, | |
1289 | struct vring_used_elem *dst, | |
1290 | const struct vring_used_elem *src, | |
1291 | unsigned int num) | |
1292 | { | |
1293 | int size = num * sizeof(*dst); | |
1294 | int ret; | |
1295 | ||
1296 | ret = copy_to_iotlb(vrh, dst, (void *)src, num * sizeof(*dst)); | |
1297 | if (ret != size) | |
1298 | return -EFAULT; | |
1299 | ||
1300 | return 0; | |
1301 | } | |
1302 | ||
1303 | /** | |
1304 | * vringh_init_iotlb - initialize a vringh for a ring with IOTLB. | |
1305 | * @vrh: the vringh to initialize. | |
1306 | * @features: the feature bits for this ring. | |
1307 | * @num: the number of elements. | |
1308 | * @weak_barriers: true if we only need memory barriers, not I/O. | |
1309 | * @desc: the userpace descriptor pointer. | |
1310 | * @avail: the userpace avail pointer. | |
1311 | * @used: the userpace used pointer. | |
1312 | * | |
1313 | * Returns an error if num is invalid. | |
1314 | */ | |
1315 | int vringh_init_iotlb(struct vringh *vrh, u64 features, | |
1316 | unsigned int num, bool weak_barriers, | |
1317 | struct vring_desc *desc, | |
1318 | struct vring_avail *avail, | |
1319 | struct vring_used *used) | |
1320 | { | |
1321 | return vringh_init_kern(vrh, features, num, weak_barriers, | |
1322 | desc, avail, used); | |
1323 | } | |
1324 | EXPORT_SYMBOL(vringh_init_iotlb); | |
1325 | ||
1326 | /** | |
1327 | * vringh_set_iotlb - initialize a vringh for a ring with IOTLB. | |
1328 | * @vrh: the vring | |
1329 | * @iotlb: iotlb associated with this vring | |
f53d9910 | 1330 | * @iotlb_lock: spinlock to synchronize the iotlb accesses |
9ad9c49c | 1331 | */ |
f53d9910 SG |
1332 | void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb, |
1333 | spinlock_t *iotlb_lock) | |
9ad9c49c JW |
1334 | { |
1335 | vrh->iotlb = iotlb; | |
f53d9910 | 1336 | vrh->iotlb_lock = iotlb_lock; |
9ad9c49c JW |
1337 | } |
1338 | EXPORT_SYMBOL(vringh_set_iotlb); | |
1339 | ||
1340 | /** | |
1341 | * vringh_getdesc_iotlb - get next available descriptor from ring with | |
1342 | * IOTLB. | |
1343 | * @vrh: the kernelspace vring. | |
1344 | * @riov: where to put the readable descriptors (or NULL) | |
1345 | * @wiov: where to put the writable descriptors (or NULL) | |
1346 | * @head: head index we received, for passing to vringh_complete_iotlb(). | |
1347 | * @gfp: flags for allocating larger riov/wiov. | |
1348 | * | |
1349 | * Returns 0 if there was no descriptor, 1 if there was, or -errno. | |
1350 | * | |
1351 | * Note that on error return, you can tell the difference between an | |
1352 | * invalid ring and a single invalid descriptor: in the former case, | |
1353 | * *head will be vrh->vring.num. You may be able to ignore an invalid | |
1354 | * descriptor, but there's not much you can do with an invalid ring. | |
1355 | * | |
69c13c58 SG |
1356 | * Note that you can reuse riov and wiov with subsequent calls. Content is |
1357 | * overwritten and memory reallocated if more space is needed. | |
1358 | * When you don't have to use riov and wiov anymore, you should clean up them | |
1359 | * calling vringh_kiov_cleanup() to release the memory, even on error! | |
9ad9c49c JW |
1360 | */ |
1361 | int vringh_getdesc_iotlb(struct vringh *vrh, | |
1362 | struct vringh_kiov *riov, | |
1363 | struct vringh_kiov *wiov, | |
1364 | u16 *head, | |
1365 | gfp_t gfp) | |
1366 | { | |
1367 | int err; | |
1368 | ||
1369 | err = __vringh_get_head(vrh, getu16_iotlb, &vrh->last_avail_idx); | |
1370 | if (err < 0) | |
1371 | return err; | |
1372 | ||
1373 | /* Empty... */ | |
1374 | if (err == vrh->vring.num) | |
1375 | return 0; | |
1376 | ||
1377 | *head = err; | |
1378 | err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, | |
1379 | gfp, copydesc_iotlb); | |
1380 | if (err) | |
1381 | return err; | |
1382 | ||
1383 | return 1; | |
1384 | } | |
1385 | EXPORT_SYMBOL(vringh_getdesc_iotlb); | |
1386 | ||
1387 | /** | |
1388 | * vringh_iov_pull_iotlb - copy bytes from vring_iov. | |
1389 | * @vrh: the vring. | |
1390 | * @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume) | |
1391 | * @dst: the place to copy. | |
1392 | * @len: the maximum length to copy. | |
1393 | * | |
1394 | * Returns the bytes copied <= len or a negative errno. | |
1395 | */ | |
1396 | ssize_t vringh_iov_pull_iotlb(struct vringh *vrh, | |
1397 | struct vringh_kiov *riov, | |
1398 | void *dst, size_t len) | |
1399 | { | |
1400 | return vringh_iov_xfer(vrh, riov, dst, len, xfer_from_iotlb); | |
1401 | } | |
1402 | EXPORT_SYMBOL(vringh_iov_pull_iotlb); | |
1403 | ||
1404 | /** | |
1405 | * vringh_iov_push_iotlb - copy bytes into vring_iov. | |
1406 | * @vrh: the vring. | |
1407 | * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume) | |
8009b0f4 | 1408 | * @src: the place to copy from. |
9ad9c49c JW |
1409 | * @len: the maximum length to copy. |
1410 | * | |
1411 | * Returns the bytes copied <= len or a negative errno. | |
1412 | */ | |
1413 | ssize_t vringh_iov_push_iotlb(struct vringh *vrh, | |
1414 | struct vringh_kiov *wiov, | |
1415 | const void *src, size_t len) | |
1416 | { | |
1417 | return vringh_iov_xfer(vrh, wiov, (void *)src, len, xfer_to_iotlb); | |
1418 | } | |
1419 | EXPORT_SYMBOL(vringh_iov_push_iotlb); | |
1420 | ||
1421 | /** | |
1422 | * vringh_abandon_iotlb - we've decided not to handle the descriptor(s). | |
1423 | * @vrh: the vring. | |
1424 | * @num: the number of descriptors to put back (ie. num | |
1425 | * vringh_get_iotlb() to undo). | |
1426 | * | |
1427 | * The next vringh_get_iotlb() will return the old descriptor(s) again. | |
1428 | */ | |
1429 | void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num) | |
1430 | { | |
1431 | /* We only update vring_avail_event(vr) when we want to be notified, | |
1432 | * so we haven't changed that yet. | |
1433 | */ | |
1434 | vrh->last_avail_idx -= num; | |
1435 | } | |
1436 | EXPORT_SYMBOL(vringh_abandon_iotlb); | |
1437 | ||
1438 | /** | |
1439 | * vringh_complete_iotlb - we've finished with descriptor, publish it. | |
1440 | * @vrh: the vring. | |
1441 | * @head: the head as filled in by vringh_getdesc_iotlb. | |
1442 | * @len: the length of data we have written. | |
1443 | * | |
1444 | * You should check vringh_need_notify_iotlb() after one or more calls | |
1445 | * to this function. | |
1446 | */ | |
1447 | int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len) | |
1448 | { | |
1449 | struct vring_used_elem used; | |
1450 | ||
1451 | used.id = cpu_to_vringh32(vrh, head); | |
1452 | used.len = cpu_to_vringh32(vrh, len); | |
1453 | ||
1454 | return __vringh_complete(vrh, &used, 1, putu16_iotlb, putused_iotlb); | |
1455 | } | |
1456 | EXPORT_SYMBOL(vringh_complete_iotlb); | |
1457 | ||
1458 | /** | |
1459 | * vringh_notify_enable_iotlb - we want to know if something changes. | |
1460 | * @vrh: the vring. | |
1461 | * | |
1462 | * This always enables notifications, but returns false if there are | |
1463 | * now more buffers available in the vring. | |
1464 | */ | |
1465 | bool vringh_notify_enable_iotlb(struct vringh *vrh) | |
1466 | { | |
1467 | return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb); | |
1468 | } | |
1469 | EXPORT_SYMBOL(vringh_notify_enable_iotlb); | |
1470 | ||
1471 | /** | |
1472 | * vringh_notify_disable_iotlb - don't tell us if something changes. | |
1473 | * @vrh: the vring. | |
1474 | * | |
1475 | * This is our normal running state: we disable and then only enable when | |
1476 | * we're going to sleep. | |
1477 | */ | |
1478 | void vringh_notify_disable_iotlb(struct vringh *vrh) | |
1479 | { | |
1480 | __vringh_notify_disable(vrh, putu16_iotlb); | |
1481 | } | |
1482 | EXPORT_SYMBOL(vringh_notify_disable_iotlb); | |
1483 | ||
1484 | /** | |
1485 | * vringh_need_notify_iotlb - must we tell the other side about used buffers? | |
1486 | * @vrh: the vring we've called vringh_complete_iotlb() on. | |
1487 | * | |
1488 | * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. | |
1489 | */ | |
1490 | int vringh_need_notify_iotlb(struct vringh *vrh) | |
1491 | { | |
1492 | return __vringh_need_notify(vrh, getu16_iotlb); | |
1493 | } | |
1494 | EXPORT_SYMBOL(vringh_need_notify_iotlb); | |
1495 | ||
3302363a | 1496 | #endif |
9ad9c49c | 1497 | |
f558a845 | 1498 | MODULE_LICENSE("GPL"); |