virtio_ring: split: stop __vring_new_virtqueue as export symbol
[linux-2.6-block.git] / drivers / virtio / virtio_ring.c
CommitLineData
fd534e9b 1// SPDX-License-Identifier: GPL-2.0-or-later
0a8a69dd
RR
2/* Virtio ring implementation.
3 *
4 * Copyright 2007 Rusty Russell IBM Corporation
0a8a69dd
RR
5 */
6#include <linux/virtio.h>
7#include <linux/virtio_ring.h>
e34f8725 8#include <linux/virtio_config.h>
0a8a69dd 9#include <linux/device.h>
5a0e3ad6 10#include <linux/slab.h>
b5a2c4f1 11#include <linux/module.h>
e93300b1 12#include <linux/hrtimer.h>
780bc790 13#include <linux/dma-mapping.h>
f8ce7263 14#include <linux/spinlock.h>
78fe3987 15#include <xen/xen.h>
0a8a69dd
RR
16
17#ifdef DEBUG
18/* For development, we want to crash whenever the ring is screwed. */
9499f5e7
RR
19#define BAD_RING(_vq, fmt, args...) \
20 do { \
21 dev_err(&(_vq)->vq.vdev->dev, \
22 "%s:"fmt, (_vq)->vq.name, ##args); \
23 BUG(); \
24 } while (0)
c5f841f1
RR
25/* Caller is supposed to guarantee no reentry. */
26#define START_USE(_vq) \
27 do { \
28 if ((_vq)->in_use) \
9499f5e7
RR
29 panic("%s:in_use = %i\n", \
30 (_vq)->vq.name, (_vq)->in_use); \
c5f841f1 31 (_vq)->in_use = __LINE__; \
9499f5e7 32 } while (0)
3a35ce7d 33#define END_USE(_vq) \
97a545ab 34 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
4d6a105e
TB
35#define LAST_ADD_TIME_UPDATE(_vq) \
36 do { \
37 ktime_t now = ktime_get(); \
38 \
39 /* No kick or get, with .1 second between? Warn. */ \
40 if ((_vq)->last_add_time_valid) \
41 WARN_ON(ktime_to_ms(ktime_sub(now, \
42 (_vq)->last_add_time)) > 100); \
43 (_vq)->last_add_time = now; \
44 (_vq)->last_add_time_valid = true; \
45 } while (0)
46#define LAST_ADD_TIME_CHECK(_vq) \
47 do { \
48 if ((_vq)->last_add_time_valid) { \
49 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
50 (_vq)->last_add_time)) > 100); \
51 } \
52 } while (0)
53#define LAST_ADD_TIME_INVALID(_vq) \
54 ((_vq)->last_add_time_valid = false)
0a8a69dd 55#else
9499f5e7
RR
56#define BAD_RING(_vq, fmt, args...) \
57 do { \
58 dev_err(&_vq->vq.vdev->dev, \
59 "%s:"fmt, (_vq)->vq.name, ##args); \
60 (_vq)->broken = true; \
61 } while (0)
0a8a69dd
RR
62#define START_USE(vq)
63#define END_USE(vq)
4d6a105e
TB
64#define LAST_ADD_TIME_UPDATE(vq)
65#define LAST_ADD_TIME_CHECK(vq)
66#define LAST_ADD_TIME_INVALID(vq)
0a8a69dd
RR
67#endif
68
cbeedb72 69struct vring_desc_state_split {
780bc790
AL
70 void *data; /* Data for callback. */
71 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
72};
73
1ce9e605
TB
74struct vring_desc_state_packed {
75 void *data; /* Data for callback. */
76 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
77 u16 num; /* Descriptor list length. */
1ce9e605
TB
78 u16 last; /* The last desc state in a list. */
79};
80
1f28750f 81struct vring_desc_extra {
ef5c366f
JW
82 dma_addr_t addr; /* Descriptor DMA addr. */
83 u32 len; /* Descriptor length. */
1ce9e605 84 u16 flags; /* Descriptor flags. */
aeef9b47 85 u16 next; /* The next desc state in a list. */
1ce9e605
TB
86};
87
d76136e4
XZ
88struct vring_virtqueue_split {
89 /* Actual memory layout for this queue. */
90 struct vring vring;
91
92 /* Last written value to avail->flags */
93 u16 avail_flags_shadow;
94
95 /*
96 * Last written value to avail->idx in
97 * guest byte order.
98 */
99 u16 avail_idx_shadow;
100
101 /* Per-descriptor state. */
102 struct vring_desc_state_split *desc_state;
103 struct vring_desc_extra *desc_extra;
104
105 /* DMA address and size information */
106 dma_addr_t queue_dma_addr;
107 size_t queue_size_in_bytes;
108};
109
110struct vring_virtqueue_packed {
111 /* Actual memory layout for this queue. */
112 struct {
113 unsigned int num;
114 struct vring_packed_desc *desc;
115 struct vring_packed_desc_event *driver;
116 struct vring_packed_desc_event *device;
117 } vring;
118
119 /* Driver ring wrap counter. */
120 bool avail_wrap_counter;
121
122 /* Avail used flags. */
123 u16 avail_used_flags;
124
125 /* Index of the next avail descriptor. */
126 u16 next_avail_idx;
127
128 /*
129 * Last written value to driver->flags in
130 * guest byte order.
131 */
132 u16 event_flags_shadow;
133
134 /* Per-descriptor state. */
135 struct vring_desc_state_packed *desc_state;
136 struct vring_desc_extra *desc_extra;
137
138 /* DMA address and size information */
139 dma_addr_t ring_dma_addr;
140 dma_addr_t driver_event_dma_addr;
141 dma_addr_t device_event_dma_addr;
142 size_t ring_size_in_bytes;
143 size_t event_size_in_bytes;
144};
145
43b4f721 146struct vring_virtqueue {
0a8a69dd
RR
147 struct virtqueue vq;
148
1ce9e605
TB
149 /* Is this a packed ring? */
150 bool packed_ring;
151
fb3fba6b
TB
152 /* Is DMA API used? */
153 bool use_dma_api;
154
7b21e34f
RR
155 /* Can we use weak barriers? */
156 bool weak_barriers;
157
0a8a69dd
RR
158 /* Other side has made a mess, don't try any more. */
159 bool broken;
160
9fa29b9d
MM
161 /* Host supports indirect buffers */
162 bool indirect;
163
a5c262c5
MT
164 /* Host publishes avail event idx */
165 bool event;
166
0a8a69dd
RR
167 /* Head of free buffer list. */
168 unsigned int free_head;
169 /* Number we've added since last sync. */
170 unsigned int num_added;
171
a7722890 172 /* Last used index we've seen.
173 * for split ring, it just contains last used index
174 * for packed ring:
175 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
176 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
177 */
1bc4953e 178 u16 last_used_idx;
0a8a69dd 179
8d622d21
MT
180 /* Hint for event idx: already triggered no need to disable. */
181 bool event_triggered;
182
1ce9e605
TB
183 union {
184 /* Available for split ring */
d76136e4 185 struct vring_virtqueue_split split;
e593bf97 186
1ce9e605 187 /* Available for packed ring */
d76136e4 188 struct vring_virtqueue_packed packed;
1ce9e605 189 };
f277ec42 190
0a8a69dd 191 /* How to notify other side. FIXME: commonalize hcalls! */
46f9c2b9 192 bool (*notify)(struct virtqueue *vq);
0a8a69dd 193
2a2d1382
AL
194 /* DMA, allocation, and size information */
195 bool we_own_ring;
2a2d1382 196
0a8a69dd
RR
197#ifdef DEBUG
198 /* They're supposed to lock for us. */
199 unsigned int in_use;
e93300b1
RR
200
201 /* Figure out if their kicks are too delayed. */
202 bool last_add_time_valid;
203 ktime_t last_add_time;
0a8a69dd 204#endif
0a8a69dd
RR
205};
206
07d9629d
XZ
207static struct virtqueue *__vring_new_virtqueue(unsigned int index,
208 struct vring vring,
209 struct virtio_device *vdev,
210 bool weak_barriers,
211 bool context,
212 bool (*notify)(struct virtqueue *),
213 void (*callback)(struct virtqueue *),
214 const char *name);
e6f633e5
TB
215
216/*
217 * Helpers.
218 */
219
0a8a69dd
RR
220#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
221
35c51e09 222static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
2f18c2d1
TB
223 unsigned int total_sg)
224{
2f18c2d1
TB
225 /*
226 * If the host supports indirect descriptor tables, and we have multiple
227 * buffers, then go indirect. FIXME: tune this threshold
228 */
229 return (vq->indirect && total_sg > 1 && vq->vq.num_free);
230}
231
d26c96c8 232/*
1a937693
MT
233 * Modern virtio devices have feature bits to specify whether they need a
234 * quirk and bypass the IOMMU. If not there, just use the DMA API.
235 *
236 * If there, the interaction between virtio and DMA API is messy.
d26c96c8
AL
237 *
238 * On most systems with virtio, physical addresses match bus addresses,
239 * and it doesn't particularly matter whether we use the DMA API.
240 *
241 * On some systems, including Xen and any system with a physical device
242 * that speaks virtio behind a physical IOMMU, we must use the DMA API
243 * for virtio DMA to work at all.
244 *
245 * On other systems, including SPARC and PPC64, virtio-pci devices are
246 * enumerated as though they are behind an IOMMU, but the virtio host
247 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
248 * there or somehow map everything as the identity.
249 *
250 * For the time being, we preserve historic behavior and bypass the DMA
251 * API.
1a937693
MT
252 *
253 * TODO: install a per-device DMA ops structure that does the right thing
254 * taking into account all the above quirks, and use the DMA API
255 * unconditionally on data path.
d26c96c8
AL
256 */
257
258static bool vring_use_dma_api(struct virtio_device *vdev)
259{
24b6842a 260 if (!virtio_has_dma_quirk(vdev))
1a937693
MT
261 return true;
262
263 /* Otherwise, we are left to guess. */
78fe3987
AL
264 /*
265 * In theory, it's possible to have a buggy QEMU-supposed
266 * emulated Q35 IOMMU and Xen enabled at the same time. On
267 * such a configuration, virtio has never worked and will
268 * not work without an even larger kludge. Instead, enable
269 * the DMA API if we're a Xen guest, which at least allows
270 * all of the sensible Xen configurations to work correctly.
271 */
272 if (xen_domain())
273 return true;
274
d26c96c8
AL
275 return false;
276}
277
e6d6dd6c
JR
278size_t virtio_max_dma_size(struct virtio_device *vdev)
279{
280 size_t max_segment_size = SIZE_MAX;
281
282 if (vring_use_dma_api(vdev))
817fc978 283 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
e6d6dd6c
JR
284
285 return max_segment_size;
286}
287EXPORT_SYMBOL_GPL(virtio_max_dma_size);
288
d79dca75
TB
289static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
290 dma_addr_t *dma_handle, gfp_t flag)
291{
292 if (vring_use_dma_api(vdev)) {
293 return dma_alloc_coherent(vdev->dev.parent, size,
294 dma_handle, flag);
295 } else {
296 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
297
298 if (queue) {
299 phys_addr_t phys_addr = virt_to_phys(queue);
300 *dma_handle = (dma_addr_t)phys_addr;
301
302 /*
303 * Sanity check: make sure we dind't truncate
304 * the address. The only arches I can find that
305 * have 64-bit phys_addr_t but 32-bit dma_addr_t
306 * are certain non-highmem MIPS and x86
307 * configurations, but these configurations
308 * should never allocate physical pages above 32
309 * bits, so this is fine. Just in case, throw a
310 * warning and abort if we end up with an
311 * unrepresentable address.
312 */
313 if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
314 free_pages_exact(queue, PAGE_ALIGN(size));
315 return NULL;
316 }
317 }
318 return queue;
319 }
320}
321
322static void vring_free_queue(struct virtio_device *vdev, size_t size,
323 void *queue, dma_addr_t dma_handle)
324{
325 if (vring_use_dma_api(vdev))
326 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
327 else
328 free_pages_exact(queue, PAGE_ALIGN(size));
329}
330
780bc790
AL
331/*
332 * The DMA ops on various arches are rather gnarly right now, and
333 * making all of the arch DMA ops work on the vring device itself
334 * is a mess. For now, we use the parent device for DMA ops.
335 */
75bfa81b 336static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
780bc790
AL
337{
338 return vq->vq.vdev->dev.parent;
339}
340
341/* Map one sg entry. */
342static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
343 struct scatterlist *sg,
344 enum dma_data_direction direction)
345{
fb3fba6b 346 if (!vq->use_dma_api)
780bc790
AL
347 return (dma_addr_t)sg_phys(sg);
348
349 /*
350 * We can't use dma_map_sg, because we don't use scatterlists in
351 * the way it expects (we don't guarantee that the scatterlist
352 * will exist for the lifetime of the mapping).
353 */
354 return dma_map_page(vring_dma_dev(vq),
355 sg_page(sg), sg->offset, sg->length,
356 direction);
357}
358
359static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
360 void *cpu_addr, size_t size,
361 enum dma_data_direction direction)
362{
fb3fba6b 363 if (!vq->use_dma_api)
780bc790
AL
364 return (dma_addr_t)virt_to_phys(cpu_addr);
365
366 return dma_map_single(vring_dma_dev(vq),
367 cpu_addr, size, direction);
368}
369
e6f633e5
TB
370static int vring_mapping_error(const struct vring_virtqueue *vq,
371 dma_addr_t addr)
372{
fb3fba6b 373 if (!vq->use_dma_api)
e6f633e5
TB
374 return 0;
375
376 return dma_mapping_error(vring_dma_dev(vq), addr);
377}
378
3a897128
XZ
379static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
380{
381 vq->vq.num_free = num;
382
383 if (vq->packed_ring)
384 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
385 else
386 vq->last_used_idx = 0;
387
388 vq->event_triggered = false;
389 vq->num_added = 0;
390
391#ifdef DEBUG
392 vq->in_use = false;
393 vq->last_add_time_valid = false;
394#endif
395}
396
e6f633e5
TB
397
398/*
399 * Split ring specific functions - *_split().
400 */
401
72b5e895
JW
402static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
403 struct vring_desc *desc)
780bc790
AL
404{
405 u16 flags;
406
fb3fba6b 407 if (!vq->use_dma_api)
780bc790
AL
408 return;
409
410 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
411
b4282ebc
XZ
412 dma_unmap_page(vring_dma_dev(vq),
413 virtio64_to_cpu(vq->vq.vdev, desc->addr),
414 virtio32_to_cpu(vq->vq.vdev, desc->len),
415 (flags & VRING_DESC_F_WRITE) ?
416 DMA_FROM_DEVICE : DMA_TO_DEVICE);
780bc790
AL
417}
418
72b5e895
JW
419static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
420 unsigned int i)
421{
422 struct vring_desc_extra *extra = vq->split.desc_extra;
423 u16 flags;
424
425 if (!vq->use_dma_api)
426 goto out;
427
428 flags = extra[i].flags;
429
430 if (flags & VRING_DESC_F_INDIRECT) {
431 dma_unmap_single(vring_dma_dev(vq),
432 extra[i].addr,
433 extra[i].len,
434 (flags & VRING_DESC_F_WRITE) ?
435 DMA_FROM_DEVICE : DMA_TO_DEVICE);
436 } else {
437 dma_unmap_page(vring_dma_dev(vq),
438 extra[i].addr,
439 extra[i].len,
440 (flags & VRING_DESC_F_WRITE) ?
441 DMA_FROM_DEVICE : DMA_TO_DEVICE);
442 }
443
444out:
445 return extra[i].next;
446}
447
138fd251
TB
448static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
449 unsigned int total_sg,
450 gfp_t gfp)
9fa29b9d
MM
451{
452 struct vring_desc *desc;
b25bd251 453 unsigned int i;
9fa29b9d 454
b92b1b89
WD
455 /*
456 * We require lowmem mappings for the descriptors because
457 * otherwise virt_to_phys will give us bogus addresses in the
458 * virtqueue.
459 */
82107539 460 gfp &= ~__GFP_HIGHMEM;
b92b1b89 461
6da2ec56 462 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
9fa29b9d 463 if (!desc)
b25bd251 464 return NULL;
9fa29b9d 465
b25bd251 466 for (i = 0; i < total_sg; i++)
00e6f3d9 467 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
b25bd251 468 return desc;
9fa29b9d
MM
469}
470
fe4c3862
JW
471static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
472 struct vring_desc *desc,
473 unsigned int i,
474 dma_addr_t addr,
475 unsigned int len,
72b5e895
JW
476 u16 flags,
477 bool indirect)
fe4c3862 478{
72b5e895
JW
479 struct vring_virtqueue *vring = to_vvq(vq);
480 struct vring_desc_extra *extra = vring->split.desc_extra;
481 u16 next;
482
fe4c3862
JW
483 desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
484 desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
485 desc[i].len = cpu_to_virtio32(vq->vdev, len);
486
72b5e895
JW
487 if (!indirect) {
488 next = extra[i].next;
489 desc[i].next = cpu_to_virtio16(vq->vdev, next);
490
491 extra[i].addr = addr;
492 extra[i].len = len;
493 extra[i].flags = flags;
494 } else
495 next = virtio16_to_cpu(vq->vdev, desc[i].next);
496
497 return next;
fe4c3862
JW
498}
499
138fd251
TB
500static inline int virtqueue_add_split(struct virtqueue *_vq,
501 struct scatterlist *sgs[],
502 unsigned int total_sg,
503 unsigned int out_sgs,
504 unsigned int in_sgs,
505 void *data,
506 void *ctx,
507 gfp_t gfp)
0a8a69dd
RR
508{
509 struct vring_virtqueue *vq = to_vvq(_vq);
13816c76 510 struct scatterlist *sg;
b25bd251 511 struct vring_desc *desc;
3f649ab7 512 unsigned int i, n, avail, descs_used, prev, err_idx;
1fe9b6fe 513 int head;
b25bd251 514 bool indirect;
0a8a69dd 515
9fa29b9d
MM
516 START_USE(vq);
517
0a8a69dd 518 BUG_ON(data == NULL);
5a08b04f 519 BUG_ON(ctx && vq->indirect);
9fa29b9d 520
70670444
RR
521 if (unlikely(vq->broken)) {
522 END_USE(vq);
523 return -EIO;
524 }
525
4d6a105e 526 LAST_ADD_TIME_UPDATE(vq);
e93300b1 527
b25bd251
RR
528 BUG_ON(total_sg == 0);
529
530 head = vq->free_head;
531
35c51e09 532 if (virtqueue_use_indirect(vq, total_sg))
138fd251 533 desc = alloc_indirect_split(_vq, total_sg, gfp);
44ed8089 534 else {
b25bd251 535 desc = NULL;
e593bf97 536 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
44ed8089 537 }
b25bd251
RR
538
539 if (desc) {
540 /* Use a single buffer which doesn't continue */
780bc790 541 indirect = true;
b25bd251
RR
542 /* Set up rest to use this indirect table. */
543 i = 0;
544 descs_used = 1;
b25bd251 545 } else {
780bc790 546 indirect = false;
e593bf97 547 desc = vq->split.vring.desc;
b25bd251
RR
548 i = head;
549 descs_used = total_sg;
9fa29b9d
MM
550 }
551
b4b4ff73 552 if (unlikely(vq->vq.num_free < descs_used)) {
0a8a69dd 553 pr_debug("Can't add buf len %i - avail = %i\n",
b25bd251 554 descs_used, vq->vq.num_free);
44653eae
RR
555 /* FIXME: for historical reasons, we force a notify here if
556 * there are outgoing parts to the buffer. Presumably the
557 * host should service the ring ASAP. */
13816c76 558 if (out_sgs)
44653eae 559 vq->notify(&vq->vq);
58625edf
WY
560 if (indirect)
561 kfree(desc);
0a8a69dd
RR
562 END_USE(vq);
563 return -ENOSPC;
564 }
565
13816c76 566 for (n = 0; n < out_sgs; n++) {
eeebf9b1 567 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
568 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
569 if (vring_mapping_error(vq, addr))
570 goto unmap_release;
571
13816c76 572 prev = i;
72b5e895
JW
573 /* Note that we trust indirect descriptor
574 * table since it use stream DMA mapping.
575 */
fe4c3862 576 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
72b5e895
JW
577 VRING_DESC_F_NEXT,
578 indirect);
13816c76 579 }
0a8a69dd 580 }
13816c76 581 for (; n < (out_sgs + in_sgs); n++) {
eeebf9b1 582 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
583 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
584 if (vring_mapping_error(vq, addr))
585 goto unmap_release;
586
13816c76 587 prev = i;
72b5e895
JW
588 /* Note that we trust indirect descriptor
589 * table since it use stream DMA mapping.
590 */
fe4c3862
JW
591 i = virtqueue_add_desc_split(_vq, desc, i, addr,
592 sg->length,
593 VRING_DESC_F_NEXT |
72b5e895
JW
594 VRING_DESC_F_WRITE,
595 indirect);
13816c76 596 }
0a8a69dd
RR
597 }
598 /* Last one doesn't continue. */
00e6f3d9 599 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
72b5e895 600 if (!indirect && vq->use_dma_api)
890d3356 601 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
72b5e895 602 ~VRING_DESC_F_NEXT;
0a8a69dd 603
780bc790
AL
604 if (indirect) {
605 /* Now that the indirect table is filled in, map it. */
606 dma_addr_t addr = vring_map_single(
607 vq, desc, total_sg * sizeof(struct vring_desc),
608 DMA_TO_DEVICE);
609 if (vring_mapping_error(vq, addr))
610 goto unmap_release;
611
fe4c3862
JW
612 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
613 head, addr,
614 total_sg * sizeof(struct vring_desc),
72b5e895
JW
615 VRING_DESC_F_INDIRECT,
616 false);
780bc790
AL
617 }
618
619 /* We're using some buffers from the free list. */
620 vq->vq.num_free -= descs_used;
621
0a8a69dd 622 /* Update free pointer */
b25bd251 623 if (indirect)
72b5e895 624 vq->free_head = vq->split.desc_extra[head].next;
b25bd251
RR
625 else
626 vq->free_head = i;
0a8a69dd 627
780bc790 628 /* Store token and indirect buffer state. */
cbeedb72 629 vq->split.desc_state[head].data = data;
780bc790 630 if (indirect)
cbeedb72 631 vq->split.desc_state[head].indir_desc = desc;
87646a34 632 else
cbeedb72 633 vq->split.desc_state[head].indir_desc = ctx;
0a8a69dd
RR
634
635 /* Put entry in available array (but don't update avail->idx until they
3b720b8c 636 * do sync). */
e593bf97
TB
637 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
638 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
0a8a69dd 639
ee7cd898
RR
640 /* Descriptors and available array need to be set before we expose the
641 * new available array entries. */
a9a0fef7 642 virtio_wmb(vq->weak_barriers);
e593bf97
TB
643 vq->split.avail_idx_shadow++;
644 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
645 vq->split.avail_idx_shadow);
ee7cd898
RR
646 vq->num_added++;
647
5e05bf58
TH
648 pr_debug("Added buffer head %i to %p\n", head, vq);
649 END_USE(vq);
650
ee7cd898
RR
651 /* This is very unlikely, but theoretically possible. Kick
652 * just in case. */
653 if (unlikely(vq->num_added == (1 << 16) - 1))
654 virtqueue_kick(_vq);
655
98e8c6bc 656 return 0;
780bc790
AL
657
658unmap_release:
659 err_idx = i;
cf8f1696
ML
660
661 if (indirect)
662 i = 0;
663 else
664 i = head;
780bc790
AL
665
666 for (n = 0; n < total_sg; n++) {
667 if (i == err_idx)
668 break;
72b5e895
JW
669 if (indirect) {
670 vring_unmap_one_split_indirect(vq, &desc[i]);
671 i = virtio16_to_cpu(_vq->vdev, desc[i].next);
672 } else
673 i = vring_unmap_one_split(vq, i);
780bc790
AL
674 }
675
780bc790
AL
676 if (indirect)
677 kfree(desc);
678
3cc36f6e 679 END_USE(vq);
f7728002 680 return -ENOMEM;
0a8a69dd 681}
13816c76 682
138fd251 683static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
684{
685 struct vring_virtqueue *vq = to_vvq(_vq);
a5c262c5 686 u16 new, old;
41f0377f
RR
687 bool needs_kick;
688
0a8a69dd 689 START_USE(vq);
a72caae2
JW
690 /* We need to expose available array entries before checking avail
691 * event. */
a9a0fef7 692 virtio_mb(vq->weak_barriers);
0a8a69dd 693
e593bf97
TB
694 old = vq->split.avail_idx_shadow - vq->num_added;
695 new = vq->split.avail_idx_shadow;
0a8a69dd
RR
696 vq->num_added = 0;
697
4d6a105e
TB
698 LAST_ADD_TIME_CHECK(vq);
699 LAST_ADD_TIME_INVALID(vq);
e93300b1 700
41f0377f 701 if (vq->event) {
e593bf97
TB
702 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
703 vring_avail_event(&vq->split.vring)),
41f0377f
RR
704 new, old);
705 } else {
e593bf97
TB
706 needs_kick = !(vq->split.vring.used->flags &
707 cpu_to_virtio16(_vq->vdev,
708 VRING_USED_F_NO_NOTIFY));
41f0377f 709 }
0a8a69dd 710 END_USE(vq);
41f0377f
RR
711 return needs_kick;
712}
138fd251 713
138fd251
TB
714static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
715 void **ctx)
0a8a69dd 716{
780bc790 717 unsigned int i, j;
c60923cb 718 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
0a8a69dd
RR
719
720 /* Clear data ptr. */
cbeedb72 721 vq->split.desc_state[head].data = NULL;
0a8a69dd 722
780bc790 723 /* Put back on free list: unmap first-level descriptors and find end */
0a8a69dd 724 i = head;
9fa29b9d 725
e593bf97 726 while (vq->split.vring.desc[i].flags & nextflag) {
72b5e895
JW
727 vring_unmap_one_split(vq, i);
728 i = vq->split.desc_extra[i].next;
06ca287d 729 vq->vq.num_free++;
0a8a69dd
RR
730 }
731
72b5e895
JW
732 vring_unmap_one_split(vq, i);
733 vq->split.desc_extra[i].next = vq->free_head;
0a8a69dd 734 vq->free_head = head;
780bc790 735
0a8a69dd 736 /* Plus final descriptor */
06ca287d 737 vq->vq.num_free++;
780bc790 738
5a08b04f 739 if (vq->indirect) {
cbeedb72
TB
740 struct vring_desc *indir_desc =
741 vq->split.desc_state[head].indir_desc;
5a08b04f
MT
742 u32 len;
743
744 /* Free the indirect table, if any, now that it's unmapped. */
745 if (!indir_desc)
746 return;
747
72b5e895 748 len = vq->split.desc_extra[head].len;
780bc790 749
72b5e895
JW
750 BUG_ON(!(vq->split.desc_extra[head].flags &
751 VRING_DESC_F_INDIRECT));
780bc790
AL
752 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
753
754 for (j = 0; j < len / sizeof(struct vring_desc); j++)
72b5e895 755 vring_unmap_one_split_indirect(vq, &indir_desc[j]);
780bc790 756
5a08b04f 757 kfree(indir_desc);
cbeedb72 758 vq->split.desc_state[head].indir_desc = NULL;
5a08b04f 759 } else if (ctx) {
cbeedb72 760 *ctx = vq->split.desc_state[head].indir_desc;
780bc790 761 }
0a8a69dd
RR
762}
763
138fd251 764static inline bool more_used_split(const struct vring_virtqueue *vq)
0a8a69dd 765{
e593bf97
TB
766 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
767 vq->split.vring.used->idx);
0a8a69dd
RR
768}
769
138fd251
TB
770static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
771 unsigned int *len,
772 void **ctx)
0a8a69dd
RR
773{
774 struct vring_virtqueue *vq = to_vvq(_vq);
775 void *ret;
776 unsigned int i;
3b720b8c 777 u16 last_used;
0a8a69dd
RR
778
779 START_USE(vq);
780
5ef82752
RR
781 if (unlikely(vq->broken)) {
782 END_USE(vq);
783 return NULL;
784 }
785
138fd251 786 if (!more_used_split(vq)) {
0a8a69dd
RR
787 pr_debug("No more buffers in queue\n");
788 END_USE(vq);
789 return NULL;
790 }
791
2d61ba95 792 /* Only get used array entries after they have been exposed by host. */
a9a0fef7 793 virtio_rmb(vq->weak_barriers);
2d61ba95 794
e593bf97
TB
795 last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
796 i = virtio32_to_cpu(_vq->vdev,
797 vq->split.vring.used->ring[last_used].id);
798 *len = virtio32_to_cpu(_vq->vdev,
799 vq->split.vring.used->ring[last_used].len);
0a8a69dd 800
e593bf97 801 if (unlikely(i >= vq->split.vring.num)) {
0a8a69dd
RR
802 BAD_RING(vq, "id %u out of range\n", i);
803 return NULL;
804 }
cbeedb72 805 if (unlikely(!vq->split.desc_state[i].data)) {
0a8a69dd
RR
806 BAD_RING(vq, "id %u is not a head!\n", i);
807 return NULL;
808 }
809
138fd251 810 /* detach_buf_split clears data, so grab it now. */
cbeedb72 811 ret = vq->split.desc_state[i].data;
138fd251 812 detach_buf_split(vq, i, ctx);
0a8a69dd 813 vq->last_used_idx++;
a5c262c5
MT
814 /* If we expect an interrupt for the next entry, tell host
815 * by writing event index and flush out the write before
816 * the read in the next get_buf call. */
e593bf97 817 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
788e5b3a 818 virtio_store_mb(vq->weak_barriers,
e593bf97 819 &vring_used_event(&vq->split.vring),
788e5b3a 820 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
a5c262c5 821
4d6a105e 822 LAST_ADD_TIME_INVALID(vq);
e93300b1 823
0a8a69dd
RR
824 END_USE(vq);
825 return ret;
826}
138fd251 827
138fd251 828static void virtqueue_disable_cb_split(struct virtqueue *_vq)
18445c4d
RR
829{
830 struct vring_virtqueue *vq = to_vvq(_vq);
831
e593bf97
TB
832 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
833 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
8d622d21
MT
834 if (vq->event)
835 /* TODO: this is a hack. Figure out a cleaner value to write. */
836 vring_used_event(&vq->split.vring) = 0x0;
837 else
e593bf97
TB
838 vq->split.vring.avail->flags =
839 cpu_to_virtio16(_vq->vdev,
840 vq->split.avail_flags_shadow);
f277ec42 841 }
18445c4d
RR
842}
843
31532340 844static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
845{
846 struct vring_virtqueue *vq = to_vvq(_vq);
cc229884 847 u16 last_used_idx;
0a8a69dd
RR
848
849 START_USE(vq);
0a8a69dd
RR
850
851 /* We optimistically turn back on interrupts, then check if there was
852 * more to do. */
a5c262c5
MT
853 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
854 * either clear the flags bit or point the event index at the next
855 * entry. Always do both to keep code simple. */
e593bf97
TB
856 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
857 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 858 if (!vq->event)
e593bf97
TB
859 vq->split.vring.avail->flags =
860 cpu_to_virtio16(_vq->vdev,
861 vq->split.avail_flags_shadow);
f277ec42 862 }
e593bf97
TB
863 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
864 last_used_idx = vq->last_used_idx);
cc229884
MT
865 END_USE(vq);
866 return last_used_idx;
867}
138fd251 868
31532340 869static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
138fd251
TB
870{
871 struct vring_virtqueue *vq = to_vvq(_vq);
872
873 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
e593bf97 874 vq->split.vring.used->idx);
138fd251
TB
875}
876
138fd251 877static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
7ab358c2
MT
878{
879 struct vring_virtqueue *vq = to_vvq(_vq);
880 u16 bufs;
881
882 START_USE(vq);
883
884 /* We optimistically turn back on interrupts, then check if there was
885 * more to do. */
886 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
887 * either clear the flags bit or point the event index at the next
0ea1e4a6 888 * entry. Always update the event index to keep code simple. */
e593bf97
TB
889 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
890 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 891 if (!vq->event)
e593bf97
TB
892 vq->split.vring.avail->flags =
893 cpu_to_virtio16(_vq->vdev,
894 vq->split.avail_flags_shadow);
f277ec42 895 }
7ab358c2 896 /* TODO: tune this threshold */
e593bf97 897 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
788e5b3a
MT
898
899 virtio_store_mb(vq->weak_barriers,
e593bf97 900 &vring_used_event(&vq->split.vring),
788e5b3a
MT
901 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
902
e593bf97
TB
903 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
904 - vq->last_used_idx) > bufs)) {
7ab358c2
MT
905 END_USE(vq);
906 return false;
907 }
908
909 END_USE(vq);
910 return true;
911}
7ab358c2 912
138fd251 913static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
c021eac4
SM
914{
915 struct vring_virtqueue *vq = to_vvq(_vq);
916 unsigned int i;
917 void *buf;
918
919 START_USE(vq);
920
e593bf97 921 for (i = 0; i < vq->split.vring.num; i++) {
cbeedb72 922 if (!vq->split.desc_state[i].data)
c021eac4 923 continue;
138fd251 924 /* detach_buf_split clears data, so grab it now. */
cbeedb72 925 buf = vq->split.desc_state[i].data;
138fd251 926 detach_buf_split(vq, i, NULL);
e593bf97
TB
927 vq->split.avail_idx_shadow--;
928 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
929 vq->split.avail_idx_shadow);
c021eac4
SM
930 END_USE(vq);
931 return buf;
932 }
933 /* That should have freed everything. */
e593bf97 934 BUG_ON(vq->vq.num_free != vq->split.vring.num);
c021eac4
SM
935
936 END_USE(vq);
937 return NULL;
938}
138fd251 939
d79dca75
TB
940static struct virtqueue *vring_create_virtqueue_split(
941 unsigned int index,
942 unsigned int num,
943 unsigned int vring_align,
944 struct virtio_device *vdev,
945 bool weak_barriers,
946 bool may_reduce_num,
947 bool context,
948 bool (*notify)(struct virtqueue *),
949 void (*callback)(struct virtqueue *),
950 const char *name)
951{
952 struct virtqueue *vq;
953 void *queue = NULL;
954 dma_addr_t dma_addr;
955 size_t queue_size_in_bytes;
956 struct vring vring;
957
958 /* We assume num is a power of 2. */
959 if (num & (num - 1)) {
960 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
961 return NULL;
962 }
963
964 /* TODO: allocate each queue chunk individually */
965 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
966 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
967 &dma_addr,
c7cc29aa 968 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
d79dca75
TB
969 if (queue)
970 break;
cf94db21
CH
971 if (!may_reduce_num)
972 return NULL;
d79dca75
TB
973 }
974
975 if (!num)
976 return NULL;
977
978 if (!queue) {
979 /* Try to get a single page. You are my only hope! */
980 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
981 &dma_addr, GFP_KERNEL|__GFP_ZERO);
982 }
983 if (!queue)
984 return NULL;
985
986 queue_size_in_bytes = vring_size(num, vring_align);
987 vring_init(&vring, num, queue, vring_align);
988
989 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
990 notify, callback, name);
991 if (!vq) {
992 vring_free_queue(vdev, queue_size_in_bytes, queue,
993 dma_addr);
994 return NULL;
995 }
996
997 to_vvq(vq)->split.queue_dma_addr = dma_addr;
998 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
999 to_vvq(vq)->we_own_ring = true;
1000
1001 return vq;
1002}
1003
e6f633e5 1004
1ce9e605
TB
1005/*
1006 * Packed ring specific functions - *_packed().
1007 */
a7722890 1008static inline bool packed_used_wrap_counter(u16 last_used_idx)
1009{
1010 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1011}
1012
1013static inline u16 packed_last_used(u16 last_used_idx)
1014{
1015 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1016}
1ce9e605 1017
d80dc15b
XZ
1018static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1019 struct vring_desc_extra *extra)
1ce9e605
TB
1020{
1021 u16 flags;
1022
1023 if (!vq->use_dma_api)
1024 return;
1025
d80dc15b 1026 flags = extra->flags;
1ce9e605
TB
1027
1028 if (flags & VRING_DESC_F_INDIRECT) {
1029 dma_unmap_single(vring_dma_dev(vq),
d80dc15b 1030 extra->addr, extra->len,
1ce9e605
TB
1031 (flags & VRING_DESC_F_WRITE) ?
1032 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1033 } else {
1034 dma_unmap_page(vring_dma_dev(vq),
d80dc15b 1035 extra->addr, extra->len,
1ce9e605
TB
1036 (flags & VRING_DESC_F_WRITE) ?
1037 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1038 }
1039}
1040
1041static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1042 struct vring_packed_desc *desc)
1043{
1044 u16 flags;
1045
1046 if (!vq->use_dma_api)
1047 return;
1048
1049 flags = le16_to_cpu(desc->flags);
1050
920379a4
XZ
1051 dma_unmap_page(vring_dma_dev(vq),
1052 le64_to_cpu(desc->addr),
1053 le32_to_cpu(desc->len),
1054 (flags & VRING_DESC_F_WRITE) ?
1055 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1ce9e605
TB
1056}
1057
1058static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1059 gfp_t gfp)
1060{
1061 struct vring_packed_desc *desc;
1062
1063 /*
1064 * We require lowmem mappings for the descriptors because
1065 * otherwise virt_to_phys will give us bogus addresses in the
1066 * virtqueue.
1067 */
1068 gfp &= ~__GFP_HIGHMEM;
1069
1070 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1071
1072 return desc;
1073}
1074
1075static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
8d7670f3
XZ
1076 struct scatterlist *sgs[],
1077 unsigned int total_sg,
1078 unsigned int out_sgs,
1079 unsigned int in_sgs,
1080 void *data,
1081 gfp_t gfp)
1ce9e605
TB
1082{
1083 struct vring_packed_desc *desc;
1084 struct scatterlist *sg;
1085 unsigned int i, n, err_idx;
1086 u16 head, id;
1087 dma_addr_t addr;
1088
1089 head = vq->packed.next_avail_idx;
1090 desc = alloc_indirect_packed(total_sg, gfp);
fc6d70f4
XZ
1091 if (!desc)
1092 return -ENOMEM;
1ce9e605
TB
1093
1094 if (unlikely(vq->vq.num_free < 1)) {
1095 pr_debug("Can't add buf len 1 - avail = 0\n");
df0bfe75 1096 kfree(desc);
1ce9e605
TB
1097 END_USE(vq);
1098 return -ENOSPC;
1099 }
1100
1101 i = 0;
1102 id = vq->free_head;
1103 BUG_ON(id == vq->packed.vring.num);
1104
1105 for (n = 0; n < out_sgs + in_sgs; n++) {
1106 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1107 addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1108 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1109 if (vring_mapping_error(vq, addr))
1110 goto unmap_release;
1111
1112 desc[i].flags = cpu_to_le16(n < out_sgs ?
1113 0 : VRING_DESC_F_WRITE);
1114 desc[i].addr = cpu_to_le64(addr);
1115 desc[i].len = cpu_to_le32(sg->length);
1116 i++;
1117 }
1118 }
1119
1120 /* Now that the indirect table is filled in, map it. */
1121 addr = vring_map_single(vq, desc,
1122 total_sg * sizeof(struct vring_packed_desc),
1123 DMA_TO_DEVICE);
1124 if (vring_mapping_error(vq, addr))
1125 goto unmap_release;
1126
1127 vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1128 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1129 sizeof(struct vring_packed_desc));
1130 vq->packed.vring.desc[head].id = cpu_to_le16(id);
1131
1132 if (vq->use_dma_api) {
1133 vq->packed.desc_extra[id].addr = addr;
1134 vq->packed.desc_extra[id].len = total_sg *
1135 sizeof(struct vring_packed_desc);
1136 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1137 vq->packed.avail_used_flags;
1138 }
1139
1140 /*
1141 * A driver MUST NOT make the first descriptor in the list
1142 * available before all subsequent descriptors comprising
1143 * the list are made available.
1144 */
1145 virtio_wmb(vq->weak_barriers);
1146 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1147 vq->packed.avail_used_flags);
1148
1149 /* We're using some buffers from the free list. */
1150 vq->vq.num_free -= 1;
1151
1152 /* Update free pointer */
1153 n = head + 1;
1154 if (n >= vq->packed.vring.num) {
1155 n = 0;
1156 vq->packed.avail_wrap_counter ^= 1;
1157 vq->packed.avail_used_flags ^=
1158 1 << VRING_PACKED_DESC_F_AVAIL |
1159 1 << VRING_PACKED_DESC_F_USED;
1160 }
1161 vq->packed.next_avail_idx = n;
aeef9b47 1162 vq->free_head = vq->packed.desc_extra[id].next;
1ce9e605
TB
1163
1164 /* Store token and indirect buffer state. */
1165 vq->packed.desc_state[id].num = 1;
1166 vq->packed.desc_state[id].data = data;
1167 vq->packed.desc_state[id].indir_desc = desc;
1168 vq->packed.desc_state[id].last = id;
1169
1170 vq->num_added += 1;
1171
1172 pr_debug("Added buffer head %i to %p\n", head, vq);
1173 END_USE(vq);
1174
1175 return 0;
1176
1177unmap_release:
1178 err_idx = i;
1179
1180 for (i = 0; i < err_idx; i++)
1181 vring_unmap_desc_packed(vq, &desc[i]);
1182
1183 kfree(desc);
1184
1185 END_USE(vq);
f7728002 1186 return -ENOMEM;
1ce9e605
TB
1187}
1188
1189static inline int virtqueue_add_packed(struct virtqueue *_vq,
1190 struct scatterlist *sgs[],
1191 unsigned int total_sg,
1192 unsigned int out_sgs,
1193 unsigned int in_sgs,
1194 void *data,
1195 void *ctx,
1196 gfp_t gfp)
1197{
1198 struct vring_virtqueue *vq = to_vvq(_vq);
1199 struct vring_packed_desc *desc;
1200 struct scatterlist *sg;
1201 unsigned int i, n, c, descs_used, err_idx;
3f649ab7
KC
1202 __le16 head_flags, flags;
1203 u16 head, id, prev, curr, avail_used_flags;
fc6d70f4 1204 int err;
1ce9e605
TB
1205
1206 START_USE(vq);
1207
1208 BUG_ON(data == NULL);
1209 BUG_ON(ctx && vq->indirect);
1210
1211 if (unlikely(vq->broken)) {
1212 END_USE(vq);
1213 return -EIO;
1214 }
1215
1216 LAST_ADD_TIME_UPDATE(vq);
1217
1218 BUG_ON(total_sg == 0);
1219
35c51e09 1220 if (virtqueue_use_indirect(vq, total_sg)) {
fc6d70f4
XZ
1221 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1222 in_sgs, data, gfp);
1861ba62
MT
1223 if (err != -ENOMEM) {
1224 END_USE(vq);
fc6d70f4 1225 return err;
1861ba62 1226 }
fc6d70f4
XZ
1227
1228 /* fall back on direct */
1229 }
1ce9e605
TB
1230
1231 head = vq->packed.next_avail_idx;
1232 avail_used_flags = vq->packed.avail_used_flags;
1233
1234 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1235
1236 desc = vq->packed.vring.desc;
1237 i = head;
1238 descs_used = total_sg;
1239
1240 if (unlikely(vq->vq.num_free < descs_used)) {
1241 pr_debug("Can't add buf len %i - avail = %i\n",
1242 descs_used, vq->vq.num_free);
1243 END_USE(vq);
1244 return -ENOSPC;
1245 }
1246
1247 id = vq->free_head;
1248 BUG_ON(id == vq->packed.vring.num);
1249
1250 curr = id;
1251 c = 0;
1252 for (n = 0; n < out_sgs + in_sgs; n++) {
1253 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1254 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1255 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1256 if (vring_mapping_error(vq, addr))
1257 goto unmap_release;
1258
1259 flags = cpu_to_le16(vq->packed.avail_used_flags |
1260 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1261 (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1262 if (i == head)
1263 head_flags = flags;
1264 else
1265 desc[i].flags = flags;
1266
1267 desc[i].addr = cpu_to_le64(addr);
1268 desc[i].len = cpu_to_le32(sg->length);
1269 desc[i].id = cpu_to_le16(id);
1270
1271 if (unlikely(vq->use_dma_api)) {
1272 vq->packed.desc_extra[curr].addr = addr;
1273 vq->packed.desc_extra[curr].len = sg->length;
1274 vq->packed.desc_extra[curr].flags =
1275 le16_to_cpu(flags);
1276 }
1277 prev = curr;
aeef9b47 1278 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1279
1280 if ((unlikely(++i >= vq->packed.vring.num))) {
1281 i = 0;
1282 vq->packed.avail_used_flags ^=
1283 1 << VRING_PACKED_DESC_F_AVAIL |
1284 1 << VRING_PACKED_DESC_F_USED;
1285 }
1286 }
1287 }
1288
1289 if (i < head)
1290 vq->packed.avail_wrap_counter ^= 1;
1291
1292 /* We're using some buffers from the free list. */
1293 vq->vq.num_free -= descs_used;
1294
1295 /* Update free pointer */
1296 vq->packed.next_avail_idx = i;
1297 vq->free_head = curr;
1298
1299 /* Store token. */
1300 vq->packed.desc_state[id].num = descs_used;
1301 vq->packed.desc_state[id].data = data;
1302 vq->packed.desc_state[id].indir_desc = ctx;
1303 vq->packed.desc_state[id].last = prev;
1304
1305 /*
1306 * A driver MUST NOT make the first descriptor in the list
1307 * available before all subsequent descriptors comprising
1308 * the list are made available.
1309 */
1310 virtio_wmb(vq->weak_barriers);
1311 vq->packed.vring.desc[head].flags = head_flags;
1312 vq->num_added += descs_used;
1313
1314 pr_debug("Added buffer head %i to %p\n", head, vq);
1315 END_USE(vq);
1316
1317 return 0;
1318
1319unmap_release:
1320 err_idx = i;
1321 i = head;
44593865 1322 curr = vq->free_head;
1ce9e605
TB
1323
1324 vq->packed.avail_used_flags = avail_used_flags;
1325
1326 for (n = 0; n < total_sg; n++) {
1327 if (i == err_idx)
1328 break;
d80dc15b 1329 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
44593865 1330 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1331 i++;
1332 if (i >= vq->packed.vring.num)
1333 i = 0;
1334 }
1335
1336 END_USE(vq);
1337 return -EIO;
1338}
1339
1340static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1341{
1342 struct vring_virtqueue *vq = to_vvq(_vq);
f51f9826 1343 u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1ce9e605
TB
1344 bool needs_kick;
1345 union {
1346 struct {
1347 __le16 off_wrap;
1348 __le16 flags;
1349 };
1350 u32 u32;
1351 } snapshot;
1352
1353 START_USE(vq);
1354
1355 /*
1356 * We need to expose the new flags value before checking notification
1357 * suppressions.
1358 */
1359 virtio_mb(vq->weak_barriers);
1360
f51f9826
TB
1361 old = vq->packed.next_avail_idx - vq->num_added;
1362 new = vq->packed.next_avail_idx;
1ce9e605
TB
1363 vq->num_added = 0;
1364
1365 snapshot.u32 = *(u32 *)vq->packed.vring.device;
1366 flags = le16_to_cpu(snapshot.flags);
1367
1368 LAST_ADD_TIME_CHECK(vq);
1369 LAST_ADD_TIME_INVALID(vq);
1370
f51f9826
TB
1371 if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1372 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1373 goto out;
1374 }
1375
1376 off_wrap = le16_to_cpu(snapshot.off_wrap);
1377
1378 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1379 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1380 if (wrap_counter != vq->packed.avail_wrap_counter)
1381 event_idx -= vq->packed.vring.num;
1382
1383 needs_kick = vring_need_event(event_idx, new, old);
1384out:
1ce9e605
TB
1385 END_USE(vq);
1386 return needs_kick;
1387}
1388
1389static void detach_buf_packed(struct vring_virtqueue *vq,
1390 unsigned int id, void **ctx)
1391{
1392 struct vring_desc_state_packed *state = NULL;
1393 struct vring_packed_desc *desc;
1394 unsigned int i, curr;
1395
1396 state = &vq->packed.desc_state[id];
1397
1398 /* Clear data ptr. */
1399 state->data = NULL;
1400
aeef9b47 1401 vq->packed.desc_extra[state->last].next = vq->free_head;
1ce9e605
TB
1402 vq->free_head = id;
1403 vq->vq.num_free += state->num;
1404
1405 if (unlikely(vq->use_dma_api)) {
1406 curr = id;
1407 for (i = 0; i < state->num; i++) {
d80dc15b
XZ
1408 vring_unmap_extra_packed(vq,
1409 &vq->packed.desc_extra[curr]);
aeef9b47 1410 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1411 }
1412 }
1413
1414 if (vq->indirect) {
1415 u32 len;
1416
1417 /* Free the indirect table, if any, now that it's unmapped. */
1418 desc = state->indir_desc;
1419 if (!desc)
1420 return;
1421
1422 if (vq->use_dma_api) {
1423 len = vq->packed.desc_extra[id].len;
1424 for (i = 0; i < len / sizeof(struct vring_packed_desc);
1425 i++)
1426 vring_unmap_desc_packed(vq, &desc[i]);
1427 }
1428 kfree(desc);
1429 state->indir_desc = NULL;
1430 } else if (ctx) {
1431 *ctx = state->indir_desc;
1432 }
1433}
1434
1435static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1436 u16 idx, bool used_wrap_counter)
1437{
1438 bool avail, used;
1439 u16 flags;
1440
1441 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1442 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1443 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1444
1445 return avail == used && used == used_wrap_counter;
1446}
1447
1448static inline bool more_used_packed(const struct vring_virtqueue *vq)
1449{
a7722890 1450 u16 last_used;
1451 u16 last_used_idx;
1452 bool used_wrap_counter;
1453
1454 last_used_idx = READ_ONCE(vq->last_used_idx);
1455 last_used = packed_last_used(last_used_idx);
1456 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1457 return is_used_desc_packed(vq, last_used, used_wrap_counter);
1ce9e605
TB
1458}
1459
1460static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1461 unsigned int *len,
1462 void **ctx)
1463{
1464 struct vring_virtqueue *vq = to_vvq(_vq);
a7722890 1465 u16 last_used, id, last_used_idx;
1466 bool used_wrap_counter;
1ce9e605
TB
1467 void *ret;
1468
1469 START_USE(vq);
1470
1471 if (unlikely(vq->broken)) {
1472 END_USE(vq);
1473 return NULL;
1474 }
1475
1476 if (!more_used_packed(vq)) {
1477 pr_debug("No more buffers in queue\n");
1478 END_USE(vq);
1479 return NULL;
1480 }
1481
1482 /* Only get used elements after they have been exposed by host. */
1483 virtio_rmb(vq->weak_barriers);
1484
a7722890 1485 last_used_idx = READ_ONCE(vq->last_used_idx);
1486 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1487 last_used = packed_last_used(last_used_idx);
1ce9e605
TB
1488 id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1489 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1490
1491 if (unlikely(id >= vq->packed.vring.num)) {
1492 BAD_RING(vq, "id %u out of range\n", id);
1493 return NULL;
1494 }
1495 if (unlikely(!vq->packed.desc_state[id].data)) {
1496 BAD_RING(vq, "id %u is not a head!\n", id);
1497 return NULL;
1498 }
1499
1500 /* detach_buf_packed clears data, so grab it now. */
1501 ret = vq->packed.desc_state[id].data;
1502 detach_buf_packed(vq, id, ctx);
1503
a7722890 1504 last_used += vq->packed.desc_state[id].num;
1505 if (unlikely(last_used >= vq->packed.vring.num)) {
1506 last_used -= vq->packed.vring.num;
1507 used_wrap_counter ^= 1;
1ce9e605
TB
1508 }
1509
a7722890 1510 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1511 WRITE_ONCE(vq->last_used_idx, last_used);
1512
f51f9826
TB
1513 /*
1514 * If we expect an interrupt for the next entry, tell host
1515 * by writing event index and flush out the write before
1516 * the read in the next get_buf call.
1517 */
1518 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1519 virtio_store_mb(vq->weak_barriers,
1520 &vq->packed.vring.driver->off_wrap,
a7722890 1521 cpu_to_le16(vq->last_used_idx));
f51f9826 1522
1ce9e605
TB
1523 LAST_ADD_TIME_INVALID(vq);
1524
1525 END_USE(vq);
1526 return ret;
1527}
1528
1529static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1530{
1531 struct vring_virtqueue *vq = to_vvq(_vq);
1532
1533 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1534 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1535 vq->packed.vring.driver->flags =
1536 cpu_to_le16(vq->packed.event_flags_shadow);
1537 }
1538}
1539
31532340 1540static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1ce9e605
TB
1541{
1542 struct vring_virtqueue *vq = to_vvq(_vq);
1543
1544 START_USE(vq);
1545
1546 /*
1547 * We optimistically turn back on interrupts, then check if there was
1548 * more to do.
1549 */
1550
f51f9826
TB
1551 if (vq->event) {
1552 vq->packed.vring.driver->off_wrap =
a7722890 1553 cpu_to_le16(vq->last_used_idx);
f51f9826
TB
1554 /*
1555 * We need to update event offset and event wrap
1556 * counter first before updating event flags.
1557 */
1558 virtio_wmb(vq->weak_barriers);
1559 }
1560
1ce9e605 1561 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1562 vq->packed.event_flags_shadow = vq->event ?
1563 VRING_PACKED_EVENT_FLAG_DESC :
1564 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1565 vq->packed.vring.driver->flags =
1566 cpu_to_le16(vq->packed.event_flags_shadow);
1567 }
1568
1569 END_USE(vq);
a7722890 1570 return vq->last_used_idx;
1ce9e605
TB
1571}
1572
1573static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1574{
1575 struct vring_virtqueue *vq = to_vvq(_vq);
1576 bool wrap_counter;
1577 u16 used_idx;
1578
1579 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1580 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1581
1582 return is_used_desc_packed(vq, used_idx, wrap_counter);
1583}
1584
1585static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1586{
1587 struct vring_virtqueue *vq = to_vvq(_vq);
a7722890 1588 u16 used_idx, wrap_counter, last_used_idx;
f51f9826 1589 u16 bufs;
1ce9e605
TB
1590
1591 START_USE(vq);
1592
1593 /*
1594 * We optimistically turn back on interrupts, then check if there was
1595 * more to do.
1596 */
1597
f51f9826
TB
1598 if (vq->event) {
1599 /* TODO: tune this threshold */
1600 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
a7722890 1601 last_used_idx = READ_ONCE(vq->last_used_idx);
1602 wrap_counter = packed_used_wrap_counter(last_used_idx);
f51f9826 1603
a7722890 1604 used_idx = packed_last_used(last_used_idx) + bufs;
f51f9826
TB
1605 if (used_idx >= vq->packed.vring.num) {
1606 used_idx -= vq->packed.vring.num;
1607 wrap_counter ^= 1;
1608 }
1609
1610 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1611 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1612
1613 /*
1614 * We need to update event offset and event wrap
1615 * counter first before updating event flags.
1616 */
1617 virtio_wmb(vq->weak_barriers);
f51f9826 1618 }
1ce9e605
TB
1619
1620 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1621 vq->packed.event_flags_shadow = vq->event ?
1622 VRING_PACKED_EVENT_FLAG_DESC :
1623 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1624 vq->packed.vring.driver->flags =
1625 cpu_to_le16(vq->packed.event_flags_shadow);
1626 }
1627
1628 /*
1629 * We need to update event suppression structure first
1630 * before re-checking for more used buffers.
1631 */
1632 virtio_mb(vq->weak_barriers);
1633
a7722890 1634 last_used_idx = READ_ONCE(vq->last_used_idx);
1635 wrap_counter = packed_used_wrap_counter(last_used_idx);
1636 used_idx = packed_last_used(last_used_idx);
1637 if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1ce9e605
TB
1638 END_USE(vq);
1639 return false;
1640 }
1641
1642 END_USE(vq);
1643 return true;
1644}
1645
1646static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1647{
1648 struct vring_virtqueue *vq = to_vvq(_vq);
1649 unsigned int i;
1650 void *buf;
1651
1652 START_USE(vq);
1653
1654 for (i = 0; i < vq->packed.vring.num; i++) {
1655 if (!vq->packed.desc_state[i].data)
1656 continue;
1657 /* detach_buf clears data, so grab it now. */
1658 buf = vq->packed.desc_state[i].data;
1659 detach_buf_packed(vq, i, NULL);
1660 END_USE(vq);
1661 return buf;
1662 }
1663 /* That should have freed everything. */
1664 BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1665
1666 END_USE(vq);
1667 return NULL;
1668}
1669
96ef18a2 1670static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
5a222421
JW
1671{
1672 struct vring_desc_extra *desc_extra;
1673 unsigned int i;
1674
1675 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1676 GFP_KERNEL);
1677 if (!desc_extra)
1678 return NULL;
1679
1680 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1681
1682 for (i = 0; i < num - 1; i++)
1683 desc_extra[i].next = i + 1;
1684
1685 return desc_extra;
1686}
1687
1ce9e605
TB
1688static struct virtqueue *vring_create_virtqueue_packed(
1689 unsigned int index,
1690 unsigned int num,
1691 unsigned int vring_align,
1692 struct virtio_device *vdev,
1693 bool weak_barriers,
1694 bool may_reduce_num,
1695 bool context,
1696 bool (*notify)(struct virtqueue *),
1697 void (*callback)(struct virtqueue *),
1698 const char *name)
1699{
1700 struct vring_virtqueue *vq;
1701 struct vring_packed_desc *ring;
1702 struct vring_packed_desc_event *driver, *device;
1703 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1704 size_t ring_size_in_bytes, event_size_in_bytes;
1ce9e605
TB
1705
1706 ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1707
1708 ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1709 &ring_dma_addr,
1710 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1711 if (!ring)
1712 goto err_ring;
1713
1714 event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1715
1716 driver = vring_alloc_queue(vdev, event_size_in_bytes,
1717 &driver_event_dma_addr,
1718 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1719 if (!driver)
1720 goto err_driver;
1721
1722 device = vring_alloc_queue(vdev, event_size_in_bytes,
1723 &device_event_dma_addr,
1724 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1725 if (!device)
1726 goto err_device;
1727
1728 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1729 if (!vq)
1730 goto err_vq;
1731
1732 vq->vq.callback = callback;
1733 vq->vq.vdev = vdev;
1734 vq->vq.name = name;
1ce9e605
TB
1735 vq->vq.index = index;
1736 vq->we_own_ring = true;
1737 vq->notify = notify;
1738 vq->weak_barriers = weak_barriers;
c346dae4 1739#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d 1740 vq->broken = true;
c346dae4
JW
1741#else
1742 vq->broken = false;
1743#endif
1ce9e605
TB
1744 vq->packed_ring = true;
1745 vq->use_dma_api = vring_use_dma_api(vdev);
1ce9e605
TB
1746
1747 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1748 !context;
1749 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1750
45383fb0
TB
1751 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1752 vq->weak_barriers = false;
1753
1ce9e605
TB
1754 vq->packed.ring_dma_addr = ring_dma_addr;
1755 vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1756 vq->packed.device_event_dma_addr = device_event_dma_addr;
1757
1758 vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1759 vq->packed.event_size_in_bytes = event_size_in_bytes;
1760
1761 vq->packed.vring.num = num;
1762 vq->packed.vring.desc = ring;
1763 vq->packed.vring.driver = driver;
1764 vq->packed.vring.device = device;
1765
1766 vq->packed.next_avail_idx = 0;
1767 vq->packed.avail_wrap_counter = 1;
1ce9e605
TB
1768 vq->packed.event_flags_shadow = 0;
1769 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1770
1771 vq->packed.desc_state = kmalloc_array(num,
1772 sizeof(struct vring_desc_state_packed),
1773 GFP_KERNEL);
1774 if (!vq->packed.desc_state)
1775 goto err_desc_state;
1776
1777 memset(vq->packed.desc_state, 0,
1778 num * sizeof(struct vring_desc_state_packed));
1779
1780 /* Put everything in free lists. */
1781 vq->free_head = 0;
1ce9e605 1782
96ef18a2 1783 vq->packed.desc_extra = vring_alloc_desc_extra(num);
1ce9e605
TB
1784 if (!vq->packed.desc_extra)
1785 goto err_desc_extra;
1786
1ce9e605
TB
1787 /* No callback? Tell other side not to bother us. */
1788 if (!callback) {
1789 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1790 vq->packed.vring.driver->flags =
1791 cpu_to_le16(vq->packed.event_flags_shadow);
1792 }
1793
3a897128
XZ
1794 virtqueue_init(vq, num);
1795
0e566c8f 1796 spin_lock(&vdev->vqs_list_lock);
e152d8af 1797 list_add_tail(&vq->vq.list, &vdev->vqs);
0e566c8f 1798 spin_unlock(&vdev->vqs_list_lock);
1ce9e605
TB
1799 return &vq->vq;
1800
1801err_desc_extra:
1802 kfree(vq->packed.desc_state);
1803err_desc_state:
1804 kfree(vq);
1805err_vq:
ae93d8ea 1806 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
1ce9e605 1807err_device:
ae93d8ea 1808 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
1ce9e605
TB
1809err_driver:
1810 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1811err_ring:
1812 return NULL;
1813}
1814
1815
e6f633e5
TB
1816/*
1817 * Generic functions and exported symbols.
1818 */
1819
1820static inline int virtqueue_add(struct virtqueue *_vq,
1821 struct scatterlist *sgs[],
1822 unsigned int total_sg,
1823 unsigned int out_sgs,
1824 unsigned int in_sgs,
1825 void *data,
1826 void *ctx,
1827 gfp_t gfp)
1828{
1ce9e605
TB
1829 struct vring_virtqueue *vq = to_vvq(_vq);
1830
1831 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1832 out_sgs, in_sgs, data, ctx, gfp) :
1833 virtqueue_add_split(_vq, sgs, total_sg,
1834 out_sgs, in_sgs, data, ctx, gfp);
e6f633e5
TB
1835}
1836
1837/**
1838 * virtqueue_add_sgs - expose buffers to other end
a5581206 1839 * @_vq: the struct virtqueue we're talking about.
e6f633e5 1840 * @sgs: array of terminated scatterlists.
a5581206
JB
1841 * @out_sgs: the number of scatterlists readable by other side
1842 * @in_sgs: the number of scatterlists which are writable (after readable ones)
e6f633e5
TB
1843 * @data: the token identifying the buffer.
1844 * @gfp: how to do memory allocations (if necessary).
1845 *
1846 * Caller must ensure we don't call this with other virtqueue operations
1847 * at the same time (except where noted).
1848 *
1849 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1850 */
1851int virtqueue_add_sgs(struct virtqueue *_vq,
1852 struct scatterlist *sgs[],
1853 unsigned int out_sgs,
1854 unsigned int in_sgs,
1855 void *data,
1856 gfp_t gfp)
1857{
1858 unsigned int i, total_sg = 0;
1859
1860 /* Count them first. */
1861 for (i = 0; i < out_sgs + in_sgs; i++) {
1862 struct scatterlist *sg;
1863
1864 for (sg = sgs[i]; sg; sg = sg_next(sg))
1865 total_sg++;
1866 }
1867 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1868 data, NULL, gfp);
1869}
1870EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1871
1872/**
1873 * virtqueue_add_outbuf - expose output buffers to other end
1874 * @vq: the struct virtqueue we're talking about.
1875 * @sg: scatterlist (must be well-formed and terminated!)
1876 * @num: the number of entries in @sg readable by other side
1877 * @data: the token identifying the buffer.
1878 * @gfp: how to do memory allocations (if necessary).
1879 *
1880 * Caller must ensure we don't call this with other virtqueue operations
1881 * at the same time (except where noted).
1882 *
1883 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1884 */
1885int virtqueue_add_outbuf(struct virtqueue *vq,
1886 struct scatterlist *sg, unsigned int num,
1887 void *data,
1888 gfp_t gfp)
1889{
1890 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1891}
1892EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1893
1894/**
1895 * virtqueue_add_inbuf - expose input buffers to other end
1896 * @vq: the struct virtqueue we're talking about.
1897 * @sg: scatterlist (must be well-formed and terminated!)
1898 * @num: the number of entries in @sg writable by other side
1899 * @data: the token identifying the buffer.
1900 * @gfp: how to do memory allocations (if necessary).
1901 *
1902 * Caller must ensure we don't call this with other virtqueue operations
1903 * at the same time (except where noted).
1904 *
1905 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1906 */
1907int virtqueue_add_inbuf(struct virtqueue *vq,
1908 struct scatterlist *sg, unsigned int num,
1909 void *data,
1910 gfp_t gfp)
1911{
1912 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1913}
1914EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1915
1916/**
1917 * virtqueue_add_inbuf_ctx - expose input buffers to other end
1918 * @vq: the struct virtqueue we're talking about.
1919 * @sg: scatterlist (must be well-formed and terminated!)
1920 * @num: the number of entries in @sg writable by other side
1921 * @data: the token identifying the buffer.
1922 * @ctx: extra context for the token
1923 * @gfp: how to do memory allocations (if necessary).
1924 *
1925 * Caller must ensure we don't call this with other virtqueue operations
1926 * at the same time (except where noted).
1927 *
1928 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1929 */
1930int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1931 struct scatterlist *sg, unsigned int num,
1932 void *data,
1933 void *ctx,
1934 gfp_t gfp)
1935{
1936 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1937}
1938EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1939
1940/**
1941 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
a5581206 1942 * @_vq: the struct virtqueue
e6f633e5
TB
1943 *
1944 * Instead of virtqueue_kick(), you can do:
1945 * if (virtqueue_kick_prepare(vq))
1946 * virtqueue_notify(vq);
1947 *
1948 * This is sometimes useful because the virtqueue_kick_prepare() needs
1949 * to be serialized, but the actual virtqueue_notify() call does not.
1950 */
1951bool virtqueue_kick_prepare(struct virtqueue *_vq)
1952{
1ce9e605
TB
1953 struct vring_virtqueue *vq = to_vvq(_vq);
1954
1955 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1956 virtqueue_kick_prepare_split(_vq);
e6f633e5
TB
1957}
1958EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
1959
1960/**
1961 * virtqueue_notify - second half of split virtqueue_kick call.
a5581206 1962 * @_vq: the struct virtqueue
e6f633e5
TB
1963 *
1964 * This does not need to be serialized.
1965 *
1966 * Returns false if host notify failed or queue is broken, otherwise true.
1967 */
1968bool virtqueue_notify(struct virtqueue *_vq)
1969{
1970 struct vring_virtqueue *vq = to_vvq(_vq);
1971
1972 if (unlikely(vq->broken))
1973 return false;
1974
1975 /* Prod other side to tell it about changes. */
1976 if (!vq->notify(_vq)) {
1977 vq->broken = true;
1978 return false;
1979 }
1980 return true;
1981}
1982EXPORT_SYMBOL_GPL(virtqueue_notify);
1983
1984/**
1985 * virtqueue_kick - update after add_buf
1986 * @vq: the struct virtqueue
1987 *
1988 * After one or more virtqueue_add_* calls, invoke this to kick
1989 * the other side.
1990 *
1991 * Caller must ensure we don't call this with other virtqueue
1992 * operations at the same time (except where noted).
1993 *
1994 * Returns false if kick failed, otherwise true.
1995 */
1996bool virtqueue_kick(struct virtqueue *vq)
1997{
1998 if (virtqueue_kick_prepare(vq))
1999 return virtqueue_notify(vq);
2000 return true;
2001}
2002EXPORT_SYMBOL_GPL(virtqueue_kick);
2003
2004/**
31c11db6 2005 * virtqueue_get_buf_ctx - get the next used buffer
a5581206 2006 * @_vq: the struct virtqueue we're talking about.
e6f633e5 2007 * @len: the length written into the buffer
a5581206 2008 * @ctx: extra context for the token
e6f633e5
TB
2009 *
2010 * If the device wrote data into the buffer, @len will be set to the
2011 * amount written. This means you don't need to clear the buffer
2012 * beforehand to ensure there's no data leakage in the case of short
2013 * writes.
2014 *
2015 * Caller must ensure we don't call this with other virtqueue
2016 * operations at the same time (except where noted).
2017 *
2018 * Returns NULL if there are no used buffers, or the "data" token
2019 * handed to virtqueue_add_*().
2020 */
2021void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2022 void **ctx)
2023{
1ce9e605
TB
2024 struct vring_virtqueue *vq = to_vvq(_vq);
2025
2026 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2027 virtqueue_get_buf_ctx_split(_vq, len, ctx);
e6f633e5
TB
2028}
2029EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2030
2031void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2032{
2033 return virtqueue_get_buf_ctx(_vq, len, NULL);
2034}
2035EXPORT_SYMBOL_GPL(virtqueue_get_buf);
e6f633e5
TB
2036/**
2037 * virtqueue_disable_cb - disable callbacks
a5581206 2038 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2039 *
2040 * Note that this is not necessarily synchronous, hence unreliable and only
2041 * useful as an optimization.
2042 *
2043 * Unlike other operations, this need not be serialized.
2044 */
2045void virtqueue_disable_cb(struct virtqueue *_vq)
2046{
1ce9e605
TB
2047 struct vring_virtqueue *vq = to_vvq(_vq);
2048
8d622d21
MT
2049 /* If device triggered an event already it won't trigger one again:
2050 * no need to disable.
2051 */
2052 if (vq->event_triggered)
2053 return;
2054
1ce9e605
TB
2055 if (vq->packed_ring)
2056 virtqueue_disable_cb_packed(_vq);
2057 else
2058 virtqueue_disable_cb_split(_vq);
e6f633e5
TB
2059}
2060EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2061
2062/**
2063 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
a5581206 2064 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2065 *
2066 * This re-enables callbacks; it returns current queue state
2067 * in an opaque unsigned value. This value should be later tested by
2068 * virtqueue_poll, to detect a possible race between the driver checking for
2069 * more work, and enabling callbacks.
2070 *
2071 * Caller must ensure we don't call this with other virtqueue
2072 * operations at the same time (except where noted).
2073 */
31532340 2074unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
e6f633e5 2075{
1ce9e605
TB
2076 struct vring_virtqueue *vq = to_vvq(_vq);
2077
8d622d21
MT
2078 if (vq->event_triggered)
2079 vq->event_triggered = false;
2080
1ce9e605
TB
2081 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2082 virtqueue_enable_cb_prepare_split(_vq);
e6f633e5
TB
2083}
2084EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2085
2086/**
2087 * virtqueue_poll - query pending used buffers
a5581206 2088 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2089 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2090 *
2091 * Returns "true" if there are pending used buffers in the queue.
2092 *
2093 * This does not need to be serialized.
2094 */
31532340 2095bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
e6f633e5
TB
2096{
2097 struct vring_virtqueue *vq = to_vvq(_vq);
2098
481a0d74
MW
2099 if (unlikely(vq->broken))
2100 return false;
2101
e6f633e5 2102 virtio_mb(vq->weak_barriers);
1ce9e605
TB
2103 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2104 virtqueue_poll_split(_vq, last_used_idx);
e6f633e5
TB
2105}
2106EXPORT_SYMBOL_GPL(virtqueue_poll);
2107
2108/**
2109 * virtqueue_enable_cb - restart callbacks after disable_cb.
a5581206 2110 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2111 *
2112 * This re-enables callbacks; it returns "false" if there are pending
2113 * buffers in the queue, to detect a possible race between the driver
2114 * checking for more work, and enabling callbacks.
2115 *
2116 * Caller must ensure we don't call this with other virtqueue
2117 * operations at the same time (except where noted).
2118 */
2119bool virtqueue_enable_cb(struct virtqueue *_vq)
2120{
31532340 2121 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
e6f633e5
TB
2122
2123 return !virtqueue_poll(_vq, last_used_idx);
2124}
2125EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2126
2127/**
2128 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
a5581206 2129 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2130 *
2131 * This re-enables callbacks but hints to the other side to delay
2132 * interrupts until most of the available buffers have been processed;
2133 * it returns "false" if there are many pending buffers in the queue,
2134 * to detect a possible race between the driver checking for more work,
2135 * and enabling callbacks.
2136 *
2137 * Caller must ensure we don't call this with other virtqueue
2138 * operations at the same time (except where noted).
2139 */
2140bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2141{
1ce9e605
TB
2142 struct vring_virtqueue *vq = to_vvq(_vq);
2143
8d622d21
MT
2144 if (vq->event_triggered)
2145 vq->event_triggered = false;
2146
1ce9e605
TB
2147 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2148 virtqueue_enable_cb_delayed_split(_vq);
e6f633e5
TB
2149}
2150EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2151
138fd251
TB
2152/**
2153 * virtqueue_detach_unused_buf - detach first unused buffer
a5581206 2154 * @_vq: the struct virtqueue we're talking about.
138fd251
TB
2155 *
2156 * Returns NULL or the "data" token handed to virtqueue_add_*().
a62eecb3
XZ
2157 * This is not valid on an active queue; it is useful for device
2158 * shutdown or the reset queue.
138fd251
TB
2159 */
2160void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2161{
1ce9e605
TB
2162 struct vring_virtqueue *vq = to_vvq(_vq);
2163
2164 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2165 virtqueue_detach_unused_buf_split(_vq);
138fd251 2166}
7c5e9ed0 2167EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
c021eac4 2168
138fd251
TB
2169static inline bool more_used(const struct vring_virtqueue *vq)
2170{
1ce9e605 2171 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
138fd251
TB
2172}
2173
0a8a69dd
RR
2174irqreturn_t vring_interrupt(int irq, void *_vq)
2175{
2176 struct vring_virtqueue *vq = to_vvq(_vq);
2177
2178 if (!more_used(vq)) {
2179 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2180 return IRQ_NONE;
2181 }
2182
8b4ec69d 2183 if (unlikely(vq->broken)) {
c346dae4 2184#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d
JW
2185 dev_warn_once(&vq->vq.vdev->dev,
2186 "virtio vring IRQ raised before DRIVER_OK");
2187 return IRQ_NONE;
c346dae4
JW
2188#else
2189 return IRQ_HANDLED;
2190#endif
8b4ec69d 2191 }
0a8a69dd 2192
8d622d21
MT
2193 /* Just a hint for performance: so it's ok that this can be racy! */
2194 if (vq->event)
2195 vq->event_triggered = true;
2196
0a8a69dd 2197 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
18445c4d
RR
2198 if (vq->vq.callback)
2199 vq->vq.callback(&vq->vq);
0a8a69dd
RR
2200
2201 return IRQ_HANDLED;
2202}
c6fd4701 2203EXPORT_SYMBOL_GPL(vring_interrupt);
0a8a69dd 2204
1ce9e605 2205/* Only available for split ring */
07d9629d
XZ
2206static struct virtqueue *__vring_new_virtqueue(unsigned int index,
2207 struct vring vring,
2208 struct virtio_device *vdev,
2209 bool weak_barriers,
2210 bool context,
2211 bool (*notify)(struct virtqueue *),
2212 void (*callback)(struct virtqueue *),
2213 const char *name)
0a8a69dd 2214{
2a2d1382 2215 struct vring_virtqueue *vq;
0a8a69dd 2216
1ce9e605
TB
2217 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2218 return NULL;
2219
cbeedb72 2220 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
0a8a69dd
RR
2221 if (!vq)
2222 return NULL;
2223
1ce9e605 2224 vq->packed_ring = false;
0a8a69dd
RR
2225 vq->vq.callback = callback;
2226 vq->vq.vdev = vdev;
9499f5e7 2227 vq->vq.name = name;
06ca287d 2228 vq->vq.index = index;
2a2d1382 2229 vq->we_own_ring = false;
0a8a69dd 2230 vq->notify = notify;
7b21e34f 2231 vq->weak_barriers = weak_barriers;
c346dae4 2232#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d 2233 vq->broken = true;
c346dae4
JW
2234#else
2235 vq->broken = false;
2236#endif
fb3fba6b 2237 vq->use_dma_api = vring_use_dma_api(vdev);
0a8a69dd 2238
5a08b04f
MT
2239 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2240 !context;
a5c262c5 2241 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
9fa29b9d 2242
45383fb0
TB
2243 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2244 vq->weak_barriers = false;
2245
d79dca75
TB
2246 vq->split.queue_dma_addr = 0;
2247 vq->split.queue_size_in_bytes = 0;
2248
e593bf97
TB
2249 vq->split.vring = vring;
2250 vq->split.avail_flags_shadow = 0;
2251 vq->split.avail_idx_shadow = 0;
2252
0a8a69dd 2253 /* No callback? Tell other side not to bother us. */
f277ec42 2254 if (!callback) {
e593bf97 2255 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 2256 if (!vq->event)
e593bf97
TB
2257 vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2258 vq->split.avail_flags_shadow);
f277ec42 2259 }
0a8a69dd 2260
cbeedb72
TB
2261 vq->split.desc_state = kmalloc_array(vring.num,
2262 sizeof(struct vring_desc_state_split), GFP_KERNEL);
5bc72234
JW
2263 if (!vq->split.desc_state)
2264 goto err_state;
cbeedb72 2265
96ef18a2 2266 vq->split.desc_extra = vring_alloc_desc_extra(vring.num);
72b5e895
JW
2267 if (!vq->split.desc_extra)
2268 goto err_extra;
2269
0a8a69dd 2270 /* Put everything in free lists. */
0a8a69dd 2271 vq->free_head = 0;
cbeedb72
TB
2272 memset(vq->split.desc_state, 0, vring.num *
2273 sizeof(struct vring_desc_state_split));
0a8a69dd 2274
3a897128
XZ
2275 virtqueue_init(vq, vring.num);
2276
0e566c8f 2277 spin_lock(&vdev->vqs_list_lock);
e152d8af 2278 list_add_tail(&vq->vq.list, &vdev->vqs);
0e566c8f 2279 spin_unlock(&vdev->vqs_list_lock);
0a8a69dd 2280 return &vq->vq;
5bc72234 2281
72b5e895
JW
2282err_extra:
2283 kfree(vq->split.desc_state);
5bc72234
JW
2284err_state:
2285 kfree(vq);
2286 return NULL;
0a8a69dd 2287}
2a2d1382 2288
2a2d1382
AL
2289struct virtqueue *vring_create_virtqueue(
2290 unsigned int index,
2291 unsigned int num,
2292 unsigned int vring_align,
2293 struct virtio_device *vdev,
2294 bool weak_barriers,
2295 bool may_reduce_num,
f94682dd 2296 bool context,
2a2d1382
AL
2297 bool (*notify)(struct virtqueue *),
2298 void (*callback)(struct virtqueue *),
2299 const char *name)
2300{
1ce9e605
TB
2301
2302 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2303 return vring_create_virtqueue_packed(index, num, vring_align,
2304 vdev, weak_barriers, may_reduce_num,
2305 context, notify, callback, name);
2306
d79dca75
TB
2307 return vring_create_virtqueue_split(index, num, vring_align,
2308 vdev, weak_barriers, may_reduce_num,
2309 context, notify, callback, name);
2a2d1382
AL
2310}
2311EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2312
1ce9e605 2313/* Only available for split ring */
2a2d1382
AL
2314struct virtqueue *vring_new_virtqueue(unsigned int index,
2315 unsigned int num,
2316 unsigned int vring_align,
2317 struct virtio_device *vdev,
2318 bool weak_barriers,
f94682dd 2319 bool context,
2a2d1382
AL
2320 void *pages,
2321 bool (*notify)(struct virtqueue *vq),
2322 void (*callback)(struct virtqueue *vq),
2323 const char *name)
2324{
2325 struct vring vring;
1ce9e605
TB
2326
2327 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2328 return NULL;
2329
2a2d1382 2330 vring_init(&vring, num, pages, vring_align);
f94682dd 2331 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
2a2d1382
AL
2332 notify, callback, name);
2333}
c6fd4701 2334EXPORT_SYMBOL_GPL(vring_new_virtqueue);
0a8a69dd 2335
3ea19e32 2336static void vring_free(struct virtqueue *_vq)
0a8a69dd 2337{
2a2d1382
AL
2338 struct vring_virtqueue *vq = to_vvq(_vq);
2339
2340 if (vq->we_own_ring) {
1ce9e605
TB
2341 if (vq->packed_ring) {
2342 vring_free_queue(vq->vq.vdev,
2343 vq->packed.ring_size_in_bytes,
2344 vq->packed.vring.desc,
2345 vq->packed.ring_dma_addr);
2346
2347 vring_free_queue(vq->vq.vdev,
2348 vq->packed.event_size_in_bytes,
2349 vq->packed.vring.driver,
2350 vq->packed.driver_event_dma_addr);
2351
2352 vring_free_queue(vq->vq.vdev,
2353 vq->packed.event_size_in_bytes,
2354 vq->packed.vring.device,
2355 vq->packed.device_event_dma_addr);
2356
2357 kfree(vq->packed.desc_state);
2358 kfree(vq->packed.desc_extra);
2359 } else {
2360 vring_free_queue(vq->vq.vdev,
2361 vq->split.queue_size_in_bytes,
2362 vq->split.vring.desc,
2363 vq->split.queue_dma_addr);
1ce9e605 2364 }
2a2d1382 2365 }
72b5e895 2366 if (!vq->packed_ring) {
f13f09a1 2367 kfree(vq->split.desc_state);
72b5e895
JW
2368 kfree(vq->split.desc_extra);
2369 }
3ea19e32
XZ
2370}
2371
2372void vring_del_virtqueue(struct virtqueue *_vq)
2373{
2374 struct vring_virtqueue *vq = to_vvq(_vq);
2375
2376 spin_lock(&vq->vq.vdev->vqs_list_lock);
2377 list_del(&_vq->list);
2378 spin_unlock(&vq->vq.vdev->vqs_list_lock);
2379
2380 vring_free(_vq);
2381
2a2d1382 2382 kfree(vq);
0a8a69dd 2383}
c6fd4701 2384EXPORT_SYMBOL_GPL(vring_del_virtqueue);
0a8a69dd 2385
e34f8725
RR
2386/* Manipulates transport-specific feature bits. */
2387void vring_transport_features(struct virtio_device *vdev)
2388{
2389 unsigned int i;
2390
2391 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2392 switch (i) {
9fa29b9d
MM
2393 case VIRTIO_RING_F_INDIRECT_DESC:
2394 break;
a5c262c5
MT
2395 case VIRTIO_RING_F_EVENT_IDX:
2396 break;
747ae34a
MT
2397 case VIRTIO_F_VERSION_1:
2398 break;
321bd212 2399 case VIRTIO_F_ACCESS_PLATFORM:
1a937693 2400 break;
f959a128
TB
2401 case VIRTIO_F_RING_PACKED:
2402 break;
45383fb0
TB
2403 case VIRTIO_F_ORDER_PLATFORM:
2404 break;
e34f8725
RR
2405 default:
2406 /* We don't understand this bit. */
e16e12be 2407 __virtio_clear_bit(vdev, i);
e34f8725
RR
2408 }
2409 }
2410}
2411EXPORT_SYMBOL_GPL(vring_transport_features);
2412
5dfc1762
RR
2413/**
2414 * virtqueue_get_vring_size - return the size of the virtqueue's vring
a5581206 2415 * @_vq: the struct virtqueue containing the vring of interest.
5dfc1762
RR
2416 *
2417 * Returns the size of the vring. This is mainly used for boasting to
2418 * userspace. Unlike other operations, this need not be serialized.
2419 */
8f9f4668
RJ
2420unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2421{
2422
2423 struct vring_virtqueue *vq = to_vvq(_vq);
2424
1ce9e605 2425 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
8f9f4668
RJ
2426}
2427EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2428
b3b32c94
HG
2429bool virtqueue_is_broken(struct virtqueue *_vq)
2430{
2431 struct vring_virtqueue *vq = to_vvq(_vq);
2432
60f07798 2433 return READ_ONCE(vq->broken);
b3b32c94
HG
2434}
2435EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2436
e2dcdfe9
RR
2437/*
2438 * This should prevent the device from being used, allowing drivers to
2439 * recover. You may need to grab appropriate locks to flush.
2440 */
2441void virtio_break_device(struct virtio_device *dev)
2442{
2443 struct virtqueue *_vq;
2444
0e566c8f 2445 spin_lock(&dev->vqs_list_lock);
e2dcdfe9
RR
2446 list_for_each_entry(_vq, &dev->vqs, list) {
2447 struct vring_virtqueue *vq = to_vvq(_vq);
60f07798
PP
2448
2449 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2450 WRITE_ONCE(vq->broken, true);
e2dcdfe9 2451 }
0e566c8f 2452 spin_unlock(&dev->vqs_list_lock);
e2dcdfe9
RR
2453}
2454EXPORT_SYMBOL_GPL(virtio_break_device);
2455
be83f04d
JW
2456/*
2457 * This should allow the device to be used by the driver. You may
2458 * need to grab appropriate locks to flush the write to
2459 * vq->broken. This should only be used in some specific case e.g
2460 * (probing and restoring). This function should only be called by the
2461 * core, not directly by the driver.
2462 */
2463void __virtio_unbreak_device(struct virtio_device *dev)
2464{
2465 struct virtqueue *_vq;
2466
2467 spin_lock(&dev->vqs_list_lock);
2468 list_for_each_entry(_vq, &dev->vqs, list) {
2469 struct vring_virtqueue *vq = to_vvq(_vq);
2470
2471 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2472 WRITE_ONCE(vq->broken, false);
2473 }
2474 spin_unlock(&dev->vqs_list_lock);
2475}
2476EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2477
2a2d1382 2478dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
89062652
CH
2479{
2480 struct vring_virtqueue *vq = to_vvq(_vq);
2481
2a2d1382
AL
2482 BUG_ON(!vq->we_own_ring);
2483
1ce9e605
TB
2484 if (vq->packed_ring)
2485 return vq->packed.ring_dma_addr;
2486
d79dca75 2487 return vq->split.queue_dma_addr;
89062652 2488}
2a2d1382 2489EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
89062652 2490
2a2d1382 2491dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
89062652
CH
2492{
2493 struct vring_virtqueue *vq = to_vvq(_vq);
2494
2a2d1382
AL
2495 BUG_ON(!vq->we_own_ring);
2496
1ce9e605
TB
2497 if (vq->packed_ring)
2498 return vq->packed.driver_event_dma_addr;
2499
d79dca75 2500 return vq->split.queue_dma_addr +
e593bf97 2501 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2a2d1382
AL
2502}
2503EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2504
2505dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2506{
2507 struct vring_virtqueue *vq = to_vvq(_vq);
2508
2509 BUG_ON(!vq->we_own_ring);
2510
1ce9e605
TB
2511 if (vq->packed_ring)
2512 return vq->packed.device_event_dma_addr;
2513
d79dca75 2514 return vq->split.queue_dma_addr +
e593bf97 2515 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2a2d1382
AL
2516}
2517EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2518
1ce9e605 2519/* Only available for split ring */
2a2d1382
AL
2520const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2521{
e593bf97 2522 return &to_vvq(vq)->split.vring;
89062652 2523}
2a2d1382 2524EXPORT_SYMBOL_GPL(virtqueue_get_vring);
89062652 2525
c6fd4701 2526MODULE_LICENSE("GPL");