virtio_ring: split: extract the logic of alloc state and extra
[linux-2.6-block.git] / drivers / virtio / virtio_ring.c
CommitLineData
fd534e9b 1// SPDX-License-Identifier: GPL-2.0-or-later
0a8a69dd
RR
2/* Virtio ring implementation.
3 *
4 * Copyright 2007 Rusty Russell IBM Corporation
0a8a69dd
RR
5 */
6#include <linux/virtio.h>
7#include <linux/virtio_ring.h>
e34f8725 8#include <linux/virtio_config.h>
0a8a69dd 9#include <linux/device.h>
5a0e3ad6 10#include <linux/slab.h>
b5a2c4f1 11#include <linux/module.h>
e93300b1 12#include <linux/hrtimer.h>
780bc790 13#include <linux/dma-mapping.h>
f8ce7263 14#include <linux/spinlock.h>
78fe3987 15#include <xen/xen.h>
0a8a69dd
RR
16
17#ifdef DEBUG
18/* For development, we want to crash whenever the ring is screwed. */
9499f5e7
RR
19#define BAD_RING(_vq, fmt, args...) \
20 do { \
21 dev_err(&(_vq)->vq.vdev->dev, \
22 "%s:"fmt, (_vq)->vq.name, ##args); \
23 BUG(); \
24 } while (0)
c5f841f1
RR
25/* Caller is supposed to guarantee no reentry. */
26#define START_USE(_vq) \
27 do { \
28 if ((_vq)->in_use) \
9499f5e7
RR
29 panic("%s:in_use = %i\n", \
30 (_vq)->vq.name, (_vq)->in_use); \
c5f841f1 31 (_vq)->in_use = __LINE__; \
9499f5e7 32 } while (0)
3a35ce7d 33#define END_USE(_vq) \
97a545ab 34 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
4d6a105e
TB
35#define LAST_ADD_TIME_UPDATE(_vq) \
36 do { \
37 ktime_t now = ktime_get(); \
38 \
39 /* No kick or get, with .1 second between? Warn. */ \
40 if ((_vq)->last_add_time_valid) \
41 WARN_ON(ktime_to_ms(ktime_sub(now, \
42 (_vq)->last_add_time)) > 100); \
43 (_vq)->last_add_time = now; \
44 (_vq)->last_add_time_valid = true; \
45 } while (0)
46#define LAST_ADD_TIME_CHECK(_vq) \
47 do { \
48 if ((_vq)->last_add_time_valid) { \
49 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
50 (_vq)->last_add_time)) > 100); \
51 } \
52 } while (0)
53#define LAST_ADD_TIME_INVALID(_vq) \
54 ((_vq)->last_add_time_valid = false)
0a8a69dd 55#else
9499f5e7
RR
56#define BAD_RING(_vq, fmt, args...) \
57 do { \
58 dev_err(&_vq->vq.vdev->dev, \
59 "%s:"fmt, (_vq)->vq.name, ##args); \
60 (_vq)->broken = true; \
61 } while (0)
0a8a69dd
RR
62#define START_USE(vq)
63#define END_USE(vq)
4d6a105e
TB
64#define LAST_ADD_TIME_UPDATE(vq)
65#define LAST_ADD_TIME_CHECK(vq)
66#define LAST_ADD_TIME_INVALID(vq)
0a8a69dd
RR
67#endif
68
cbeedb72 69struct vring_desc_state_split {
780bc790
AL
70 void *data; /* Data for callback. */
71 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
72};
73
1ce9e605
TB
74struct vring_desc_state_packed {
75 void *data; /* Data for callback. */
76 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
77 u16 num; /* Descriptor list length. */
1ce9e605
TB
78 u16 last; /* The last desc state in a list. */
79};
80
1f28750f 81struct vring_desc_extra {
ef5c366f
JW
82 dma_addr_t addr; /* Descriptor DMA addr. */
83 u32 len; /* Descriptor length. */
1ce9e605 84 u16 flags; /* Descriptor flags. */
aeef9b47 85 u16 next; /* The next desc state in a list. */
1ce9e605
TB
86};
87
d76136e4
XZ
88struct vring_virtqueue_split {
89 /* Actual memory layout for this queue. */
90 struct vring vring;
91
92 /* Last written value to avail->flags */
93 u16 avail_flags_shadow;
94
95 /*
96 * Last written value to avail->idx in
97 * guest byte order.
98 */
99 u16 avail_idx_shadow;
100
101 /* Per-descriptor state. */
102 struct vring_desc_state_split *desc_state;
103 struct vring_desc_extra *desc_extra;
104
105 /* DMA address and size information */
106 dma_addr_t queue_dma_addr;
107 size_t queue_size_in_bytes;
108};
109
110struct vring_virtqueue_packed {
111 /* Actual memory layout for this queue. */
112 struct {
113 unsigned int num;
114 struct vring_packed_desc *desc;
115 struct vring_packed_desc_event *driver;
116 struct vring_packed_desc_event *device;
117 } vring;
118
119 /* Driver ring wrap counter. */
120 bool avail_wrap_counter;
121
122 /* Avail used flags. */
123 u16 avail_used_flags;
124
125 /* Index of the next avail descriptor. */
126 u16 next_avail_idx;
127
128 /*
129 * Last written value to driver->flags in
130 * guest byte order.
131 */
132 u16 event_flags_shadow;
133
134 /* Per-descriptor state. */
135 struct vring_desc_state_packed *desc_state;
136 struct vring_desc_extra *desc_extra;
137
138 /* DMA address and size information */
139 dma_addr_t ring_dma_addr;
140 dma_addr_t driver_event_dma_addr;
141 dma_addr_t device_event_dma_addr;
142 size_t ring_size_in_bytes;
143 size_t event_size_in_bytes;
144};
145
43b4f721 146struct vring_virtqueue {
0a8a69dd
RR
147 struct virtqueue vq;
148
1ce9e605
TB
149 /* Is this a packed ring? */
150 bool packed_ring;
151
fb3fba6b
TB
152 /* Is DMA API used? */
153 bool use_dma_api;
154
7b21e34f
RR
155 /* Can we use weak barriers? */
156 bool weak_barriers;
157
0a8a69dd
RR
158 /* Other side has made a mess, don't try any more. */
159 bool broken;
160
9fa29b9d
MM
161 /* Host supports indirect buffers */
162 bool indirect;
163
a5c262c5
MT
164 /* Host publishes avail event idx */
165 bool event;
166
0a8a69dd
RR
167 /* Head of free buffer list. */
168 unsigned int free_head;
169 /* Number we've added since last sync. */
170 unsigned int num_added;
171
a7722890 172 /* Last used index we've seen.
173 * for split ring, it just contains last used index
174 * for packed ring:
175 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
176 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
177 */
1bc4953e 178 u16 last_used_idx;
0a8a69dd 179
8d622d21
MT
180 /* Hint for event idx: already triggered no need to disable. */
181 bool event_triggered;
182
1ce9e605
TB
183 union {
184 /* Available for split ring */
d76136e4 185 struct vring_virtqueue_split split;
e593bf97 186
1ce9e605 187 /* Available for packed ring */
d76136e4 188 struct vring_virtqueue_packed packed;
1ce9e605 189 };
f277ec42 190
0a8a69dd 191 /* How to notify other side. FIXME: commonalize hcalls! */
46f9c2b9 192 bool (*notify)(struct virtqueue *vq);
0a8a69dd 193
2a2d1382
AL
194 /* DMA, allocation, and size information */
195 bool we_own_ring;
2a2d1382 196
0a8a69dd
RR
197#ifdef DEBUG
198 /* They're supposed to lock for us. */
199 unsigned int in_use;
e93300b1
RR
200
201 /* Figure out if their kicks are too delayed. */
202 bool last_add_time_valid;
203 ktime_t last_add_time;
0a8a69dd 204#endif
0a8a69dd
RR
205};
206
07d9629d 207static struct virtqueue *__vring_new_virtqueue(unsigned int index,
cd4c812a 208 struct vring_virtqueue_split *vring_split,
07d9629d
XZ
209 struct virtio_device *vdev,
210 bool weak_barriers,
211 bool context,
212 bool (*notify)(struct virtqueue *),
213 void (*callback)(struct virtqueue *),
214 const char *name);
a2b36c8d 215static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
e6f633e5
TB
216
217/*
218 * Helpers.
219 */
220
0a8a69dd
RR
221#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
222
35c51e09 223static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
2f18c2d1
TB
224 unsigned int total_sg)
225{
2f18c2d1
TB
226 /*
227 * If the host supports indirect descriptor tables, and we have multiple
228 * buffers, then go indirect. FIXME: tune this threshold
229 */
230 return (vq->indirect && total_sg > 1 && vq->vq.num_free);
231}
232
d26c96c8 233/*
1a937693
MT
234 * Modern virtio devices have feature bits to specify whether they need a
235 * quirk and bypass the IOMMU. If not there, just use the DMA API.
236 *
237 * If there, the interaction between virtio and DMA API is messy.
d26c96c8
AL
238 *
239 * On most systems with virtio, physical addresses match bus addresses,
240 * and it doesn't particularly matter whether we use the DMA API.
241 *
242 * On some systems, including Xen and any system with a physical device
243 * that speaks virtio behind a physical IOMMU, we must use the DMA API
244 * for virtio DMA to work at all.
245 *
246 * On other systems, including SPARC and PPC64, virtio-pci devices are
247 * enumerated as though they are behind an IOMMU, but the virtio host
248 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
249 * there or somehow map everything as the identity.
250 *
251 * For the time being, we preserve historic behavior and bypass the DMA
252 * API.
1a937693
MT
253 *
254 * TODO: install a per-device DMA ops structure that does the right thing
255 * taking into account all the above quirks, and use the DMA API
256 * unconditionally on data path.
d26c96c8
AL
257 */
258
259static bool vring_use_dma_api(struct virtio_device *vdev)
260{
24b6842a 261 if (!virtio_has_dma_quirk(vdev))
1a937693
MT
262 return true;
263
264 /* Otherwise, we are left to guess. */
78fe3987
AL
265 /*
266 * In theory, it's possible to have a buggy QEMU-supposed
267 * emulated Q35 IOMMU and Xen enabled at the same time. On
268 * such a configuration, virtio has never worked and will
269 * not work without an even larger kludge. Instead, enable
270 * the DMA API if we're a Xen guest, which at least allows
271 * all of the sensible Xen configurations to work correctly.
272 */
273 if (xen_domain())
274 return true;
275
d26c96c8
AL
276 return false;
277}
278
e6d6dd6c
JR
279size_t virtio_max_dma_size(struct virtio_device *vdev)
280{
281 size_t max_segment_size = SIZE_MAX;
282
283 if (vring_use_dma_api(vdev))
817fc978 284 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
e6d6dd6c
JR
285
286 return max_segment_size;
287}
288EXPORT_SYMBOL_GPL(virtio_max_dma_size);
289
d79dca75
TB
290static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
291 dma_addr_t *dma_handle, gfp_t flag)
292{
293 if (vring_use_dma_api(vdev)) {
294 return dma_alloc_coherent(vdev->dev.parent, size,
295 dma_handle, flag);
296 } else {
297 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
298
299 if (queue) {
300 phys_addr_t phys_addr = virt_to_phys(queue);
301 *dma_handle = (dma_addr_t)phys_addr;
302
303 /*
304 * Sanity check: make sure we dind't truncate
305 * the address. The only arches I can find that
306 * have 64-bit phys_addr_t but 32-bit dma_addr_t
307 * are certain non-highmem MIPS and x86
308 * configurations, but these configurations
309 * should never allocate physical pages above 32
310 * bits, so this is fine. Just in case, throw a
311 * warning and abort if we end up with an
312 * unrepresentable address.
313 */
314 if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
315 free_pages_exact(queue, PAGE_ALIGN(size));
316 return NULL;
317 }
318 }
319 return queue;
320 }
321}
322
323static void vring_free_queue(struct virtio_device *vdev, size_t size,
324 void *queue, dma_addr_t dma_handle)
325{
326 if (vring_use_dma_api(vdev))
327 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
328 else
329 free_pages_exact(queue, PAGE_ALIGN(size));
330}
331
780bc790
AL
332/*
333 * The DMA ops on various arches are rather gnarly right now, and
334 * making all of the arch DMA ops work on the vring device itself
335 * is a mess. For now, we use the parent device for DMA ops.
336 */
75bfa81b 337static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
780bc790
AL
338{
339 return vq->vq.vdev->dev.parent;
340}
341
342/* Map one sg entry. */
343static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
344 struct scatterlist *sg,
345 enum dma_data_direction direction)
346{
fb3fba6b 347 if (!vq->use_dma_api)
780bc790
AL
348 return (dma_addr_t)sg_phys(sg);
349
350 /*
351 * We can't use dma_map_sg, because we don't use scatterlists in
352 * the way it expects (we don't guarantee that the scatterlist
353 * will exist for the lifetime of the mapping).
354 */
355 return dma_map_page(vring_dma_dev(vq),
356 sg_page(sg), sg->offset, sg->length,
357 direction);
358}
359
360static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
361 void *cpu_addr, size_t size,
362 enum dma_data_direction direction)
363{
fb3fba6b 364 if (!vq->use_dma_api)
780bc790
AL
365 return (dma_addr_t)virt_to_phys(cpu_addr);
366
367 return dma_map_single(vring_dma_dev(vq),
368 cpu_addr, size, direction);
369}
370
e6f633e5
TB
371static int vring_mapping_error(const struct vring_virtqueue *vq,
372 dma_addr_t addr)
373{
fb3fba6b 374 if (!vq->use_dma_api)
e6f633e5
TB
375 return 0;
376
377 return dma_mapping_error(vring_dma_dev(vq), addr);
378}
379
3a897128
XZ
380static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
381{
382 vq->vq.num_free = num;
383
384 if (vq->packed_ring)
385 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
386 else
387 vq->last_used_idx = 0;
388
389 vq->event_triggered = false;
390 vq->num_added = 0;
391
392#ifdef DEBUG
393 vq->in_use = false;
394 vq->last_add_time_valid = false;
395#endif
396}
397
e6f633e5
TB
398
399/*
400 * Split ring specific functions - *_split().
401 */
402
72b5e895
JW
403static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
404 struct vring_desc *desc)
780bc790
AL
405{
406 u16 flags;
407
fb3fba6b 408 if (!vq->use_dma_api)
780bc790
AL
409 return;
410
411 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
412
b4282ebc
XZ
413 dma_unmap_page(vring_dma_dev(vq),
414 virtio64_to_cpu(vq->vq.vdev, desc->addr),
415 virtio32_to_cpu(vq->vq.vdev, desc->len),
416 (flags & VRING_DESC_F_WRITE) ?
417 DMA_FROM_DEVICE : DMA_TO_DEVICE);
780bc790
AL
418}
419
72b5e895
JW
420static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
421 unsigned int i)
422{
423 struct vring_desc_extra *extra = vq->split.desc_extra;
424 u16 flags;
425
426 if (!vq->use_dma_api)
427 goto out;
428
429 flags = extra[i].flags;
430
431 if (flags & VRING_DESC_F_INDIRECT) {
432 dma_unmap_single(vring_dma_dev(vq),
433 extra[i].addr,
434 extra[i].len,
435 (flags & VRING_DESC_F_WRITE) ?
436 DMA_FROM_DEVICE : DMA_TO_DEVICE);
437 } else {
438 dma_unmap_page(vring_dma_dev(vq),
439 extra[i].addr,
440 extra[i].len,
441 (flags & VRING_DESC_F_WRITE) ?
442 DMA_FROM_DEVICE : DMA_TO_DEVICE);
443 }
444
445out:
446 return extra[i].next;
447}
448
138fd251
TB
449static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
450 unsigned int total_sg,
451 gfp_t gfp)
9fa29b9d
MM
452{
453 struct vring_desc *desc;
b25bd251 454 unsigned int i;
9fa29b9d 455
b92b1b89
WD
456 /*
457 * We require lowmem mappings for the descriptors because
458 * otherwise virt_to_phys will give us bogus addresses in the
459 * virtqueue.
460 */
82107539 461 gfp &= ~__GFP_HIGHMEM;
b92b1b89 462
6da2ec56 463 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
9fa29b9d 464 if (!desc)
b25bd251 465 return NULL;
9fa29b9d 466
b25bd251 467 for (i = 0; i < total_sg; i++)
00e6f3d9 468 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
b25bd251 469 return desc;
9fa29b9d
MM
470}
471
fe4c3862
JW
472static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
473 struct vring_desc *desc,
474 unsigned int i,
475 dma_addr_t addr,
476 unsigned int len,
72b5e895
JW
477 u16 flags,
478 bool indirect)
fe4c3862 479{
72b5e895
JW
480 struct vring_virtqueue *vring = to_vvq(vq);
481 struct vring_desc_extra *extra = vring->split.desc_extra;
482 u16 next;
483
fe4c3862
JW
484 desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
485 desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
486 desc[i].len = cpu_to_virtio32(vq->vdev, len);
487
72b5e895
JW
488 if (!indirect) {
489 next = extra[i].next;
490 desc[i].next = cpu_to_virtio16(vq->vdev, next);
491
492 extra[i].addr = addr;
493 extra[i].len = len;
494 extra[i].flags = flags;
495 } else
496 next = virtio16_to_cpu(vq->vdev, desc[i].next);
497
498 return next;
fe4c3862
JW
499}
500
138fd251
TB
501static inline int virtqueue_add_split(struct virtqueue *_vq,
502 struct scatterlist *sgs[],
503 unsigned int total_sg,
504 unsigned int out_sgs,
505 unsigned int in_sgs,
506 void *data,
507 void *ctx,
508 gfp_t gfp)
0a8a69dd
RR
509{
510 struct vring_virtqueue *vq = to_vvq(_vq);
13816c76 511 struct scatterlist *sg;
b25bd251 512 struct vring_desc *desc;
3f649ab7 513 unsigned int i, n, avail, descs_used, prev, err_idx;
1fe9b6fe 514 int head;
b25bd251 515 bool indirect;
0a8a69dd 516
9fa29b9d
MM
517 START_USE(vq);
518
0a8a69dd 519 BUG_ON(data == NULL);
5a08b04f 520 BUG_ON(ctx && vq->indirect);
9fa29b9d 521
70670444
RR
522 if (unlikely(vq->broken)) {
523 END_USE(vq);
524 return -EIO;
525 }
526
4d6a105e 527 LAST_ADD_TIME_UPDATE(vq);
e93300b1 528
b25bd251
RR
529 BUG_ON(total_sg == 0);
530
531 head = vq->free_head;
532
35c51e09 533 if (virtqueue_use_indirect(vq, total_sg))
138fd251 534 desc = alloc_indirect_split(_vq, total_sg, gfp);
44ed8089 535 else {
b25bd251 536 desc = NULL;
e593bf97 537 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
44ed8089 538 }
b25bd251
RR
539
540 if (desc) {
541 /* Use a single buffer which doesn't continue */
780bc790 542 indirect = true;
b25bd251
RR
543 /* Set up rest to use this indirect table. */
544 i = 0;
545 descs_used = 1;
b25bd251 546 } else {
780bc790 547 indirect = false;
e593bf97 548 desc = vq->split.vring.desc;
b25bd251
RR
549 i = head;
550 descs_used = total_sg;
9fa29b9d
MM
551 }
552
b4b4ff73 553 if (unlikely(vq->vq.num_free < descs_used)) {
0a8a69dd 554 pr_debug("Can't add buf len %i - avail = %i\n",
b25bd251 555 descs_used, vq->vq.num_free);
44653eae
RR
556 /* FIXME: for historical reasons, we force a notify here if
557 * there are outgoing parts to the buffer. Presumably the
558 * host should service the ring ASAP. */
13816c76 559 if (out_sgs)
44653eae 560 vq->notify(&vq->vq);
58625edf
WY
561 if (indirect)
562 kfree(desc);
0a8a69dd
RR
563 END_USE(vq);
564 return -ENOSPC;
565 }
566
13816c76 567 for (n = 0; n < out_sgs; n++) {
eeebf9b1 568 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
569 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
570 if (vring_mapping_error(vq, addr))
571 goto unmap_release;
572
13816c76 573 prev = i;
72b5e895
JW
574 /* Note that we trust indirect descriptor
575 * table since it use stream DMA mapping.
576 */
fe4c3862 577 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
72b5e895
JW
578 VRING_DESC_F_NEXT,
579 indirect);
13816c76 580 }
0a8a69dd 581 }
13816c76 582 for (; n < (out_sgs + in_sgs); n++) {
eeebf9b1 583 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
584 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
585 if (vring_mapping_error(vq, addr))
586 goto unmap_release;
587
13816c76 588 prev = i;
72b5e895
JW
589 /* Note that we trust indirect descriptor
590 * table since it use stream DMA mapping.
591 */
fe4c3862
JW
592 i = virtqueue_add_desc_split(_vq, desc, i, addr,
593 sg->length,
594 VRING_DESC_F_NEXT |
72b5e895
JW
595 VRING_DESC_F_WRITE,
596 indirect);
13816c76 597 }
0a8a69dd
RR
598 }
599 /* Last one doesn't continue. */
00e6f3d9 600 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
72b5e895 601 if (!indirect && vq->use_dma_api)
890d3356 602 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
72b5e895 603 ~VRING_DESC_F_NEXT;
0a8a69dd 604
780bc790
AL
605 if (indirect) {
606 /* Now that the indirect table is filled in, map it. */
607 dma_addr_t addr = vring_map_single(
608 vq, desc, total_sg * sizeof(struct vring_desc),
609 DMA_TO_DEVICE);
610 if (vring_mapping_error(vq, addr))
611 goto unmap_release;
612
fe4c3862
JW
613 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
614 head, addr,
615 total_sg * sizeof(struct vring_desc),
72b5e895
JW
616 VRING_DESC_F_INDIRECT,
617 false);
780bc790
AL
618 }
619
620 /* We're using some buffers from the free list. */
621 vq->vq.num_free -= descs_used;
622
0a8a69dd 623 /* Update free pointer */
b25bd251 624 if (indirect)
72b5e895 625 vq->free_head = vq->split.desc_extra[head].next;
b25bd251
RR
626 else
627 vq->free_head = i;
0a8a69dd 628
780bc790 629 /* Store token and indirect buffer state. */
cbeedb72 630 vq->split.desc_state[head].data = data;
780bc790 631 if (indirect)
cbeedb72 632 vq->split.desc_state[head].indir_desc = desc;
87646a34 633 else
cbeedb72 634 vq->split.desc_state[head].indir_desc = ctx;
0a8a69dd
RR
635
636 /* Put entry in available array (but don't update avail->idx until they
3b720b8c 637 * do sync). */
e593bf97
TB
638 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
639 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
0a8a69dd 640
ee7cd898
RR
641 /* Descriptors and available array need to be set before we expose the
642 * new available array entries. */
a9a0fef7 643 virtio_wmb(vq->weak_barriers);
e593bf97
TB
644 vq->split.avail_idx_shadow++;
645 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
646 vq->split.avail_idx_shadow);
ee7cd898
RR
647 vq->num_added++;
648
5e05bf58
TH
649 pr_debug("Added buffer head %i to %p\n", head, vq);
650 END_USE(vq);
651
ee7cd898
RR
652 /* This is very unlikely, but theoretically possible. Kick
653 * just in case. */
654 if (unlikely(vq->num_added == (1 << 16) - 1))
655 virtqueue_kick(_vq);
656
98e8c6bc 657 return 0;
780bc790
AL
658
659unmap_release:
660 err_idx = i;
cf8f1696
ML
661
662 if (indirect)
663 i = 0;
664 else
665 i = head;
780bc790
AL
666
667 for (n = 0; n < total_sg; n++) {
668 if (i == err_idx)
669 break;
72b5e895
JW
670 if (indirect) {
671 vring_unmap_one_split_indirect(vq, &desc[i]);
672 i = virtio16_to_cpu(_vq->vdev, desc[i].next);
673 } else
674 i = vring_unmap_one_split(vq, i);
780bc790
AL
675 }
676
780bc790
AL
677 if (indirect)
678 kfree(desc);
679
3cc36f6e 680 END_USE(vq);
f7728002 681 return -ENOMEM;
0a8a69dd 682}
13816c76 683
138fd251 684static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
685{
686 struct vring_virtqueue *vq = to_vvq(_vq);
a5c262c5 687 u16 new, old;
41f0377f
RR
688 bool needs_kick;
689
0a8a69dd 690 START_USE(vq);
a72caae2
JW
691 /* We need to expose available array entries before checking avail
692 * event. */
a9a0fef7 693 virtio_mb(vq->weak_barriers);
0a8a69dd 694
e593bf97
TB
695 old = vq->split.avail_idx_shadow - vq->num_added;
696 new = vq->split.avail_idx_shadow;
0a8a69dd
RR
697 vq->num_added = 0;
698
4d6a105e
TB
699 LAST_ADD_TIME_CHECK(vq);
700 LAST_ADD_TIME_INVALID(vq);
e93300b1 701
41f0377f 702 if (vq->event) {
e593bf97
TB
703 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
704 vring_avail_event(&vq->split.vring)),
41f0377f
RR
705 new, old);
706 } else {
e593bf97
TB
707 needs_kick = !(vq->split.vring.used->flags &
708 cpu_to_virtio16(_vq->vdev,
709 VRING_USED_F_NO_NOTIFY));
41f0377f 710 }
0a8a69dd 711 END_USE(vq);
41f0377f
RR
712 return needs_kick;
713}
138fd251 714
138fd251
TB
715static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
716 void **ctx)
0a8a69dd 717{
780bc790 718 unsigned int i, j;
c60923cb 719 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
0a8a69dd
RR
720
721 /* Clear data ptr. */
cbeedb72 722 vq->split.desc_state[head].data = NULL;
0a8a69dd 723
780bc790 724 /* Put back on free list: unmap first-level descriptors and find end */
0a8a69dd 725 i = head;
9fa29b9d 726
e593bf97 727 while (vq->split.vring.desc[i].flags & nextflag) {
72b5e895
JW
728 vring_unmap_one_split(vq, i);
729 i = vq->split.desc_extra[i].next;
06ca287d 730 vq->vq.num_free++;
0a8a69dd
RR
731 }
732
72b5e895
JW
733 vring_unmap_one_split(vq, i);
734 vq->split.desc_extra[i].next = vq->free_head;
0a8a69dd 735 vq->free_head = head;
780bc790 736
0a8a69dd 737 /* Plus final descriptor */
06ca287d 738 vq->vq.num_free++;
780bc790 739
5a08b04f 740 if (vq->indirect) {
cbeedb72
TB
741 struct vring_desc *indir_desc =
742 vq->split.desc_state[head].indir_desc;
5a08b04f
MT
743 u32 len;
744
745 /* Free the indirect table, if any, now that it's unmapped. */
746 if (!indir_desc)
747 return;
748
72b5e895 749 len = vq->split.desc_extra[head].len;
780bc790 750
72b5e895
JW
751 BUG_ON(!(vq->split.desc_extra[head].flags &
752 VRING_DESC_F_INDIRECT));
780bc790
AL
753 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
754
755 for (j = 0; j < len / sizeof(struct vring_desc); j++)
72b5e895 756 vring_unmap_one_split_indirect(vq, &indir_desc[j]);
780bc790 757
5a08b04f 758 kfree(indir_desc);
cbeedb72 759 vq->split.desc_state[head].indir_desc = NULL;
5a08b04f 760 } else if (ctx) {
cbeedb72 761 *ctx = vq->split.desc_state[head].indir_desc;
780bc790 762 }
0a8a69dd
RR
763}
764
138fd251 765static inline bool more_used_split(const struct vring_virtqueue *vq)
0a8a69dd 766{
e593bf97
TB
767 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
768 vq->split.vring.used->idx);
0a8a69dd
RR
769}
770
138fd251
TB
771static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
772 unsigned int *len,
773 void **ctx)
0a8a69dd
RR
774{
775 struct vring_virtqueue *vq = to_vvq(_vq);
776 void *ret;
777 unsigned int i;
3b720b8c 778 u16 last_used;
0a8a69dd
RR
779
780 START_USE(vq);
781
5ef82752
RR
782 if (unlikely(vq->broken)) {
783 END_USE(vq);
784 return NULL;
785 }
786
138fd251 787 if (!more_used_split(vq)) {
0a8a69dd
RR
788 pr_debug("No more buffers in queue\n");
789 END_USE(vq);
790 return NULL;
791 }
792
2d61ba95 793 /* Only get used array entries after they have been exposed by host. */
a9a0fef7 794 virtio_rmb(vq->weak_barriers);
2d61ba95 795
e593bf97
TB
796 last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
797 i = virtio32_to_cpu(_vq->vdev,
798 vq->split.vring.used->ring[last_used].id);
799 *len = virtio32_to_cpu(_vq->vdev,
800 vq->split.vring.used->ring[last_used].len);
0a8a69dd 801
e593bf97 802 if (unlikely(i >= vq->split.vring.num)) {
0a8a69dd
RR
803 BAD_RING(vq, "id %u out of range\n", i);
804 return NULL;
805 }
cbeedb72 806 if (unlikely(!vq->split.desc_state[i].data)) {
0a8a69dd
RR
807 BAD_RING(vq, "id %u is not a head!\n", i);
808 return NULL;
809 }
810
138fd251 811 /* detach_buf_split clears data, so grab it now. */
cbeedb72 812 ret = vq->split.desc_state[i].data;
138fd251 813 detach_buf_split(vq, i, ctx);
0a8a69dd 814 vq->last_used_idx++;
a5c262c5
MT
815 /* If we expect an interrupt for the next entry, tell host
816 * by writing event index and flush out the write before
817 * the read in the next get_buf call. */
e593bf97 818 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
788e5b3a 819 virtio_store_mb(vq->weak_barriers,
e593bf97 820 &vring_used_event(&vq->split.vring),
788e5b3a 821 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
a5c262c5 822
4d6a105e 823 LAST_ADD_TIME_INVALID(vq);
e93300b1 824
0a8a69dd
RR
825 END_USE(vq);
826 return ret;
827}
138fd251 828
138fd251 829static void virtqueue_disable_cb_split(struct virtqueue *_vq)
18445c4d
RR
830{
831 struct vring_virtqueue *vq = to_vvq(_vq);
832
e593bf97
TB
833 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
834 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
8d622d21
MT
835 if (vq->event)
836 /* TODO: this is a hack. Figure out a cleaner value to write. */
837 vring_used_event(&vq->split.vring) = 0x0;
838 else
e593bf97
TB
839 vq->split.vring.avail->flags =
840 cpu_to_virtio16(_vq->vdev,
841 vq->split.avail_flags_shadow);
f277ec42 842 }
18445c4d
RR
843}
844
31532340 845static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
846{
847 struct vring_virtqueue *vq = to_vvq(_vq);
cc229884 848 u16 last_used_idx;
0a8a69dd
RR
849
850 START_USE(vq);
0a8a69dd
RR
851
852 /* We optimistically turn back on interrupts, then check if there was
853 * more to do. */
a5c262c5
MT
854 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
855 * either clear the flags bit or point the event index at the next
856 * entry. Always do both to keep code simple. */
e593bf97
TB
857 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
858 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 859 if (!vq->event)
e593bf97
TB
860 vq->split.vring.avail->flags =
861 cpu_to_virtio16(_vq->vdev,
862 vq->split.avail_flags_shadow);
f277ec42 863 }
e593bf97
TB
864 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
865 last_used_idx = vq->last_used_idx);
cc229884
MT
866 END_USE(vq);
867 return last_used_idx;
868}
138fd251 869
31532340 870static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
138fd251
TB
871{
872 struct vring_virtqueue *vq = to_vvq(_vq);
873
874 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
e593bf97 875 vq->split.vring.used->idx);
138fd251
TB
876}
877
138fd251 878static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
7ab358c2
MT
879{
880 struct vring_virtqueue *vq = to_vvq(_vq);
881 u16 bufs;
882
883 START_USE(vq);
884
885 /* We optimistically turn back on interrupts, then check if there was
886 * more to do. */
887 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
888 * either clear the flags bit or point the event index at the next
0ea1e4a6 889 * entry. Always update the event index to keep code simple. */
e593bf97
TB
890 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
891 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 892 if (!vq->event)
e593bf97
TB
893 vq->split.vring.avail->flags =
894 cpu_to_virtio16(_vq->vdev,
895 vq->split.avail_flags_shadow);
f277ec42 896 }
7ab358c2 897 /* TODO: tune this threshold */
e593bf97 898 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
788e5b3a
MT
899
900 virtio_store_mb(vq->weak_barriers,
e593bf97 901 &vring_used_event(&vq->split.vring),
788e5b3a
MT
902 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
903
e593bf97
TB
904 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
905 - vq->last_used_idx) > bufs)) {
7ab358c2
MT
906 END_USE(vq);
907 return false;
908 }
909
910 END_USE(vq);
911 return true;
912}
7ab358c2 913
138fd251 914static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
c021eac4
SM
915{
916 struct vring_virtqueue *vq = to_vvq(_vq);
917 unsigned int i;
918 void *buf;
919
920 START_USE(vq);
921
e593bf97 922 for (i = 0; i < vq->split.vring.num; i++) {
cbeedb72 923 if (!vq->split.desc_state[i].data)
c021eac4 924 continue;
138fd251 925 /* detach_buf_split clears data, so grab it now. */
cbeedb72 926 buf = vq->split.desc_state[i].data;
138fd251 927 detach_buf_split(vq, i, NULL);
e593bf97
TB
928 vq->split.avail_idx_shadow--;
929 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
930 vq->split.avail_idx_shadow);
c021eac4
SM
931 END_USE(vq);
932 return buf;
933 }
934 /* That should have freed everything. */
e593bf97 935 BUG_ON(vq->vq.num_free != vq->split.vring.num);
c021eac4
SM
936
937 END_USE(vq);
938 return NULL;
939}
138fd251 940
a2b36c8d
XZ
941static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
942{
943 struct vring_desc_state_split *state;
944 struct vring_desc_extra *extra;
945 u32 num = vring_split->vring.num;
946
947 state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
948 if (!state)
949 goto err_state;
950
951 extra = vring_alloc_desc_extra(num);
952 if (!extra)
953 goto err_extra;
954
955 memset(state, 0, num * sizeof(struct vring_desc_state_split));
956
957 vring_split->desc_state = state;
958 vring_split->desc_extra = extra;
959 return 0;
960
961err_extra:
962 kfree(state);
963err_state:
964 return -ENOMEM;
965}
966
89f05d94
XZ
967static void vring_free_split(struct vring_virtqueue_split *vring_split,
968 struct virtio_device *vdev)
969{
970 vring_free_queue(vdev, vring_split->queue_size_in_bytes,
971 vring_split->vring.desc,
972 vring_split->queue_dma_addr);
973
974 kfree(vring_split->desc_state);
975 kfree(vring_split->desc_extra);
976}
977
c2d87fe6
XZ
978static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
979 struct virtio_device *vdev,
980 u32 num,
981 unsigned int vring_align,
982 bool may_reduce_num)
d79dca75 983{
d79dca75
TB
984 void *queue = NULL;
985 dma_addr_t dma_addr;
d79dca75
TB
986
987 /* We assume num is a power of 2. */
988 if (num & (num - 1)) {
989 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
c2d87fe6 990 return -EINVAL;
d79dca75
TB
991 }
992
993 /* TODO: allocate each queue chunk individually */
994 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
995 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
996 &dma_addr,
c7cc29aa 997 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
d79dca75
TB
998 if (queue)
999 break;
cf94db21 1000 if (!may_reduce_num)
c2d87fe6 1001 return -ENOMEM;
d79dca75
TB
1002 }
1003
1004 if (!num)
c2d87fe6 1005 return -ENOMEM;
d79dca75
TB
1006
1007 if (!queue) {
1008 /* Try to get a single page. You are my only hope! */
1009 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1010 &dma_addr, GFP_KERNEL|__GFP_ZERO);
1011 }
1012 if (!queue)
c2d87fe6
XZ
1013 return -ENOMEM;
1014
1015 vring_init(&vring_split->vring, num, queue, vring_align);
1016
1017 vring_split->queue_dma_addr = dma_addr;
1018 vring_split->queue_size_in_bytes = vring_size(num, vring_align);
d79dca75 1019
c2d87fe6
XZ
1020 return 0;
1021}
1022
1023static struct virtqueue *vring_create_virtqueue_split(
1024 unsigned int index,
1025 unsigned int num,
1026 unsigned int vring_align,
1027 struct virtio_device *vdev,
1028 bool weak_barriers,
1029 bool may_reduce_num,
1030 bool context,
1031 bool (*notify)(struct virtqueue *),
1032 void (*callback)(struct virtqueue *),
1033 const char *name)
1034{
1035 struct vring_virtqueue_split vring_split = {};
1036 struct virtqueue *vq;
1037 int err;
1038
1039 err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1040 may_reduce_num);
1041 if (err)
1042 return NULL;
d79dca75 1043
cd4c812a
XZ
1044 vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
1045 context, notify, callback, name);
d79dca75 1046 if (!vq) {
c2d87fe6 1047 vring_free_split(&vring_split, vdev);
d79dca75
TB
1048 return NULL;
1049 }
1050
c2d87fe6
XZ
1051 to_vvq(vq)->split.queue_dma_addr = vring_split.queue_dma_addr;
1052 to_vvq(vq)->split.queue_size_in_bytes = vring_split.queue_size_in_bytes;
d79dca75
TB
1053 to_vvq(vq)->we_own_ring = true;
1054
1055 return vq;
1056}
1057
e6f633e5 1058
1ce9e605
TB
1059/*
1060 * Packed ring specific functions - *_packed().
1061 */
a7722890 1062static inline bool packed_used_wrap_counter(u16 last_used_idx)
1063{
1064 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1065}
1066
1067static inline u16 packed_last_used(u16 last_used_idx)
1068{
1069 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1070}
1ce9e605 1071
d80dc15b
XZ
1072static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1073 struct vring_desc_extra *extra)
1ce9e605
TB
1074{
1075 u16 flags;
1076
1077 if (!vq->use_dma_api)
1078 return;
1079
d80dc15b 1080 flags = extra->flags;
1ce9e605
TB
1081
1082 if (flags & VRING_DESC_F_INDIRECT) {
1083 dma_unmap_single(vring_dma_dev(vq),
d80dc15b 1084 extra->addr, extra->len,
1ce9e605
TB
1085 (flags & VRING_DESC_F_WRITE) ?
1086 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1087 } else {
1088 dma_unmap_page(vring_dma_dev(vq),
d80dc15b 1089 extra->addr, extra->len,
1ce9e605
TB
1090 (flags & VRING_DESC_F_WRITE) ?
1091 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1092 }
1093}
1094
1095static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1096 struct vring_packed_desc *desc)
1097{
1098 u16 flags;
1099
1100 if (!vq->use_dma_api)
1101 return;
1102
1103 flags = le16_to_cpu(desc->flags);
1104
920379a4
XZ
1105 dma_unmap_page(vring_dma_dev(vq),
1106 le64_to_cpu(desc->addr),
1107 le32_to_cpu(desc->len),
1108 (flags & VRING_DESC_F_WRITE) ?
1109 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1ce9e605
TB
1110}
1111
1112static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1113 gfp_t gfp)
1114{
1115 struct vring_packed_desc *desc;
1116
1117 /*
1118 * We require lowmem mappings for the descriptors because
1119 * otherwise virt_to_phys will give us bogus addresses in the
1120 * virtqueue.
1121 */
1122 gfp &= ~__GFP_HIGHMEM;
1123
1124 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1125
1126 return desc;
1127}
1128
1129static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
8d7670f3
XZ
1130 struct scatterlist *sgs[],
1131 unsigned int total_sg,
1132 unsigned int out_sgs,
1133 unsigned int in_sgs,
1134 void *data,
1135 gfp_t gfp)
1ce9e605
TB
1136{
1137 struct vring_packed_desc *desc;
1138 struct scatterlist *sg;
1139 unsigned int i, n, err_idx;
1140 u16 head, id;
1141 dma_addr_t addr;
1142
1143 head = vq->packed.next_avail_idx;
1144 desc = alloc_indirect_packed(total_sg, gfp);
fc6d70f4
XZ
1145 if (!desc)
1146 return -ENOMEM;
1ce9e605
TB
1147
1148 if (unlikely(vq->vq.num_free < 1)) {
1149 pr_debug("Can't add buf len 1 - avail = 0\n");
df0bfe75 1150 kfree(desc);
1ce9e605
TB
1151 END_USE(vq);
1152 return -ENOSPC;
1153 }
1154
1155 i = 0;
1156 id = vq->free_head;
1157 BUG_ON(id == vq->packed.vring.num);
1158
1159 for (n = 0; n < out_sgs + in_sgs; n++) {
1160 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1161 addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1162 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1163 if (vring_mapping_error(vq, addr))
1164 goto unmap_release;
1165
1166 desc[i].flags = cpu_to_le16(n < out_sgs ?
1167 0 : VRING_DESC_F_WRITE);
1168 desc[i].addr = cpu_to_le64(addr);
1169 desc[i].len = cpu_to_le32(sg->length);
1170 i++;
1171 }
1172 }
1173
1174 /* Now that the indirect table is filled in, map it. */
1175 addr = vring_map_single(vq, desc,
1176 total_sg * sizeof(struct vring_packed_desc),
1177 DMA_TO_DEVICE);
1178 if (vring_mapping_error(vq, addr))
1179 goto unmap_release;
1180
1181 vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1182 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1183 sizeof(struct vring_packed_desc));
1184 vq->packed.vring.desc[head].id = cpu_to_le16(id);
1185
1186 if (vq->use_dma_api) {
1187 vq->packed.desc_extra[id].addr = addr;
1188 vq->packed.desc_extra[id].len = total_sg *
1189 sizeof(struct vring_packed_desc);
1190 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1191 vq->packed.avail_used_flags;
1192 }
1193
1194 /*
1195 * A driver MUST NOT make the first descriptor in the list
1196 * available before all subsequent descriptors comprising
1197 * the list are made available.
1198 */
1199 virtio_wmb(vq->weak_barriers);
1200 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1201 vq->packed.avail_used_flags);
1202
1203 /* We're using some buffers from the free list. */
1204 vq->vq.num_free -= 1;
1205
1206 /* Update free pointer */
1207 n = head + 1;
1208 if (n >= vq->packed.vring.num) {
1209 n = 0;
1210 vq->packed.avail_wrap_counter ^= 1;
1211 vq->packed.avail_used_flags ^=
1212 1 << VRING_PACKED_DESC_F_AVAIL |
1213 1 << VRING_PACKED_DESC_F_USED;
1214 }
1215 vq->packed.next_avail_idx = n;
aeef9b47 1216 vq->free_head = vq->packed.desc_extra[id].next;
1ce9e605
TB
1217
1218 /* Store token and indirect buffer state. */
1219 vq->packed.desc_state[id].num = 1;
1220 vq->packed.desc_state[id].data = data;
1221 vq->packed.desc_state[id].indir_desc = desc;
1222 vq->packed.desc_state[id].last = id;
1223
1224 vq->num_added += 1;
1225
1226 pr_debug("Added buffer head %i to %p\n", head, vq);
1227 END_USE(vq);
1228
1229 return 0;
1230
1231unmap_release:
1232 err_idx = i;
1233
1234 for (i = 0; i < err_idx; i++)
1235 vring_unmap_desc_packed(vq, &desc[i]);
1236
1237 kfree(desc);
1238
1239 END_USE(vq);
f7728002 1240 return -ENOMEM;
1ce9e605
TB
1241}
1242
1243static inline int virtqueue_add_packed(struct virtqueue *_vq,
1244 struct scatterlist *sgs[],
1245 unsigned int total_sg,
1246 unsigned int out_sgs,
1247 unsigned int in_sgs,
1248 void *data,
1249 void *ctx,
1250 gfp_t gfp)
1251{
1252 struct vring_virtqueue *vq = to_vvq(_vq);
1253 struct vring_packed_desc *desc;
1254 struct scatterlist *sg;
1255 unsigned int i, n, c, descs_used, err_idx;
3f649ab7
KC
1256 __le16 head_flags, flags;
1257 u16 head, id, prev, curr, avail_used_flags;
fc6d70f4 1258 int err;
1ce9e605
TB
1259
1260 START_USE(vq);
1261
1262 BUG_ON(data == NULL);
1263 BUG_ON(ctx && vq->indirect);
1264
1265 if (unlikely(vq->broken)) {
1266 END_USE(vq);
1267 return -EIO;
1268 }
1269
1270 LAST_ADD_TIME_UPDATE(vq);
1271
1272 BUG_ON(total_sg == 0);
1273
35c51e09 1274 if (virtqueue_use_indirect(vq, total_sg)) {
fc6d70f4
XZ
1275 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1276 in_sgs, data, gfp);
1861ba62
MT
1277 if (err != -ENOMEM) {
1278 END_USE(vq);
fc6d70f4 1279 return err;
1861ba62 1280 }
fc6d70f4
XZ
1281
1282 /* fall back on direct */
1283 }
1ce9e605
TB
1284
1285 head = vq->packed.next_avail_idx;
1286 avail_used_flags = vq->packed.avail_used_flags;
1287
1288 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1289
1290 desc = vq->packed.vring.desc;
1291 i = head;
1292 descs_used = total_sg;
1293
1294 if (unlikely(vq->vq.num_free < descs_used)) {
1295 pr_debug("Can't add buf len %i - avail = %i\n",
1296 descs_used, vq->vq.num_free);
1297 END_USE(vq);
1298 return -ENOSPC;
1299 }
1300
1301 id = vq->free_head;
1302 BUG_ON(id == vq->packed.vring.num);
1303
1304 curr = id;
1305 c = 0;
1306 for (n = 0; n < out_sgs + in_sgs; n++) {
1307 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1308 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1309 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1310 if (vring_mapping_error(vq, addr))
1311 goto unmap_release;
1312
1313 flags = cpu_to_le16(vq->packed.avail_used_flags |
1314 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1315 (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1316 if (i == head)
1317 head_flags = flags;
1318 else
1319 desc[i].flags = flags;
1320
1321 desc[i].addr = cpu_to_le64(addr);
1322 desc[i].len = cpu_to_le32(sg->length);
1323 desc[i].id = cpu_to_le16(id);
1324
1325 if (unlikely(vq->use_dma_api)) {
1326 vq->packed.desc_extra[curr].addr = addr;
1327 vq->packed.desc_extra[curr].len = sg->length;
1328 vq->packed.desc_extra[curr].flags =
1329 le16_to_cpu(flags);
1330 }
1331 prev = curr;
aeef9b47 1332 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1333
1334 if ((unlikely(++i >= vq->packed.vring.num))) {
1335 i = 0;
1336 vq->packed.avail_used_flags ^=
1337 1 << VRING_PACKED_DESC_F_AVAIL |
1338 1 << VRING_PACKED_DESC_F_USED;
1339 }
1340 }
1341 }
1342
1343 if (i < head)
1344 vq->packed.avail_wrap_counter ^= 1;
1345
1346 /* We're using some buffers from the free list. */
1347 vq->vq.num_free -= descs_used;
1348
1349 /* Update free pointer */
1350 vq->packed.next_avail_idx = i;
1351 vq->free_head = curr;
1352
1353 /* Store token. */
1354 vq->packed.desc_state[id].num = descs_used;
1355 vq->packed.desc_state[id].data = data;
1356 vq->packed.desc_state[id].indir_desc = ctx;
1357 vq->packed.desc_state[id].last = prev;
1358
1359 /*
1360 * A driver MUST NOT make the first descriptor in the list
1361 * available before all subsequent descriptors comprising
1362 * the list are made available.
1363 */
1364 virtio_wmb(vq->weak_barriers);
1365 vq->packed.vring.desc[head].flags = head_flags;
1366 vq->num_added += descs_used;
1367
1368 pr_debug("Added buffer head %i to %p\n", head, vq);
1369 END_USE(vq);
1370
1371 return 0;
1372
1373unmap_release:
1374 err_idx = i;
1375 i = head;
44593865 1376 curr = vq->free_head;
1ce9e605
TB
1377
1378 vq->packed.avail_used_flags = avail_used_flags;
1379
1380 for (n = 0; n < total_sg; n++) {
1381 if (i == err_idx)
1382 break;
d80dc15b 1383 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
44593865 1384 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1385 i++;
1386 if (i >= vq->packed.vring.num)
1387 i = 0;
1388 }
1389
1390 END_USE(vq);
1391 return -EIO;
1392}
1393
1394static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1395{
1396 struct vring_virtqueue *vq = to_vvq(_vq);
f51f9826 1397 u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1ce9e605
TB
1398 bool needs_kick;
1399 union {
1400 struct {
1401 __le16 off_wrap;
1402 __le16 flags;
1403 };
1404 u32 u32;
1405 } snapshot;
1406
1407 START_USE(vq);
1408
1409 /*
1410 * We need to expose the new flags value before checking notification
1411 * suppressions.
1412 */
1413 virtio_mb(vq->weak_barriers);
1414
f51f9826
TB
1415 old = vq->packed.next_avail_idx - vq->num_added;
1416 new = vq->packed.next_avail_idx;
1ce9e605
TB
1417 vq->num_added = 0;
1418
1419 snapshot.u32 = *(u32 *)vq->packed.vring.device;
1420 flags = le16_to_cpu(snapshot.flags);
1421
1422 LAST_ADD_TIME_CHECK(vq);
1423 LAST_ADD_TIME_INVALID(vq);
1424
f51f9826
TB
1425 if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1426 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1427 goto out;
1428 }
1429
1430 off_wrap = le16_to_cpu(snapshot.off_wrap);
1431
1432 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1433 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1434 if (wrap_counter != vq->packed.avail_wrap_counter)
1435 event_idx -= vq->packed.vring.num;
1436
1437 needs_kick = vring_need_event(event_idx, new, old);
1438out:
1ce9e605
TB
1439 END_USE(vq);
1440 return needs_kick;
1441}
1442
1443static void detach_buf_packed(struct vring_virtqueue *vq,
1444 unsigned int id, void **ctx)
1445{
1446 struct vring_desc_state_packed *state = NULL;
1447 struct vring_packed_desc *desc;
1448 unsigned int i, curr;
1449
1450 state = &vq->packed.desc_state[id];
1451
1452 /* Clear data ptr. */
1453 state->data = NULL;
1454
aeef9b47 1455 vq->packed.desc_extra[state->last].next = vq->free_head;
1ce9e605
TB
1456 vq->free_head = id;
1457 vq->vq.num_free += state->num;
1458
1459 if (unlikely(vq->use_dma_api)) {
1460 curr = id;
1461 for (i = 0; i < state->num; i++) {
d80dc15b
XZ
1462 vring_unmap_extra_packed(vq,
1463 &vq->packed.desc_extra[curr]);
aeef9b47 1464 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1465 }
1466 }
1467
1468 if (vq->indirect) {
1469 u32 len;
1470
1471 /* Free the indirect table, if any, now that it's unmapped. */
1472 desc = state->indir_desc;
1473 if (!desc)
1474 return;
1475
1476 if (vq->use_dma_api) {
1477 len = vq->packed.desc_extra[id].len;
1478 for (i = 0; i < len / sizeof(struct vring_packed_desc);
1479 i++)
1480 vring_unmap_desc_packed(vq, &desc[i]);
1481 }
1482 kfree(desc);
1483 state->indir_desc = NULL;
1484 } else if (ctx) {
1485 *ctx = state->indir_desc;
1486 }
1487}
1488
1489static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1490 u16 idx, bool used_wrap_counter)
1491{
1492 bool avail, used;
1493 u16 flags;
1494
1495 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1496 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1497 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1498
1499 return avail == used && used == used_wrap_counter;
1500}
1501
1502static inline bool more_used_packed(const struct vring_virtqueue *vq)
1503{
a7722890 1504 u16 last_used;
1505 u16 last_used_idx;
1506 bool used_wrap_counter;
1507
1508 last_used_idx = READ_ONCE(vq->last_used_idx);
1509 last_used = packed_last_used(last_used_idx);
1510 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1511 return is_used_desc_packed(vq, last_used, used_wrap_counter);
1ce9e605
TB
1512}
1513
1514static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1515 unsigned int *len,
1516 void **ctx)
1517{
1518 struct vring_virtqueue *vq = to_vvq(_vq);
a7722890 1519 u16 last_used, id, last_used_idx;
1520 bool used_wrap_counter;
1ce9e605
TB
1521 void *ret;
1522
1523 START_USE(vq);
1524
1525 if (unlikely(vq->broken)) {
1526 END_USE(vq);
1527 return NULL;
1528 }
1529
1530 if (!more_used_packed(vq)) {
1531 pr_debug("No more buffers in queue\n");
1532 END_USE(vq);
1533 return NULL;
1534 }
1535
1536 /* Only get used elements after they have been exposed by host. */
1537 virtio_rmb(vq->weak_barriers);
1538
a7722890 1539 last_used_idx = READ_ONCE(vq->last_used_idx);
1540 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1541 last_used = packed_last_used(last_used_idx);
1ce9e605
TB
1542 id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1543 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1544
1545 if (unlikely(id >= vq->packed.vring.num)) {
1546 BAD_RING(vq, "id %u out of range\n", id);
1547 return NULL;
1548 }
1549 if (unlikely(!vq->packed.desc_state[id].data)) {
1550 BAD_RING(vq, "id %u is not a head!\n", id);
1551 return NULL;
1552 }
1553
1554 /* detach_buf_packed clears data, so grab it now. */
1555 ret = vq->packed.desc_state[id].data;
1556 detach_buf_packed(vq, id, ctx);
1557
a7722890 1558 last_used += vq->packed.desc_state[id].num;
1559 if (unlikely(last_used >= vq->packed.vring.num)) {
1560 last_used -= vq->packed.vring.num;
1561 used_wrap_counter ^= 1;
1ce9e605
TB
1562 }
1563
a7722890 1564 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1565 WRITE_ONCE(vq->last_used_idx, last_used);
1566
f51f9826
TB
1567 /*
1568 * If we expect an interrupt for the next entry, tell host
1569 * by writing event index and flush out the write before
1570 * the read in the next get_buf call.
1571 */
1572 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1573 virtio_store_mb(vq->weak_barriers,
1574 &vq->packed.vring.driver->off_wrap,
a7722890 1575 cpu_to_le16(vq->last_used_idx));
f51f9826 1576
1ce9e605
TB
1577 LAST_ADD_TIME_INVALID(vq);
1578
1579 END_USE(vq);
1580 return ret;
1581}
1582
1583static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1584{
1585 struct vring_virtqueue *vq = to_vvq(_vq);
1586
1587 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1588 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1589 vq->packed.vring.driver->flags =
1590 cpu_to_le16(vq->packed.event_flags_shadow);
1591 }
1592}
1593
31532340 1594static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1ce9e605
TB
1595{
1596 struct vring_virtqueue *vq = to_vvq(_vq);
1597
1598 START_USE(vq);
1599
1600 /*
1601 * We optimistically turn back on interrupts, then check if there was
1602 * more to do.
1603 */
1604
f51f9826
TB
1605 if (vq->event) {
1606 vq->packed.vring.driver->off_wrap =
a7722890 1607 cpu_to_le16(vq->last_used_idx);
f51f9826
TB
1608 /*
1609 * We need to update event offset and event wrap
1610 * counter first before updating event flags.
1611 */
1612 virtio_wmb(vq->weak_barriers);
1613 }
1614
1ce9e605 1615 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1616 vq->packed.event_flags_shadow = vq->event ?
1617 VRING_PACKED_EVENT_FLAG_DESC :
1618 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1619 vq->packed.vring.driver->flags =
1620 cpu_to_le16(vq->packed.event_flags_shadow);
1621 }
1622
1623 END_USE(vq);
a7722890 1624 return vq->last_used_idx;
1ce9e605
TB
1625}
1626
1627static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1628{
1629 struct vring_virtqueue *vq = to_vvq(_vq);
1630 bool wrap_counter;
1631 u16 used_idx;
1632
1633 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1634 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1635
1636 return is_used_desc_packed(vq, used_idx, wrap_counter);
1637}
1638
1639static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1640{
1641 struct vring_virtqueue *vq = to_vvq(_vq);
a7722890 1642 u16 used_idx, wrap_counter, last_used_idx;
f51f9826 1643 u16 bufs;
1ce9e605
TB
1644
1645 START_USE(vq);
1646
1647 /*
1648 * We optimistically turn back on interrupts, then check if there was
1649 * more to do.
1650 */
1651
f51f9826
TB
1652 if (vq->event) {
1653 /* TODO: tune this threshold */
1654 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
a7722890 1655 last_used_idx = READ_ONCE(vq->last_used_idx);
1656 wrap_counter = packed_used_wrap_counter(last_used_idx);
f51f9826 1657
a7722890 1658 used_idx = packed_last_used(last_used_idx) + bufs;
f51f9826
TB
1659 if (used_idx >= vq->packed.vring.num) {
1660 used_idx -= vq->packed.vring.num;
1661 wrap_counter ^= 1;
1662 }
1663
1664 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1665 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1666
1667 /*
1668 * We need to update event offset and event wrap
1669 * counter first before updating event flags.
1670 */
1671 virtio_wmb(vq->weak_barriers);
f51f9826 1672 }
1ce9e605
TB
1673
1674 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1675 vq->packed.event_flags_shadow = vq->event ?
1676 VRING_PACKED_EVENT_FLAG_DESC :
1677 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1678 vq->packed.vring.driver->flags =
1679 cpu_to_le16(vq->packed.event_flags_shadow);
1680 }
1681
1682 /*
1683 * We need to update event suppression structure first
1684 * before re-checking for more used buffers.
1685 */
1686 virtio_mb(vq->weak_barriers);
1687
a7722890 1688 last_used_idx = READ_ONCE(vq->last_used_idx);
1689 wrap_counter = packed_used_wrap_counter(last_used_idx);
1690 used_idx = packed_last_used(last_used_idx);
1691 if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1ce9e605
TB
1692 END_USE(vq);
1693 return false;
1694 }
1695
1696 END_USE(vq);
1697 return true;
1698}
1699
1700static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1701{
1702 struct vring_virtqueue *vq = to_vvq(_vq);
1703 unsigned int i;
1704 void *buf;
1705
1706 START_USE(vq);
1707
1708 for (i = 0; i < vq->packed.vring.num; i++) {
1709 if (!vq->packed.desc_state[i].data)
1710 continue;
1711 /* detach_buf clears data, so grab it now. */
1712 buf = vq->packed.desc_state[i].data;
1713 detach_buf_packed(vq, i, NULL);
1714 END_USE(vq);
1715 return buf;
1716 }
1717 /* That should have freed everything. */
1718 BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1719
1720 END_USE(vq);
1721 return NULL;
1722}
1723
96ef18a2 1724static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
5a222421
JW
1725{
1726 struct vring_desc_extra *desc_extra;
1727 unsigned int i;
1728
1729 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1730 GFP_KERNEL);
1731 if (!desc_extra)
1732 return NULL;
1733
1734 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1735
1736 for (i = 0; i < num - 1; i++)
1737 desc_extra[i].next = i + 1;
1738
1739 return desc_extra;
1740}
1741
1ce9e605
TB
1742static struct virtqueue *vring_create_virtqueue_packed(
1743 unsigned int index,
1744 unsigned int num,
1745 unsigned int vring_align,
1746 struct virtio_device *vdev,
1747 bool weak_barriers,
1748 bool may_reduce_num,
1749 bool context,
1750 bool (*notify)(struct virtqueue *),
1751 void (*callback)(struct virtqueue *),
1752 const char *name)
1753{
1754 struct vring_virtqueue *vq;
1755 struct vring_packed_desc *ring;
1756 struct vring_packed_desc_event *driver, *device;
1757 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1758 size_t ring_size_in_bytes, event_size_in_bytes;
1ce9e605
TB
1759
1760 ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1761
1762 ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1763 &ring_dma_addr,
1764 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1765 if (!ring)
1766 goto err_ring;
1767
1768 event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1769
1770 driver = vring_alloc_queue(vdev, event_size_in_bytes,
1771 &driver_event_dma_addr,
1772 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1773 if (!driver)
1774 goto err_driver;
1775
1776 device = vring_alloc_queue(vdev, event_size_in_bytes,
1777 &device_event_dma_addr,
1778 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1779 if (!device)
1780 goto err_device;
1781
1782 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1783 if (!vq)
1784 goto err_vq;
1785
1786 vq->vq.callback = callback;
1787 vq->vq.vdev = vdev;
1788 vq->vq.name = name;
1ce9e605
TB
1789 vq->vq.index = index;
1790 vq->we_own_ring = true;
1791 vq->notify = notify;
1792 vq->weak_barriers = weak_barriers;
c346dae4 1793#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d 1794 vq->broken = true;
c346dae4
JW
1795#else
1796 vq->broken = false;
1797#endif
1ce9e605
TB
1798 vq->packed_ring = true;
1799 vq->use_dma_api = vring_use_dma_api(vdev);
1ce9e605
TB
1800
1801 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1802 !context;
1803 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1804
45383fb0
TB
1805 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1806 vq->weak_barriers = false;
1807
1ce9e605
TB
1808 vq->packed.ring_dma_addr = ring_dma_addr;
1809 vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1810 vq->packed.device_event_dma_addr = device_event_dma_addr;
1811
1812 vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1813 vq->packed.event_size_in_bytes = event_size_in_bytes;
1814
1815 vq->packed.vring.num = num;
1816 vq->packed.vring.desc = ring;
1817 vq->packed.vring.driver = driver;
1818 vq->packed.vring.device = device;
1819
1820 vq->packed.next_avail_idx = 0;
1821 vq->packed.avail_wrap_counter = 1;
1ce9e605
TB
1822 vq->packed.event_flags_shadow = 0;
1823 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1824
1825 vq->packed.desc_state = kmalloc_array(num,
1826 sizeof(struct vring_desc_state_packed),
1827 GFP_KERNEL);
1828 if (!vq->packed.desc_state)
1829 goto err_desc_state;
1830
1831 memset(vq->packed.desc_state, 0,
1832 num * sizeof(struct vring_desc_state_packed));
1833
1834 /* Put everything in free lists. */
1835 vq->free_head = 0;
1ce9e605 1836
96ef18a2 1837 vq->packed.desc_extra = vring_alloc_desc_extra(num);
1ce9e605
TB
1838 if (!vq->packed.desc_extra)
1839 goto err_desc_extra;
1840
1ce9e605
TB
1841 /* No callback? Tell other side not to bother us. */
1842 if (!callback) {
1843 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1844 vq->packed.vring.driver->flags =
1845 cpu_to_le16(vq->packed.event_flags_shadow);
1846 }
1847
3a897128
XZ
1848 virtqueue_init(vq, num);
1849
0e566c8f 1850 spin_lock(&vdev->vqs_list_lock);
e152d8af 1851 list_add_tail(&vq->vq.list, &vdev->vqs);
0e566c8f 1852 spin_unlock(&vdev->vqs_list_lock);
1ce9e605
TB
1853 return &vq->vq;
1854
1855err_desc_extra:
1856 kfree(vq->packed.desc_state);
1857err_desc_state:
1858 kfree(vq);
1859err_vq:
ae93d8ea 1860 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
1ce9e605 1861err_device:
ae93d8ea 1862 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
1ce9e605
TB
1863err_driver:
1864 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1865err_ring:
1866 return NULL;
1867}
1868
1869
e6f633e5
TB
1870/*
1871 * Generic functions and exported symbols.
1872 */
1873
1874static inline int virtqueue_add(struct virtqueue *_vq,
1875 struct scatterlist *sgs[],
1876 unsigned int total_sg,
1877 unsigned int out_sgs,
1878 unsigned int in_sgs,
1879 void *data,
1880 void *ctx,
1881 gfp_t gfp)
1882{
1ce9e605
TB
1883 struct vring_virtqueue *vq = to_vvq(_vq);
1884
1885 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1886 out_sgs, in_sgs, data, ctx, gfp) :
1887 virtqueue_add_split(_vq, sgs, total_sg,
1888 out_sgs, in_sgs, data, ctx, gfp);
e6f633e5
TB
1889}
1890
1891/**
1892 * virtqueue_add_sgs - expose buffers to other end
a5581206 1893 * @_vq: the struct virtqueue we're talking about.
e6f633e5 1894 * @sgs: array of terminated scatterlists.
a5581206
JB
1895 * @out_sgs: the number of scatterlists readable by other side
1896 * @in_sgs: the number of scatterlists which are writable (after readable ones)
e6f633e5
TB
1897 * @data: the token identifying the buffer.
1898 * @gfp: how to do memory allocations (if necessary).
1899 *
1900 * Caller must ensure we don't call this with other virtqueue operations
1901 * at the same time (except where noted).
1902 *
1903 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1904 */
1905int virtqueue_add_sgs(struct virtqueue *_vq,
1906 struct scatterlist *sgs[],
1907 unsigned int out_sgs,
1908 unsigned int in_sgs,
1909 void *data,
1910 gfp_t gfp)
1911{
1912 unsigned int i, total_sg = 0;
1913
1914 /* Count them first. */
1915 for (i = 0; i < out_sgs + in_sgs; i++) {
1916 struct scatterlist *sg;
1917
1918 for (sg = sgs[i]; sg; sg = sg_next(sg))
1919 total_sg++;
1920 }
1921 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1922 data, NULL, gfp);
1923}
1924EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1925
1926/**
1927 * virtqueue_add_outbuf - expose output buffers to other end
1928 * @vq: the struct virtqueue we're talking about.
1929 * @sg: scatterlist (must be well-formed and terminated!)
1930 * @num: the number of entries in @sg readable by other side
1931 * @data: the token identifying the buffer.
1932 * @gfp: how to do memory allocations (if necessary).
1933 *
1934 * Caller must ensure we don't call this with other virtqueue operations
1935 * at the same time (except where noted).
1936 *
1937 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1938 */
1939int virtqueue_add_outbuf(struct virtqueue *vq,
1940 struct scatterlist *sg, unsigned int num,
1941 void *data,
1942 gfp_t gfp)
1943{
1944 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1945}
1946EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1947
1948/**
1949 * virtqueue_add_inbuf - expose input buffers to other end
1950 * @vq: the struct virtqueue we're talking about.
1951 * @sg: scatterlist (must be well-formed and terminated!)
1952 * @num: the number of entries in @sg writable by other side
1953 * @data: the token identifying the buffer.
1954 * @gfp: how to do memory allocations (if necessary).
1955 *
1956 * Caller must ensure we don't call this with other virtqueue operations
1957 * at the same time (except where noted).
1958 *
1959 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1960 */
1961int virtqueue_add_inbuf(struct virtqueue *vq,
1962 struct scatterlist *sg, unsigned int num,
1963 void *data,
1964 gfp_t gfp)
1965{
1966 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1967}
1968EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1969
1970/**
1971 * virtqueue_add_inbuf_ctx - expose input buffers to other end
1972 * @vq: the struct virtqueue we're talking about.
1973 * @sg: scatterlist (must be well-formed and terminated!)
1974 * @num: the number of entries in @sg writable by other side
1975 * @data: the token identifying the buffer.
1976 * @ctx: extra context for the token
1977 * @gfp: how to do memory allocations (if necessary).
1978 *
1979 * Caller must ensure we don't call this with other virtqueue operations
1980 * at the same time (except where noted).
1981 *
1982 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1983 */
1984int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1985 struct scatterlist *sg, unsigned int num,
1986 void *data,
1987 void *ctx,
1988 gfp_t gfp)
1989{
1990 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1991}
1992EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1993
1994/**
1995 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
a5581206 1996 * @_vq: the struct virtqueue
e6f633e5
TB
1997 *
1998 * Instead of virtqueue_kick(), you can do:
1999 * if (virtqueue_kick_prepare(vq))
2000 * virtqueue_notify(vq);
2001 *
2002 * This is sometimes useful because the virtqueue_kick_prepare() needs
2003 * to be serialized, but the actual virtqueue_notify() call does not.
2004 */
2005bool virtqueue_kick_prepare(struct virtqueue *_vq)
2006{
1ce9e605
TB
2007 struct vring_virtqueue *vq = to_vvq(_vq);
2008
2009 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2010 virtqueue_kick_prepare_split(_vq);
e6f633e5
TB
2011}
2012EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2013
2014/**
2015 * virtqueue_notify - second half of split virtqueue_kick call.
a5581206 2016 * @_vq: the struct virtqueue
e6f633e5
TB
2017 *
2018 * This does not need to be serialized.
2019 *
2020 * Returns false if host notify failed or queue is broken, otherwise true.
2021 */
2022bool virtqueue_notify(struct virtqueue *_vq)
2023{
2024 struct vring_virtqueue *vq = to_vvq(_vq);
2025
2026 if (unlikely(vq->broken))
2027 return false;
2028
2029 /* Prod other side to tell it about changes. */
2030 if (!vq->notify(_vq)) {
2031 vq->broken = true;
2032 return false;
2033 }
2034 return true;
2035}
2036EXPORT_SYMBOL_GPL(virtqueue_notify);
2037
2038/**
2039 * virtqueue_kick - update after add_buf
2040 * @vq: the struct virtqueue
2041 *
2042 * After one or more virtqueue_add_* calls, invoke this to kick
2043 * the other side.
2044 *
2045 * Caller must ensure we don't call this with other virtqueue
2046 * operations at the same time (except where noted).
2047 *
2048 * Returns false if kick failed, otherwise true.
2049 */
2050bool virtqueue_kick(struct virtqueue *vq)
2051{
2052 if (virtqueue_kick_prepare(vq))
2053 return virtqueue_notify(vq);
2054 return true;
2055}
2056EXPORT_SYMBOL_GPL(virtqueue_kick);
2057
2058/**
31c11db6 2059 * virtqueue_get_buf_ctx - get the next used buffer
a5581206 2060 * @_vq: the struct virtqueue we're talking about.
e6f633e5 2061 * @len: the length written into the buffer
a5581206 2062 * @ctx: extra context for the token
e6f633e5
TB
2063 *
2064 * If the device wrote data into the buffer, @len will be set to the
2065 * amount written. This means you don't need to clear the buffer
2066 * beforehand to ensure there's no data leakage in the case of short
2067 * writes.
2068 *
2069 * Caller must ensure we don't call this with other virtqueue
2070 * operations at the same time (except where noted).
2071 *
2072 * Returns NULL if there are no used buffers, or the "data" token
2073 * handed to virtqueue_add_*().
2074 */
2075void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2076 void **ctx)
2077{
1ce9e605
TB
2078 struct vring_virtqueue *vq = to_vvq(_vq);
2079
2080 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2081 virtqueue_get_buf_ctx_split(_vq, len, ctx);
e6f633e5
TB
2082}
2083EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2084
2085void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2086{
2087 return virtqueue_get_buf_ctx(_vq, len, NULL);
2088}
2089EXPORT_SYMBOL_GPL(virtqueue_get_buf);
e6f633e5
TB
2090/**
2091 * virtqueue_disable_cb - disable callbacks
a5581206 2092 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2093 *
2094 * Note that this is not necessarily synchronous, hence unreliable and only
2095 * useful as an optimization.
2096 *
2097 * Unlike other operations, this need not be serialized.
2098 */
2099void virtqueue_disable_cb(struct virtqueue *_vq)
2100{
1ce9e605
TB
2101 struct vring_virtqueue *vq = to_vvq(_vq);
2102
8d622d21
MT
2103 /* If device triggered an event already it won't trigger one again:
2104 * no need to disable.
2105 */
2106 if (vq->event_triggered)
2107 return;
2108
1ce9e605
TB
2109 if (vq->packed_ring)
2110 virtqueue_disable_cb_packed(_vq);
2111 else
2112 virtqueue_disable_cb_split(_vq);
e6f633e5
TB
2113}
2114EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2115
2116/**
2117 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
a5581206 2118 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2119 *
2120 * This re-enables callbacks; it returns current queue state
2121 * in an opaque unsigned value. This value should be later tested by
2122 * virtqueue_poll, to detect a possible race between the driver checking for
2123 * more work, and enabling callbacks.
2124 *
2125 * Caller must ensure we don't call this with other virtqueue
2126 * operations at the same time (except where noted).
2127 */
31532340 2128unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
e6f633e5 2129{
1ce9e605
TB
2130 struct vring_virtqueue *vq = to_vvq(_vq);
2131
8d622d21
MT
2132 if (vq->event_triggered)
2133 vq->event_triggered = false;
2134
1ce9e605
TB
2135 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2136 virtqueue_enable_cb_prepare_split(_vq);
e6f633e5
TB
2137}
2138EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2139
2140/**
2141 * virtqueue_poll - query pending used buffers
a5581206 2142 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2143 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2144 *
2145 * Returns "true" if there are pending used buffers in the queue.
2146 *
2147 * This does not need to be serialized.
2148 */
31532340 2149bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
e6f633e5
TB
2150{
2151 struct vring_virtqueue *vq = to_vvq(_vq);
2152
481a0d74
MW
2153 if (unlikely(vq->broken))
2154 return false;
2155
e6f633e5 2156 virtio_mb(vq->weak_barriers);
1ce9e605
TB
2157 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2158 virtqueue_poll_split(_vq, last_used_idx);
e6f633e5
TB
2159}
2160EXPORT_SYMBOL_GPL(virtqueue_poll);
2161
2162/**
2163 * virtqueue_enable_cb - restart callbacks after disable_cb.
a5581206 2164 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2165 *
2166 * This re-enables callbacks; it returns "false" if there are pending
2167 * buffers in the queue, to detect a possible race between the driver
2168 * checking for more work, and enabling callbacks.
2169 *
2170 * Caller must ensure we don't call this with other virtqueue
2171 * operations at the same time (except where noted).
2172 */
2173bool virtqueue_enable_cb(struct virtqueue *_vq)
2174{
31532340 2175 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
e6f633e5
TB
2176
2177 return !virtqueue_poll(_vq, last_used_idx);
2178}
2179EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2180
2181/**
2182 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
a5581206 2183 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2184 *
2185 * This re-enables callbacks but hints to the other side to delay
2186 * interrupts until most of the available buffers have been processed;
2187 * it returns "false" if there are many pending buffers in the queue,
2188 * to detect a possible race between the driver checking for more work,
2189 * and enabling callbacks.
2190 *
2191 * Caller must ensure we don't call this with other virtqueue
2192 * operations at the same time (except where noted).
2193 */
2194bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2195{
1ce9e605
TB
2196 struct vring_virtqueue *vq = to_vvq(_vq);
2197
8d622d21
MT
2198 if (vq->event_triggered)
2199 vq->event_triggered = false;
2200
1ce9e605
TB
2201 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2202 virtqueue_enable_cb_delayed_split(_vq);
e6f633e5
TB
2203}
2204EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2205
138fd251
TB
2206/**
2207 * virtqueue_detach_unused_buf - detach first unused buffer
a5581206 2208 * @_vq: the struct virtqueue we're talking about.
138fd251
TB
2209 *
2210 * Returns NULL or the "data" token handed to virtqueue_add_*().
a62eecb3
XZ
2211 * This is not valid on an active queue; it is useful for device
2212 * shutdown or the reset queue.
138fd251
TB
2213 */
2214void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2215{
1ce9e605
TB
2216 struct vring_virtqueue *vq = to_vvq(_vq);
2217
2218 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2219 virtqueue_detach_unused_buf_split(_vq);
138fd251 2220}
7c5e9ed0 2221EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
c021eac4 2222
138fd251
TB
2223static inline bool more_used(const struct vring_virtqueue *vq)
2224{
1ce9e605 2225 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
138fd251
TB
2226}
2227
0a8a69dd
RR
2228irqreturn_t vring_interrupt(int irq, void *_vq)
2229{
2230 struct vring_virtqueue *vq = to_vvq(_vq);
2231
2232 if (!more_used(vq)) {
2233 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2234 return IRQ_NONE;
2235 }
2236
8b4ec69d 2237 if (unlikely(vq->broken)) {
c346dae4 2238#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d
JW
2239 dev_warn_once(&vq->vq.vdev->dev,
2240 "virtio vring IRQ raised before DRIVER_OK");
2241 return IRQ_NONE;
c346dae4
JW
2242#else
2243 return IRQ_HANDLED;
2244#endif
8b4ec69d 2245 }
0a8a69dd 2246
8d622d21
MT
2247 /* Just a hint for performance: so it's ok that this can be racy! */
2248 if (vq->event)
2249 vq->event_triggered = true;
2250
0a8a69dd 2251 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
18445c4d
RR
2252 if (vq->vq.callback)
2253 vq->vq.callback(&vq->vq);
0a8a69dd
RR
2254
2255 return IRQ_HANDLED;
2256}
c6fd4701 2257EXPORT_SYMBOL_GPL(vring_interrupt);
0a8a69dd 2258
1ce9e605 2259/* Only available for split ring */
07d9629d 2260static struct virtqueue *__vring_new_virtqueue(unsigned int index,
cd4c812a 2261 struct vring_virtqueue_split *vring_split,
07d9629d
XZ
2262 struct virtio_device *vdev,
2263 bool weak_barriers,
2264 bool context,
2265 bool (*notify)(struct virtqueue *),
2266 void (*callback)(struct virtqueue *),
2267 const char *name)
0a8a69dd 2268{
2a2d1382 2269 struct vring_virtqueue *vq;
a2b36c8d 2270 int err;
0a8a69dd 2271
1ce9e605
TB
2272 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2273 return NULL;
2274
cbeedb72 2275 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
0a8a69dd
RR
2276 if (!vq)
2277 return NULL;
2278
1ce9e605 2279 vq->packed_ring = false;
0a8a69dd
RR
2280 vq->vq.callback = callback;
2281 vq->vq.vdev = vdev;
9499f5e7 2282 vq->vq.name = name;
06ca287d 2283 vq->vq.index = index;
2a2d1382 2284 vq->we_own_ring = false;
0a8a69dd 2285 vq->notify = notify;
7b21e34f 2286 vq->weak_barriers = weak_barriers;
c346dae4 2287#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d 2288 vq->broken = true;
c346dae4
JW
2289#else
2290 vq->broken = false;
2291#endif
fb3fba6b 2292 vq->use_dma_api = vring_use_dma_api(vdev);
0a8a69dd 2293
5a08b04f
MT
2294 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2295 !context;
a5c262c5 2296 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
9fa29b9d 2297
45383fb0
TB
2298 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2299 vq->weak_barriers = false;
2300
d79dca75
TB
2301 vq->split.queue_dma_addr = 0;
2302 vq->split.queue_size_in_bytes = 0;
2303
cd4c812a 2304 vq->split.vring = vring_split->vring;
e593bf97
TB
2305 vq->split.avail_flags_shadow = 0;
2306 vq->split.avail_idx_shadow = 0;
2307
0a8a69dd 2308 /* No callback? Tell other side not to bother us. */
f277ec42 2309 if (!callback) {
e593bf97 2310 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 2311 if (!vq->event)
e593bf97
TB
2312 vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2313 vq->split.avail_flags_shadow);
f277ec42 2314 }
0a8a69dd 2315
a2b36c8d
XZ
2316 err = vring_alloc_state_extra_split(vring_split);
2317 if (err) {
2318 kfree(vq);
2319 return NULL;
2320 }
72b5e895 2321
0a8a69dd 2322 /* Put everything in free lists. */
0a8a69dd 2323 vq->free_head = 0;
a2b36c8d
XZ
2324
2325 vq->split.desc_state = vring_split->desc_state;
2326 vq->split.desc_extra = vring_split->desc_extra;
0a8a69dd 2327
cd4c812a 2328 virtqueue_init(vq, vring_split->vring.num);
3a897128 2329
0e566c8f 2330 spin_lock(&vdev->vqs_list_lock);
e152d8af 2331 list_add_tail(&vq->vq.list, &vdev->vqs);
0e566c8f 2332 spin_unlock(&vdev->vqs_list_lock);
0a8a69dd
RR
2333 return &vq->vq;
2334}
2a2d1382 2335
2a2d1382
AL
2336struct virtqueue *vring_create_virtqueue(
2337 unsigned int index,
2338 unsigned int num,
2339 unsigned int vring_align,
2340 struct virtio_device *vdev,
2341 bool weak_barriers,
2342 bool may_reduce_num,
f94682dd 2343 bool context,
2a2d1382
AL
2344 bool (*notify)(struct virtqueue *),
2345 void (*callback)(struct virtqueue *),
2346 const char *name)
2347{
1ce9e605
TB
2348
2349 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2350 return vring_create_virtqueue_packed(index, num, vring_align,
2351 vdev, weak_barriers, may_reduce_num,
2352 context, notify, callback, name);
2353
d79dca75
TB
2354 return vring_create_virtqueue_split(index, num, vring_align,
2355 vdev, weak_barriers, may_reduce_num,
2356 context, notify, callback, name);
2a2d1382
AL
2357}
2358EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2359
1ce9e605 2360/* Only available for split ring */
2a2d1382
AL
2361struct virtqueue *vring_new_virtqueue(unsigned int index,
2362 unsigned int num,
2363 unsigned int vring_align,
2364 struct virtio_device *vdev,
2365 bool weak_barriers,
f94682dd 2366 bool context,
2a2d1382
AL
2367 void *pages,
2368 bool (*notify)(struct virtqueue *vq),
2369 void (*callback)(struct virtqueue *vq),
2370 const char *name)
2371{
cd4c812a 2372 struct vring_virtqueue_split vring_split = {};
1ce9e605
TB
2373
2374 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2375 return NULL;
2376
cd4c812a
XZ
2377 vring_init(&vring_split.vring, num, pages, vring_align);
2378 return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2379 context, notify, callback, name);
2a2d1382 2380}
c6fd4701 2381EXPORT_SYMBOL_GPL(vring_new_virtqueue);
0a8a69dd 2382
3ea19e32 2383static void vring_free(struct virtqueue *_vq)
0a8a69dd 2384{
2a2d1382
AL
2385 struct vring_virtqueue *vq = to_vvq(_vq);
2386
2387 if (vq->we_own_ring) {
1ce9e605
TB
2388 if (vq->packed_ring) {
2389 vring_free_queue(vq->vq.vdev,
2390 vq->packed.ring_size_in_bytes,
2391 vq->packed.vring.desc,
2392 vq->packed.ring_dma_addr);
2393
2394 vring_free_queue(vq->vq.vdev,
2395 vq->packed.event_size_in_bytes,
2396 vq->packed.vring.driver,
2397 vq->packed.driver_event_dma_addr);
2398
2399 vring_free_queue(vq->vq.vdev,
2400 vq->packed.event_size_in_bytes,
2401 vq->packed.vring.device,
2402 vq->packed.device_event_dma_addr);
2403
2404 kfree(vq->packed.desc_state);
2405 kfree(vq->packed.desc_extra);
2406 } else {
2407 vring_free_queue(vq->vq.vdev,
2408 vq->split.queue_size_in_bytes,
2409 vq->split.vring.desc,
2410 vq->split.queue_dma_addr);
1ce9e605 2411 }
2a2d1382 2412 }
72b5e895 2413 if (!vq->packed_ring) {
f13f09a1 2414 kfree(vq->split.desc_state);
72b5e895
JW
2415 kfree(vq->split.desc_extra);
2416 }
3ea19e32
XZ
2417}
2418
2419void vring_del_virtqueue(struct virtqueue *_vq)
2420{
2421 struct vring_virtqueue *vq = to_vvq(_vq);
2422
2423 spin_lock(&vq->vq.vdev->vqs_list_lock);
2424 list_del(&_vq->list);
2425 spin_unlock(&vq->vq.vdev->vqs_list_lock);
2426
2427 vring_free(_vq);
2428
2a2d1382 2429 kfree(vq);
0a8a69dd 2430}
c6fd4701 2431EXPORT_SYMBOL_GPL(vring_del_virtqueue);
0a8a69dd 2432
e34f8725
RR
2433/* Manipulates transport-specific feature bits. */
2434void vring_transport_features(struct virtio_device *vdev)
2435{
2436 unsigned int i;
2437
2438 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2439 switch (i) {
9fa29b9d
MM
2440 case VIRTIO_RING_F_INDIRECT_DESC:
2441 break;
a5c262c5
MT
2442 case VIRTIO_RING_F_EVENT_IDX:
2443 break;
747ae34a
MT
2444 case VIRTIO_F_VERSION_1:
2445 break;
321bd212 2446 case VIRTIO_F_ACCESS_PLATFORM:
1a937693 2447 break;
f959a128
TB
2448 case VIRTIO_F_RING_PACKED:
2449 break;
45383fb0
TB
2450 case VIRTIO_F_ORDER_PLATFORM:
2451 break;
e34f8725
RR
2452 default:
2453 /* We don't understand this bit. */
e16e12be 2454 __virtio_clear_bit(vdev, i);
e34f8725
RR
2455 }
2456 }
2457}
2458EXPORT_SYMBOL_GPL(vring_transport_features);
2459
5dfc1762
RR
2460/**
2461 * virtqueue_get_vring_size - return the size of the virtqueue's vring
a5581206 2462 * @_vq: the struct virtqueue containing the vring of interest.
5dfc1762
RR
2463 *
2464 * Returns the size of the vring. This is mainly used for boasting to
2465 * userspace. Unlike other operations, this need not be serialized.
2466 */
8f9f4668
RJ
2467unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2468{
2469
2470 struct vring_virtqueue *vq = to_vvq(_vq);
2471
1ce9e605 2472 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
8f9f4668
RJ
2473}
2474EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2475
b3b32c94
HG
2476bool virtqueue_is_broken(struct virtqueue *_vq)
2477{
2478 struct vring_virtqueue *vq = to_vvq(_vq);
2479
60f07798 2480 return READ_ONCE(vq->broken);
b3b32c94
HG
2481}
2482EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2483
e2dcdfe9
RR
2484/*
2485 * This should prevent the device from being used, allowing drivers to
2486 * recover. You may need to grab appropriate locks to flush.
2487 */
2488void virtio_break_device(struct virtio_device *dev)
2489{
2490 struct virtqueue *_vq;
2491
0e566c8f 2492 spin_lock(&dev->vqs_list_lock);
e2dcdfe9
RR
2493 list_for_each_entry(_vq, &dev->vqs, list) {
2494 struct vring_virtqueue *vq = to_vvq(_vq);
60f07798
PP
2495
2496 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2497 WRITE_ONCE(vq->broken, true);
e2dcdfe9 2498 }
0e566c8f 2499 spin_unlock(&dev->vqs_list_lock);
e2dcdfe9
RR
2500}
2501EXPORT_SYMBOL_GPL(virtio_break_device);
2502
be83f04d
JW
2503/*
2504 * This should allow the device to be used by the driver. You may
2505 * need to grab appropriate locks to flush the write to
2506 * vq->broken. This should only be used in some specific case e.g
2507 * (probing and restoring). This function should only be called by the
2508 * core, not directly by the driver.
2509 */
2510void __virtio_unbreak_device(struct virtio_device *dev)
2511{
2512 struct virtqueue *_vq;
2513
2514 spin_lock(&dev->vqs_list_lock);
2515 list_for_each_entry(_vq, &dev->vqs, list) {
2516 struct vring_virtqueue *vq = to_vvq(_vq);
2517
2518 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2519 WRITE_ONCE(vq->broken, false);
2520 }
2521 spin_unlock(&dev->vqs_list_lock);
2522}
2523EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2524
2a2d1382 2525dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
89062652
CH
2526{
2527 struct vring_virtqueue *vq = to_vvq(_vq);
2528
2a2d1382
AL
2529 BUG_ON(!vq->we_own_ring);
2530
1ce9e605
TB
2531 if (vq->packed_ring)
2532 return vq->packed.ring_dma_addr;
2533
d79dca75 2534 return vq->split.queue_dma_addr;
89062652 2535}
2a2d1382 2536EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
89062652 2537
2a2d1382 2538dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
89062652
CH
2539{
2540 struct vring_virtqueue *vq = to_vvq(_vq);
2541
2a2d1382
AL
2542 BUG_ON(!vq->we_own_ring);
2543
1ce9e605
TB
2544 if (vq->packed_ring)
2545 return vq->packed.driver_event_dma_addr;
2546
d79dca75 2547 return vq->split.queue_dma_addr +
e593bf97 2548 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2a2d1382
AL
2549}
2550EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2551
2552dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2553{
2554 struct vring_virtqueue *vq = to_vvq(_vq);
2555
2556 BUG_ON(!vq->we_own_ring);
2557
1ce9e605
TB
2558 if (vq->packed_ring)
2559 return vq->packed.device_event_dma_addr;
2560
d79dca75 2561 return vq->split.queue_dma_addr +
e593bf97 2562 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2a2d1382
AL
2563}
2564EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2565
1ce9e605 2566/* Only available for split ring */
2a2d1382
AL
2567const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2568{
e593bf97 2569 return &to_vvq(vq)->split.vring;
89062652 2570}
2a2d1382 2571EXPORT_SYMBOL_GPL(virtqueue_get_vring);
89062652 2572
c6fd4701 2573MODULE_LICENSE("GPL");