virtio_ring: split: extract the logic of vring init
[linux-block.git] / drivers / virtio / virtio_ring.c
CommitLineData
fd534e9b 1// SPDX-License-Identifier: GPL-2.0-or-later
0a8a69dd
RR
2/* Virtio ring implementation.
3 *
4 * Copyright 2007 Rusty Russell IBM Corporation
0a8a69dd
RR
5 */
6#include <linux/virtio.h>
7#include <linux/virtio_ring.h>
e34f8725 8#include <linux/virtio_config.h>
0a8a69dd 9#include <linux/device.h>
5a0e3ad6 10#include <linux/slab.h>
b5a2c4f1 11#include <linux/module.h>
e93300b1 12#include <linux/hrtimer.h>
780bc790 13#include <linux/dma-mapping.h>
f8ce7263 14#include <linux/spinlock.h>
78fe3987 15#include <xen/xen.h>
0a8a69dd
RR
16
17#ifdef DEBUG
18/* For development, we want to crash whenever the ring is screwed. */
9499f5e7
RR
19#define BAD_RING(_vq, fmt, args...) \
20 do { \
21 dev_err(&(_vq)->vq.vdev->dev, \
22 "%s:"fmt, (_vq)->vq.name, ##args); \
23 BUG(); \
24 } while (0)
c5f841f1
RR
25/* Caller is supposed to guarantee no reentry. */
26#define START_USE(_vq) \
27 do { \
28 if ((_vq)->in_use) \
9499f5e7
RR
29 panic("%s:in_use = %i\n", \
30 (_vq)->vq.name, (_vq)->in_use); \
c5f841f1 31 (_vq)->in_use = __LINE__; \
9499f5e7 32 } while (0)
3a35ce7d 33#define END_USE(_vq) \
97a545ab 34 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
4d6a105e
TB
35#define LAST_ADD_TIME_UPDATE(_vq) \
36 do { \
37 ktime_t now = ktime_get(); \
38 \
39 /* No kick or get, with .1 second between? Warn. */ \
40 if ((_vq)->last_add_time_valid) \
41 WARN_ON(ktime_to_ms(ktime_sub(now, \
42 (_vq)->last_add_time)) > 100); \
43 (_vq)->last_add_time = now; \
44 (_vq)->last_add_time_valid = true; \
45 } while (0)
46#define LAST_ADD_TIME_CHECK(_vq) \
47 do { \
48 if ((_vq)->last_add_time_valid) { \
49 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
50 (_vq)->last_add_time)) > 100); \
51 } \
52 } while (0)
53#define LAST_ADD_TIME_INVALID(_vq) \
54 ((_vq)->last_add_time_valid = false)
0a8a69dd 55#else
9499f5e7
RR
56#define BAD_RING(_vq, fmt, args...) \
57 do { \
58 dev_err(&_vq->vq.vdev->dev, \
59 "%s:"fmt, (_vq)->vq.name, ##args); \
60 (_vq)->broken = true; \
61 } while (0)
0a8a69dd
RR
62#define START_USE(vq)
63#define END_USE(vq)
4d6a105e
TB
64#define LAST_ADD_TIME_UPDATE(vq)
65#define LAST_ADD_TIME_CHECK(vq)
66#define LAST_ADD_TIME_INVALID(vq)
0a8a69dd
RR
67#endif
68
cbeedb72 69struct vring_desc_state_split {
780bc790
AL
70 void *data; /* Data for callback. */
71 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
72};
73
1ce9e605
TB
74struct vring_desc_state_packed {
75 void *data; /* Data for callback. */
76 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
77 u16 num; /* Descriptor list length. */
1ce9e605
TB
78 u16 last; /* The last desc state in a list. */
79};
80
1f28750f 81struct vring_desc_extra {
ef5c366f
JW
82 dma_addr_t addr; /* Descriptor DMA addr. */
83 u32 len; /* Descriptor length. */
1ce9e605 84 u16 flags; /* Descriptor flags. */
aeef9b47 85 u16 next; /* The next desc state in a list. */
1ce9e605
TB
86};
87
d76136e4
XZ
88struct vring_virtqueue_split {
89 /* Actual memory layout for this queue. */
90 struct vring vring;
91
92 /* Last written value to avail->flags */
93 u16 avail_flags_shadow;
94
95 /*
96 * Last written value to avail->idx in
97 * guest byte order.
98 */
99 u16 avail_idx_shadow;
100
101 /* Per-descriptor state. */
102 struct vring_desc_state_split *desc_state;
103 struct vring_desc_extra *desc_extra;
104
105 /* DMA address and size information */
106 dma_addr_t queue_dma_addr;
107 size_t queue_size_in_bytes;
108};
109
110struct vring_virtqueue_packed {
111 /* Actual memory layout for this queue. */
112 struct {
113 unsigned int num;
114 struct vring_packed_desc *desc;
115 struct vring_packed_desc_event *driver;
116 struct vring_packed_desc_event *device;
117 } vring;
118
119 /* Driver ring wrap counter. */
120 bool avail_wrap_counter;
121
122 /* Avail used flags. */
123 u16 avail_used_flags;
124
125 /* Index of the next avail descriptor. */
126 u16 next_avail_idx;
127
128 /*
129 * Last written value to driver->flags in
130 * guest byte order.
131 */
132 u16 event_flags_shadow;
133
134 /* Per-descriptor state. */
135 struct vring_desc_state_packed *desc_state;
136 struct vring_desc_extra *desc_extra;
137
138 /* DMA address and size information */
139 dma_addr_t ring_dma_addr;
140 dma_addr_t driver_event_dma_addr;
141 dma_addr_t device_event_dma_addr;
142 size_t ring_size_in_bytes;
143 size_t event_size_in_bytes;
144};
145
43b4f721 146struct vring_virtqueue {
0a8a69dd
RR
147 struct virtqueue vq;
148
1ce9e605
TB
149 /* Is this a packed ring? */
150 bool packed_ring;
151
fb3fba6b
TB
152 /* Is DMA API used? */
153 bool use_dma_api;
154
7b21e34f
RR
155 /* Can we use weak barriers? */
156 bool weak_barriers;
157
0a8a69dd
RR
158 /* Other side has made a mess, don't try any more. */
159 bool broken;
160
9fa29b9d
MM
161 /* Host supports indirect buffers */
162 bool indirect;
163
a5c262c5
MT
164 /* Host publishes avail event idx */
165 bool event;
166
0a8a69dd
RR
167 /* Head of free buffer list. */
168 unsigned int free_head;
169 /* Number we've added since last sync. */
170 unsigned int num_added;
171
a7722890 172 /* Last used index we've seen.
173 * for split ring, it just contains last used index
174 * for packed ring:
175 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
176 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
177 */
1bc4953e 178 u16 last_used_idx;
0a8a69dd 179
8d622d21
MT
180 /* Hint for event idx: already triggered no need to disable. */
181 bool event_triggered;
182
1ce9e605
TB
183 union {
184 /* Available for split ring */
d76136e4 185 struct vring_virtqueue_split split;
e593bf97 186
1ce9e605 187 /* Available for packed ring */
d76136e4 188 struct vring_virtqueue_packed packed;
1ce9e605 189 };
f277ec42 190
0a8a69dd 191 /* How to notify other side. FIXME: commonalize hcalls! */
46f9c2b9 192 bool (*notify)(struct virtqueue *vq);
0a8a69dd 193
2a2d1382
AL
194 /* DMA, allocation, and size information */
195 bool we_own_ring;
2a2d1382 196
0a8a69dd
RR
197#ifdef DEBUG
198 /* They're supposed to lock for us. */
199 unsigned int in_use;
e93300b1
RR
200
201 /* Figure out if their kicks are too delayed. */
202 bool last_add_time_valid;
203 ktime_t last_add_time;
0a8a69dd 204#endif
0a8a69dd
RR
205};
206
07d9629d 207static struct virtqueue *__vring_new_virtqueue(unsigned int index,
cd4c812a 208 struct vring_virtqueue_split *vring_split,
07d9629d
XZ
209 struct virtio_device *vdev,
210 bool weak_barriers,
211 bool context,
212 bool (*notify)(struct virtqueue *),
213 void (*callback)(struct virtqueue *),
214 const char *name);
a2b36c8d 215static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
e6f633e5
TB
216
217/*
218 * Helpers.
219 */
220
0a8a69dd
RR
221#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
222
35c51e09 223static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
2f18c2d1
TB
224 unsigned int total_sg)
225{
2f18c2d1
TB
226 /*
227 * If the host supports indirect descriptor tables, and we have multiple
228 * buffers, then go indirect. FIXME: tune this threshold
229 */
230 return (vq->indirect && total_sg > 1 && vq->vq.num_free);
231}
232
d26c96c8 233/*
1a937693
MT
234 * Modern virtio devices have feature bits to specify whether they need a
235 * quirk and bypass the IOMMU. If not there, just use the DMA API.
236 *
237 * If there, the interaction between virtio and DMA API is messy.
d26c96c8
AL
238 *
239 * On most systems with virtio, physical addresses match bus addresses,
240 * and it doesn't particularly matter whether we use the DMA API.
241 *
242 * On some systems, including Xen and any system with a physical device
243 * that speaks virtio behind a physical IOMMU, we must use the DMA API
244 * for virtio DMA to work at all.
245 *
246 * On other systems, including SPARC and PPC64, virtio-pci devices are
247 * enumerated as though they are behind an IOMMU, but the virtio host
248 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
249 * there or somehow map everything as the identity.
250 *
251 * For the time being, we preserve historic behavior and bypass the DMA
252 * API.
1a937693
MT
253 *
254 * TODO: install a per-device DMA ops structure that does the right thing
255 * taking into account all the above quirks, and use the DMA API
256 * unconditionally on data path.
d26c96c8
AL
257 */
258
259static bool vring_use_dma_api(struct virtio_device *vdev)
260{
24b6842a 261 if (!virtio_has_dma_quirk(vdev))
1a937693
MT
262 return true;
263
264 /* Otherwise, we are left to guess. */
78fe3987
AL
265 /*
266 * In theory, it's possible to have a buggy QEMU-supposed
267 * emulated Q35 IOMMU and Xen enabled at the same time. On
268 * such a configuration, virtio has never worked and will
269 * not work without an even larger kludge. Instead, enable
270 * the DMA API if we're a Xen guest, which at least allows
271 * all of the sensible Xen configurations to work correctly.
272 */
273 if (xen_domain())
274 return true;
275
d26c96c8
AL
276 return false;
277}
278
e6d6dd6c
JR
279size_t virtio_max_dma_size(struct virtio_device *vdev)
280{
281 size_t max_segment_size = SIZE_MAX;
282
283 if (vring_use_dma_api(vdev))
817fc978 284 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
e6d6dd6c
JR
285
286 return max_segment_size;
287}
288EXPORT_SYMBOL_GPL(virtio_max_dma_size);
289
d79dca75
TB
290static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
291 dma_addr_t *dma_handle, gfp_t flag)
292{
293 if (vring_use_dma_api(vdev)) {
294 return dma_alloc_coherent(vdev->dev.parent, size,
295 dma_handle, flag);
296 } else {
297 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
298
299 if (queue) {
300 phys_addr_t phys_addr = virt_to_phys(queue);
301 *dma_handle = (dma_addr_t)phys_addr;
302
303 /*
304 * Sanity check: make sure we dind't truncate
305 * the address. The only arches I can find that
306 * have 64-bit phys_addr_t but 32-bit dma_addr_t
307 * are certain non-highmem MIPS and x86
308 * configurations, but these configurations
309 * should never allocate physical pages above 32
310 * bits, so this is fine. Just in case, throw a
311 * warning and abort if we end up with an
312 * unrepresentable address.
313 */
314 if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
315 free_pages_exact(queue, PAGE_ALIGN(size));
316 return NULL;
317 }
318 }
319 return queue;
320 }
321}
322
323static void vring_free_queue(struct virtio_device *vdev, size_t size,
324 void *queue, dma_addr_t dma_handle)
325{
326 if (vring_use_dma_api(vdev))
327 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
328 else
329 free_pages_exact(queue, PAGE_ALIGN(size));
330}
331
780bc790
AL
332/*
333 * The DMA ops on various arches are rather gnarly right now, and
334 * making all of the arch DMA ops work on the vring device itself
335 * is a mess. For now, we use the parent device for DMA ops.
336 */
75bfa81b 337static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
780bc790
AL
338{
339 return vq->vq.vdev->dev.parent;
340}
341
342/* Map one sg entry. */
343static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
344 struct scatterlist *sg,
345 enum dma_data_direction direction)
346{
fb3fba6b 347 if (!vq->use_dma_api)
780bc790
AL
348 return (dma_addr_t)sg_phys(sg);
349
350 /*
351 * We can't use dma_map_sg, because we don't use scatterlists in
352 * the way it expects (we don't guarantee that the scatterlist
353 * will exist for the lifetime of the mapping).
354 */
355 return dma_map_page(vring_dma_dev(vq),
356 sg_page(sg), sg->offset, sg->length,
357 direction);
358}
359
360static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
361 void *cpu_addr, size_t size,
362 enum dma_data_direction direction)
363{
fb3fba6b 364 if (!vq->use_dma_api)
780bc790
AL
365 return (dma_addr_t)virt_to_phys(cpu_addr);
366
367 return dma_map_single(vring_dma_dev(vq),
368 cpu_addr, size, direction);
369}
370
e6f633e5
TB
371static int vring_mapping_error(const struct vring_virtqueue *vq,
372 dma_addr_t addr)
373{
fb3fba6b 374 if (!vq->use_dma_api)
e6f633e5
TB
375 return 0;
376
377 return dma_mapping_error(vring_dma_dev(vq), addr);
378}
379
3a897128
XZ
380static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
381{
382 vq->vq.num_free = num;
383
384 if (vq->packed_ring)
385 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
386 else
387 vq->last_used_idx = 0;
388
389 vq->event_triggered = false;
390 vq->num_added = 0;
391
392#ifdef DEBUG
393 vq->in_use = false;
394 vq->last_add_time_valid = false;
395#endif
396}
397
e6f633e5
TB
398
399/*
400 * Split ring specific functions - *_split().
401 */
402
72b5e895
JW
403static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
404 struct vring_desc *desc)
780bc790
AL
405{
406 u16 flags;
407
fb3fba6b 408 if (!vq->use_dma_api)
780bc790
AL
409 return;
410
411 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
412
b4282ebc
XZ
413 dma_unmap_page(vring_dma_dev(vq),
414 virtio64_to_cpu(vq->vq.vdev, desc->addr),
415 virtio32_to_cpu(vq->vq.vdev, desc->len),
416 (flags & VRING_DESC_F_WRITE) ?
417 DMA_FROM_DEVICE : DMA_TO_DEVICE);
780bc790
AL
418}
419
72b5e895
JW
420static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
421 unsigned int i)
422{
423 struct vring_desc_extra *extra = vq->split.desc_extra;
424 u16 flags;
425
426 if (!vq->use_dma_api)
427 goto out;
428
429 flags = extra[i].flags;
430
431 if (flags & VRING_DESC_F_INDIRECT) {
432 dma_unmap_single(vring_dma_dev(vq),
433 extra[i].addr,
434 extra[i].len,
435 (flags & VRING_DESC_F_WRITE) ?
436 DMA_FROM_DEVICE : DMA_TO_DEVICE);
437 } else {
438 dma_unmap_page(vring_dma_dev(vq),
439 extra[i].addr,
440 extra[i].len,
441 (flags & VRING_DESC_F_WRITE) ?
442 DMA_FROM_DEVICE : DMA_TO_DEVICE);
443 }
444
445out:
446 return extra[i].next;
447}
448
138fd251
TB
449static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
450 unsigned int total_sg,
451 gfp_t gfp)
9fa29b9d
MM
452{
453 struct vring_desc *desc;
b25bd251 454 unsigned int i;
9fa29b9d 455
b92b1b89
WD
456 /*
457 * We require lowmem mappings for the descriptors because
458 * otherwise virt_to_phys will give us bogus addresses in the
459 * virtqueue.
460 */
82107539 461 gfp &= ~__GFP_HIGHMEM;
b92b1b89 462
6da2ec56 463 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
9fa29b9d 464 if (!desc)
b25bd251 465 return NULL;
9fa29b9d 466
b25bd251 467 for (i = 0; i < total_sg; i++)
00e6f3d9 468 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
b25bd251 469 return desc;
9fa29b9d
MM
470}
471
fe4c3862
JW
472static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
473 struct vring_desc *desc,
474 unsigned int i,
475 dma_addr_t addr,
476 unsigned int len,
72b5e895
JW
477 u16 flags,
478 bool indirect)
fe4c3862 479{
72b5e895
JW
480 struct vring_virtqueue *vring = to_vvq(vq);
481 struct vring_desc_extra *extra = vring->split.desc_extra;
482 u16 next;
483
fe4c3862
JW
484 desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
485 desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
486 desc[i].len = cpu_to_virtio32(vq->vdev, len);
487
72b5e895
JW
488 if (!indirect) {
489 next = extra[i].next;
490 desc[i].next = cpu_to_virtio16(vq->vdev, next);
491
492 extra[i].addr = addr;
493 extra[i].len = len;
494 extra[i].flags = flags;
495 } else
496 next = virtio16_to_cpu(vq->vdev, desc[i].next);
497
498 return next;
fe4c3862
JW
499}
500
138fd251
TB
501static inline int virtqueue_add_split(struct virtqueue *_vq,
502 struct scatterlist *sgs[],
503 unsigned int total_sg,
504 unsigned int out_sgs,
505 unsigned int in_sgs,
506 void *data,
507 void *ctx,
508 gfp_t gfp)
0a8a69dd
RR
509{
510 struct vring_virtqueue *vq = to_vvq(_vq);
13816c76 511 struct scatterlist *sg;
b25bd251 512 struct vring_desc *desc;
3f649ab7 513 unsigned int i, n, avail, descs_used, prev, err_idx;
1fe9b6fe 514 int head;
b25bd251 515 bool indirect;
0a8a69dd 516
9fa29b9d
MM
517 START_USE(vq);
518
0a8a69dd 519 BUG_ON(data == NULL);
5a08b04f 520 BUG_ON(ctx && vq->indirect);
9fa29b9d 521
70670444
RR
522 if (unlikely(vq->broken)) {
523 END_USE(vq);
524 return -EIO;
525 }
526
4d6a105e 527 LAST_ADD_TIME_UPDATE(vq);
e93300b1 528
b25bd251
RR
529 BUG_ON(total_sg == 0);
530
531 head = vq->free_head;
532
35c51e09 533 if (virtqueue_use_indirect(vq, total_sg))
138fd251 534 desc = alloc_indirect_split(_vq, total_sg, gfp);
44ed8089 535 else {
b25bd251 536 desc = NULL;
e593bf97 537 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
44ed8089 538 }
b25bd251
RR
539
540 if (desc) {
541 /* Use a single buffer which doesn't continue */
780bc790 542 indirect = true;
b25bd251
RR
543 /* Set up rest to use this indirect table. */
544 i = 0;
545 descs_used = 1;
b25bd251 546 } else {
780bc790 547 indirect = false;
e593bf97 548 desc = vq->split.vring.desc;
b25bd251
RR
549 i = head;
550 descs_used = total_sg;
9fa29b9d
MM
551 }
552
b4b4ff73 553 if (unlikely(vq->vq.num_free < descs_used)) {
0a8a69dd 554 pr_debug("Can't add buf len %i - avail = %i\n",
b25bd251 555 descs_used, vq->vq.num_free);
44653eae
RR
556 /* FIXME: for historical reasons, we force a notify here if
557 * there are outgoing parts to the buffer. Presumably the
558 * host should service the ring ASAP. */
13816c76 559 if (out_sgs)
44653eae 560 vq->notify(&vq->vq);
58625edf
WY
561 if (indirect)
562 kfree(desc);
0a8a69dd
RR
563 END_USE(vq);
564 return -ENOSPC;
565 }
566
13816c76 567 for (n = 0; n < out_sgs; n++) {
eeebf9b1 568 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
569 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
570 if (vring_mapping_error(vq, addr))
571 goto unmap_release;
572
13816c76 573 prev = i;
72b5e895
JW
574 /* Note that we trust indirect descriptor
575 * table since it use stream DMA mapping.
576 */
fe4c3862 577 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
72b5e895
JW
578 VRING_DESC_F_NEXT,
579 indirect);
13816c76 580 }
0a8a69dd 581 }
13816c76 582 for (; n < (out_sgs + in_sgs); n++) {
eeebf9b1 583 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
584 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
585 if (vring_mapping_error(vq, addr))
586 goto unmap_release;
587
13816c76 588 prev = i;
72b5e895
JW
589 /* Note that we trust indirect descriptor
590 * table since it use stream DMA mapping.
591 */
fe4c3862
JW
592 i = virtqueue_add_desc_split(_vq, desc, i, addr,
593 sg->length,
594 VRING_DESC_F_NEXT |
72b5e895
JW
595 VRING_DESC_F_WRITE,
596 indirect);
13816c76 597 }
0a8a69dd
RR
598 }
599 /* Last one doesn't continue. */
00e6f3d9 600 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
72b5e895 601 if (!indirect && vq->use_dma_api)
890d3356 602 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
72b5e895 603 ~VRING_DESC_F_NEXT;
0a8a69dd 604
780bc790
AL
605 if (indirect) {
606 /* Now that the indirect table is filled in, map it. */
607 dma_addr_t addr = vring_map_single(
608 vq, desc, total_sg * sizeof(struct vring_desc),
609 DMA_TO_DEVICE);
610 if (vring_mapping_error(vq, addr))
611 goto unmap_release;
612
fe4c3862
JW
613 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
614 head, addr,
615 total_sg * sizeof(struct vring_desc),
72b5e895
JW
616 VRING_DESC_F_INDIRECT,
617 false);
780bc790
AL
618 }
619
620 /* We're using some buffers from the free list. */
621 vq->vq.num_free -= descs_used;
622
0a8a69dd 623 /* Update free pointer */
b25bd251 624 if (indirect)
72b5e895 625 vq->free_head = vq->split.desc_extra[head].next;
b25bd251
RR
626 else
627 vq->free_head = i;
0a8a69dd 628
780bc790 629 /* Store token and indirect buffer state. */
cbeedb72 630 vq->split.desc_state[head].data = data;
780bc790 631 if (indirect)
cbeedb72 632 vq->split.desc_state[head].indir_desc = desc;
87646a34 633 else
cbeedb72 634 vq->split.desc_state[head].indir_desc = ctx;
0a8a69dd
RR
635
636 /* Put entry in available array (but don't update avail->idx until they
3b720b8c 637 * do sync). */
e593bf97
TB
638 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
639 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
0a8a69dd 640
ee7cd898
RR
641 /* Descriptors and available array need to be set before we expose the
642 * new available array entries. */
a9a0fef7 643 virtio_wmb(vq->weak_barriers);
e593bf97
TB
644 vq->split.avail_idx_shadow++;
645 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
646 vq->split.avail_idx_shadow);
ee7cd898
RR
647 vq->num_added++;
648
5e05bf58
TH
649 pr_debug("Added buffer head %i to %p\n", head, vq);
650 END_USE(vq);
651
ee7cd898
RR
652 /* This is very unlikely, but theoretically possible. Kick
653 * just in case. */
654 if (unlikely(vq->num_added == (1 << 16) - 1))
655 virtqueue_kick(_vq);
656
98e8c6bc 657 return 0;
780bc790
AL
658
659unmap_release:
660 err_idx = i;
cf8f1696
ML
661
662 if (indirect)
663 i = 0;
664 else
665 i = head;
780bc790
AL
666
667 for (n = 0; n < total_sg; n++) {
668 if (i == err_idx)
669 break;
72b5e895
JW
670 if (indirect) {
671 vring_unmap_one_split_indirect(vq, &desc[i]);
672 i = virtio16_to_cpu(_vq->vdev, desc[i].next);
673 } else
674 i = vring_unmap_one_split(vq, i);
780bc790
AL
675 }
676
780bc790
AL
677 if (indirect)
678 kfree(desc);
679
3cc36f6e 680 END_USE(vq);
f7728002 681 return -ENOMEM;
0a8a69dd 682}
13816c76 683
138fd251 684static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
685{
686 struct vring_virtqueue *vq = to_vvq(_vq);
a5c262c5 687 u16 new, old;
41f0377f
RR
688 bool needs_kick;
689
0a8a69dd 690 START_USE(vq);
a72caae2
JW
691 /* We need to expose available array entries before checking avail
692 * event. */
a9a0fef7 693 virtio_mb(vq->weak_barriers);
0a8a69dd 694
e593bf97
TB
695 old = vq->split.avail_idx_shadow - vq->num_added;
696 new = vq->split.avail_idx_shadow;
0a8a69dd
RR
697 vq->num_added = 0;
698
4d6a105e
TB
699 LAST_ADD_TIME_CHECK(vq);
700 LAST_ADD_TIME_INVALID(vq);
e93300b1 701
41f0377f 702 if (vq->event) {
e593bf97
TB
703 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
704 vring_avail_event(&vq->split.vring)),
41f0377f
RR
705 new, old);
706 } else {
e593bf97
TB
707 needs_kick = !(vq->split.vring.used->flags &
708 cpu_to_virtio16(_vq->vdev,
709 VRING_USED_F_NO_NOTIFY));
41f0377f 710 }
0a8a69dd 711 END_USE(vq);
41f0377f
RR
712 return needs_kick;
713}
138fd251 714
138fd251
TB
715static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
716 void **ctx)
0a8a69dd 717{
780bc790 718 unsigned int i, j;
c60923cb 719 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
0a8a69dd
RR
720
721 /* Clear data ptr. */
cbeedb72 722 vq->split.desc_state[head].data = NULL;
0a8a69dd 723
780bc790 724 /* Put back on free list: unmap first-level descriptors and find end */
0a8a69dd 725 i = head;
9fa29b9d 726
e593bf97 727 while (vq->split.vring.desc[i].flags & nextflag) {
72b5e895
JW
728 vring_unmap_one_split(vq, i);
729 i = vq->split.desc_extra[i].next;
06ca287d 730 vq->vq.num_free++;
0a8a69dd
RR
731 }
732
72b5e895
JW
733 vring_unmap_one_split(vq, i);
734 vq->split.desc_extra[i].next = vq->free_head;
0a8a69dd 735 vq->free_head = head;
780bc790 736
0a8a69dd 737 /* Plus final descriptor */
06ca287d 738 vq->vq.num_free++;
780bc790 739
5a08b04f 740 if (vq->indirect) {
cbeedb72
TB
741 struct vring_desc *indir_desc =
742 vq->split.desc_state[head].indir_desc;
5a08b04f
MT
743 u32 len;
744
745 /* Free the indirect table, if any, now that it's unmapped. */
746 if (!indir_desc)
747 return;
748
72b5e895 749 len = vq->split.desc_extra[head].len;
780bc790 750
72b5e895
JW
751 BUG_ON(!(vq->split.desc_extra[head].flags &
752 VRING_DESC_F_INDIRECT));
780bc790
AL
753 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
754
755 for (j = 0; j < len / sizeof(struct vring_desc); j++)
72b5e895 756 vring_unmap_one_split_indirect(vq, &indir_desc[j]);
780bc790 757
5a08b04f 758 kfree(indir_desc);
cbeedb72 759 vq->split.desc_state[head].indir_desc = NULL;
5a08b04f 760 } else if (ctx) {
cbeedb72 761 *ctx = vq->split.desc_state[head].indir_desc;
780bc790 762 }
0a8a69dd
RR
763}
764
138fd251 765static inline bool more_used_split(const struct vring_virtqueue *vq)
0a8a69dd 766{
e593bf97
TB
767 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
768 vq->split.vring.used->idx);
0a8a69dd
RR
769}
770
138fd251
TB
771static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
772 unsigned int *len,
773 void **ctx)
0a8a69dd
RR
774{
775 struct vring_virtqueue *vq = to_vvq(_vq);
776 void *ret;
777 unsigned int i;
3b720b8c 778 u16 last_used;
0a8a69dd
RR
779
780 START_USE(vq);
781
5ef82752
RR
782 if (unlikely(vq->broken)) {
783 END_USE(vq);
784 return NULL;
785 }
786
138fd251 787 if (!more_used_split(vq)) {
0a8a69dd
RR
788 pr_debug("No more buffers in queue\n");
789 END_USE(vq);
790 return NULL;
791 }
792
2d61ba95 793 /* Only get used array entries after they have been exposed by host. */
a9a0fef7 794 virtio_rmb(vq->weak_barriers);
2d61ba95 795
e593bf97
TB
796 last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
797 i = virtio32_to_cpu(_vq->vdev,
798 vq->split.vring.used->ring[last_used].id);
799 *len = virtio32_to_cpu(_vq->vdev,
800 vq->split.vring.used->ring[last_used].len);
0a8a69dd 801
e593bf97 802 if (unlikely(i >= vq->split.vring.num)) {
0a8a69dd
RR
803 BAD_RING(vq, "id %u out of range\n", i);
804 return NULL;
805 }
cbeedb72 806 if (unlikely(!vq->split.desc_state[i].data)) {
0a8a69dd
RR
807 BAD_RING(vq, "id %u is not a head!\n", i);
808 return NULL;
809 }
810
138fd251 811 /* detach_buf_split clears data, so grab it now. */
cbeedb72 812 ret = vq->split.desc_state[i].data;
138fd251 813 detach_buf_split(vq, i, ctx);
0a8a69dd 814 vq->last_used_idx++;
a5c262c5
MT
815 /* If we expect an interrupt for the next entry, tell host
816 * by writing event index and flush out the write before
817 * the read in the next get_buf call. */
e593bf97 818 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
788e5b3a 819 virtio_store_mb(vq->weak_barriers,
e593bf97 820 &vring_used_event(&vq->split.vring),
788e5b3a 821 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
a5c262c5 822
4d6a105e 823 LAST_ADD_TIME_INVALID(vq);
e93300b1 824
0a8a69dd
RR
825 END_USE(vq);
826 return ret;
827}
138fd251 828
138fd251 829static void virtqueue_disable_cb_split(struct virtqueue *_vq)
18445c4d
RR
830{
831 struct vring_virtqueue *vq = to_vvq(_vq);
832
e593bf97
TB
833 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
834 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
8d622d21
MT
835 if (vq->event)
836 /* TODO: this is a hack. Figure out a cleaner value to write. */
837 vring_used_event(&vq->split.vring) = 0x0;
838 else
e593bf97
TB
839 vq->split.vring.avail->flags =
840 cpu_to_virtio16(_vq->vdev,
841 vq->split.avail_flags_shadow);
f277ec42 842 }
18445c4d
RR
843}
844
31532340 845static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
846{
847 struct vring_virtqueue *vq = to_vvq(_vq);
cc229884 848 u16 last_used_idx;
0a8a69dd
RR
849
850 START_USE(vq);
0a8a69dd
RR
851
852 /* We optimistically turn back on interrupts, then check if there was
853 * more to do. */
a5c262c5
MT
854 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
855 * either clear the flags bit or point the event index at the next
856 * entry. Always do both to keep code simple. */
e593bf97
TB
857 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
858 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 859 if (!vq->event)
e593bf97
TB
860 vq->split.vring.avail->flags =
861 cpu_to_virtio16(_vq->vdev,
862 vq->split.avail_flags_shadow);
f277ec42 863 }
e593bf97
TB
864 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
865 last_used_idx = vq->last_used_idx);
cc229884
MT
866 END_USE(vq);
867 return last_used_idx;
868}
138fd251 869
31532340 870static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
138fd251
TB
871{
872 struct vring_virtqueue *vq = to_vvq(_vq);
873
874 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
e593bf97 875 vq->split.vring.used->idx);
138fd251
TB
876}
877
138fd251 878static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
7ab358c2
MT
879{
880 struct vring_virtqueue *vq = to_vvq(_vq);
881 u16 bufs;
882
883 START_USE(vq);
884
885 /* We optimistically turn back on interrupts, then check if there was
886 * more to do. */
887 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
888 * either clear the flags bit or point the event index at the next
0ea1e4a6 889 * entry. Always update the event index to keep code simple. */
e593bf97
TB
890 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
891 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 892 if (!vq->event)
e593bf97
TB
893 vq->split.vring.avail->flags =
894 cpu_to_virtio16(_vq->vdev,
895 vq->split.avail_flags_shadow);
f277ec42 896 }
7ab358c2 897 /* TODO: tune this threshold */
e593bf97 898 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
788e5b3a
MT
899
900 virtio_store_mb(vq->weak_barriers,
e593bf97 901 &vring_used_event(&vq->split.vring),
788e5b3a
MT
902 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
903
e593bf97
TB
904 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
905 - vq->last_used_idx) > bufs)) {
7ab358c2
MT
906 END_USE(vq);
907 return false;
908 }
909
910 END_USE(vq);
911 return true;
912}
7ab358c2 913
138fd251 914static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
c021eac4
SM
915{
916 struct vring_virtqueue *vq = to_vvq(_vq);
917 unsigned int i;
918 void *buf;
919
920 START_USE(vq);
921
e593bf97 922 for (i = 0; i < vq->split.vring.num; i++) {
cbeedb72 923 if (!vq->split.desc_state[i].data)
c021eac4 924 continue;
138fd251 925 /* detach_buf_split clears data, so grab it now. */
cbeedb72 926 buf = vq->split.desc_state[i].data;
138fd251 927 detach_buf_split(vq, i, NULL);
e593bf97
TB
928 vq->split.avail_idx_shadow--;
929 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
930 vq->split.avail_idx_shadow);
c021eac4
SM
931 END_USE(vq);
932 return buf;
933 }
934 /* That should have freed everything. */
e593bf97 935 BUG_ON(vq->vq.num_free != vq->split.vring.num);
c021eac4
SM
936
937 END_USE(vq);
938 return NULL;
939}
138fd251 940
198fa7be
XZ
941static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
942 struct vring_virtqueue *vq)
943{
944 struct virtio_device *vdev;
945
946 vdev = vq->vq.vdev;
947
948 vring_split->avail_flags_shadow = 0;
949 vring_split->avail_idx_shadow = 0;
950
951 /* No callback? Tell other side not to bother us. */
952 if (!vq->vq.callback) {
953 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
954 if (!vq->event)
955 vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
956 vring_split->avail_flags_shadow);
957 }
958}
959
a2b36c8d
XZ
960static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
961{
962 struct vring_desc_state_split *state;
963 struct vring_desc_extra *extra;
964 u32 num = vring_split->vring.num;
965
966 state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
967 if (!state)
968 goto err_state;
969
970 extra = vring_alloc_desc_extra(num);
971 if (!extra)
972 goto err_extra;
973
974 memset(state, 0, num * sizeof(struct vring_desc_state_split));
975
976 vring_split->desc_state = state;
977 vring_split->desc_extra = extra;
978 return 0;
979
980err_extra:
981 kfree(state);
982err_state:
983 return -ENOMEM;
984}
985
89f05d94
XZ
986static void vring_free_split(struct vring_virtqueue_split *vring_split,
987 struct virtio_device *vdev)
988{
989 vring_free_queue(vdev, vring_split->queue_size_in_bytes,
990 vring_split->vring.desc,
991 vring_split->queue_dma_addr);
992
993 kfree(vring_split->desc_state);
994 kfree(vring_split->desc_extra);
995}
996
c2d87fe6
XZ
997static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
998 struct virtio_device *vdev,
999 u32 num,
1000 unsigned int vring_align,
1001 bool may_reduce_num)
d79dca75 1002{
d79dca75
TB
1003 void *queue = NULL;
1004 dma_addr_t dma_addr;
d79dca75
TB
1005
1006 /* We assume num is a power of 2. */
1007 if (num & (num - 1)) {
1008 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
c2d87fe6 1009 return -EINVAL;
d79dca75
TB
1010 }
1011
1012 /* TODO: allocate each queue chunk individually */
1013 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1014 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1015 &dma_addr,
c7cc29aa 1016 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
d79dca75
TB
1017 if (queue)
1018 break;
cf94db21 1019 if (!may_reduce_num)
c2d87fe6 1020 return -ENOMEM;
d79dca75
TB
1021 }
1022
1023 if (!num)
c2d87fe6 1024 return -ENOMEM;
d79dca75
TB
1025
1026 if (!queue) {
1027 /* Try to get a single page. You are my only hope! */
1028 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1029 &dma_addr, GFP_KERNEL|__GFP_ZERO);
1030 }
1031 if (!queue)
c2d87fe6
XZ
1032 return -ENOMEM;
1033
1034 vring_init(&vring_split->vring, num, queue, vring_align);
1035
1036 vring_split->queue_dma_addr = dma_addr;
1037 vring_split->queue_size_in_bytes = vring_size(num, vring_align);
d79dca75 1038
c2d87fe6
XZ
1039 return 0;
1040}
1041
1042static struct virtqueue *vring_create_virtqueue_split(
1043 unsigned int index,
1044 unsigned int num,
1045 unsigned int vring_align,
1046 struct virtio_device *vdev,
1047 bool weak_barriers,
1048 bool may_reduce_num,
1049 bool context,
1050 bool (*notify)(struct virtqueue *),
1051 void (*callback)(struct virtqueue *),
1052 const char *name)
1053{
1054 struct vring_virtqueue_split vring_split = {};
1055 struct virtqueue *vq;
1056 int err;
1057
1058 err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1059 may_reduce_num);
1060 if (err)
1061 return NULL;
d79dca75 1062
cd4c812a
XZ
1063 vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
1064 context, notify, callback, name);
d79dca75 1065 if (!vq) {
c2d87fe6 1066 vring_free_split(&vring_split, vdev);
d79dca75
TB
1067 return NULL;
1068 }
1069
c2d87fe6
XZ
1070 to_vvq(vq)->split.queue_dma_addr = vring_split.queue_dma_addr;
1071 to_vvq(vq)->split.queue_size_in_bytes = vring_split.queue_size_in_bytes;
d79dca75
TB
1072 to_vvq(vq)->we_own_ring = true;
1073
1074 return vq;
1075}
1076
e6f633e5 1077
1ce9e605
TB
1078/*
1079 * Packed ring specific functions - *_packed().
1080 */
a7722890 1081static inline bool packed_used_wrap_counter(u16 last_used_idx)
1082{
1083 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1084}
1085
1086static inline u16 packed_last_used(u16 last_used_idx)
1087{
1088 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1089}
1ce9e605 1090
d80dc15b
XZ
1091static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1092 struct vring_desc_extra *extra)
1ce9e605
TB
1093{
1094 u16 flags;
1095
1096 if (!vq->use_dma_api)
1097 return;
1098
d80dc15b 1099 flags = extra->flags;
1ce9e605
TB
1100
1101 if (flags & VRING_DESC_F_INDIRECT) {
1102 dma_unmap_single(vring_dma_dev(vq),
d80dc15b 1103 extra->addr, extra->len,
1ce9e605
TB
1104 (flags & VRING_DESC_F_WRITE) ?
1105 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1106 } else {
1107 dma_unmap_page(vring_dma_dev(vq),
d80dc15b 1108 extra->addr, extra->len,
1ce9e605
TB
1109 (flags & VRING_DESC_F_WRITE) ?
1110 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1111 }
1112}
1113
1114static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1115 struct vring_packed_desc *desc)
1116{
1117 u16 flags;
1118
1119 if (!vq->use_dma_api)
1120 return;
1121
1122 flags = le16_to_cpu(desc->flags);
1123
920379a4
XZ
1124 dma_unmap_page(vring_dma_dev(vq),
1125 le64_to_cpu(desc->addr),
1126 le32_to_cpu(desc->len),
1127 (flags & VRING_DESC_F_WRITE) ?
1128 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1ce9e605
TB
1129}
1130
1131static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1132 gfp_t gfp)
1133{
1134 struct vring_packed_desc *desc;
1135
1136 /*
1137 * We require lowmem mappings for the descriptors because
1138 * otherwise virt_to_phys will give us bogus addresses in the
1139 * virtqueue.
1140 */
1141 gfp &= ~__GFP_HIGHMEM;
1142
1143 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1144
1145 return desc;
1146}
1147
1148static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
8d7670f3
XZ
1149 struct scatterlist *sgs[],
1150 unsigned int total_sg,
1151 unsigned int out_sgs,
1152 unsigned int in_sgs,
1153 void *data,
1154 gfp_t gfp)
1ce9e605
TB
1155{
1156 struct vring_packed_desc *desc;
1157 struct scatterlist *sg;
1158 unsigned int i, n, err_idx;
1159 u16 head, id;
1160 dma_addr_t addr;
1161
1162 head = vq->packed.next_avail_idx;
1163 desc = alloc_indirect_packed(total_sg, gfp);
fc6d70f4
XZ
1164 if (!desc)
1165 return -ENOMEM;
1ce9e605
TB
1166
1167 if (unlikely(vq->vq.num_free < 1)) {
1168 pr_debug("Can't add buf len 1 - avail = 0\n");
df0bfe75 1169 kfree(desc);
1ce9e605
TB
1170 END_USE(vq);
1171 return -ENOSPC;
1172 }
1173
1174 i = 0;
1175 id = vq->free_head;
1176 BUG_ON(id == vq->packed.vring.num);
1177
1178 for (n = 0; n < out_sgs + in_sgs; n++) {
1179 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1180 addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1181 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1182 if (vring_mapping_error(vq, addr))
1183 goto unmap_release;
1184
1185 desc[i].flags = cpu_to_le16(n < out_sgs ?
1186 0 : VRING_DESC_F_WRITE);
1187 desc[i].addr = cpu_to_le64(addr);
1188 desc[i].len = cpu_to_le32(sg->length);
1189 i++;
1190 }
1191 }
1192
1193 /* Now that the indirect table is filled in, map it. */
1194 addr = vring_map_single(vq, desc,
1195 total_sg * sizeof(struct vring_packed_desc),
1196 DMA_TO_DEVICE);
1197 if (vring_mapping_error(vq, addr))
1198 goto unmap_release;
1199
1200 vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1201 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1202 sizeof(struct vring_packed_desc));
1203 vq->packed.vring.desc[head].id = cpu_to_le16(id);
1204
1205 if (vq->use_dma_api) {
1206 vq->packed.desc_extra[id].addr = addr;
1207 vq->packed.desc_extra[id].len = total_sg *
1208 sizeof(struct vring_packed_desc);
1209 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1210 vq->packed.avail_used_flags;
1211 }
1212
1213 /*
1214 * A driver MUST NOT make the first descriptor in the list
1215 * available before all subsequent descriptors comprising
1216 * the list are made available.
1217 */
1218 virtio_wmb(vq->weak_barriers);
1219 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1220 vq->packed.avail_used_flags);
1221
1222 /* We're using some buffers from the free list. */
1223 vq->vq.num_free -= 1;
1224
1225 /* Update free pointer */
1226 n = head + 1;
1227 if (n >= vq->packed.vring.num) {
1228 n = 0;
1229 vq->packed.avail_wrap_counter ^= 1;
1230 vq->packed.avail_used_flags ^=
1231 1 << VRING_PACKED_DESC_F_AVAIL |
1232 1 << VRING_PACKED_DESC_F_USED;
1233 }
1234 vq->packed.next_avail_idx = n;
aeef9b47 1235 vq->free_head = vq->packed.desc_extra[id].next;
1ce9e605
TB
1236
1237 /* Store token and indirect buffer state. */
1238 vq->packed.desc_state[id].num = 1;
1239 vq->packed.desc_state[id].data = data;
1240 vq->packed.desc_state[id].indir_desc = desc;
1241 vq->packed.desc_state[id].last = id;
1242
1243 vq->num_added += 1;
1244
1245 pr_debug("Added buffer head %i to %p\n", head, vq);
1246 END_USE(vq);
1247
1248 return 0;
1249
1250unmap_release:
1251 err_idx = i;
1252
1253 for (i = 0; i < err_idx; i++)
1254 vring_unmap_desc_packed(vq, &desc[i]);
1255
1256 kfree(desc);
1257
1258 END_USE(vq);
f7728002 1259 return -ENOMEM;
1ce9e605
TB
1260}
1261
1262static inline int virtqueue_add_packed(struct virtqueue *_vq,
1263 struct scatterlist *sgs[],
1264 unsigned int total_sg,
1265 unsigned int out_sgs,
1266 unsigned int in_sgs,
1267 void *data,
1268 void *ctx,
1269 gfp_t gfp)
1270{
1271 struct vring_virtqueue *vq = to_vvq(_vq);
1272 struct vring_packed_desc *desc;
1273 struct scatterlist *sg;
1274 unsigned int i, n, c, descs_used, err_idx;
3f649ab7
KC
1275 __le16 head_flags, flags;
1276 u16 head, id, prev, curr, avail_used_flags;
fc6d70f4 1277 int err;
1ce9e605
TB
1278
1279 START_USE(vq);
1280
1281 BUG_ON(data == NULL);
1282 BUG_ON(ctx && vq->indirect);
1283
1284 if (unlikely(vq->broken)) {
1285 END_USE(vq);
1286 return -EIO;
1287 }
1288
1289 LAST_ADD_TIME_UPDATE(vq);
1290
1291 BUG_ON(total_sg == 0);
1292
35c51e09 1293 if (virtqueue_use_indirect(vq, total_sg)) {
fc6d70f4
XZ
1294 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1295 in_sgs, data, gfp);
1861ba62
MT
1296 if (err != -ENOMEM) {
1297 END_USE(vq);
fc6d70f4 1298 return err;
1861ba62 1299 }
fc6d70f4
XZ
1300
1301 /* fall back on direct */
1302 }
1ce9e605
TB
1303
1304 head = vq->packed.next_avail_idx;
1305 avail_used_flags = vq->packed.avail_used_flags;
1306
1307 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1308
1309 desc = vq->packed.vring.desc;
1310 i = head;
1311 descs_used = total_sg;
1312
1313 if (unlikely(vq->vq.num_free < descs_used)) {
1314 pr_debug("Can't add buf len %i - avail = %i\n",
1315 descs_used, vq->vq.num_free);
1316 END_USE(vq);
1317 return -ENOSPC;
1318 }
1319
1320 id = vq->free_head;
1321 BUG_ON(id == vq->packed.vring.num);
1322
1323 curr = id;
1324 c = 0;
1325 for (n = 0; n < out_sgs + in_sgs; n++) {
1326 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1327 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1328 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1329 if (vring_mapping_error(vq, addr))
1330 goto unmap_release;
1331
1332 flags = cpu_to_le16(vq->packed.avail_used_flags |
1333 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1334 (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1335 if (i == head)
1336 head_flags = flags;
1337 else
1338 desc[i].flags = flags;
1339
1340 desc[i].addr = cpu_to_le64(addr);
1341 desc[i].len = cpu_to_le32(sg->length);
1342 desc[i].id = cpu_to_le16(id);
1343
1344 if (unlikely(vq->use_dma_api)) {
1345 vq->packed.desc_extra[curr].addr = addr;
1346 vq->packed.desc_extra[curr].len = sg->length;
1347 vq->packed.desc_extra[curr].flags =
1348 le16_to_cpu(flags);
1349 }
1350 prev = curr;
aeef9b47 1351 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1352
1353 if ((unlikely(++i >= vq->packed.vring.num))) {
1354 i = 0;
1355 vq->packed.avail_used_flags ^=
1356 1 << VRING_PACKED_DESC_F_AVAIL |
1357 1 << VRING_PACKED_DESC_F_USED;
1358 }
1359 }
1360 }
1361
1362 if (i < head)
1363 vq->packed.avail_wrap_counter ^= 1;
1364
1365 /* We're using some buffers from the free list. */
1366 vq->vq.num_free -= descs_used;
1367
1368 /* Update free pointer */
1369 vq->packed.next_avail_idx = i;
1370 vq->free_head = curr;
1371
1372 /* Store token. */
1373 vq->packed.desc_state[id].num = descs_used;
1374 vq->packed.desc_state[id].data = data;
1375 vq->packed.desc_state[id].indir_desc = ctx;
1376 vq->packed.desc_state[id].last = prev;
1377
1378 /*
1379 * A driver MUST NOT make the first descriptor in the list
1380 * available before all subsequent descriptors comprising
1381 * the list are made available.
1382 */
1383 virtio_wmb(vq->weak_barriers);
1384 vq->packed.vring.desc[head].flags = head_flags;
1385 vq->num_added += descs_used;
1386
1387 pr_debug("Added buffer head %i to %p\n", head, vq);
1388 END_USE(vq);
1389
1390 return 0;
1391
1392unmap_release:
1393 err_idx = i;
1394 i = head;
44593865 1395 curr = vq->free_head;
1ce9e605
TB
1396
1397 vq->packed.avail_used_flags = avail_used_flags;
1398
1399 for (n = 0; n < total_sg; n++) {
1400 if (i == err_idx)
1401 break;
d80dc15b 1402 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
44593865 1403 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1404 i++;
1405 if (i >= vq->packed.vring.num)
1406 i = 0;
1407 }
1408
1409 END_USE(vq);
1410 return -EIO;
1411}
1412
1413static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1414{
1415 struct vring_virtqueue *vq = to_vvq(_vq);
f51f9826 1416 u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1ce9e605
TB
1417 bool needs_kick;
1418 union {
1419 struct {
1420 __le16 off_wrap;
1421 __le16 flags;
1422 };
1423 u32 u32;
1424 } snapshot;
1425
1426 START_USE(vq);
1427
1428 /*
1429 * We need to expose the new flags value before checking notification
1430 * suppressions.
1431 */
1432 virtio_mb(vq->weak_barriers);
1433
f51f9826
TB
1434 old = vq->packed.next_avail_idx - vq->num_added;
1435 new = vq->packed.next_avail_idx;
1ce9e605
TB
1436 vq->num_added = 0;
1437
1438 snapshot.u32 = *(u32 *)vq->packed.vring.device;
1439 flags = le16_to_cpu(snapshot.flags);
1440
1441 LAST_ADD_TIME_CHECK(vq);
1442 LAST_ADD_TIME_INVALID(vq);
1443
f51f9826
TB
1444 if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1445 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1446 goto out;
1447 }
1448
1449 off_wrap = le16_to_cpu(snapshot.off_wrap);
1450
1451 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1452 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1453 if (wrap_counter != vq->packed.avail_wrap_counter)
1454 event_idx -= vq->packed.vring.num;
1455
1456 needs_kick = vring_need_event(event_idx, new, old);
1457out:
1ce9e605
TB
1458 END_USE(vq);
1459 return needs_kick;
1460}
1461
1462static void detach_buf_packed(struct vring_virtqueue *vq,
1463 unsigned int id, void **ctx)
1464{
1465 struct vring_desc_state_packed *state = NULL;
1466 struct vring_packed_desc *desc;
1467 unsigned int i, curr;
1468
1469 state = &vq->packed.desc_state[id];
1470
1471 /* Clear data ptr. */
1472 state->data = NULL;
1473
aeef9b47 1474 vq->packed.desc_extra[state->last].next = vq->free_head;
1ce9e605
TB
1475 vq->free_head = id;
1476 vq->vq.num_free += state->num;
1477
1478 if (unlikely(vq->use_dma_api)) {
1479 curr = id;
1480 for (i = 0; i < state->num; i++) {
d80dc15b
XZ
1481 vring_unmap_extra_packed(vq,
1482 &vq->packed.desc_extra[curr]);
aeef9b47 1483 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1484 }
1485 }
1486
1487 if (vq->indirect) {
1488 u32 len;
1489
1490 /* Free the indirect table, if any, now that it's unmapped. */
1491 desc = state->indir_desc;
1492 if (!desc)
1493 return;
1494
1495 if (vq->use_dma_api) {
1496 len = vq->packed.desc_extra[id].len;
1497 for (i = 0; i < len / sizeof(struct vring_packed_desc);
1498 i++)
1499 vring_unmap_desc_packed(vq, &desc[i]);
1500 }
1501 kfree(desc);
1502 state->indir_desc = NULL;
1503 } else if (ctx) {
1504 *ctx = state->indir_desc;
1505 }
1506}
1507
1508static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1509 u16 idx, bool used_wrap_counter)
1510{
1511 bool avail, used;
1512 u16 flags;
1513
1514 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1515 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1516 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1517
1518 return avail == used && used == used_wrap_counter;
1519}
1520
1521static inline bool more_used_packed(const struct vring_virtqueue *vq)
1522{
a7722890 1523 u16 last_used;
1524 u16 last_used_idx;
1525 bool used_wrap_counter;
1526
1527 last_used_idx = READ_ONCE(vq->last_used_idx);
1528 last_used = packed_last_used(last_used_idx);
1529 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1530 return is_used_desc_packed(vq, last_used, used_wrap_counter);
1ce9e605
TB
1531}
1532
1533static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1534 unsigned int *len,
1535 void **ctx)
1536{
1537 struct vring_virtqueue *vq = to_vvq(_vq);
a7722890 1538 u16 last_used, id, last_used_idx;
1539 bool used_wrap_counter;
1ce9e605
TB
1540 void *ret;
1541
1542 START_USE(vq);
1543
1544 if (unlikely(vq->broken)) {
1545 END_USE(vq);
1546 return NULL;
1547 }
1548
1549 if (!more_used_packed(vq)) {
1550 pr_debug("No more buffers in queue\n");
1551 END_USE(vq);
1552 return NULL;
1553 }
1554
1555 /* Only get used elements after they have been exposed by host. */
1556 virtio_rmb(vq->weak_barriers);
1557
a7722890 1558 last_used_idx = READ_ONCE(vq->last_used_idx);
1559 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1560 last_used = packed_last_used(last_used_idx);
1ce9e605
TB
1561 id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1562 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1563
1564 if (unlikely(id >= vq->packed.vring.num)) {
1565 BAD_RING(vq, "id %u out of range\n", id);
1566 return NULL;
1567 }
1568 if (unlikely(!vq->packed.desc_state[id].data)) {
1569 BAD_RING(vq, "id %u is not a head!\n", id);
1570 return NULL;
1571 }
1572
1573 /* detach_buf_packed clears data, so grab it now. */
1574 ret = vq->packed.desc_state[id].data;
1575 detach_buf_packed(vq, id, ctx);
1576
a7722890 1577 last_used += vq->packed.desc_state[id].num;
1578 if (unlikely(last_used >= vq->packed.vring.num)) {
1579 last_used -= vq->packed.vring.num;
1580 used_wrap_counter ^= 1;
1ce9e605
TB
1581 }
1582
a7722890 1583 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1584 WRITE_ONCE(vq->last_used_idx, last_used);
1585
f51f9826
TB
1586 /*
1587 * If we expect an interrupt for the next entry, tell host
1588 * by writing event index and flush out the write before
1589 * the read in the next get_buf call.
1590 */
1591 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1592 virtio_store_mb(vq->weak_barriers,
1593 &vq->packed.vring.driver->off_wrap,
a7722890 1594 cpu_to_le16(vq->last_used_idx));
f51f9826 1595
1ce9e605
TB
1596 LAST_ADD_TIME_INVALID(vq);
1597
1598 END_USE(vq);
1599 return ret;
1600}
1601
1602static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1603{
1604 struct vring_virtqueue *vq = to_vvq(_vq);
1605
1606 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1607 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1608 vq->packed.vring.driver->flags =
1609 cpu_to_le16(vq->packed.event_flags_shadow);
1610 }
1611}
1612
31532340 1613static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1ce9e605
TB
1614{
1615 struct vring_virtqueue *vq = to_vvq(_vq);
1616
1617 START_USE(vq);
1618
1619 /*
1620 * We optimistically turn back on interrupts, then check if there was
1621 * more to do.
1622 */
1623
f51f9826
TB
1624 if (vq->event) {
1625 vq->packed.vring.driver->off_wrap =
a7722890 1626 cpu_to_le16(vq->last_used_idx);
f51f9826
TB
1627 /*
1628 * We need to update event offset and event wrap
1629 * counter first before updating event flags.
1630 */
1631 virtio_wmb(vq->weak_barriers);
1632 }
1633
1ce9e605 1634 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1635 vq->packed.event_flags_shadow = vq->event ?
1636 VRING_PACKED_EVENT_FLAG_DESC :
1637 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1638 vq->packed.vring.driver->flags =
1639 cpu_to_le16(vq->packed.event_flags_shadow);
1640 }
1641
1642 END_USE(vq);
a7722890 1643 return vq->last_used_idx;
1ce9e605
TB
1644}
1645
1646static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1647{
1648 struct vring_virtqueue *vq = to_vvq(_vq);
1649 bool wrap_counter;
1650 u16 used_idx;
1651
1652 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1653 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1654
1655 return is_used_desc_packed(vq, used_idx, wrap_counter);
1656}
1657
1658static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1659{
1660 struct vring_virtqueue *vq = to_vvq(_vq);
a7722890 1661 u16 used_idx, wrap_counter, last_used_idx;
f51f9826 1662 u16 bufs;
1ce9e605
TB
1663
1664 START_USE(vq);
1665
1666 /*
1667 * We optimistically turn back on interrupts, then check if there was
1668 * more to do.
1669 */
1670
f51f9826
TB
1671 if (vq->event) {
1672 /* TODO: tune this threshold */
1673 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
a7722890 1674 last_used_idx = READ_ONCE(vq->last_used_idx);
1675 wrap_counter = packed_used_wrap_counter(last_used_idx);
f51f9826 1676
a7722890 1677 used_idx = packed_last_used(last_used_idx) + bufs;
f51f9826
TB
1678 if (used_idx >= vq->packed.vring.num) {
1679 used_idx -= vq->packed.vring.num;
1680 wrap_counter ^= 1;
1681 }
1682
1683 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1684 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1685
1686 /*
1687 * We need to update event offset and event wrap
1688 * counter first before updating event flags.
1689 */
1690 virtio_wmb(vq->weak_barriers);
f51f9826 1691 }
1ce9e605
TB
1692
1693 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1694 vq->packed.event_flags_shadow = vq->event ?
1695 VRING_PACKED_EVENT_FLAG_DESC :
1696 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1697 vq->packed.vring.driver->flags =
1698 cpu_to_le16(vq->packed.event_flags_shadow);
1699 }
1700
1701 /*
1702 * We need to update event suppression structure first
1703 * before re-checking for more used buffers.
1704 */
1705 virtio_mb(vq->weak_barriers);
1706
a7722890 1707 last_used_idx = READ_ONCE(vq->last_used_idx);
1708 wrap_counter = packed_used_wrap_counter(last_used_idx);
1709 used_idx = packed_last_used(last_used_idx);
1710 if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1ce9e605
TB
1711 END_USE(vq);
1712 return false;
1713 }
1714
1715 END_USE(vq);
1716 return true;
1717}
1718
1719static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1720{
1721 struct vring_virtqueue *vq = to_vvq(_vq);
1722 unsigned int i;
1723 void *buf;
1724
1725 START_USE(vq);
1726
1727 for (i = 0; i < vq->packed.vring.num; i++) {
1728 if (!vq->packed.desc_state[i].data)
1729 continue;
1730 /* detach_buf clears data, so grab it now. */
1731 buf = vq->packed.desc_state[i].data;
1732 detach_buf_packed(vq, i, NULL);
1733 END_USE(vq);
1734 return buf;
1735 }
1736 /* That should have freed everything. */
1737 BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1738
1739 END_USE(vq);
1740 return NULL;
1741}
1742
96ef18a2 1743static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
5a222421
JW
1744{
1745 struct vring_desc_extra *desc_extra;
1746 unsigned int i;
1747
1748 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1749 GFP_KERNEL);
1750 if (!desc_extra)
1751 return NULL;
1752
1753 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1754
1755 for (i = 0; i < num - 1; i++)
1756 desc_extra[i].next = i + 1;
1757
1758 return desc_extra;
1759}
1760
1ce9e605
TB
1761static struct virtqueue *vring_create_virtqueue_packed(
1762 unsigned int index,
1763 unsigned int num,
1764 unsigned int vring_align,
1765 struct virtio_device *vdev,
1766 bool weak_barriers,
1767 bool may_reduce_num,
1768 bool context,
1769 bool (*notify)(struct virtqueue *),
1770 void (*callback)(struct virtqueue *),
1771 const char *name)
1772{
1773 struct vring_virtqueue *vq;
1774 struct vring_packed_desc *ring;
1775 struct vring_packed_desc_event *driver, *device;
1776 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1777 size_t ring_size_in_bytes, event_size_in_bytes;
1ce9e605
TB
1778
1779 ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1780
1781 ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1782 &ring_dma_addr,
1783 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1784 if (!ring)
1785 goto err_ring;
1786
1787 event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1788
1789 driver = vring_alloc_queue(vdev, event_size_in_bytes,
1790 &driver_event_dma_addr,
1791 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1792 if (!driver)
1793 goto err_driver;
1794
1795 device = vring_alloc_queue(vdev, event_size_in_bytes,
1796 &device_event_dma_addr,
1797 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1798 if (!device)
1799 goto err_device;
1800
1801 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1802 if (!vq)
1803 goto err_vq;
1804
1805 vq->vq.callback = callback;
1806 vq->vq.vdev = vdev;
1807 vq->vq.name = name;
1ce9e605
TB
1808 vq->vq.index = index;
1809 vq->we_own_ring = true;
1810 vq->notify = notify;
1811 vq->weak_barriers = weak_barriers;
c346dae4 1812#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d 1813 vq->broken = true;
c346dae4
JW
1814#else
1815 vq->broken = false;
1816#endif
1ce9e605
TB
1817 vq->packed_ring = true;
1818 vq->use_dma_api = vring_use_dma_api(vdev);
1ce9e605
TB
1819
1820 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1821 !context;
1822 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1823
45383fb0
TB
1824 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1825 vq->weak_barriers = false;
1826
1ce9e605
TB
1827 vq->packed.ring_dma_addr = ring_dma_addr;
1828 vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1829 vq->packed.device_event_dma_addr = device_event_dma_addr;
1830
1831 vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1832 vq->packed.event_size_in_bytes = event_size_in_bytes;
1833
1834 vq->packed.vring.num = num;
1835 vq->packed.vring.desc = ring;
1836 vq->packed.vring.driver = driver;
1837 vq->packed.vring.device = device;
1838
1839 vq->packed.next_avail_idx = 0;
1840 vq->packed.avail_wrap_counter = 1;
1ce9e605
TB
1841 vq->packed.event_flags_shadow = 0;
1842 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1843
1844 vq->packed.desc_state = kmalloc_array(num,
1845 sizeof(struct vring_desc_state_packed),
1846 GFP_KERNEL);
1847 if (!vq->packed.desc_state)
1848 goto err_desc_state;
1849
1850 memset(vq->packed.desc_state, 0,
1851 num * sizeof(struct vring_desc_state_packed));
1852
1853 /* Put everything in free lists. */
1854 vq->free_head = 0;
1ce9e605 1855
96ef18a2 1856 vq->packed.desc_extra = vring_alloc_desc_extra(num);
1ce9e605
TB
1857 if (!vq->packed.desc_extra)
1858 goto err_desc_extra;
1859
1ce9e605
TB
1860 /* No callback? Tell other side not to bother us. */
1861 if (!callback) {
1862 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1863 vq->packed.vring.driver->flags =
1864 cpu_to_le16(vq->packed.event_flags_shadow);
1865 }
1866
3a897128
XZ
1867 virtqueue_init(vq, num);
1868
0e566c8f 1869 spin_lock(&vdev->vqs_list_lock);
e152d8af 1870 list_add_tail(&vq->vq.list, &vdev->vqs);
0e566c8f 1871 spin_unlock(&vdev->vqs_list_lock);
1ce9e605
TB
1872 return &vq->vq;
1873
1874err_desc_extra:
1875 kfree(vq->packed.desc_state);
1876err_desc_state:
1877 kfree(vq);
1878err_vq:
ae93d8ea 1879 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
1ce9e605 1880err_device:
ae93d8ea 1881 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
1ce9e605
TB
1882err_driver:
1883 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1884err_ring:
1885 return NULL;
1886}
1887
1888
e6f633e5
TB
1889/*
1890 * Generic functions and exported symbols.
1891 */
1892
1893static inline int virtqueue_add(struct virtqueue *_vq,
1894 struct scatterlist *sgs[],
1895 unsigned int total_sg,
1896 unsigned int out_sgs,
1897 unsigned int in_sgs,
1898 void *data,
1899 void *ctx,
1900 gfp_t gfp)
1901{
1ce9e605
TB
1902 struct vring_virtqueue *vq = to_vvq(_vq);
1903
1904 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1905 out_sgs, in_sgs, data, ctx, gfp) :
1906 virtqueue_add_split(_vq, sgs, total_sg,
1907 out_sgs, in_sgs, data, ctx, gfp);
e6f633e5
TB
1908}
1909
1910/**
1911 * virtqueue_add_sgs - expose buffers to other end
a5581206 1912 * @_vq: the struct virtqueue we're talking about.
e6f633e5 1913 * @sgs: array of terminated scatterlists.
a5581206
JB
1914 * @out_sgs: the number of scatterlists readable by other side
1915 * @in_sgs: the number of scatterlists which are writable (after readable ones)
e6f633e5
TB
1916 * @data: the token identifying the buffer.
1917 * @gfp: how to do memory allocations (if necessary).
1918 *
1919 * Caller must ensure we don't call this with other virtqueue operations
1920 * at the same time (except where noted).
1921 *
1922 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1923 */
1924int virtqueue_add_sgs(struct virtqueue *_vq,
1925 struct scatterlist *sgs[],
1926 unsigned int out_sgs,
1927 unsigned int in_sgs,
1928 void *data,
1929 gfp_t gfp)
1930{
1931 unsigned int i, total_sg = 0;
1932
1933 /* Count them first. */
1934 for (i = 0; i < out_sgs + in_sgs; i++) {
1935 struct scatterlist *sg;
1936
1937 for (sg = sgs[i]; sg; sg = sg_next(sg))
1938 total_sg++;
1939 }
1940 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1941 data, NULL, gfp);
1942}
1943EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1944
1945/**
1946 * virtqueue_add_outbuf - expose output buffers to other end
1947 * @vq: the struct virtqueue we're talking about.
1948 * @sg: scatterlist (must be well-formed and terminated!)
1949 * @num: the number of entries in @sg readable by other side
1950 * @data: the token identifying the buffer.
1951 * @gfp: how to do memory allocations (if necessary).
1952 *
1953 * Caller must ensure we don't call this with other virtqueue operations
1954 * at the same time (except where noted).
1955 *
1956 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1957 */
1958int virtqueue_add_outbuf(struct virtqueue *vq,
1959 struct scatterlist *sg, unsigned int num,
1960 void *data,
1961 gfp_t gfp)
1962{
1963 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1964}
1965EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1966
1967/**
1968 * virtqueue_add_inbuf - expose input buffers to other end
1969 * @vq: the struct virtqueue we're talking about.
1970 * @sg: scatterlist (must be well-formed and terminated!)
1971 * @num: the number of entries in @sg writable by other side
1972 * @data: the token identifying the buffer.
1973 * @gfp: how to do memory allocations (if necessary).
1974 *
1975 * Caller must ensure we don't call this with other virtqueue operations
1976 * at the same time (except where noted).
1977 *
1978 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1979 */
1980int virtqueue_add_inbuf(struct virtqueue *vq,
1981 struct scatterlist *sg, unsigned int num,
1982 void *data,
1983 gfp_t gfp)
1984{
1985 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1986}
1987EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1988
1989/**
1990 * virtqueue_add_inbuf_ctx - expose input buffers to other end
1991 * @vq: the struct virtqueue we're talking about.
1992 * @sg: scatterlist (must be well-formed and terminated!)
1993 * @num: the number of entries in @sg writable by other side
1994 * @data: the token identifying the buffer.
1995 * @ctx: extra context for the token
1996 * @gfp: how to do memory allocations (if necessary).
1997 *
1998 * Caller must ensure we don't call this with other virtqueue operations
1999 * at the same time (except where noted).
2000 *
2001 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2002 */
2003int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2004 struct scatterlist *sg, unsigned int num,
2005 void *data,
2006 void *ctx,
2007 gfp_t gfp)
2008{
2009 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
2010}
2011EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2012
2013/**
2014 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
a5581206 2015 * @_vq: the struct virtqueue
e6f633e5
TB
2016 *
2017 * Instead of virtqueue_kick(), you can do:
2018 * if (virtqueue_kick_prepare(vq))
2019 * virtqueue_notify(vq);
2020 *
2021 * This is sometimes useful because the virtqueue_kick_prepare() needs
2022 * to be serialized, but the actual virtqueue_notify() call does not.
2023 */
2024bool virtqueue_kick_prepare(struct virtqueue *_vq)
2025{
1ce9e605
TB
2026 struct vring_virtqueue *vq = to_vvq(_vq);
2027
2028 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2029 virtqueue_kick_prepare_split(_vq);
e6f633e5
TB
2030}
2031EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2032
2033/**
2034 * virtqueue_notify - second half of split virtqueue_kick call.
a5581206 2035 * @_vq: the struct virtqueue
e6f633e5
TB
2036 *
2037 * This does not need to be serialized.
2038 *
2039 * Returns false if host notify failed or queue is broken, otherwise true.
2040 */
2041bool virtqueue_notify(struct virtqueue *_vq)
2042{
2043 struct vring_virtqueue *vq = to_vvq(_vq);
2044
2045 if (unlikely(vq->broken))
2046 return false;
2047
2048 /* Prod other side to tell it about changes. */
2049 if (!vq->notify(_vq)) {
2050 vq->broken = true;
2051 return false;
2052 }
2053 return true;
2054}
2055EXPORT_SYMBOL_GPL(virtqueue_notify);
2056
2057/**
2058 * virtqueue_kick - update after add_buf
2059 * @vq: the struct virtqueue
2060 *
2061 * After one or more virtqueue_add_* calls, invoke this to kick
2062 * the other side.
2063 *
2064 * Caller must ensure we don't call this with other virtqueue
2065 * operations at the same time (except where noted).
2066 *
2067 * Returns false if kick failed, otherwise true.
2068 */
2069bool virtqueue_kick(struct virtqueue *vq)
2070{
2071 if (virtqueue_kick_prepare(vq))
2072 return virtqueue_notify(vq);
2073 return true;
2074}
2075EXPORT_SYMBOL_GPL(virtqueue_kick);
2076
2077/**
31c11db6 2078 * virtqueue_get_buf_ctx - get the next used buffer
a5581206 2079 * @_vq: the struct virtqueue we're talking about.
e6f633e5 2080 * @len: the length written into the buffer
a5581206 2081 * @ctx: extra context for the token
e6f633e5
TB
2082 *
2083 * If the device wrote data into the buffer, @len will be set to the
2084 * amount written. This means you don't need to clear the buffer
2085 * beforehand to ensure there's no data leakage in the case of short
2086 * writes.
2087 *
2088 * Caller must ensure we don't call this with other virtqueue
2089 * operations at the same time (except where noted).
2090 *
2091 * Returns NULL if there are no used buffers, or the "data" token
2092 * handed to virtqueue_add_*().
2093 */
2094void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2095 void **ctx)
2096{
1ce9e605
TB
2097 struct vring_virtqueue *vq = to_vvq(_vq);
2098
2099 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2100 virtqueue_get_buf_ctx_split(_vq, len, ctx);
e6f633e5
TB
2101}
2102EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2103
2104void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2105{
2106 return virtqueue_get_buf_ctx(_vq, len, NULL);
2107}
2108EXPORT_SYMBOL_GPL(virtqueue_get_buf);
e6f633e5
TB
2109/**
2110 * virtqueue_disable_cb - disable callbacks
a5581206 2111 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2112 *
2113 * Note that this is not necessarily synchronous, hence unreliable and only
2114 * useful as an optimization.
2115 *
2116 * Unlike other operations, this need not be serialized.
2117 */
2118void virtqueue_disable_cb(struct virtqueue *_vq)
2119{
1ce9e605
TB
2120 struct vring_virtqueue *vq = to_vvq(_vq);
2121
8d622d21
MT
2122 /* If device triggered an event already it won't trigger one again:
2123 * no need to disable.
2124 */
2125 if (vq->event_triggered)
2126 return;
2127
1ce9e605
TB
2128 if (vq->packed_ring)
2129 virtqueue_disable_cb_packed(_vq);
2130 else
2131 virtqueue_disable_cb_split(_vq);
e6f633e5
TB
2132}
2133EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2134
2135/**
2136 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
a5581206 2137 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2138 *
2139 * This re-enables callbacks; it returns current queue state
2140 * in an opaque unsigned value. This value should be later tested by
2141 * virtqueue_poll, to detect a possible race between the driver checking for
2142 * more work, and enabling callbacks.
2143 *
2144 * Caller must ensure we don't call this with other virtqueue
2145 * operations at the same time (except where noted).
2146 */
31532340 2147unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
e6f633e5 2148{
1ce9e605
TB
2149 struct vring_virtqueue *vq = to_vvq(_vq);
2150
8d622d21
MT
2151 if (vq->event_triggered)
2152 vq->event_triggered = false;
2153
1ce9e605
TB
2154 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2155 virtqueue_enable_cb_prepare_split(_vq);
e6f633e5
TB
2156}
2157EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2158
2159/**
2160 * virtqueue_poll - query pending used buffers
a5581206 2161 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2162 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2163 *
2164 * Returns "true" if there are pending used buffers in the queue.
2165 *
2166 * This does not need to be serialized.
2167 */
31532340 2168bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
e6f633e5
TB
2169{
2170 struct vring_virtqueue *vq = to_vvq(_vq);
2171
481a0d74
MW
2172 if (unlikely(vq->broken))
2173 return false;
2174
e6f633e5 2175 virtio_mb(vq->weak_barriers);
1ce9e605
TB
2176 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2177 virtqueue_poll_split(_vq, last_used_idx);
e6f633e5
TB
2178}
2179EXPORT_SYMBOL_GPL(virtqueue_poll);
2180
2181/**
2182 * virtqueue_enable_cb - restart callbacks after disable_cb.
a5581206 2183 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2184 *
2185 * This re-enables callbacks; it returns "false" if there are pending
2186 * buffers in the queue, to detect a possible race between the driver
2187 * checking for more work, and enabling callbacks.
2188 *
2189 * Caller must ensure we don't call this with other virtqueue
2190 * operations at the same time (except where noted).
2191 */
2192bool virtqueue_enable_cb(struct virtqueue *_vq)
2193{
31532340 2194 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
e6f633e5
TB
2195
2196 return !virtqueue_poll(_vq, last_used_idx);
2197}
2198EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2199
2200/**
2201 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
a5581206 2202 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2203 *
2204 * This re-enables callbacks but hints to the other side to delay
2205 * interrupts until most of the available buffers have been processed;
2206 * it returns "false" if there are many pending buffers in the queue,
2207 * to detect a possible race between the driver checking for more work,
2208 * and enabling callbacks.
2209 *
2210 * Caller must ensure we don't call this with other virtqueue
2211 * operations at the same time (except where noted).
2212 */
2213bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2214{
1ce9e605
TB
2215 struct vring_virtqueue *vq = to_vvq(_vq);
2216
8d622d21
MT
2217 if (vq->event_triggered)
2218 vq->event_triggered = false;
2219
1ce9e605
TB
2220 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2221 virtqueue_enable_cb_delayed_split(_vq);
e6f633e5
TB
2222}
2223EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2224
138fd251
TB
2225/**
2226 * virtqueue_detach_unused_buf - detach first unused buffer
a5581206 2227 * @_vq: the struct virtqueue we're talking about.
138fd251
TB
2228 *
2229 * Returns NULL or the "data" token handed to virtqueue_add_*().
a62eecb3
XZ
2230 * This is not valid on an active queue; it is useful for device
2231 * shutdown or the reset queue.
138fd251
TB
2232 */
2233void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2234{
1ce9e605
TB
2235 struct vring_virtqueue *vq = to_vvq(_vq);
2236
2237 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2238 virtqueue_detach_unused_buf_split(_vq);
138fd251 2239}
7c5e9ed0 2240EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
c021eac4 2241
138fd251
TB
2242static inline bool more_used(const struct vring_virtqueue *vq)
2243{
1ce9e605 2244 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
138fd251
TB
2245}
2246
0a8a69dd
RR
2247irqreturn_t vring_interrupt(int irq, void *_vq)
2248{
2249 struct vring_virtqueue *vq = to_vvq(_vq);
2250
2251 if (!more_used(vq)) {
2252 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2253 return IRQ_NONE;
2254 }
2255
8b4ec69d 2256 if (unlikely(vq->broken)) {
c346dae4 2257#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d
JW
2258 dev_warn_once(&vq->vq.vdev->dev,
2259 "virtio vring IRQ raised before DRIVER_OK");
2260 return IRQ_NONE;
c346dae4
JW
2261#else
2262 return IRQ_HANDLED;
2263#endif
8b4ec69d 2264 }
0a8a69dd 2265
8d622d21
MT
2266 /* Just a hint for performance: so it's ok that this can be racy! */
2267 if (vq->event)
2268 vq->event_triggered = true;
2269
0a8a69dd 2270 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
18445c4d
RR
2271 if (vq->vq.callback)
2272 vq->vq.callback(&vq->vq);
0a8a69dd
RR
2273
2274 return IRQ_HANDLED;
2275}
c6fd4701 2276EXPORT_SYMBOL_GPL(vring_interrupt);
0a8a69dd 2277
1ce9e605 2278/* Only available for split ring */
07d9629d 2279static struct virtqueue *__vring_new_virtqueue(unsigned int index,
cd4c812a 2280 struct vring_virtqueue_split *vring_split,
07d9629d
XZ
2281 struct virtio_device *vdev,
2282 bool weak_barriers,
2283 bool context,
2284 bool (*notify)(struct virtqueue *),
2285 void (*callback)(struct virtqueue *),
2286 const char *name)
0a8a69dd 2287{
2a2d1382 2288 struct vring_virtqueue *vq;
a2b36c8d 2289 int err;
0a8a69dd 2290
1ce9e605
TB
2291 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2292 return NULL;
2293
cbeedb72 2294 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
0a8a69dd
RR
2295 if (!vq)
2296 return NULL;
2297
1ce9e605 2298 vq->packed_ring = false;
0a8a69dd
RR
2299 vq->vq.callback = callback;
2300 vq->vq.vdev = vdev;
9499f5e7 2301 vq->vq.name = name;
06ca287d 2302 vq->vq.index = index;
2a2d1382 2303 vq->we_own_ring = false;
0a8a69dd 2304 vq->notify = notify;
7b21e34f 2305 vq->weak_barriers = weak_barriers;
c346dae4 2306#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d 2307 vq->broken = true;
c346dae4
JW
2308#else
2309 vq->broken = false;
2310#endif
fb3fba6b 2311 vq->use_dma_api = vring_use_dma_api(vdev);
0a8a69dd 2312
5a08b04f
MT
2313 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2314 !context;
a5c262c5 2315 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
9fa29b9d 2316
45383fb0
TB
2317 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2318 vq->weak_barriers = false;
2319
d79dca75
TB
2320 vq->split.queue_dma_addr = 0;
2321 vq->split.queue_size_in_bytes = 0;
2322
cd4c812a 2323 vq->split.vring = vring_split->vring;
0a8a69dd 2324
a2b36c8d
XZ
2325 err = vring_alloc_state_extra_split(vring_split);
2326 if (err) {
2327 kfree(vq);
2328 return NULL;
2329 }
72b5e895 2330
0a8a69dd 2331 /* Put everything in free lists. */
0a8a69dd 2332 vq->free_head = 0;
a2b36c8d
XZ
2333
2334 vq->split.desc_state = vring_split->desc_state;
2335 vq->split.desc_extra = vring_split->desc_extra;
0a8a69dd 2336
198fa7be
XZ
2337 virtqueue_vring_init_split(vring_split, vq);
2338
cd4c812a 2339 virtqueue_init(vq, vring_split->vring.num);
3a897128 2340
0e566c8f 2341 spin_lock(&vdev->vqs_list_lock);
e152d8af 2342 list_add_tail(&vq->vq.list, &vdev->vqs);
0e566c8f 2343 spin_unlock(&vdev->vqs_list_lock);
0a8a69dd
RR
2344 return &vq->vq;
2345}
2a2d1382 2346
2a2d1382
AL
2347struct virtqueue *vring_create_virtqueue(
2348 unsigned int index,
2349 unsigned int num,
2350 unsigned int vring_align,
2351 struct virtio_device *vdev,
2352 bool weak_barriers,
2353 bool may_reduce_num,
f94682dd 2354 bool context,
2a2d1382
AL
2355 bool (*notify)(struct virtqueue *),
2356 void (*callback)(struct virtqueue *),
2357 const char *name)
2358{
1ce9e605
TB
2359
2360 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2361 return vring_create_virtqueue_packed(index, num, vring_align,
2362 vdev, weak_barriers, may_reduce_num,
2363 context, notify, callback, name);
2364
d79dca75
TB
2365 return vring_create_virtqueue_split(index, num, vring_align,
2366 vdev, weak_barriers, may_reduce_num,
2367 context, notify, callback, name);
2a2d1382
AL
2368}
2369EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2370
1ce9e605 2371/* Only available for split ring */
2a2d1382
AL
2372struct virtqueue *vring_new_virtqueue(unsigned int index,
2373 unsigned int num,
2374 unsigned int vring_align,
2375 struct virtio_device *vdev,
2376 bool weak_barriers,
f94682dd 2377 bool context,
2a2d1382
AL
2378 void *pages,
2379 bool (*notify)(struct virtqueue *vq),
2380 void (*callback)(struct virtqueue *vq),
2381 const char *name)
2382{
cd4c812a 2383 struct vring_virtqueue_split vring_split = {};
1ce9e605
TB
2384
2385 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2386 return NULL;
2387
cd4c812a
XZ
2388 vring_init(&vring_split.vring, num, pages, vring_align);
2389 return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2390 context, notify, callback, name);
2a2d1382 2391}
c6fd4701 2392EXPORT_SYMBOL_GPL(vring_new_virtqueue);
0a8a69dd 2393
3ea19e32 2394static void vring_free(struct virtqueue *_vq)
0a8a69dd 2395{
2a2d1382
AL
2396 struct vring_virtqueue *vq = to_vvq(_vq);
2397
2398 if (vq->we_own_ring) {
1ce9e605
TB
2399 if (vq->packed_ring) {
2400 vring_free_queue(vq->vq.vdev,
2401 vq->packed.ring_size_in_bytes,
2402 vq->packed.vring.desc,
2403 vq->packed.ring_dma_addr);
2404
2405 vring_free_queue(vq->vq.vdev,
2406 vq->packed.event_size_in_bytes,
2407 vq->packed.vring.driver,
2408 vq->packed.driver_event_dma_addr);
2409
2410 vring_free_queue(vq->vq.vdev,
2411 vq->packed.event_size_in_bytes,
2412 vq->packed.vring.device,
2413 vq->packed.device_event_dma_addr);
2414
2415 kfree(vq->packed.desc_state);
2416 kfree(vq->packed.desc_extra);
2417 } else {
2418 vring_free_queue(vq->vq.vdev,
2419 vq->split.queue_size_in_bytes,
2420 vq->split.vring.desc,
2421 vq->split.queue_dma_addr);
1ce9e605 2422 }
2a2d1382 2423 }
72b5e895 2424 if (!vq->packed_ring) {
f13f09a1 2425 kfree(vq->split.desc_state);
72b5e895
JW
2426 kfree(vq->split.desc_extra);
2427 }
3ea19e32
XZ
2428}
2429
2430void vring_del_virtqueue(struct virtqueue *_vq)
2431{
2432 struct vring_virtqueue *vq = to_vvq(_vq);
2433
2434 spin_lock(&vq->vq.vdev->vqs_list_lock);
2435 list_del(&_vq->list);
2436 spin_unlock(&vq->vq.vdev->vqs_list_lock);
2437
2438 vring_free(_vq);
2439
2a2d1382 2440 kfree(vq);
0a8a69dd 2441}
c6fd4701 2442EXPORT_SYMBOL_GPL(vring_del_virtqueue);
0a8a69dd 2443
e34f8725
RR
2444/* Manipulates transport-specific feature bits. */
2445void vring_transport_features(struct virtio_device *vdev)
2446{
2447 unsigned int i;
2448
2449 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2450 switch (i) {
9fa29b9d
MM
2451 case VIRTIO_RING_F_INDIRECT_DESC:
2452 break;
a5c262c5
MT
2453 case VIRTIO_RING_F_EVENT_IDX:
2454 break;
747ae34a
MT
2455 case VIRTIO_F_VERSION_1:
2456 break;
321bd212 2457 case VIRTIO_F_ACCESS_PLATFORM:
1a937693 2458 break;
f959a128
TB
2459 case VIRTIO_F_RING_PACKED:
2460 break;
45383fb0
TB
2461 case VIRTIO_F_ORDER_PLATFORM:
2462 break;
e34f8725
RR
2463 default:
2464 /* We don't understand this bit. */
e16e12be 2465 __virtio_clear_bit(vdev, i);
e34f8725
RR
2466 }
2467 }
2468}
2469EXPORT_SYMBOL_GPL(vring_transport_features);
2470
5dfc1762
RR
2471/**
2472 * virtqueue_get_vring_size - return the size of the virtqueue's vring
a5581206 2473 * @_vq: the struct virtqueue containing the vring of interest.
5dfc1762
RR
2474 *
2475 * Returns the size of the vring. This is mainly used for boasting to
2476 * userspace. Unlike other operations, this need not be serialized.
2477 */
8f9f4668
RJ
2478unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2479{
2480
2481 struct vring_virtqueue *vq = to_vvq(_vq);
2482
1ce9e605 2483 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
8f9f4668
RJ
2484}
2485EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2486
b3b32c94
HG
2487bool virtqueue_is_broken(struct virtqueue *_vq)
2488{
2489 struct vring_virtqueue *vq = to_vvq(_vq);
2490
60f07798 2491 return READ_ONCE(vq->broken);
b3b32c94
HG
2492}
2493EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2494
e2dcdfe9
RR
2495/*
2496 * This should prevent the device from being used, allowing drivers to
2497 * recover. You may need to grab appropriate locks to flush.
2498 */
2499void virtio_break_device(struct virtio_device *dev)
2500{
2501 struct virtqueue *_vq;
2502
0e566c8f 2503 spin_lock(&dev->vqs_list_lock);
e2dcdfe9
RR
2504 list_for_each_entry(_vq, &dev->vqs, list) {
2505 struct vring_virtqueue *vq = to_vvq(_vq);
60f07798
PP
2506
2507 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2508 WRITE_ONCE(vq->broken, true);
e2dcdfe9 2509 }
0e566c8f 2510 spin_unlock(&dev->vqs_list_lock);
e2dcdfe9
RR
2511}
2512EXPORT_SYMBOL_GPL(virtio_break_device);
2513
be83f04d
JW
2514/*
2515 * This should allow the device to be used by the driver. You may
2516 * need to grab appropriate locks to flush the write to
2517 * vq->broken. This should only be used in some specific case e.g
2518 * (probing and restoring). This function should only be called by the
2519 * core, not directly by the driver.
2520 */
2521void __virtio_unbreak_device(struct virtio_device *dev)
2522{
2523 struct virtqueue *_vq;
2524
2525 spin_lock(&dev->vqs_list_lock);
2526 list_for_each_entry(_vq, &dev->vqs, list) {
2527 struct vring_virtqueue *vq = to_vvq(_vq);
2528
2529 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2530 WRITE_ONCE(vq->broken, false);
2531 }
2532 spin_unlock(&dev->vqs_list_lock);
2533}
2534EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2535
2a2d1382 2536dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
89062652
CH
2537{
2538 struct vring_virtqueue *vq = to_vvq(_vq);
2539
2a2d1382
AL
2540 BUG_ON(!vq->we_own_ring);
2541
1ce9e605
TB
2542 if (vq->packed_ring)
2543 return vq->packed.ring_dma_addr;
2544
d79dca75 2545 return vq->split.queue_dma_addr;
89062652 2546}
2a2d1382 2547EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
89062652 2548
2a2d1382 2549dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
89062652
CH
2550{
2551 struct vring_virtqueue *vq = to_vvq(_vq);
2552
2a2d1382
AL
2553 BUG_ON(!vq->we_own_ring);
2554
1ce9e605
TB
2555 if (vq->packed_ring)
2556 return vq->packed.driver_event_dma_addr;
2557
d79dca75 2558 return vq->split.queue_dma_addr +
e593bf97 2559 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2a2d1382
AL
2560}
2561EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2562
2563dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2564{
2565 struct vring_virtqueue *vq = to_vvq(_vq);
2566
2567 BUG_ON(!vq->we_own_ring);
2568
1ce9e605
TB
2569 if (vq->packed_ring)
2570 return vq->packed.device_event_dma_addr;
2571
d79dca75 2572 return vq->split.queue_dma_addr +
e593bf97 2573 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2a2d1382
AL
2574}
2575EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2576
1ce9e605 2577/* Only available for split ring */
2a2d1382
AL
2578const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2579{
e593bf97 2580 return &to_vvq(vq)->split.vring;
89062652 2581}
2a2d1382 2582EXPORT_SYMBOL_GPL(virtqueue_get_vring);
89062652 2583
c6fd4701 2584MODULE_LICENSE("GPL");