virtio_ring: split: __vring_new_virtqueue() accept struct vring_virtqueue_split
[linux-block.git] / drivers / virtio / virtio_ring.c
CommitLineData
fd534e9b 1// SPDX-License-Identifier: GPL-2.0-or-later
0a8a69dd
RR
2/* Virtio ring implementation.
3 *
4 * Copyright 2007 Rusty Russell IBM Corporation
0a8a69dd
RR
5 */
6#include <linux/virtio.h>
7#include <linux/virtio_ring.h>
e34f8725 8#include <linux/virtio_config.h>
0a8a69dd 9#include <linux/device.h>
5a0e3ad6 10#include <linux/slab.h>
b5a2c4f1 11#include <linux/module.h>
e93300b1 12#include <linux/hrtimer.h>
780bc790 13#include <linux/dma-mapping.h>
f8ce7263 14#include <linux/spinlock.h>
78fe3987 15#include <xen/xen.h>
0a8a69dd
RR
16
17#ifdef DEBUG
18/* For development, we want to crash whenever the ring is screwed. */
9499f5e7
RR
19#define BAD_RING(_vq, fmt, args...) \
20 do { \
21 dev_err(&(_vq)->vq.vdev->dev, \
22 "%s:"fmt, (_vq)->vq.name, ##args); \
23 BUG(); \
24 } while (0)
c5f841f1
RR
25/* Caller is supposed to guarantee no reentry. */
26#define START_USE(_vq) \
27 do { \
28 if ((_vq)->in_use) \
9499f5e7
RR
29 panic("%s:in_use = %i\n", \
30 (_vq)->vq.name, (_vq)->in_use); \
c5f841f1 31 (_vq)->in_use = __LINE__; \
9499f5e7 32 } while (0)
3a35ce7d 33#define END_USE(_vq) \
97a545ab 34 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
4d6a105e
TB
35#define LAST_ADD_TIME_UPDATE(_vq) \
36 do { \
37 ktime_t now = ktime_get(); \
38 \
39 /* No kick or get, with .1 second between? Warn. */ \
40 if ((_vq)->last_add_time_valid) \
41 WARN_ON(ktime_to_ms(ktime_sub(now, \
42 (_vq)->last_add_time)) > 100); \
43 (_vq)->last_add_time = now; \
44 (_vq)->last_add_time_valid = true; \
45 } while (0)
46#define LAST_ADD_TIME_CHECK(_vq) \
47 do { \
48 if ((_vq)->last_add_time_valid) { \
49 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
50 (_vq)->last_add_time)) > 100); \
51 } \
52 } while (0)
53#define LAST_ADD_TIME_INVALID(_vq) \
54 ((_vq)->last_add_time_valid = false)
0a8a69dd 55#else
9499f5e7
RR
56#define BAD_RING(_vq, fmt, args...) \
57 do { \
58 dev_err(&_vq->vq.vdev->dev, \
59 "%s:"fmt, (_vq)->vq.name, ##args); \
60 (_vq)->broken = true; \
61 } while (0)
0a8a69dd
RR
62#define START_USE(vq)
63#define END_USE(vq)
4d6a105e
TB
64#define LAST_ADD_TIME_UPDATE(vq)
65#define LAST_ADD_TIME_CHECK(vq)
66#define LAST_ADD_TIME_INVALID(vq)
0a8a69dd
RR
67#endif
68
cbeedb72 69struct vring_desc_state_split {
780bc790
AL
70 void *data; /* Data for callback. */
71 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
72};
73
1ce9e605
TB
74struct vring_desc_state_packed {
75 void *data; /* Data for callback. */
76 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
77 u16 num; /* Descriptor list length. */
1ce9e605
TB
78 u16 last; /* The last desc state in a list. */
79};
80
1f28750f 81struct vring_desc_extra {
ef5c366f
JW
82 dma_addr_t addr; /* Descriptor DMA addr. */
83 u32 len; /* Descriptor length. */
1ce9e605 84 u16 flags; /* Descriptor flags. */
aeef9b47 85 u16 next; /* The next desc state in a list. */
1ce9e605
TB
86};
87
d76136e4
XZ
88struct vring_virtqueue_split {
89 /* Actual memory layout for this queue. */
90 struct vring vring;
91
92 /* Last written value to avail->flags */
93 u16 avail_flags_shadow;
94
95 /*
96 * Last written value to avail->idx in
97 * guest byte order.
98 */
99 u16 avail_idx_shadow;
100
101 /* Per-descriptor state. */
102 struct vring_desc_state_split *desc_state;
103 struct vring_desc_extra *desc_extra;
104
105 /* DMA address and size information */
106 dma_addr_t queue_dma_addr;
107 size_t queue_size_in_bytes;
108};
109
110struct vring_virtqueue_packed {
111 /* Actual memory layout for this queue. */
112 struct {
113 unsigned int num;
114 struct vring_packed_desc *desc;
115 struct vring_packed_desc_event *driver;
116 struct vring_packed_desc_event *device;
117 } vring;
118
119 /* Driver ring wrap counter. */
120 bool avail_wrap_counter;
121
122 /* Avail used flags. */
123 u16 avail_used_flags;
124
125 /* Index of the next avail descriptor. */
126 u16 next_avail_idx;
127
128 /*
129 * Last written value to driver->flags in
130 * guest byte order.
131 */
132 u16 event_flags_shadow;
133
134 /* Per-descriptor state. */
135 struct vring_desc_state_packed *desc_state;
136 struct vring_desc_extra *desc_extra;
137
138 /* DMA address and size information */
139 dma_addr_t ring_dma_addr;
140 dma_addr_t driver_event_dma_addr;
141 dma_addr_t device_event_dma_addr;
142 size_t ring_size_in_bytes;
143 size_t event_size_in_bytes;
144};
145
43b4f721 146struct vring_virtqueue {
0a8a69dd
RR
147 struct virtqueue vq;
148
1ce9e605
TB
149 /* Is this a packed ring? */
150 bool packed_ring;
151
fb3fba6b
TB
152 /* Is DMA API used? */
153 bool use_dma_api;
154
7b21e34f
RR
155 /* Can we use weak barriers? */
156 bool weak_barriers;
157
0a8a69dd
RR
158 /* Other side has made a mess, don't try any more. */
159 bool broken;
160
9fa29b9d
MM
161 /* Host supports indirect buffers */
162 bool indirect;
163
a5c262c5
MT
164 /* Host publishes avail event idx */
165 bool event;
166
0a8a69dd
RR
167 /* Head of free buffer list. */
168 unsigned int free_head;
169 /* Number we've added since last sync. */
170 unsigned int num_added;
171
a7722890 172 /* Last used index we've seen.
173 * for split ring, it just contains last used index
174 * for packed ring:
175 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
176 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
177 */
1bc4953e 178 u16 last_used_idx;
0a8a69dd 179
8d622d21
MT
180 /* Hint for event idx: already triggered no need to disable. */
181 bool event_triggered;
182
1ce9e605
TB
183 union {
184 /* Available for split ring */
d76136e4 185 struct vring_virtqueue_split split;
e593bf97 186
1ce9e605 187 /* Available for packed ring */
d76136e4 188 struct vring_virtqueue_packed packed;
1ce9e605 189 };
f277ec42 190
0a8a69dd 191 /* How to notify other side. FIXME: commonalize hcalls! */
46f9c2b9 192 bool (*notify)(struct virtqueue *vq);
0a8a69dd 193
2a2d1382
AL
194 /* DMA, allocation, and size information */
195 bool we_own_ring;
2a2d1382 196
0a8a69dd
RR
197#ifdef DEBUG
198 /* They're supposed to lock for us. */
199 unsigned int in_use;
e93300b1
RR
200
201 /* Figure out if their kicks are too delayed. */
202 bool last_add_time_valid;
203 ktime_t last_add_time;
0a8a69dd 204#endif
0a8a69dd
RR
205};
206
07d9629d 207static struct virtqueue *__vring_new_virtqueue(unsigned int index,
cd4c812a 208 struct vring_virtqueue_split *vring_split,
07d9629d
XZ
209 struct virtio_device *vdev,
210 bool weak_barriers,
211 bool context,
212 bool (*notify)(struct virtqueue *),
213 void (*callback)(struct virtqueue *),
214 const char *name);
e6f633e5
TB
215
216/*
217 * Helpers.
218 */
219
0a8a69dd
RR
220#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
221
35c51e09 222static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
2f18c2d1
TB
223 unsigned int total_sg)
224{
2f18c2d1
TB
225 /*
226 * If the host supports indirect descriptor tables, and we have multiple
227 * buffers, then go indirect. FIXME: tune this threshold
228 */
229 return (vq->indirect && total_sg > 1 && vq->vq.num_free);
230}
231
d26c96c8 232/*
1a937693
MT
233 * Modern virtio devices have feature bits to specify whether they need a
234 * quirk and bypass the IOMMU. If not there, just use the DMA API.
235 *
236 * If there, the interaction between virtio and DMA API is messy.
d26c96c8
AL
237 *
238 * On most systems with virtio, physical addresses match bus addresses,
239 * and it doesn't particularly matter whether we use the DMA API.
240 *
241 * On some systems, including Xen and any system with a physical device
242 * that speaks virtio behind a physical IOMMU, we must use the DMA API
243 * for virtio DMA to work at all.
244 *
245 * On other systems, including SPARC and PPC64, virtio-pci devices are
246 * enumerated as though they are behind an IOMMU, but the virtio host
247 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
248 * there or somehow map everything as the identity.
249 *
250 * For the time being, we preserve historic behavior and bypass the DMA
251 * API.
1a937693
MT
252 *
253 * TODO: install a per-device DMA ops structure that does the right thing
254 * taking into account all the above quirks, and use the DMA API
255 * unconditionally on data path.
d26c96c8
AL
256 */
257
258static bool vring_use_dma_api(struct virtio_device *vdev)
259{
24b6842a 260 if (!virtio_has_dma_quirk(vdev))
1a937693
MT
261 return true;
262
263 /* Otherwise, we are left to guess. */
78fe3987
AL
264 /*
265 * In theory, it's possible to have a buggy QEMU-supposed
266 * emulated Q35 IOMMU and Xen enabled at the same time. On
267 * such a configuration, virtio has never worked and will
268 * not work without an even larger kludge. Instead, enable
269 * the DMA API if we're a Xen guest, which at least allows
270 * all of the sensible Xen configurations to work correctly.
271 */
272 if (xen_domain())
273 return true;
274
d26c96c8
AL
275 return false;
276}
277
e6d6dd6c
JR
278size_t virtio_max_dma_size(struct virtio_device *vdev)
279{
280 size_t max_segment_size = SIZE_MAX;
281
282 if (vring_use_dma_api(vdev))
817fc978 283 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
e6d6dd6c
JR
284
285 return max_segment_size;
286}
287EXPORT_SYMBOL_GPL(virtio_max_dma_size);
288
d79dca75
TB
289static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
290 dma_addr_t *dma_handle, gfp_t flag)
291{
292 if (vring_use_dma_api(vdev)) {
293 return dma_alloc_coherent(vdev->dev.parent, size,
294 dma_handle, flag);
295 } else {
296 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
297
298 if (queue) {
299 phys_addr_t phys_addr = virt_to_phys(queue);
300 *dma_handle = (dma_addr_t)phys_addr;
301
302 /*
303 * Sanity check: make sure we dind't truncate
304 * the address. The only arches I can find that
305 * have 64-bit phys_addr_t but 32-bit dma_addr_t
306 * are certain non-highmem MIPS and x86
307 * configurations, but these configurations
308 * should never allocate physical pages above 32
309 * bits, so this is fine. Just in case, throw a
310 * warning and abort if we end up with an
311 * unrepresentable address.
312 */
313 if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
314 free_pages_exact(queue, PAGE_ALIGN(size));
315 return NULL;
316 }
317 }
318 return queue;
319 }
320}
321
322static void vring_free_queue(struct virtio_device *vdev, size_t size,
323 void *queue, dma_addr_t dma_handle)
324{
325 if (vring_use_dma_api(vdev))
326 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
327 else
328 free_pages_exact(queue, PAGE_ALIGN(size));
329}
330
780bc790
AL
331/*
332 * The DMA ops on various arches are rather gnarly right now, and
333 * making all of the arch DMA ops work on the vring device itself
334 * is a mess. For now, we use the parent device for DMA ops.
335 */
75bfa81b 336static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
780bc790
AL
337{
338 return vq->vq.vdev->dev.parent;
339}
340
341/* Map one sg entry. */
342static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
343 struct scatterlist *sg,
344 enum dma_data_direction direction)
345{
fb3fba6b 346 if (!vq->use_dma_api)
780bc790
AL
347 return (dma_addr_t)sg_phys(sg);
348
349 /*
350 * We can't use dma_map_sg, because we don't use scatterlists in
351 * the way it expects (we don't guarantee that the scatterlist
352 * will exist for the lifetime of the mapping).
353 */
354 return dma_map_page(vring_dma_dev(vq),
355 sg_page(sg), sg->offset, sg->length,
356 direction);
357}
358
359static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
360 void *cpu_addr, size_t size,
361 enum dma_data_direction direction)
362{
fb3fba6b 363 if (!vq->use_dma_api)
780bc790
AL
364 return (dma_addr_t)virt_to_phys(cpu_addr);
365
366 return dma_map_single(vring_dma_dev(vq),
367 cpu_addr, size, direction);
368}
369
e6f633e5
TB
370static int vring_mapping_error(const struct vring_virtqueue *vq,
371 dma_addr_t addr)
372{
fb3fba6b 373 if (!vq->use_dma_api)
e6f633e5
TB
374 return 0;
375
376 return dma_mapping_error(vring_dma_dev(vq), addr);
377}
378
3a897128
XZ
379static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
380{
381 vq->vq.num_free = num;
382
383 if (vq->packed_ring)
384 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
385 else
386 vq->last_used_idx = 0;
387
388 vq->event_triggered = false;
389 vq->num_added = 0;
390
391#ifdef DEBUG
392 vq->in_use = false;
393 vq->last_add_time_valid = false;
394#endif
395}
396
e6f633e5
TB
397
398/*
399 * Split ring specific functions - *_split().
400 */
401
72b5e895
JW
402static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
403 struct vring_desc *desc)
780bc790
AL
404{
405 u16 flags;
406
fb3fba6b 407 if (!vq->use_dma_api)
780bc790
AL
408 return;
409
410 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
411
b4282ebc
XZ
412 dma_unmap_page(vring_dma_dev(vq),
413 virtio64_to_cpu(vq->vq.vdev, desc->addr),
414 virtio32_to_cpu(vq->vq.vdev, desc->len),
415 (flags & VRING_DESC_F_WRITE) ?
416 DMA_FROM_DEVICE : DMA_TO_DEVICE);
780bc790
AL
417}
418
72b5e895
JW
419static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
420 unsigned int i)
421{
422 struct vring_desc_extra *extra = vq->split.desc_extra;
423 u16 flags;
424
425 if (!vq->use_dma_api)
426 goto out;
427
428 flags = extra[i].flags;
429
430 if (flags & VRING_DESC_F_INDIRECT) {
431 dma_unmap_single(vring_dma_dev(vq),
432 extra[i].addr,
433 extra[i].len,
434 (flags & VRING_DESC_F_WRITE) ?
435 DMA_FROM_DEVICE : DMA_TO_DEVICE);
436 } else {
437 dma_unmap_page(vring_dma_dev(vq),
438 extra[i].addr,
439 extra[i].len,
440 (flags & VRING_DESC_F_WRITE) ?
441 DMA_FROM_DEVICE : DMA_TO_DEVICE);
442 }
443
444out:
445 return extra[i].next;
446}
447
138fd251
TB
448static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
449 unsigned int total_sg,
450 gfp_t gfp)
9fa29b9d
MM
451{
452 struct vring_desc *desc;
b25bd251 453 unsigned int i;
9fa29b9d 454
b92b1b89
WD
455 /*
456 * We require lowmem mappings for the descriptors because
457 * otherwise virt_to_phys will give us bogus addresses in the
458 * virtqueue.
459 */
82107539 460 gfp &= ~__GFP_HIGHMEM;
b92b1b89 461
6da2ec56 462 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
9fa29b9d 463 if (!desc)
b25bd251 464 return NULL;
9fa29b9d 465
b25bd251 466 for (i = 0; i < total_sg; i++)
00e6f3d9 467 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
b25bd251 468 return desc;
9fa29b9d
MM
469}
470
fe4c3862
JW
471static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
472 struct vring_desc *desc,
473 unsigned int i,
474 dma_addr_t addr,
475 unsigned int len,
72b5e895
JW
476 u16 flags,
477 bool indirect)
fe4c3862 478{
72b5e895
JW
479 struct vring_virtqueue *vring = to_vvq(vq);
480 struct vring_desc_extra *extra = vring->split.desc_extra;
481 u16 next;
482
fe4c3862
JW
483 desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
484 desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
485 desc[i].len = cpu_to_virtio32(vq->vdev, len);
486
72b5e895
JW
487 if (!indirect) {
488 next = extra[i].next;
489 desc[i].next = cpu_to_virtio16(vq->vdev, next);
490
491 extra[i].addr = addr;
492 extra[i].len = len;
493 extra[i].flags = flags;
494 } else
495 next = virtio16_to_cpu(vq->vdev, desc[i].next);
496
497 return next;
fe4c3862
JW
498}
499
138fd251
TB
500static inline int virtqueue_add_split(struct virtqueue *_vq,
501 struct scatterlist *sgs[],
502 unsigned int total_sg,
503 unsigned int out_sgs,
504 unsigned int in_sgs,
505 void *data,
506 void *ctx,
507 gfp_t gfp)
0a8a69dd
RR
508{
509 struct vring_virtqueue *vq = to_vvq(_vq);
13816c76 510 struct scatterlist *sg;
b25bd251 511 struct vring_desc *desc;
3f649ab7 512 unsigned int i, n, avail, descs_used, prev, err_idx;
1fe9b6fe 513 int head;
b25bd251 514 bool indirect;
0a8a69dd 515
9fa29b9d
MM
516 START_USE(vq);
517
0a8a69dd 518 BUG_ON(data == NULL);
5a08b04f 519 BUG_ON(ctx && vq->indirect);
9fa29b9d 520
70670444
RR
521 if (unlikely(vq->broken)) {
522 END_USE(vq);
523 return -EIO;
524 }
525
4d6a105e 526 LAST_ADD_TIME_UPDATE(vq);
e93300b1 527
b25bd251
RR
528 BUG_ON(total_sg == 0);
529
530 head = vq->free_head;
531
35c51e09 532 if (virtqueue_use_indirect(vq, total_sg))
138fd251 533 desc = alloc_indirect_split(_vq, total_sg, gfp);
44ed8089 534 else {
b25bd251 535 desc = NULL;
e593bf97 536 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
44ed8089 537 }
b25bd251
RR
538
539 if (desc) {
540 /* Use a single buffer which doesn't continue */
780bc790 541 indirect = true;
b25bd251
RR
542 /* Set up rest to use this indirect table. */
543 i = 0;
544 descs_used = 1;
b25bd251 545 } else {
780bc790 546 indirect = false;
e593bf97 547 desc = vq->split.vring.desc;
b25bd251
RR
548 i = head;
549 descs_used = total_sg;
9fa29b9d
MM
550 }
551
b4b4ff73 552 if (unlikely(vq->vq.num_free < descs_used)) {
0a8a69dd 553 pr_debug("Can't add buf len %i - avail = %i\n",
b25bd251 554 descs_used, vq->vq.num_free);
44653eae
RR
555 /* FIXME: for historical reasons, we force a notify here if
556 * there are outgoing parts to the buffer. Presumably the
557 * host should service the ring ASAP. */
13816c76 558 if (out_sgs)
44653eae 559 vq->notify(&vq->vq);
58625edf
WY
560 if (indirect)
561 kfree(desc);
0a8a69dd
RR
562 END_USE(vq);
563 return -ENOSPC;
564 }
565
13816c76 566 for (n = 0; n < out_sgs; n++) {
eeebf9b1 567 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
568 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
569 if (vring_mapping_error(vq, addr))
570 goto unmap_release;
571
13816c76 572 prev = i;
72b5e895
JW
573 /* Note that we trust indirect descriptor
574 * table since it use stream DMA mapping.
575 */
fe4c3862 576 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
72b5e895
JW
577 VRING_DESC_F_NEXT,
578 indirect);
13816c76 579 }
0a8a69dd 580 }
13816c76 581 for (; n < (out_sgs + in_sgs); n++) {
eeebf9b1 582 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
583 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
584 if (vring_mapping_error(vq, addr))
585 goto unmap_release;
586
13816c76 587 prev = i;
72b5e895
JW
588 /* Note that we trust indirect descriptor
589 * table since it use stream DMA mapping.
590 */
fe4c3862
JW
591 i = virtqueue_add_desc_split(_vq, desc, i, addr,
592 sg->length,
593 VRING_DESC_F_NEXT |
72b5e895
JW
594 VRING_DESC_F_WRITE,
595 indirect);
13816c76 596 }
0a8a69dd
RR
597 }
598 /* Last one doesn't continue. */
00e6f3d9 599 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
72b5e895 600 if (!indirect && vq->use_dma_api)
890d3356 601 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
72b5e895 602 ~VRING_DESC_F_NEXT;
0a8a69dd 603
780bc790
AL
604 if (indirect) {
605 /* Now that the indirect table is filled in, map it. */
606 dma_addr_t addr = vring_map_single(
607 vq, desc, total_sg * sizeof(struct vring_desc),
608 DMA_TO_DEVICE);
609 if (vring_mapping_error(vq, addr))
610 goto unmap_release;
611
fe4c3862
JW
612 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
613 head, addr,
614 total_sg * sizeof(struct vring_desc),
72b5e895
JW
615 VRING_DESC_F_INDIRECT,
616 false);
780bc790
AL
617 }
618
619 /* We're using some buffers from the free list. */
620 vq->vq.num_free -= descs_used;
621
0a8a69dd 622 /* Update free pointer */
b25bd251 623 if (indirect)
72b5e895 624 vq->free_head = vq->split.desc_extra[head].next;
b25bd251
RR
625 else
626 vq->free_head = i;
0a8a69dd 627
780bc790 628 /* Store token and indirect buffer state. */
cbeedb72 629 vq->split.desc_state[head].data = data;
780bc790 630 if (indirect)
cbeedb72 631 vq->split.desc_state[head].indir_desc = desc;
87646a34 632 else
cbeedb72 633 vq->split.desc_state[head].indir_desc = ctx;
0a8a69dd
RR
634
635 /* Put entry in available array (but don't update avail->idx until they
3b720b8c 636 * do sync). */
e593bf97
TB
637 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
638 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
0a8a69dd 639
ee7cd898
RR
640 /* Descriptors and available array need to be set before we expose the
641 * new available array entries. */
a9a0fef7 642 virtio_wmb(vq->weak_barriers);
e593bf97
TB
643 vq->split.avail_idx_shadow++;
644 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
645 vq->split.avail_idx_shadow);
ee7cd898
RR
646 vq->num_added++;
647
5e05bf58
TH
648 pr_debug("Added buffer head %i to %p\n", head, vq);
649 END_USE(vq);
650
ee7cd898
RR
651 /* This is very unlikely, but theoretically possible. Kick
652 * just in case. */
653 if (unlikely(vq->num_added == (1 << 16) - 1))
654 virtqueue_kick(_vq);
655
98e8c6bc 656 return 0;
780bc790
AL
657
658unmap_release:
659 err_idx = i;
cf8f1696
ML
660
661 if (indirect)
662 i = 0;
663 else
664 i = head;
780bc790
AL
665
666 for (n = 0; n < total_sg; n++) {
667 if (i == err_idx)
668 break;
72b5e895
JW
669 if (indirect) {
670 vring_unmap_one_split_indirect(vq, &desc[i]);
671 i = virtio16_to_cpu(_vq->vdev, desc[i].next);
672 } else
673 i = vring_unmap_one_split(vq, i);
780bc790
AL
674 }
675
780bc790
AL
676 if (indirect)
677 kfree(desc);
678
3cc36f6e 679 END_USE(vq);
f7728002 680 return -ENOMEM;
0a8a69dd 681}
13816c76 682
138fd251 683static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
684{
685 struct vring_virtqueue *vq = to_vvq(_vq);
a5c262c5 686 u16 new, old;
41f0377f
RR
687 bool needs_kick;
688
0a8a69dd 689 START_USE(vq);
a72caae2
JW
690 /* We need to expose available array entries before checking avail
691 * event. */
a9a0fef7 692 virtio_mb(vq->weak_barriers);
0a8a69dd 693
e593bf97
TB
694 old = vq->split.avail_idx_shadow - vq->num_added;
695 new = vq->split.avail_idx_shadow;
0a8a69dd
RR
696 vq->num_added = 0;
697
4d6a105e
TB
698 LAST_ADD_TIME_CHECK(vq);
699 LAST_ADD_TIME_INVALID(vq);
e93300b1 700
41f0377f 701 if (vq->event) {
e593bf97
TB
702 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
703 vring_avail_event(&vq->split.vring)),
41f0377f
RR
704 new, old);
705 } else {
e593bf97
TB
706 needs_kick = !(vq->split.vring.used->flags &
707 cpu_to_virtio16(_vq->vdev,
708 VRING_USED_F_NO_NOTIFY));
41f0377f 709 }
0a8a69dd 710 END_USE(vq);
41f0377f
RR
711 return needs_kick;
712}
138fd251 713
138fd251
TB
714static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
715 void **ctx)
0a8a69dd 716{
780bc790 717 unsigned int i, j;
c60923cb 718 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
0a8a69dd
RR
719
720 /* Clear data ptr. */
cbeedb72 721 vq->split.desc_state[head].data = NULL;
0a8a69dd 722
780bc790 723 /* Put back on free list: unmap first-level descriptors and find end */
0a8a69dd 724 i = head;
9fa29b9d 725
e593bf97 726 while (vq->split.vring.desc[i].flags & nextflag) {
72b5e895
JW
727 vring_unmap_one_split(vq, i);
728 i = vq->split.desc_extra[i].next;
06ca287d 729 vq->vq.num_free++;
0a8a69dd
RR
730 }
731
72b5e895
JW
732 vring_unmap_one_split(vq, i);
733 vq->split.desc_extra[i].next = vq->free_head;
0a8a69dd 734 vq->free_head = head;
780bc790 735
0a8a69dd 736 /* Plus final descriptor */
06ca287d 737 vq->vq.num_free++;
780bc790 738
5a08b04f 739 if (vq->indirect) {
cbeedb72
TB
740 struct vring_desc *indir_desc =
741 vq->split.desc_state[head].indir_desc;
5a08b04f
MT
742 u32 len;
743
744 /* Free the indirect table, if any, now that it's unmapped. */
745 if (!indir_desc)
746 return;
747
72b5e895 748 len = vq->split.desc_extra[head].len;
780bc790 749
72b5e895
JW
750 BUG_ON(!(vq->split.desc_extra[head].flags &
751 VRING_DESC_F_INDIRECT));
780bc790
AL
752 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
753
754 for (j = 0; j < len / sizeof(struct vring_desc); j++)
72b5e895 755 vring_unmap_one_split_indirect(vq, &indir_desc[j]);
780bc790 756
5a08b04f 757 kfree(indir_desc);
cbeedb72 758 vq->split.desc_state[head].indir_desc = NULL;
5a08b04f 759 } else if (ctx) {
cbeedb72 760 *ctx = vq->split.desc_state[head].indir_desc;
780bc790 761 }
0a8a69dd
RR
762}
763
138fd251 764static inline bool more_used_split(const struct vring_virtqueue *vq)
0a8a69dd 765{
e593bf97
TB
766 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
767 vq->split.vring.used->idx);
0a8a69dd
RR
768}
769
138fd251
TB
770static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
771 unsigned int *len,
772 void **ctx)
0a8a69dd
RR
773{
774 struct vring_virtqueue *vq = to_vvq(_vq);
775 void *ret;
776 unsigned int i;
3b720b8c 777 u16 last_used;
0a8a69dd
RR
778
779 START_USE(vq);
780
5ef82752
RR
781 if (unlikely(vq->broken)) {
782 END_USE(vq);
783 return NULL;
784 }
785
138fd251 786 if (!more_used_split(vq)) {
0a8a69dd
RR
787 pr_debug("No more buffers in queue\n");
788 END_USE(vq);
789 return NULL;
790 }
791
2d61ba95 792 /* Only get used array entries after they have been exposed by host. */
a9a0fef7 793 virtio_rmb(vq->weak_barriers);
2d61ba95 794
e593bf97
TB
795 last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
796 i = virtio32_to_cpu(_vq->vdev,
797 vq->split.vring.used->ring[last_used].id);
798 *len = virtio32_to_cpu(_vq->vdev,
799 vq->split.vring.used->ring[last_used].len);
0a8a69dd 800
e593bf97 801 if (unlikely(i >= vq->split.vring.num)) {
0a8a69dd
RR
802 BAD_RING(vq, "id %u out of range\n", i);
803 return NULL;
804 }
cbeedb72 805 if (unlikely(!vq->split.desc_state[i].data)) {
0a8a69dd
RR
806 BAD_RING(vq, "id %u is not a head!\n", i);
807 return NULL;
808 }
809
138fd251 810 /* detach_buf_split clears data, so grab it now. */
cbeedb72 811 ret = vq->split.desc_state[i].data;
138fd251 812 detach_buf_split(vq, i, ctx);
0a8a69dd 813 vq->last_used_idx++;
a5c262c5
MT
814 /* If we expect an interrupt for the next entry, tell host
815 * by writing event index and flush out the write before
816 * the read in the next get_buf call. */
e593bf97 817 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
788e5b3a 818 virtio_store_mb(vq->weak_barriers,
e593bf97 819 &vring_used_event(&vq->split.vring),
788e5b3a 820 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
a5c262c5 821
4d6a105e 822 LAST_ADD_TIME_INVALID(vq);
e93300b1 823
0a8a69dd
RR
824 END_USE(vq);
825 return ret;
826}
138fd251 827
138fd251 828static void virtqueue_disable_cb_split(struct virtqueue *_vq)
18445c4d
RR
829{
830 struct vring_virtqueue *vq = to_vvq(_vq);
831
e593bf97
TB
832 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
833 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
8d622d21
MT
834 if (vq->event)
835 /* TODO: this is a hack. Figure out a cleaner value to write. */
836 vring_used_event(&vq->split.vring) = 0x0;
837 else
e593bf97
TB
838 vq->split.vring.avail->flags =
839 cpu_to_virtio16(_vq->vdev,
840 vq->split.avail_flags_shadow);
f277ec42 841 }
18445c4d
RR
842}
843
31532340 844static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
845{
846 struct vring_virtqueue *vq = to_vvq(_vq);
cc229884 847 u16 last_used_idx;
0a8a69dd
RR
848
849 START_USE(vq);
0a8a69dd
RR
850
851 /* We optimistically turn back on interrupts, then check if there was
852 * more to do. */
a5c262c5
MT
853 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
854 * either clear the flags bit or point the event index at the next
855 * entry. Always do both to keep code simple. */
e593bf97
TB
856 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
857 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 858 if (!vq->event)
e593bf97
TB
859 vq->split.vring.avail->flags =
860 cpu_to_virtio16(_vq->vdev,
861 vq->split.avail_flags_shadow);
f277ec42 862 }
e593bf97
TB
863 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
864 last_used_idx = vq->last_used_idx);
cc229884
MT
865 END_USE(vq);
866 return last_used_idx;
867}
138fd251 868
31532340 869static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
138fd251
TB
870{
871 struct vring_virtqueue *vq = to_vvq(_vq);
872
873 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
e593bf97 874 vq->split.vring.used->idx);
138fd251
TB
875}
876
138fd251 877static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
7ab358c2
MT
878{
879 struct vring_virtqueue *vq = to_vvq(_vq);
880 u16 bufs;
881
882 START_USE(vq);
883
884 /* We optimistically turn back on interrupts, then check if there was
885 * more to do. */
886 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
887 * either clear the flags bit or point the event index at the next
0ea1e4a6 888 * entry. Always update the event index to keep code simple. */
e593bf97
TB
889 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
890 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 891 if (!vq->event)
e593bf97
TB
892 vq->split.vring.avail->flags =
893 cpu_to_virtio16(_vq->vdev,
894 vq->split.avail_flags_shadow);
f277ec42 895 }
7ab358c2 896 /* TODO: tune this threshold */
e593bf97 897 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
788e5b3a
MT
898
899 virtio_store_mb(vq->weak_barriers,
e593bf97 900 &vring_used_event(&vq->split.vring),
788e5b3a
MT
901 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
902
e593bf97
TB
903 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
904 - vq->last_used_idx) > bufs)) {
7ab358c2
MT
905 END_USE(vq);
906 return false;
907 }
908
909 END_USE(vq);
910 return true;
911}
7ab358c2 912
138fd251 913static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
c021eac4
SM
914{
915 struct vring_virtqueue *vq = to_vvq(_vq);
916 unsigned int i;
917 void *buf;
918
919 START_USE(vq);
920
e593bf97 921 for (i = 0; i < vq->split.vring.num; i++) {
cbeedb72 922 if (!vq->split.desc_state[i].data)
c021eac4 923 continue;
138fd251 924 /* detach_buf_split clears data, so grab it now. */
cbeedb72 925 buf = vq->split.desc_state[i].data;
138fd251 926 detach_buf_split(vq, i, NULL);
e593bf97
TB
927 vq->split.avail_idx_shadow--;
928 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
929 vq->split.avail_idx_shadow);
c021eac4
SM
930 END_USE(vq);
931 return buf;
932 }
933 /* That should have freed everything. */
e593bf97 934 BUG_ON(vq->vq.num_free != vq->split.vring.num);
c021eac4
SM
935
936 END_USE(vq);
937 return NULL;
938}
138fd251 939
d79dca75
TB
940static struct virtqueue *vring_create_virtqueue_split(
941 unsigned int index,
942 unsigned int num,
943 unsigned int vring_align,
944 struct virtio_device *vdev,
945 bool weak_barriers,
946 bool may_reduce_num,
947 bool context,
948 bool (*notify)(struct virtqueue *),
949 void (*callback)(struct virtqueue *),
950 const char *name)
951{
cd4c812a 952 struct vring_virtqueue_split vring_split = {};
d79dca75
TB
953 struct virtqueue *vq;
954 void *queue = NULL;
955 dma_addr_t dma_addr;
956 size_t queue_size_in_bytes;
957 struct vring vring;
958
959 /* We assume num is a power of 2. */
960 if (num & (num - 1)) {
961 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
962 return NULL;
963 }
964
965 /* TODO: allocate each queue chunk individually */
966 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
967 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
968 &dma_addr,
c7cc29aa 969 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
d79dca75
TB
970 if (queue)
971 break;
cf94db21
CH
972 if (!may_reduce_num)
973 return NULL;
d79dca75
TB
974 }
975
976 if (!num)
977 return NULL;
978
979 if (!queue) {
980 /* Try to get a single page. You are my only hope! */
981 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
982 &dma_addr, GFP_KERNEL|__GFP_ZERO);
983 }
984 if (!queue)
985 return NULL;
986
987 queue_size_in_bytes = vring_size(num, vring_align);
cd4c812a 988 vring_init(&vring_split.vring, num, queue, vring_align);
d79dca75 989
cd4c812a
XZ
990 vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
991 context, notify, callback, name);
d79dca75
TB
992 if (!vq) {
993 vring_free_queue(vdev, queue_size_in_bytes, queue,
994 dma_addr);
995 return NULL;
996 }
997
998 to_vvq(vq)->split.queue_dma_addr = dma_addr;
999 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
1000 to_vvq(vq)->we_own_ring = true;
1001
1002 return vq;
1003}
1004
e6f633e5 1005
1ce9e605
TB
1006/*
1007 * Packed ring specific functions - *_packed().
1008 */
a7722890 1009static inline bool packed_used_wrap_counter(u16 last_used_idx)
1010{
1011 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1012}
1013
1014static inline u16 packed_last_used(u16 last_used_idx)
1015{
1016 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1017}
1ce9e605 1018
d80dc15b
XZ
1019static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1020 struct vring_desc_extra *extra)
1ce9e605
TB
1021{
1022 u16 flags;
1023
1024 if (!vq->use_dma_api)
1025 return;
1026
d80dc15b 1027 flags = extra->flags;
1ce9e605
TB
1028
1029 if (flags & VRING_DESC_F_INDIRECT) {
1030 dma_unmap_single(vring_dma_dev(vq),
d80dc15b 1031 extra->addr, extra->len,
1ce9e605
TB
1032 (flags & VRING_DESC_F_WRITE) ?
1033 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1034 } else {
1035 dma_unmap_page(vring_dma_dev(vq),
d80dc15b 1036 extra->addr, extra->len,
1ce9e605
TB
1037 (flags & VRING_DESC_F_WRITE) ?
1038 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1039 }
1040}
1041
1042static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1043 struct vring_packed_desc *desc)
1044{
1045 u16 flags;
1046
1047 if (!vq->use_dma_api)
1048 return;
1049
1050 flags = le16_to_cpu(desc->flags);
1051
920379a4
XZ
1052 dma_unmap_page(vring_dma_dev(vq),
1053 le64_to_cpu(desc->addr),
1054 le32_to_cpu(desc->len),
1055 (flags & VRING_DESC_F_WRITE) ?
1056 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1ce9e605
TB
1057}
1058
1059static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1060 gfp_t gfp)
1061{
1062 struct vring_packed_desc *desc;
1063
1064 /*
1065 * We require lowmem mappings for the descriptors because
1066 * otherwise virt_to_phys will give us bogus addresses in the
1067 * virtqueue.
1068 */
1069 gfp &= ~__GFP_HIGHMEM;
1070
1071 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1072
1073 return desc;
1074}
1075
1076static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
8d7670f3
XZ
1077 struct scatterlist *sgs[],
1078 unsigned int total_sg,
1079 unsigned int out_sgs,
1080 unsigned int in_sgs,
1081 void *data,
1082 gfp_t gfp)
1ce9e605
TB
1083{
1084 struct vring_packed_desc *desc;
1085 struct scatterlist *sg;
1086 unsigned int i, n, err_idx;
1087 u16 head, id;
1088 dma_addr_t addr;
1089
1090 head = vq->packed.next_avail_idx;
1091 desc = alloc_indirect_packed(total_sg, gfp);
fc6d70f4
XZ
1092 if (!desc)
1093 return -ENOMEM;
1ce9e605
TB
1094
1095 if (unlikely(vq->vq.num_free < 1)) {
1096 pr_debug("Can't add buf len 1 - avail = 0\n");
df0bfe75 1097 kfree(desc);
1ce9e605
TB
1098 END_USE(vq);
1099 return -ENOSPC;
1100 }
1101
1102 i = 0;
1103 id = vq->free_head;
1104 BUG_ON(id == vq->packed.vring.num);
1105
1106 for (n = 0; n < out_sgs + in_sgs; n++) {
1107 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1108 addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1109 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1110 if (vring_mapping_error(vq, addr))
1111 goto unmap_release;
1112
1113 desc[i].flags = cpu_to_le16(n < out_sgs ?
1114 0 : VRING_DESC_F_WRITE);
1115 desc[i].addr = cpu_to_le64(addr);
1116 desc[i].len = cpu_to_le32(sg->length);
1117 i++;
1118 }
1119 }
1120
1121 /* Now that the indirect table is filled in, map it. */
1122 addr = vring_map_single(vq, desc,
1123 total_sg * sizeof(struct vring_packed_desc),
1124 DMA_TO_DEVICE);
1125 if (vring_mapping_error(vq, addr))
1126 goto unmap_release;
1127
1128 vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1129 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1130 sizeof(struct vring_packed_desc));
1131 vq->packed.vring.desc[head].id = cpu_to_le16(id);
1132
1133 if (vq->use_dma_api) {
1134 vq->packed.desc_extra[id].addr = addr;
1135 vq->packed.desc_extra[id].len = total_sg *
1136 sizeof(struct vring_packed_desc);
1137 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1138 vq->packed.avail_used_flags;
1139 }
1140
1141 /*
1142 * A driver MUST NOT make the first descriptor in the list
1143 * available before all subsequent descriptors comprising
1144 * the list are made available.
1145 */
1146 virtio_wmb(vq->weak_barriers);
1147 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1148 vq->packed.avail_used_flags);
1149
1150 /* We're using some buffers from the free list. */
1151 vq->vq.num_free -= 1;
1152
1153 /* Update free pointer */
1154 n = head + 1;
1155 if (n >= vq->packed.vring.num) {
1156 n = 0;
1157 vq->packed.avail_wrap_counter ^= 1;
1158 vq->packed.avail_used_flags ^=
1159 1 << VRING_PACKED_DESC_F_AVAIL |
1160 1 << VRING_PACKED_DESC_F_USED;
1161 }
1162 vq->packed.next_avail_idx = n;
aeef9b47 1163 vq->free_head = vq->packed.desc_extra[id].next;
1ce9e605
TB
1164
1165 /* Store token and indirect buffer state. */
1166 vq->packed.desc_state[id].num = 1;
1167 vq->packed.desc_state[id].data = data;
1168 vq->packed.desc_state[id].indir_desc = desc;
1169 vq->packed.desc_state[id].last = id;
1170
1171 vq->num_added += 1;
1172
1173 pr_debug("Added buffer head %i to %p\n", head, vq);
1174 END_USE(vq);
1175
1176 return 0;
1177
1178unmap_release:
1179 err_idx = i;
1180
1181 for (i = 0; i < err_idx; i++)
1182 vring_unmap_desc_packed(vq, &desc[i]);
1183
1184 kfree(desc);
1185
1186 END_USE(vq);
f7728002 1187 return -ENOMEM;
1ce9e605
TB
1188}
1189
1190static inline int virtqueue_add_packed(struct virtqueue *_vq,
1191 struct scatterlist *sgs[],
1192 unsigned int total_sg,
1193 unsigned int out_sgs,
1194 unsigned int in_sgs,
1195 void *data,
1196 void *ctx,
1197 gfp_t gfp)
1198{
1199 struct vring_virtqueue *vq = to_vvq(_vq);
1200 struct vring_packed_desc *desc;
1201 struct scatterlist *sg;
1202 unsigned int i, n, c, descs_used, err_idx;
3f649ab7
KC
1203 __le16 head_flags, flags;
1204 u16 head, id, prev, curr, avail_used_flags;
fc6d70f4 1205 int err;
1ce9e605
TB
1206
1207 START_USE(vq);
1208
1209 BUG_ON(data == NULL);
1210 BUG_ON(ctx && vq->indirect);
1211
1212 if (unlikely(vq->broken)) {
1213 END_USE(vq);
1214 return -EIO;
1215 }
1216
1217 LAST_ADD_TIME_UPDATE(vq);
1218
1219 BUG_ON(total_sg == 0);
1220
35c51e09 1221 if (virtqueue_use_indirect(vq, total_sg)) {
fc6d70f4
XZ
1222 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1223 in_sgs, data, gfp);
1861ba62
MT
1224 if (err != -ENOMEM) {
1225 END_USE(vq);
fc6d70f4 1226 return err;
1861ba62 1227 }
fc6d70f4
XZ
1228
1229 /* fall back on direct */
1230 }
1ce9e605
TB
1231
1232 head = vq->packed.next_avail_idx;
1233 avail_used_flags = vq->packed.avail_used_flags;
1234
1235 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1236
1237 desc = vq->packed.vring.desc;
1238 i = head;
1239 descs_used = total_sg;
1240
1241 if (unlikely(vq->vq.num_free < descs_used)) {
1242 pr_debug("Can't add buf len %i - avail = %i\n",
1243 descs_used, vq->vq.num_free);
1244 END_USE(vq);
1245 return -ENOSPC;
1246 }
1247
1248 id = vq->free_head;
1249 BUG_ON(id == vq->packed.vring.num);
1250
1251 curr = id;
1252 c = 0;
1253 for (n = 0; n < out_sgs + in_sgs; n++) {
1254 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1255 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1256 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1257 if (vring_mapping_error(vq, addr))
1258 goto unmap_release;
1259
1260 flags = cpu_to_le16(vq->packed.avail_used_flags |
1261 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1262 (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1263 if (i == head)
1264 head_flags = flags;
1265 else
1266 desc[i].flags = flags;
1267
1268 desc[i].addr = cpu_to_le64(addr);
1269 desc[i].len = cpu_to_le32(sg->length);
1270 desc[i].id = cpu_to_le16(id);
1271
1272 if (unlikely(vq->use_dma_api)) {
1273 vq->packed.desc_extra[curr].addr = addr;
1274 vq->packed.desc_extra[curr].len = sg->length;
1275 vq->packed.desc_extra[curr].flags =
1276 le16_to_cpu(flags);
1277 }
1278 prev = curr;
aeef9b47 1279 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1280
1281 if ((unlikely(++i >= vq->packed.vring.num))) {
1282 i = 0;
1283 vq->packed.avail_used_flags ^=
1284 1 << VRING_PACKED_DESC_F_AVAIL |
1285 1 << VRING_PACKED_DESC_F_USED;
1286 }
1287 }
1288 }
1289
1290 if (i < head)
1291 vq->packed.avail_wrap_counter ^= 1;
1292
1293 /* We're using some buffers from the free list. */
1294 vq->vq.num_free -= descs_used;
1295
1296 /* Update free pointer */
1297 vq->packed.next_avail_idx = i;
1298 vq->free_head = curr;
1299
1300 /* Store token. */
1301 vq->packed.desc_state[id].num = descs_used;
1302 vq->packed.desc_state[id].data = data;
1303 vq->packed.desc_state[id].indir_desc = ctx;
1304 vq->packed.desc_state[id].last = prev;
1305
1306 /*
1307 * A driver MUST NOT make the first descriptor in the list
1308 * available before all subsequent descriptors comprising
1309 * the list are made available.
1310 */
1311 virtio_wmb(vq->weak_barriers);
1312 vq->packed.vring.desc[head].flags = head_flags;
1313 vq->num_added += descs_used;
1314
1315 pr_debug("Added buffer head %i to %p\n", head, vq);
1316 END_USE(vq);
1317
1318 return 0;
1319
1320unmap_release:
1321 err_idx = i;
1322 i = head;
44593865 1323 curr = vq->free_head;
1ce9e605
TB
1324
1325 vq->packed.avail_used_flags = avail_used_flags;
1326
1327 for (n = 0; n < total_sg; n++) {
1328 if (i == err_idx)
1329 break;
d80dc15b 1330 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
44593865 1331 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1332 i++;
1333 if (i >= vq->packed.vring.num)
1334 i = 0;
1335 }
1336
1337 END_USE(vq);
1338 return -EIO;
1339}
1340
1341static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1342{
1343 struct vring_virtqueue *vq = to_vvq(_vq);
f51f9826 1344 u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1ce9e605
TB
1345 bool needs_kick;
1346 union {
1347 struct {
1348 __le16 off_wrap;
1349 __le16 flags;
1350 };
1351 u32 u32;
1352 } snapshot;
1353
1354 START_USE(vq);
1355
1356 /*
1357 * We need to expose the new flags value before checking notification
1358 * suppressions.
1359 */
1360 virtio_mb(vq->weak_barriers);
1361
f51f9826
TB
1362 old = vq->packed.next_avail_idx - vq->num_added;
1363 new = vq->packed.next_avail_idx;
1ce9e605
TB
1364 vq->num_added = 0;
1365
1366 snapshot.u32 = *(u32 *)vq->packed.vring.device;
1367 flags = le16_to_cpu(snapshot.flags);
1368
1369 LAST_ADD_TIME_CHECK(vq);
1370 LAST_ADD_TIME_INVALID(vq);
1371
f51f9826
TB
1372 if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1373 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1374 goto out;
1375 }
1376
1377 off_wrap = le16_to_cpu(snapshot.off_wrap);
1378
1379 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1380 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1381 if (wrap_counter != vq->packed.avail_wrap_counter)
1382 event_idx -= vq->packed.vring.num;
1383
1384 needs_kick = vring_need_event(event_idx, new, old);
1385out:
1ce9e605
TB
1386 END_USE(vq);
1387 return needs_kick;
1388}
1389
1390static void detach_buf_packed(struct vring_virtqueue *vq,
1391 unsigned int id, void **ctx)
1392{
1393 struct vring_desc_state_packed *state = NULL;
1394 struct vring_packed_desc *desc;
1395 unsigned int i, curr;
1396
1397 state = &vq->packed.desc_state[id];
1398
1399 /* Clear data ptr. */
1400 state->data = NULL;
1401
aeef9b47 1402 vq->packed.desc_extra[state->last].next = vq->free_head;
1ce9e605
TB
1403 vq->free_head = id;
1404 vq->vq.num_free += state->num;
1405
1406 if (unlikely(vq->use_dma_api)) {
1407 curr = id;
1408 for (i = 0; i < state->num; i++) {
d80dc15b
XZ
1409 vring_unmap_extra_packed(vq,
1410 &vq->packed.desc_extra[curr]);
aeef9b47 1411 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1412 }
1413 }
1414
1415 if (vq->indirect) {
1416 u32 len;
1417
1418 /* Free the indirect table, if any, now that it's unmapped. */
1419 desc = state->indir_desc;
1420 if (!desc)
1421 return;
1422
1423 if (vq->use_dma_api) {
1424 len = vq->packed.desc_extra[id].len;
1425 for (i = 0; i < len / sizeof(struct vring_packed_desc);
1426 i++)
1427 vring_unmap_desc_packed(vq, &desc[i]);
1428 }
1429 kfree(desc);
1430 state->indir_desc = NULL;
1431 } else if (ctx) {
1432 *ctx = state->indir_desc;
1433 }
1434}
1435
1436static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1437 u16 idx, bool used_wrap_counter)
1438{
1439 bool avail, used;
1440 u16 flags;
1441
1442 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1443 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1444 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1445
1446 return avail == used && used == used_wrap_counter;
1447}
1448
1449static inline bool more_used_packed(const struct vring_virtqueue *vq)
1450{
a7722890 1451 u16 last_used;
1452 u16 last_used_idx;
1453 bool used_wrap_counter;
1454
1455 last_used_idx = READ_ONCE(vq->last_used_idx);
1456 last_used = packed_last_used(last_used_idx);
1457 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1458 return is_used_desc_packed(vq, last_used, used_wrap_counter);
1ce9e605
TB
1459}
1460
1461static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1462 unsigned int *len,
1463 void **ctx)
1464{
1465 struct vring_virtqueue *vq = to_vvq(_vq);
a7722890 1466 u16 last_used, id, last_used_idx;
1467 bool used_wrap_counter;
1ce9e605
TB
1468 void *ret;
1469
1470 START_USE(vq);
1471
1472 if (unlikely(vq->broken)) {
1473 END_USE(vq);
1474 return NULL;
1475 }
1476
1477 if (!more_used_packed(vq)) {
1478 pr_debug("No more buffers in queue\n");
1479 END_USE(vq);
1480 return NULL;
1481 }
1482
1483 /* Only get used elements after they have been exposed by host. */
1484 virtio_rmb(vq->weak_barriers);
1485
a7722890 1486 last_used_idx = READ_ONCE(vq->last_used_idx);
1487 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1488 last_used = packed_last_used(last_used_idx);
1ce9e605
TB
1489 id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1490 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1491
1492 if (unlikely(id >= vq->packed.vring.num)) {
1493 BAD_RING(vq, "id %u out of range\n", id);
1494 return NULL;
1495 }
1496 if (unlikely(!vq->packed.desc_state[id].data)) {
1497 BAD_RING(vq, "id %u is not a head!\n", id);
1498 return NULL;
1499 }
1500
1501 /* detach_buf_packed clears data, so grab it now. */
1502 ret = vq->packed.desc_state[id].data;
1503 detach_buf_packed(vq, id, ctx);
1504
a7722890 1505 last_used += vq->packed.desc_state[id].num;
1506 if (unlikely(last_used >= vq->packed.vring.num)) {
1507 last_used -= vq->packed.vring.num;
1508 used_wrap_counter ^= 1;
1ce9e605
TB
1509 }
1510
a7722890 1511 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1512 WRITE_ONCE(vq->last_used_idx, last_used);
1513
f51f9826
TB
1514 /*
1515 * If we expect an interrupt for the next entry, tell host
1516 * by writing event index and flush out the write before
1517 * the read in the next get_buf call.
1518 */
1519 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1520 virtio_store_mb(vq->weak_barriers,
1521 &vq->packed.vring.driver->off_wrap,
a7722890 1522 cpu_to_le16(vq->last_used_idx));
f51f9826 1523
1ce9e605
TB
1524 LAST_ADD_TIME_INVALID(vq);
1525
1526 END_USE(vq);
1527 return ret;
1528}
1529
1530static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1531{
1532 struct vring_virtqueue *vq = to_vvq(_vq);
1533
1534 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1535 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1536 vq->packed.vring.driver->flags =
1537 cpu_to_le16(vq->packed.event_flags_shadow);
1538 }
1539}
1540
31532340 1541static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1ce9e605
TB
1542{
1543 struct vring_virtqueue *vq = to_vvq(_vq);
1544
1545 START_USE(vq);
1546
1547 /*
1548 * We optimistically turn back on interrupts, then check if there was
1549 * more to do.
1550 */
1551
f51f9826
TB
1552 if (vq->event) {
1553 vq->packed.vring.driver->off_wrap =
a7722890 1554 cpu_to_le16(vq->last_used_idx);
f51f9826
TB
1555 /*
1556 * We need to update event offset and event wrap
1557 * counter first before updating event flags.
1558 */
1559 virtio_wmb(vq->weak_barriers);
1560 }
1561
1ce9e605 1562 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1563 vq->packed.event_flags_shadow = vq->event ?
1564 VRING_PACKED_EVENT_FLAG_DESC :
1565 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1566 vq->packed.vring.driver->flags =
1567 cpu_to_le16(vq->packed.event_flags_shadow);
1568 }
1569
1570 END_USE(vq);
a7722890 1571 return vq->last_used_idx;
1ce9e605
TB
1572}
1573
1574static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1575{
1576 struct vring_virtqueue *vq = to_vvq(_vq);
1577 bool wrap_counter;
1578 u16 used_idx;
1579
1580 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1581 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1582
1583 return is_used_desc_packed(vq, used_idx, wrap_counter);
1584}
1585
1586static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1587{
1588 struct vring_virtqueue *vq = to_vvq(_vq);
a7722890 1589 u16 used_idx, wrap_counter, last_used_idx;
f51f9826 1590 u16 bufs;
1ce9e605
TB
1591
1592 START_USE(vq);
1593
1594 /*
1595 * We optimistically turn back on interrupts, then check if there was
1596 * more to do.
1597 */
1598
f51f9826
TB
1599 if (vq->event) {
1600 /* TODO: tune this threshold */
1601 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
a7722890 1602 last_used_idx = READ_ONCE(vq->last_used_idx);
1603 wrap_counter = packed_used_wrap_counter(last_used_idx);
f51f9826 1604
a7722890 1605 used_idx = packed_last_used(last_used_idx) + bufs;
f51f9826
TB
1606 if (used_idx >= vq->packed.vring.num) {
1607 used_idx -= vq->packed.vring.num;
1608 wrap_counter ^= 1;
1609 }
1610
1611 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1612 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1613
1614 /*
1615 * We need to update event offset and event wrap
1616 * counter first before updating event flags.
1617 */
1618 virtio_wmb(vq->weak_barriers);
f51f9826 1619 }
1ce9e605
TB
1620
1621 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1622 vq->packed.event_flags_shadow = vq->event ?
1623 VRING_PACKED_EVENT_FLAG_DESC :
1624 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1625 vq->packed.vring.driver->flags =
1626 cpu_to_le16(vq->packed.event_flags_shadow);
1627 }
1628
1629 /*
1630 * We need to update event suppression structure first
1631 * before re-checking for more used buffers.
1632 */
1633 virtio_mb(vq->weak_barriers);
1634
a7722890 1635 last_used_idx = READ_ONCE(vq->last_used_idx);
1636 wrap_counter = packed_used_wrap_counter(last_used_idx);
1637 used_idx = packed_last_used(last_used_idx);
1638 if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1ce9e605
TB
1639 END_USE(vq);
1640 return false;
1641 }
1642
1643 END_USE(vq);
1644 return true;
1645}
1646
1647static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1648{
1649 struct vring_virtqueue *vq = to_vvq(_vq);
1650 unsigned int i;
1651 void *buf;
1652
1653 START_USE(vq);
1654
1655 for (i = 0; i < vq->packed.vring.num; i++) {
1656 if (!vq->packed.desc_state[i].data)
1657 continue;
1658 /* detach_buf clears data, so grab it now. */
1659 buf = vq->packed.desc_state[i].data;
1660 detach_buf_packed(vq, i, NULL);
1661 END_USE(vq);
1662 return buf;
1663 }
1664 /* That should have freed everything. */
1665 BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1666
1667 END_USE(vq);
1668 return NULL;
1669}
1670
96ef18a2 1671static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
5a222421
JW
1672{
1673 struct vring_desc_extra *desc_extra;
1674 unsigned int i;
1675
1676 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1677 GFP_KERNEL);
1678 if (!desc_extra)
1679 return NULL;
1680
1681 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1682
1683 for (i = 0; i < num - 1; i++)
1684 desc_extra[i].next = i + 1;
1685
1686 return desc_extra;
1687}
1688
1ce9e605
TB
1689static struct virtqueue *vring_create_virtqueue_packed(
1690 unsigned int index,
1691 unsigned int num,
1692 unsigned int vring_align,
1693 struct virtio_device *vdev,
1694 bool weak_barriers,
1695 bool may_reduce_num,
1696 bool context,
1697 bool (*notify)(struct virtqueue *),
1698 void (*callback)(struct virtqueue *),
1699 const char *name)
1700{
1701 struct vring_virtqueue *vq;
1702 struct vring_packed_desc *ring;
1703 struct vring_packed_desc_event *driver, *device;
1704 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1705 size_t ring_size_in_bytes, event_size_in_bytes;
1ce9e605
TB
1706
1707 ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1708
1709 ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1710 &ring_dma_addr,
1711 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1712 if (!ring)
1713 goto err_ring;
1714
1715 event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1716
1717 driver = vring_alloc_queue(vdev, event_size_in_bytes,
1718 &driver_event_dma_addr,
1719 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1720 if (!driver)
1721 goto err_driver;
1722
1723 device = vring_alloc_queue(vdev, event_size_in_bytes,
1724 &device_event_dma_addr,
1725 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1726 if (!device)
1727 goto err_device;
1728
1729 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1730 if (!vq)
1731 goto err_vq;
1732
1733 vq->vq.callback = callback;
1734 vq->vq.vdev = vdev;
1735 vq->vq.name = name;
1ce9e605
TB
1736 vq->vq.index = index;
1737 vq->we_own_ring = true;
1738 vq->notify = notify;
1739 vq->weak_barriers = weak_barriers;
c346dae4 1740#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d 1741 vq->broken = true;
c346dae4
JW
1742#else
1743 vq->broken = false;
1744#endif
1ce9e605
TB
1745 vq->packed_ring = true;
1746 vq->use_dma_api = vring_use_dma_api(vdev);
1ce9e605
TB
1747
1748 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1749 !context;
1750 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1751
45383fb0
TB
1752 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1753 vq->weak_barriers = false;
1754
1ce9e605
TB
1755 vq->packed.ring_dma_addr = ring_dma_addr;
1756 vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1757 vq->packed.device_event_dma_addr = device_event_dma_addr;
1758
1759 vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1760 vq->packed.event_size_in_bytes = event_size_in_bytes;
1761
1762 vq->packed.vring.num = num;
1763 vq->packed.vring.desc = ring;
1764 vq->packed.vring.driver = driver;
1765 vq->packed.vring.device = device;
1766
1767 vq->packed.next_avail_idx = 0;
1768 vq->packed.avail_wrap_counter = 1;
1ce9e605
TB
1769 vq->packed.event_flags_shadow = 0;
1770 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1771
1772 vq->packed.desc_state = kmalloc_array(num,
1773 sizeof(struct vring_desc_state_packed),
1774 GFP_KERNEL);
1775 if (!vq->packed.desc_state)
1776 goto err_desc_state;
1777
1778 memset(vq->packed.desc_state, 0,
1779 num * sizeof(struct vring_desc_state_packed));
1780
1781 /* Put everything in free lists. */
1782 vq->free_head = 0;
1ce9e605 1783
96ef18a2 1784 vq->packed.desc_extra = vring_alloc_desc_extra(num);
1ce9e605
TB
1785 if (!vq->packed.desc_extra)
1786 goto err_desc_extra;
1787
1ce9e605
TB
1788 /* No callback? Tell other side not to bother us. */
1789 if (!callback) {
1790 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1791 vq->packed.vring.driver->flags =
1792 cpu_to_le16(vq->packed.event_flags_shadow);
1793 }
1794
3a897128
XZ
1795 virtqueue_init(vq, num);
1796
0e566c8f 1797 spin_lock(&vdev->vqs_list_lock);
e152d8af 1798 list_add_tail(&vq->vq.list, &vdev->vqs);
0e566c8f 1799 spin_unlock(&vdev->vqs_list_lock);
1ce9e605
TB
1800 return &vq->vq;
1801
1802err_desc_extra:
1803 kfree(vq->packed.desc_state);
1804err_desc_state:
1805 kfree(vq);
1806err_vq:
ae93d8ea 1807 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
1ce9e605 1808err_device:
ae93d8ea 1809 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
1ce9e605
TB
1810err_driver:
1811 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1812err_ring:
1813 return NULL;
1814}
1815
1816
e6f633e5
TB
1817/*
1818 * Generic functions and exported symbols.
1819 */
1820
1821static inline int virtqueue_add(struct virtqueue *_vq,
1822 struct scatterlist *sgs[],
1823 unsigned int total_sg,
1824 unsigned int out_sgs,
1825 unsigned int in_sgs,
1826 void *data,
1827 void *ctx,
1828 gfp_t gfp)
1829{
1ce9e605
TB
1830 struct vring_virtqueue *vq = to_vvq(_vq);
1831
1832 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1833 out_sgs, in_sgs, data, ctx, gfp) :
1834 virtqueue_add_split(_vq, sgs, total_sg,
1835 out_sgs, in_sgs, data, ctx, gfp);
e6f633e5
TB
1836}
1837
1838/**
1839 * virtqueue_add_sgs - expose buffers to other end
a5581206 1840 * @_vq: the struct virtqueue we're talking about.
e6f633e5 1841 * @sgs: array of terminated scatterlists.
a5581206
JB
1842 * @out_sgs: the number of scatterlists readable by other side
1843 * @in_sgs: the number of scatterlists which are writable (after readable ones)
e6f633e5
TB
1844 * @data: the token identifying the buffer.
1845 * @gfp: how to do memory allocations (if necessary).
1846 *
1847 * Caller must ensure we don't call this with other virtqueue operations
1848 * at the same time (except where noted).
1849 *
1850 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1851 */
1852int virtqueue_add_sgs(struct virtqueue *_vq,
1853 struct scatterlist *sgs[],
1854 unsigned int out_sgs,
1855 unsigned int in_sgs,
1856 void *data,
1857 gfp_t gfp)
1858{
1859 unsigned int i, total_sg = 0;
1860
1861 /* Count them first. */
1862 for (i = 0; i < out_sgs + in_sgs; i++) {
1863 struct scatterlist *sg;
1864
1865 for (sg = sgs[i]; sg; sg = sg_next(sg))
1866 total_sg++;
1867 }
1868 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1869 data, NULL, gfp);
1870}
1871EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1872
1873/**
1874 * virtqueue_add_outbuf - expose output buffers to other end
1875 * @vq: the struct virtqueue we're talking about.
1876 * @sg: scatterlist (must be well-formed and terminated!)
1877 * @num: the number of entries in @sg readable by other side
1878 * @data: the token identifying the buffer.
1879 * @gfp: how to do memory allocations (if necessary).
1880 *
1881 * Caller must ensure we don't call this with other virtqueue operations
1882 * at the same time (except where noted).
1883 *
1884 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1885 */
1886int virtqueue_add_outbuf(struct virtqueue *vq,
1887 struct scatterlist *sg, unsigned int num,
1888 void *data,
1889 gfp_t gfp)
1890{
1891 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1892}
1893EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1894
1895/**
1896 * virtqueue_add_inbuf - expose input buffers to other end
1897 * @vq: the struct virtqueue we're talking about.
1898 * @sg: scatterlist (must be well-formed and terminated!)
1899 * @num: the number of entries in @sg writable by other side
1900 * @data: the token identifying the buffer.
1901 * @gfp: how to do memory allocations (if necessary).
1902 *
1903 * Caller must ensure we don't call this with other virtqueue operations
1904 * at the same time (except where noted).
1905 *
1906 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1907 */
1908int virtqueue_add_inbuf(struct virtqueue *vq,
1909 struct scatterlist *sg, unsigned int num,
1910 void *data,
1911 gfp_t gfp)
1912{
1913 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1914}
1915EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1916
1917/**
1918 * virtqueue_add_inbuf_ctx - expose input buffers to other end
1919 * @vq: the struct virtqueue we're talking about.
1920 * @sg: scatterlist (must be well-formed and terminated!)
1921 * @num: the number of entries in @sg writable by other side
1922 * @data: the token identifying the buffer.
1923 * @ctx: extra context for the token
1924 * @gfp: how to do memory allocations (if necessary).
1925 *
1926 * Caller must ensure we don't call this with other virtqueue operations
1927 * at the same time (except where noted).
1928 *
1929 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1930 */
1931int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1932 struct scatterlist *sg, unsigned int num,
1933 void *data,
1934 void *ctx,
1935 gfp_t gfp)
1936{
1937 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1938}
1939EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1940
1941/**
1942 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
a5581206 1943 * @_vq: the struct virtqueue
e6f633e5
TB
1944 *
1945 * Instead of virtqueue_kick(), you can do:
1946 * if (virtqueue_kick_prepare(vq))
1947 * virtqueue_notify(vq);
1948 *
1949 * This is sometimes useful because the virtqueue_kick_prepare() needs
1950 * to be serialized, but the actual virtqueue_notify() call does not.
1951 */
1952bool virtqueue_kick_prepare(struct virtqueue *_vq)
1953{
1ce9e605
TB
1954 struct vring_virtqueue *vq = to_vvq(_vq);
1955
1956 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1957 virtqueue_kick_prepare_split(_vq);
e6f633e5
TB
1958}
1959EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
1960
1961/**
1962 * virtqueue_notify - second half of split virtqueue_kick call.
a5581206 1963 * @_vq: the struct virtqueue
e6f633e5
TB
1964 *
1965 * This does not need to be serialized.
1966 *
1967 * Returns false if host notify failed or queue is broken, otherwise true.
1968 */
1969bool virtqueue_notify(struct virtqueue *_vq)
1970{
1971 struct vring_virtqueue *vq = to_vvq(_vq);
1972
1973 if (unlikely(vq->broken))
1974 return false;
1975
1976 /* Prod other side to tell it about changes. */
1977 if (!vq->notify(_vq)) {
1978 vq->broken = true;
1979 return false;
1980 }
1981 return true;
1982}
1983EXPORT_SYMBOL_GPL(virtqueue_notify);
1984
1985/**
1986 * virtqueue_kick - update after add_buf
1987 * @vq: the struct virtqueue
1988 *
1989 * After one or more virtqueue_add_* calls, invoke this to kick
1990 * the other side.
1991 *
1992 * Caller must ensure we don't call this with other virtqueue
1993 * operations at the same time (except where noted).
1994 *
1995 * Returns false if kick failed, otherwise true.
1996 */
1997bool virtqueue_kick(struct virtqueue *vq)
1998{
1999 if (virtqueue_kick_prepare(vq))
2000 return virtqueue_notify(vq);
2001 return true;
2002}
2003EXPORT_SYMBOL_GPL(virtqueue_kick);
2004
2005/**
31c11db6 2006 * virtqueue_get_buf_ctx - get the next used buffer
a5581206 2007 * @_vq: the struct virtqueue we're talking about.
e6f633e5 2008 * @len: the length written into the buffer
a5581206 2009 * @ctx: extra context for the token
e6f633e5
TB
2010 *
2011 * If the device wrote data into the buffer, @len will be set to the
2012 * amount written. This means you don't need to clear the buffer
2013 * beforehand to ensure there's no data leakage in the case of short
2014 * writes.
2015 *
2016 * Caller must ensure we don't call this with other virtqueue
2017 * operations at the same time (except where noted).
2018 *
2019 * Returns NULL if there are no used buffers, or the "data" token
2020 * handed to virtqueue_add_*().
2021 */
2022void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2023 void **ctx)
2024{
1ce9e605
TB
2025 struct vring_virtqueue *vq = to_vvq(_vq);
2026
2027 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2028 virtqueue_get_buf_ctx_split(_vq, len, ctx);
e6f633e5
TB
2029}
2030EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2031
2032void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2033{
2034 return virtqueue_get_buf_ctx(_vq, len, NULL);
2035}
2036EXPORT_SYMBOL_GPL(virtqueue_get_buf);
e6f633e5
TB
2037/**
2038 * virtqueue_disable_cb - disable callbacks
a5581206 2039 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2040 *
2041 * Note that this is not necessarily synchronous, hence unreliable and only
2042 * useful as an optimization.
2043 *
2044 * Unlike other operations, this need not be serialized.
2045 */
2046void virtqueue_disable_cb(struct virtqueue *_vq)
2047{
1ce9e605
TB
2048 struct vring_virtqueue *vq = to_vvq(_vq);
2049
8d622d21
MT
2050 /* If device triggered an event already it won't trigger one again:
2051 * no need to disable.
2052 */
2053 if (vq->event_triggered)
2054 return;
2055
1ce9e605
TB
2056 if (vq->packed_ring)
2057 virtqueue_disable_cb_packed(_vq);
2058 else
2059 virtqueue_disable_cb_split(_vq);
e6f633e5
TB
2060}
2061EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2062
2063/**
2064 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
a5581206 2065 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2066 *
2067 * This re-enables callbacks; it returns current queue state
2068 * in an opaque unsigned value. This value should be later tested by
2069 * virtqueue_poll, to detect a possible race between the driver checking for
2070 * more work, and enabling callbacks.
2071 *
2072 * Caller must ensure we don't call this with other virtqueue
2073 * operations at the same time (except where noted).
2074 */
31532340 2075unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
e6f633e5 2076{
1ce9e605
TB
2077 struct vring_virtqueue *vq = to_vvq(_vq);
2078
8d622d21
MT
2079 if (vq->event_triggered)
2080 vq->event_triggered = false;
2081
1ce9e605
TB
2082 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2083 virtqueue_enable_cb_prepare_split(_vq);
e6f633e5
TB
2084}
2085EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2086
2087/**
2088 * virtqueue_poll - query pending used buffers
a5581206 2089 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2090 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2091 *
2092 * Returns "true" if there are pending used buffers in the queue.
2093 *
2094 * This does not need to be serialized.
2095 */
31532340 2096bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
e6f633e5
TB
2097{
2098 struct vring_virtqueue *vq = to_vvq(_vq);
2099
481a0d74
MW
2100 if (unlikely(vq->broken))
2101 return false;
2102
e6f633e5 2103 virtio_mb(vq->weak_barriers);
1ce9e605
TB
2104 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2105 virtqueue_poll_split(_vq, last_used_idx);
e6f633e5
TB
2106}
2107EXPORT_SYMBOL_GPL(virtqueue_poll);
2108
2109/**
2110 * virtqueue_enable_cb - restart callbacks after disable_cb.
a5581206 2111 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2112 *
2113 * This re-enables callbacks; it returns "false" if there are pending
2114 * buffers in the queue, to detect a possible race between the driver
2115 * checking for more work, and enabling callbacks.
2116 *
2117 * Caller must ensure we don't call this with other virtqueue
2118 * operations at the same time (except where noted).
2119 */
2120bool virtqueue_enable_cb(struct virtqueue *_vq)
2121{
31532340 2122 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
e6f633e5
TB
2123
2124 return !virtqueue_poll(_vq, last_used_idx);
2125}
2126EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2127
2128/**
2129 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
a5581206 2130 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2131 *
2132 * This re-enables callbacks but hints to the other side to delay
2133 * interrupts until most of the available buffers have been processed;
2134 * it returns "false" if there are many pending buffers in the queue,
2135 * to detect a possible race between the driver checking for more work,
2136 * and enabling callbacks.
2137 *
2138 * Caller must ensure we don't call this with other virtqueue
2139 * operations at the same time (except where noted).
2140 */
2141bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2142{
1ce9e605
TB
2143 struct vring_virtqueue *vq = to_vvq(_vq);
2144
8d622d21
MT
2145 if (vq->event_triggered)
2146 vq->event_triggered = false;
2147
1ce9e605
TB
2148 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2149 virtqueue_enable_cb_delayed_split(_vq);
e6f633e5
TB
2150}
2151EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2152
138fd251
TB
2153/**
2154 * virtqueue_detach_unused_buf - detach first unused buffer
a5581206 2155 * @_vq: the struct virtqueue we're talking about.
138fd251
TB
2156 *
2157 * Returns NULL or the "data" token handed to virtqueue_add_*().
a62eecb3
XZ
2158 * This is not valid on an active queue; it is useful for device
2159 * shutdown or the reset queue.
138fd251
TB
2160 */
2161void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2162{
1ce9e605
TB
2163 struct vring_virtqueue *vq = to_vvq(_vq);
2164
2165 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2166 virtqueue_detach_unused_buf_split(_vq);
138fd251 2167}
7c5e9ed0 2168EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
c021eac4 2169
138fd251
TB
2170static inline bool more_used(const struct vring_virtqueue *vq)
2171{
1ce9e605 2172 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
138fd251
TB
2173}
2174
0a8a69dd
RR
2175irqreturn_t vring_interrupt(int irq, void *_vq)
2176{
2177 struct vring_virtqueue *vq = to_vvq(_vq);
2178
2179 if (!more_used(vq)) {
2180 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2181 return IRQ_NONE;
2182 }
2183
8b4ec69d 2184 if (unlikely(vq->broken)) {
c346dae4 2185#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d
JW
2186 dev_warn_once(&vq->vq.vdev->dev,
2187 "virtio vring IRQ raised before DRIVER_OK");
2188 return IRQ_NONE;
c346dae4
JW
2189#else
2190 return IRQ_HANDLED;
2191#endif
8b4ec69d 2192 }
0a8a69dd 2193
8d622d21
MT
2194 /* Just a hint for performance: so it's ok that this can be racy! */
2195 if (vq->event)
2196 vq->event_triggered = true;
2197
0a8a69dd 2198 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
18445c4d
RR
2199 if (vq->vq.callback)
2200 vq->vq.callback(&vq->vq);
0a8a69dd
RR
2201
2202 return IRQ_HANDLED;
2203}
c6fd4701 2204EXPORT_SYMBOL_GPL(vring_interrupt);
0a8a69dd 2205
1ce9e605 2206/* Only available for split ring */
07d9629d 2207static struct virtqueue *__vring_new_virtqueue(unsigned int index,
cd4c812a 2208 struct vring_virtqueue_split *vring_split,
07d9629d
XZ
2209 struct virtio_device *vdev,
2210 bool weak_barriers,
2211 bool context,
2212 bool (*notify)(struct virtqueue *),
2213 void (*callback)(struct virtqueue *),
2214 const char *name)
0a8a69dd 2215{
2a2d1382 2216 struct vring_virtqueue *vq;
0a8a69dd 2217
1ce9e605
TB
2218 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2219 return NULL;
2220
cbeedb72 2221 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
0a8a69dd
RR
2222 if (!vq)
2223 return NULL;
2224
1ce9e605 2225 vq->packed_ring = false;
0a8a69dd
RR
2226 vq->vq.callback = callback;
2227 vq->vq.vdev = vdev;
9499f5e7 2228 vq->vq.name = name;
06ca287d 2229 vq->vq.index = index;
2a2d1382 2230 vq->we_own_ring = false;
0a8a69dd 2231 vq->notify = notify;
7b21e34f 2232 vq->weak_barriers = weak_barriers;
c346dae4 2233#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d 2234 vq->broken = true;
c346dae4
JW
2235#else
2236 vq->broken = false;
2237#endif
fb3fba6b 2238 vq->use_dma_api = vring_use_dma_api(vdev);
0a8a69dd 2239
5a08b04f
MT
2240 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2241 !context;
a5c262c5 2242 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
9fa29b9d 2243
45383fb0
TB
2244 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2245 vq->weak_barriers = false;
2246
d79dca75
TB
2247 vq->split.queue_dma_addr = 0;
2248 vq->split.queue_size_in_bytes = 0;
2249
cd4c812a 2250 vq->split.vring = vring_split->vring;
e593bf97
TB
2251 vq->split.avail_flags_shadow = 0;
2252 vq->split.avail_idx_shadow = 0;
2253
0a8a69dd 2254 /* No callback? Tell other side not to bother us. */
f277ec42 2255 if (!callback) {
e593bf97 2256 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 2257 if (!vq->event)
e593bf97
TB
2258 vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2259 vq->split.avail_flags_shadow);
f277ec42 2260 }
0a8a69dd 2261
cd4c812a 2262 vq->split.desc_state = kmalloc_array(vring_split->vring.num,
cbeedb72 2263 sizeof(struct vring_desc_state_split), GFP_KERNEL);
5bc72234
JW
2264 if (!vq->split.desc_state)
2265 goto err_state;
cbeedb72 2266
cd4c812a 2267 vq->split.desc_extra = vring_alloc_desc_extra(vring_split->vring.num);
72b5e895
JW
2268 if (!vq->split.desc_extra)
2269 goto err_extra;
2270
0a8a69dd 2271 /* Put everything in free lists. */
0a8a69dd 2272 vq->free_head = 0;
cd4c812a 2273 memset(vq->split.desc_state, 0, vring_split->vring.num *
cbeedb72 2274 sizeof(struct vring_desc_state_split));
0a8a69dd 2275
cd4c812a 2276 virtqueue_init(vq, vring_split->vring.num);
3a897128 2277
0e566c8f 2278 spin_lock(&vdev->vqs_list_lock);
e152d8af 2279 list_add_tail(&vq->vq.list, &vdev->vqs);
0e566c8f 2280 spin_unlock(&vdev->vqs_list_lock);
0a8a69dd 2281 return &vq->vq;
5bc72234 2282
72b5e895
JW
2283err_extra:
2284 kfree(vq->split.desc_state);
5bc72234
JW
2285err_state:
2286 kfree(vq);
2287 return NULL;
0a8a69dd 2288}
2a2d1382 2289
2a2d1382
AL
2290struct virtqueue *vring_create_virtqueue(
2291 unsigned int index,
2292 unsigned int num,
2293 unsigned int vring_align,
2294 struct virtio_device *vdev,
2295 bool weak_barriers,
2296 bool may_reduce_num,
f94682dd 2297 bool context,
2a2d1382
AL
2298 bool (*notify)(struct virtqueue *),
2299 void (*callback)(struct virtqueue *),
2300 const char *name)
2301{
1ce9e605
TB
2302
2303 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2304 return vring_create_virtqueue_packed(index, num, vring_align,
2305 vdev, weak_barriers, may_reduce_num,
2306 context, notify, callback, name);
2307
d79dca75
TB
2308 return vring_create_virtqueue_split(index, num, vring_align,
2309 vdev, weak_barriers, may_reduce_num,
2310 context, notify, callback, name);
2a2d1382
AL
2311}
2312EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2313
1ce9e605 2314/* Only available for split ring */
2a2d1382
AL
2315struct virtqueue *vring_new_virtqueue(unsigned int index,
2316 unsigned int num,
2317 unsigned int vring_align,
2318 struct virtio_device *vdev,
2319 bool weak_barriers,
f94682dd 2320 bool context,
2a2d1382
AL
2321 void *pages,
2322 bool (*notify)(struct virtqueue *vq),
2323 void (*callback)(struct virtqueue *vq),
2324 const char *name)
2325{
cd4c812a 2326 struct vring_virtqueue_split vring_split = {};
1ce9e605
TB
2327
2328 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2329 return NULL;
2330
cd4c812a
XZ
2331 vring_init(&vring_split.vring, num, pages, vring_align);
2332 return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2333 context, notify, callback, name);
2a2d1382 2334}
c6fd4701 2335EXPORT_SYMBOL_GPL(vring_new_virtqueue);
0a8a69dd 2336
3ea19e32 2337static void vring_free(struct virtqueue *_vq)
0a8a69dd 2338{
2a2d1382
AL
2339 struct vring_virtqueue *vq = to_vvq(_vq);
2340
2341 if (vq->we_own_ring) {
1ce9e605
TB
2342 if (vq->packed_ring) {
2343 vring_free_queue(vq->vq.vdev,
2344 vq->packed.ring_size_in_bytes,
2345 vq->packed.vring.desc,
2346 vq->packed.ring_dma_addr);
2347
2348 vring_free_queue(vq->vq.vdev,
2349 vq->packed.event_size_in_bytes,
2350 vq->packed.vring.driver,
2351 vq->packed.driver_event_dma_addr);
2352
2353 vring_free_queue(vq->vq.vdev,
2354 vq->packed.event_size_in_bytes,
2355 vq->packed.vring.device,
2356 vq->packed.device_event_dma_addr);
2357
2358 kfree(vq->packed.desc_state);
2359 kfree(vq->packed.desc_extra);
2360 } else {
2361 vring_free_queue(vq->vq.vdev,
2362 vq->split.queue_size_in_bytes,
2363 vq->split.vring.desc,
2364 vq->split.queue_dma_addr);
1ce9e605 2365 }
2a2d1382 2366 }
72b5e895 2367 if (!vq->packed_ring) {
f13f09a1 2368 kfree(vq->split.desc_state);
72b5e895
JW
2369 kfree(vq->split.desc_extra);
2370 }
3ea19e32
XZ
2371}
2372
2373void vring_del_virtqueue(struct virtqueue *_vq)
2374{
2375 struct vring_virtqueue *vq = to_vvq(_vq);
2376
2377 spin_lock(&vq->vq.vdev->vqs_list_lock);
2378 list_del(&_vq->list);
2379 spin_unlock(&vq->vq.vdev->vqs_list_lock);
2380
2381 vring_free(_vq);
2382
2a2d1382 2383 kfree(vq);
0a8a69dd 2384}
c6fd4701 2385EXPORT_SYMBOL_GPL(vring_del_virtqueue);
0a8a69dd 2386
e34f8725
RR
2387/* Manipulates transport-specific feature bits. */
2388void vring_transport_features(struct virtio_device *vdev)
2389{
2390 unsigned int i;
2391
2392 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2393 switch (i) {
9fa29b9d
MM
2394 case VIRTIO_RING_F_INDIRECT_DESC:
2395 break;
a5c262c5
MT
2396 case VIRTIO_RING_F_EVENT_IDX:
2397 break;
747ae34a
MT
2398 case VIRTIO_F_VERSION_1:
2399 break;
321bd212 2400 case VIRTIO_F_ACCESS_PLATFORM:
1a937693 2401 break;
f959a128
TB
2402 case VIRTIO_F_RING_PACKED:
2403 break;
45383fb0
TB
2404 case VIRTIO_F_ORDER_PLATFORM:
2405 break;
e34f8725
RR
2406 default:
2407 /* We don't understand this bit. */
e16e12be 2408 __virtio_clear_bit(vdev, i);
e34f8725
RR
2409 }
2410 }
2411}
2412EXPORT_SYMBOL_GPL(vring_transport_features);
2413
5dfc1762
RR
2414/**
2415 * virtqueue_get_vring_size - return the size of the virtqueue's vring
a5581206 2416 * @_vq: the struct virtqueue containing the vring of interest.
5dfc1762
RR
2417 *
2418 * Returns the size of the vring. This is mainly used for boasting to
2419 * userspace. Unlike other operations, this need not be serialized.
2420 */
8f9f4668
RJ
2421unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2422{
2423
2424 struct vring_virtqueue *vq = to_vvq(_vq);
2425
1ce9e605 2426 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
8f9f4668
RJ
2427}
2428EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2429
b3b32c94
HG
2430bool virtqueue_is_broken(struct virtqueue *_vq)
2431{
2432 struct vring_virtqueue *vq = to_vvq(_vq);
2433
60f07798 2434 return READ_ONCE(vq->broken);
b3b32c94
HG
2435}
2436EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2437
e2dcdfe9
RR
2438/*
2439 * This should prevent the device from being used, allowing drivers to
2440 * recover. You may need to grab appropriate locks to flush.
2441 */
2442void virtio_break_device(struct virtio_device *dev)
2443{
2444 struct virtqueue *_vq;
2445
0e566c8f 2446 spin_lock(&dev->vqs_list_lock);
e2dcdfe9
RR
2447 list_for_each_entry(_vq, &dev->vqs, list) {
2448 struct vring_virtqueue *vq = to_vvq(_vq);
60f07798
PP
2449
2450 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2451 WRITE_ONCE(vq->broken, true);
e2dcdfe9 2452 }
0e566c8f 2453 spin_unlock(&dev->vqs_list_lock);
e2dcdfe9
RR
2454}
2455EXPORT_SYMBOL_GPL(virtio_break_device);
2456
be83f04d
JW
2457/*
2458 * This should allow the device to be used by the driver. You may
2459 * need to grab appropriate locks to flush the write to
2460 * vq->broken. This should only be used in some specific case e.g
2461 * (probing and restoring). This function should only be called by the
2462 * core, not directly by the driver.
2463 */
2464void __virtio_unbreak_device(struct virtio_device *dev)
2465{
2466 struct virtqueue *_vq;
2467
2468 spin_lock(&dev->vqs_list_lock);
2469 list_for_each_entry(_vq, &dev->vqs, list) {
2470 struct vring_virtqueue *vq = to_vvq(_vq);
2471
2472 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2473 WRITE_ONCE(vq->broken, false);
2474 }
2475 spin_unlock(&dev->vqs_list_lock);
2476}
2477EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2478
2a2d1382 2479dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
89062652
CH
2480{
2481 struct vring_virtqueue *vq = to_vvq(_vq);
2482
2a2d1382
AL
2483 BUG_ON(!vq->we_own_ring);
2484
1ce9e605
TB
2485 if (vq->packed_ring)
2486 return vq->packed.ring_dma_addr;
2487
d79dca75 2488 return vq->split.queue_dma_addr;
89062652 2489}
2a2d1382 2490EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
89062652 2491
2a2d1382 2492dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
89062652
CH
2493{
2494 struct vring_virtqueue *vq = to_vvq(_vq);
2495
2a2d1382
AL
2496 BUG_ON(!vq->we_own_ring);
2497
1ce9e605
TB
2498 if (vq->packed_ring)
2499 return vq->packed.driver_event_dma_addr;
2500
d79dca75 2501 return vq->split.queue_dma_addr +
e593bf97 2502 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2a2d1382
AL
2503}
2504EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2505
2506dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2507{
2508 struct vring_virtqueue *vq = to_vvq(_vq);
2509
2510 BUG_ON(!vq->we_own_ring);
2511
1ce9e605
TB
2512 if (vq->packed_ring)
2513 return vq->packed.device_event_dma_addr;
2514
d79dca75 2515 return vq->split.queue_dma_addr +
e593bf97 2516 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2a2d1382
AL
2517}
2518EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2519
1ce9e605 2520/* Only available for split ring */
2a2d1382
AL
2521const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2522{
e593bf97 2523 return &to_vvq(vq)->split.vring;
89062652 2524}
2a2d1382 2525EXPORT_SYMBOL_GPL(virtqueue_get_vring);
89062652 2526
c6fd4701 2527MODULE_LICENSE("GPL");