virtio: Revert "virtio: find_vqs() add arg sizes"
[linux-block.git] / drivers / virtio / virtio_ring.c
CommitLineData
fd534e9b 1// SPDX-License-Identifier: GPL-2.0-or-later
0a8a69dd
RR
2/* Virtio ring implementation.
3 *
4 * Copyright 2007 Rusty Russell IBM Corporation
0a8a69dd
RR
5 */
6#include <linux/virtio.h>
7#include <linux/virtio_ring.h>
e34f8725 8#include <linux/virtio_config.h>
0a8a69dd 9#include <linux/device.h>
5a0e3ad6 10#include <linux/slab.h>
b5a2c4f1 11#include <linux/module.h>
e93300b1 12#include <linux/hrtimer.h>
780bc790 13#include <linux/dma-mapping.h>
f8ce7263 14#include <linux/spinlock.h>
78fe3987 15#include <xen/xen.h>
0a8a69dd
RR
16
17#ifdef DEBUG
18/* For development, we want to crash whenever the ring is screwed. */
9499f5e7
RR
19#define BAD_RING(_vq, fmt, args...) \
20 do { \
21 dev_err(&(_vq)->vq.vdev->dev, \
22 "%s:"fmt, (_vq)->vq.name, ##args); \
23 BUG(); \
24 } while (0)
c5f841f1
RR
25/* Caller is supposed to guarantee no reentry. */
26#define START_USE(_vq) \
27 do { \
28 if ((_vq)->in_use) \
9499f5e7
RR
29 panic("%s:in_use = %i\n", \
30 (_vq)->vq.name, (_vq)->in_use); \
c5f841f1 31 (_vq)->in_use = __LINE__; \
9499f5e7 32 } while (0)
3a35ce7d 33#define END_USE(_vq) \
97a545ab 34 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
4d6a105e
TB
35#define LAST_ADD_TIME_UPDATE(_vq) \
36 do { \
37 ktime_t now = ktime_get(); \
38 \
39 /* No kick or get, with .1 second between? Warn. */ \
40 if ((_vq)->last_add_time_valid) \
41 WARN_ON(ktime_to_ms(ktime_sub(now, \
42 (_vq)->last_add_time)) > 100); \
43 (_vq)->last_add_time = now; \
44 (_vq)->last_add_time_valid = true; \
45 } while (0)
46#define LAST_ADD_TIME_CHECK(_vq) \
47 do { \
48 if ((_vq)->last_add_time_valid) { \
49 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
50 (_vq)->last_add_time)) > 100); \
51 } \
52 } while (0)
53#define LAST_ADD_TIME_INVALID(_vq) \
54 ((_vq)->last_add_time_valid = false)
0a8a69dd 55#else
9499f5e7
RR
56#define BAD_RING(_vq, fmt, args...) \
57 do { \
58 dev_err(&_vq->vq.vdev->dev, \
59 "%s:"fmt, (_vq)->vq.name, ##args); \
60 (_vq)->broken = true; \
61 } while (0)
0a8a69dd
RR
62#define START_USE(vq)
63#define END_USE(vq)
4d6a105e
TB
64#define LAST_ADD_TIME_UPDATE(vq)
65#define LAST_ADD_TIME_CHECK(vq)
66#define LAST_ADD_TIME_INVALID(vq)
0a8a69dd
RR
67#endif
68
cbeedb72 69struct vring_desc_state_split {
780bc790
AL
70 void *data; /* Data for callback. */
71 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
72};
73
1ce9e605
TB
74struct vring_desc_state_packed {
75 void *data; /* Data for callback. */
76 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
77 u16 num; /* Descriptor list length. */
1ce9e605
TB
78 u16 last; /* The last desc state in a list. */
79};
80
1f28750f 81struct vring_desc_extra {
ef5c366f
JW
82 dma_addr_t addr; /* Descriptor DMA addr. */
83 u32 len; /* Descriptor length. */
1ce9e605 84 u16 flags; /* Descriptor flags. */
aeef9b47 85 u16 next; /* The next desc state in a list. */
1ce9e605
TB
86};
87
d76136e4
XZ
88struct vring_virtqueue_split {
89 /* Actual memory layout for this queue. */
90 struct vring vring;
91
92 /* Last written value to avail->flags */
93 u16 avail_flags_shadow;
94
95 /*
96 * Last written value to avail->idx in
97 * guest byte order.
98 */
99 u16 avail_idx_shadow;
100
101 /* Per-descriptor state. */
102 struct vring_desc_state_split *desc_state;
103 struct vring_desc_extra *desc_extra;
104
105 /* DMA address and size information */
106 dma_addr_t queue_dma_addr;
107 size_t queue_size_in_bytes;
af36b16f
XZ
108
109 /*
110 * The parameters for creating vrings are reserved for creating new
111 * vring.
112 */
113 u32 vring_align;
114 bool may_reduce_num;
d76136e4
XZ
115};
116
117struct vring_virtqueue_packed {
118 /* Actual memory layout for this queue. */
119 struct {
120 unsigned int num;
121 struct vring_packed_desc *desc;
122 struct vring_packed_desc_event *driver;
123 struct vring_packed_desc_event *device;
124 } vring;
125
126 /* Driver ring wrap counter. */
127 bool avail_wrap_counter;
128
129 /* Avail used flags. */
130 u16 avail_used_flags;
131
132 /* Index of the next avail descriptor. */
133 u16 next_avail_idx;
134
135 /*
136 * Last written value to driver->flags in
137 * guest byte order.
138 */
139 u16 event_flags_shadow;
140
141 /* Per-descriptor state. */
142 struct vring_desc_state_packed *desc_state;
143 struct vring_desc_extra *desc_extra;
144
145 /* DMA address and size information */
146 dma_addr_t ring_dma_addr;
147 dma_addr_t driver_event_dma_addr;
148 dma_addr_t device_event_dma_addr;
149 size_t ring_size_in_bytes;
150 size_t event_size_in_bytes;
151};
152
43b4f721 153struct vring_virtqueue {
0a8a69dd
RR
154 struct virtqueue vq;
155
1ce9e605
TB
156 /* Is this a packed ring? */
157 bool packed_ring;
158
fb3fba6b
TB
159 /* Is DMA API used? */
160 bool use_dma_api;
161
7b21e34f
RR
162 /* Can we use weak barriers? */
163 bool weak_barriers;
164
0a8a69dd
RR
165 /* Other side has made a mess, don't try any more. */
166 bool broken;
167
9fa29b9d
MM
168 /* Host supports indirect buffers */
169 bool indirect;
170
a5c262c5
MT
171 /* Host publishes avail event idx */
172 bool event;
173
0a8a69dd
RR
174 /* Head of free buffer list. */
175 unsigned int free_head;
176 /* Number we've added since last sync. */
177 unsigned int num_added;
178
a7722890 179 /* Last used index we've seen.
180 * for split ring, it just contains last used index
181 * for packed ring:
182 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
183 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
184 */
1bc4953e 185 u16 last_used_idx;
0a8a69dd 186
8d622d21
MT
187 /* Hint for event idx: already triggered no need to disable. */
188 bool event_triggered;
189
1ce9e605
TB
190 union {
191 /* Available for split ring */
d76136e4 192 struct vring_virtqueue_split split;
e593bf97 193
1ce9e605 194 /* Available for packed ring */
d76136e4 195 struct vring_virtqueue_packed packed;
1ce9e605 196 };
f277ec42 197
0a8a69dd 198 /* How to notify other side. FIXME: commonalize hcalls! */
46f9c2b9 199 bool (*notify)(struct virtqueue *vq);
0a8a69dd 200
2a2d1382
AL
201 /* DMA, allocation, and size information */
202 bool we_own_ring;
2a2d1382 203
0a8a69dd
RR
204#ifdef DEBUG
205 /* They're supposed to lock for us. */
206 unsigned int in_use;
e93300b1
RR
207
208 /* Figure out if their kicks are too delayed. */
209 bool last_add_time_valid;
210 ktime_t last_add_time;
0a8a69dd 211#endif
0a8a69dd
RR
212};
213
07d9629d 214static struct virtqueue *__vring_new_virtqueue(unsigned int index,
cd4c812a 215 struct vring_virtqueue_split *vring_split,
07d9629d
XZ
216 struct virtio_device *vdev,
217 bool weak_barriers,
218 bool context,
219 bool (*notify)(struct virtqueue *),
220 void (*callback)(struct virtqueue *),
221 const char *name);
a2b36c8d 222static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
6fea20e5 223static void vring_free(struct virtqueue *_vq);
e6f633e5
TB
224
225/*
226 * Helpers.
227 */
228
0a8a69dd
RR
229#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
230
35c51e09 231static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
2f18c2d1
TB
232 unsigned int total_sg)
233{
2f18c2d1
TB
234 /*
235 * If the host supports indirect descriptor tables, and we have multiple
236 * buffers, then go indirect. FIXME: tune this threshold
237 */
238 return (vq->indirect && total_sg > 1 && vq->vq.num_free);
239}
240
d26c96c8 241/*
1a937693
MT
242 * Modern virtio devices have feature bits to specify whether they need a
243 * quirk and bypass the IOMMU. If not there, just use the DMA API.
244 *
245 * If there, the interaction between virtio and DMA API is messy.
d26c96c8
AL
246 *
247 * On most systems with virtio, physical addresses match bus addresses,
248 * and it doesn't particularly matter whether we use the DMA API.
249 *
250 * On some systems, including Xen and any system with a physical device
251 * that speaks virtio behind a physical IOMMU, we must use the DMA API
252 * for virtio DMA to work at all.
253 *
254 * On other systems, including SPARC and PPC64, virtio-pci devices are
255 * enumerated as though they are behind an IOMMU, but the virtio host
256 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
257 * there or somehow map everything as the identity.
258 *
259 * For the time being, we preserve historic behavior and bypass the DMA
260 * API.
1a937693
MT
261 *
262 * TODO: install a per-device DMA ops structure that does the right thing
263 * taking into account all the above quirks, and use the DMA API
264 * unconditionally on data path.
d26c96c8
AL
265 */
266
267static bool vring_use_dma_api(struct virtio_device *vdev)
268{
24b6842a 269 if (!virtio_has_dma_quirk(vdev))
1a937693
MT
270 return true;
271
272 /* Otherwise, we are left to guess. */
78fe3987
AL
273 /*
274 * In theory, it's possible to have a buggy QEMU-supposed
275 * emulated Q35 IOMMU and Xen enabled at the same time. On
276 * such a configuration, virtio has never worked and will
277 * not work without an even larger kludge. Instead, enable
278 * the DMA API if we're a Xen guest, which at least allows
279 * all of the sensible Xen configurations to work correctly.
280 */
281 if (xen_domain())
282 return true;
283
d26c96c8
AL
284 return false;
285}
286
e6d6dd6c
JR
287size_t virtio_max_dma_size(struct virtio_device *vdev)
288{
289 size_t max_segment_size = SIZE_MAX;
290
291 if (vring_use_dma_api(vdev))
817fc978 292 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
e6d6dd6c
JR
293
294 return max_segment_size;
295}
296EXPORT_SYMBOL_GPL(virtio_max_dma_size);
297
d79dca75
TB
298static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
299 dma_addr_t *dma_handle, gfp_t flag)
300{
301 if (vring_use_dma_api(vdev)) {
302 return dma_alloc_coherent(vdev->dev.parent, size,
303 dma_handle, flag);
304 } else {
305 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
306
307 if (queue) {
308 phys_addr_t phys_addr = virt_to_phys(queue);
309 *dma_handle = (dma_addr_t)phys_addr;
310
311 /*
312 * Sanity check: make sure we dind't truncate
313 * the address. The only arches I can find that
314 * have 64-bit phys_addr_t but 32-bit dma_addr_t
315 * are certain non-highmem MIPS and x86
316 * configurations, but these configurations
317 * should never allocate physical pages above 32
318 * bits, so this is fine. Just in case, throw a
319 * warning and abort if we end up with an
320 * unrepresentable address.
321 */
322 if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
323 free_pages_exact(queue, PAGE_ALIGN(size));
324 return NULL;
325 }
326 }
327 return queue;
328 }
329}
330
331static void vring_free_queue(struct virtio_device *vdev, size_t size,
332 void *queue, dma_addr_t dma_handle)
333{
334 if (vring_use_dma_api(vdev))
335 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
336 else
337 free_pages_exact(queue, PAGE_ALIGN(size));
338}
339
780bc790
AL
340/*
341 * The DMA ops on various arches are rather gnarly right now, and
342 * making all of the arch DMA ops work on the vring device itself
343 * is a mess. For now, we use the parent device for DMA ops.
344 */
75bfa81b 345static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
780bc790
AL
346{
347 return vq->vq.vdev->dev.parent;
348}
349
350/* Map one sg entry. */
351static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
352 struct scatterlist *sg,
353 enum dma_data_direction direction)
354{
fb3fba6b 355 if (!vq->use_dma_api)
780bc790
AL
356 return (dma_addr_t)sg_phys(sg);
357
358 /*
359 * We can't use dma_map_sg, because we don't use scatterlists in
360 * the way it expects (we don't guarantee that the scatterlist
361 * will exist for the lifetime of the mapping).
362 */
363 return dma_map_page(vring_dma_dev(vq),
364 sg_page(sg), sg->offset, sg->length,
365 direction);
366}
367
368static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
369 void *cpu_addr, size_t size,
370 enum dma_data_direction direction)
371{
fb3fba6b 372 if (!vq->use_dma_api)
780bc790
AL
373 return (dma_addr_t)virt_to_phys(cpu_addr);
374
375 return dma_map_single(vring_dma_dev(vq),
376 cpu_addr, size, direction);
377}
378
e6f633e5
TB
379static int vring_mapping_error(const struct vring_virtqueue *vq,
380 dma_addr_t addr)
381{
fb3fba6b 382 if (!vq->use_dma_api)
e6f633e5
TB
383 return 0;
384
385 return dma_mapping_error(vring_dma_dev(vq), addr);
386}
387
3a897128
XZ
388static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
389{
390 vq->vq.num_free = num;
391
392 if (vq->packed_ring)
393 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
394 else
395 vq->last_used_idx = 0;
396
397 vq->event_triggered = false;
398 vq->num_added = 0;
399
400#ifdef DEBUG
401 vq->in_use = false;
402 vq->last_add_time_valid = false;
403#endif
404}
405
e6f633e5
TB
406
407/*
408 * Split ring specific functions - *_split().
409 */
410
72b5e895
JW
411static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
412 struct vring_desc *desc)
780bc790
AL
413{
414 u16 flags;
415
fb3fba6b 416 if (!vq->use_dma_api)
780bc790
AL
417 return;
418
419 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
420
b4282ebc
XZ
421 dma_unmap_page(vring_dma_dev(vq),
422 virtio64_to_cpu(vq->vq.vdev, desc->addr),
423 virtio32_to_cpu(vq->vq.vdev, desc->len),
424 (flags & VRING_DESC_F_WRITE) ?
425 DMA_FROM_DEVICE : DMA_TO_DEVICE);
780bc790
AL
426}
427
72b5e895
JW
428static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
429 unsigned int i)
430{
431 struct vring_desc_extra *extra = vq->split.desc_extra;
432 u16 flags;
433
434 if (!vq->use_dma_api)
435 goto out;
436
437 flags = extra[i].flags;
438
439 if (flags & VRING_DESC_F_INDIRECT) {
440 dma_unmap_single(vring_dma_dev(vq),
441 extra[i].addr,
442 extra[i].len,
443 (flags & VRING_DESC_F_WRITE) ?
444 DMA_FROM_DEVICE : DMA_TO_DEVICE);
445 } else {
446 dma_unmap_page(vring_dma_dev(vq),
447 extra[i].addr,
448 extra[i].len,
449 (flags & VRING_DESC_F_WRITE) ?
450 DMA_FROM_DEVICE : DMA_TO_DEVICE);
451 }
452
453out:
454 return extra[i].next;
455}
456
138fd251
TB
457static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
458 unsigned int total_sg,
459 gfp_t gfp)
9fa29b9d
MM
460{
461 struct vring_desc *desc;
b25bd251 462 unsigned int i;
9fa29b9d 463
b92b1b89
WD
464 /*
465 * We require lowmem mappings for the descriptors because
466 * otherwise virt_to_phys will give us bogus addresses in the
467 * virtqueue.
468 */
82107539 469 gfp &= ~__GFP_HIGHMEM;
b92b1b89 470
6da2ec56 471 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
9fa29b9d 472 if (!desc)
b25bd251 473 return NULL;
9fa29b9d 474
b25bd251 475 for (i = 0; i < total_sg; i++)
00e6f3d9 476 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
b25bd251 477 return desc;
9fa29b9d
MM
478}
479
fe4c3862
JW
480static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
481 struct vring_desc *desc,
482 unsigned int i,
483 dma_addr_t addr,
484 unsigned int len,
72b5e895
JW
485 u16 flags,
486 bool indirect)
fe4c3862 487{
72b5e895
JW
488 struct vring_virtqueue *vring = to_vvq(vq);
489 struct vring_desc_extra *extra = vring->split.desc_extra;
490 u16 next;
491
fe4c3862
JW
492 desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
493 desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
494 desc[i].len = cpu_to_virtio32(vq->vdev, len);
495
72b5e895
JW
496 if (!indirect) {
497 next = extra[i].next;
498 desc[i].next = cpu_to_virtio16(vq->vdev, next);
499
500 extra[i].addr = addr;
501 extra[i].len = len;
502 extra[i].flags = flags;
503 } else
504 next = virtio16_to_cpu(vq->vdev, desc[i].next);
505
506 return next;
fe4c3862
JW
507}
508
138fd251
TB
509static inline int virtqueue_add_split(struct virtqueue *_vq,
510 struct scatterlist *sgs[],
511 unsigned int total_sg,
512 unsigned int out_sgs,
513 unsigned int in_sgs,
514 void *data,
515 void *ctx,
516 gfp_t gfp)
0a8a69dd
RR
517{
518 struct vring_virtqueue *vq = to_vvq(_vq);
13816c76 519 struct scatterlist *sg;
b25bd251 520 struct vring_desc *desc;
3f649ab7 521 unsigned int i, n, avail, descs_used, prev, err_idx;
1fe9b6fe 522 int head;
b25bd251 523 bool indirect;
0a8a69dd 524
9fa29b9d
MM
525 START_USE(vq);
526
0a8a69dd 527 BUG_ON(data == NULL);
5a08b04f 528 BUG_ON(ctx && vq->indirect);
9fa29b9d 529
70670444
RR
530 if (unlikely(vq->broken)) {
531 END_USE(vq);
532 return -EIO;
533 }
534
4d6a105e 535 LAST_ADD_TIME_UPDATE(vq);
e93300b1 536
b25bd251
RR
537 BUG_ON(total_sg == 0);
538
539 head = vq->free_head;
540
35c51e09 541 if (virtqueue_use_indirect(vq, total_sg))
138fd251 542 desc = alloc_indirect_split(_vq, total_sg, gfp);
44ed8089 543 else {
b25bd251 544 desc = NULL;
e593bf97 545 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
44ed8089 546 }
b25bd251
RR
547
548 if (desc) {
549 /* Use a single buffer which doesn't continue */
780bc790 550 indirect = true;
b25bd251
RR
551 /* Set up rest to use this indirect table. */
552 i = 0;
553 descs_used = 1;
b25bd251 554 } else {
780bc790 555 indirect = false;
e593bf97 556 desc = vq->split.vring.desc;
b25bd251
RR
557 i = head;
558 descs_used = total_sg;
9fa29b9d
MM
559 }
560
b4b4ff73 561 if (unlikely(vq->vq.num_free < descs_used)) {
0a8a69dd 562 pr_debug("Can't add buf len %i - avail = %i\n",
b25bd251 563 descs_used, vq->vq.num_free);
44653eae
RR
564 /* FIXME: for historical reasons, we force a notify here if
565 * there are outgoing parts to the buffer. Presumably the
566 * host should service the ring ASAP. */
13816c76 567 if (out_sgs)
44653eae 568 vq->notify(&vq->vq);
58625edf
WY
569 if (indirect)
570 kfree(desc);
0a8a69dd
RR
571 END_USE(vq);
572 return -ENOSPC;
573 }
574
13816c76 575 for (n = 0; n < out_sgs; n++) {
eeebf9b1 576 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
577 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
578 if (vring_mapping_error(vq, addr))
579 goto unmap_release;
580
13816c76 581 prev = i;
72b5e895
JW
582 /* Note that we trust indirect descriptor
583 * table since it use stream DMA mapping.
584 */
fe4c3862 585 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
72b5e895
JW
586 VRING_DESC_F_NEXT,
587 indirect);
13816c76 588 }
0a8a69dd 589 }
13816c76 590 for (; n < (out_sgs + in_sgs); n++) {
eeebf9b1 591 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
592 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
593 if (vring_mapping_error(vq, addr))
594 goto unmap_release;
595
13816c76 596 prev = i;
72b5e895
JW
597 /* Note that we trust indirect descriptor
598 * table since it use stream DMA mapping.
599 */
fe4c3862
JW
600 i = virtqueue_add_desc_split(_vq, desc, i, addr,
601 sg->length,
602 VRING_DESC_F_NEXT |
72b5e895
JW
603 VRING_DESC_F_WRITE,
604 indirect);
13816c76 605 }
0a8a69dd
RR
606 }
607 /* Last one doesn't continue. */
00e6f3d9 608 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
72b5e895 609 if (!indirect && vq->use_dma_api)
890d3356 610 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
72b5e895 611 ~VRING_DESC_F_NEXT;
0a8a69dd 612
780bc790
AL
613 if (indirect) {
614 /* Now that the indirect table is filled in, map it. */
615 dma_addr_t addr = vring_map_single(
616 vq, desc, total_sg * sizeof(struct vring_desc),
617 DMA_TO_DEVICE);
618 if (vring_mapping_error(vq, addr))
619 goto unmap_release;
620
fe4c3862
JW
621 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
622 head, addr,
623 total_sg * sizeof(struct vring_desc),
72b5e895
JW
624 VRING_DESC_F_INDIRECT,
625 false);
780bc790
AL
626 }
627
628 /* We're using some buffers from the free list. */
629 vq->vq.num_free -= descs_used;
630
0a8a69dd 631 /* Update free pointer */
b25bd251 632 if (indirect)
72b5e895 633 vq->free_head = vq->split.desc_extra[head].next;
b25bd251
RR
634 else
635 vq->free_head = i;
0a8a69dd 636
780bc790 637 /* Store token and indirect buffer state. */
cbeedb72 638 vq->split.desc_state[head].data = data;
780bc790 639 if (indirect)
cbeedb72 640 vq->split.desc_state[head].indir_desc = desc;
87646a34 641 else
cbeedb72 642 vq->split.desc_state[head].indir_desc = ctx;
0a8a69dd
RR
643
644 /* Put entry in available array (but don't update avail->idx until they
3b720b8c 645 * do sync). */
e593bf97
TB
646 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
647 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
0a8a69dd 648
ee7cd898
RR
649 /* Descriptors and available array need to be set before we expose the
650 * new available array entries. */
a9a0fef7 651 virtio_wmb(vq->weak_barriers);
e593bf97
TB
652 vq->split.avail_idx_shadow++;
653 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
654 vq->split.avail_idx_shadow);
ee7cd898
RR
655 vq->num_added++;
656
5e05bf58
TH
657 pr_debug("Added buffer head %i to %p\n", head, vq);
658 END_USE(vq);
659
ee7cd898
RR
660 /* This is very unlikely, but theoretically possible. Kick
661 * just in case. */
662 if (unlikely(vq->num_added == (1 << 16) - 1))
663 virtqueue_kick(_vq);
664
98e8c6bc 665 return 0;
780bc790
AL
666
667unmap_release:
668 err_idx = i;
cf8f1696
ML
669
670 if (indirect)
671 i = 0;
672 else
673 i = head;
780bc790
AL
674
675 for (n = 0; n < total_sg; n++) {
676 if (i == err_idx)
677 break;
72b5e895
JW
678 if (indirect) {
679 vring_unmap_one_split_indirect(vq, &desc[i]);
680 i = virtio16_to_cpu(_vq->vdev, desc[i].next);
681 } else
682 i = vring_unmap_one_split(vq, i);
780bc790
AL
683 }
684
780bc790
AL
685 if (indirect)
686 kfree(desc);
687
3cc36f6e 688 END_USE(vq);
f7728002 689 return -ENOMEM;
0a8a69dd 690}
13816c76 691
138fd251 692static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
693{
694 struct vring_virtqueue *vq = to_vvq(_vq);
a5c262c5 695 u16 new, old;
41f0377f
RR
696 bool needs_kick;
697
0a8a69dd 698 START_USE(vq);
a72caae2
JW
699 /* We need to expose available array entries before checking avail
700 * event. */
a9a0fef7 701 virtio_mb(vq->weak_barriers);
0a8a69dd 702
e593bf97
TB
703 old = vq->split.avail_idx_shadow - vq->num_added;
704 new = vq->split.avail_idx_shadow;
0a8a69dd
RR
705 vq->num_added = 0;
706
4d6a105e
TB
707 LAST_ADD_TIME_CHECK(vq);
708 LAST_ADD_TIME_INVALID(vq);
e93300b1 709
41f0377f 710 if (vq->event) {
e593bf97
TB
711 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
712 vring_avail_event(&vq->split.vring)),
41f0377f
RR
713 new, old);
714 } else {
e593bf97
TB
715 needs_kick = !(vq->split.vring.used->flags &
716 cpu_to_virtio16(_vq->vdev,
717 VRING_USED_F_NO_NOTIFY));
41f0377f 718 }
0a8a69dd 719 END_USE(vq);
41f0377f
RR
720 return needs_kick;
721}
138fd251 722
138fd251
TB
723static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
724 void **ctx)
0a8a69dd 725{
780bc790 726 unsigned int i, j;
c60923cb 727 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
0a8a69dd
RR
728
729 /* Clear data ptr. */
cbeedb72 730 vq->split.desc_state[head].data = NULL;
0a8a69dd 731
780bc790 732 /* Put back on free list: unmap first-level descriptors and find end */
0a8a69dd 733 i = head;
9fa29b9d 734
e593bf97 735 while (vq->split.vring.desc[i].flags & nextflag) {
72b5e895
JW
736 vring_unmap_one_split(vq, i);
737 i = vq->split.desc_extra[i].next;
06ca287d 738 vq->vq.num_free++;
0a8a69dd
RR
739 }
740
72b5e895
JW
741 vring_unmap_one_split(vq, i);
742 vq->split.desc_extra[i].next = vq->free_head;
0a8a69dd 743 vq->free_head = head;
780bc790 744
0a8a69dd 745 /* Plus final descriptor */
06ca287d 746 vq->vq.num_free++;
780bc790 747
5a08b04f 748 if (vq->indirect) {
cbeedb72
TB
749 struct vring_desc *indir_desc =
750 vq->split.desc_state[head].indir_desc;
5a08b04f
MT
751 u32 len;
752
753 /* Free the indirect table, if any, now that it's unmapped. */
754 if (!indir_desc)
755 return;
756
72b5e895 757 len = vq->split.desc_extra[head].len;
780bc790 758
72b5e895
JW
759 BUG_ON(!(vq->split.desc_extra[head].flags &
760 VRING_DESC_F_INDIRECT));
780bc790
AL
761 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
762
763 for (j = 0; j < len / sizeof(struct vring_desc); j++)
72b5e895 764 vring_unmap_one_split_indirect(vq, &indir_desc[j]);
780bc790 765
5a08b04f 766 kfree(indir_desc);
cbeedb72 767 vq->split.desc_state[head].indir_desc = NULL;
5a08b04f 768 } else if (ctx) {
cbeedb72 769 *ctx = vq->split.desc_state[head].indir_desc;
780bc790 770 }
0a8a69dd
RR
771}
772
138fd251 773static inline bool more_used_split(const struct vring_virtqueue *vq)
0a8a69dd 774{
e593bf97
TB
775 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
776 vq->split.vring.used->idx);
0a8a69dd
RR
777}
778
138fd251
TB
779static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
780 unsigned int *len,
781 void **ctx)
0a8a69dd
RR
782{
783 struct vring_virtqueue *vq = to_vvq(_vq);
784 void *ret;
785 unsigned int i;
3b720b8c 786 u16 last_used;
0a8a69dd
RR
787
788 START_USE(vq);
789
5ef82752
RR
790 if (unlikely(vq->broken)) {
791 END_USE(vq);
792 return NULL;
793 }
794
138fd251 795 if (!more_used_split(vq)) {
0a8a69dd
RR
796 pr_debug("No more buffers in queue\n");
797 END_USE(vq);
798 return NULL;
799 }
800
2d61ba95 801 /* Only get used array entries after they have been exposed by host. */
a9a0fef7 802 virtio_rmb(vq->weak_barriers);
2d61ba95 803
e593bf97
TB
804 last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
805 i = virtio32_to_cpu(_vq->vdev,
806 vq->split.vring.used->ring[last_used].id);
807 *len = virtio32_to_cpu(_vq->vdev,
808 vq->split.vring.used->ring[last_used].len);
0a8a69dd 809
e593bf97 810 if (unlikely(i >= vq->split.vring.num)) {
0a8a69dd
RR
811 BAD_RING(vq, "id %u out of range\n", i);
812 return NULL;
813 }
cbeedb72 814 if (unlikely(!vq->split.desc_state[i].data)) {
0a8a69dd
RR
815 BAD_RING(vq, "id %u is not a head!\n", i);
816 return NULL;
817 }
818
138fd251 819 /* detach_buf_split clears data, so grab it now. */
cbeedb72 820 ret = vq->split.desc_state[i].data;
138fd251 821 detach_buf_split(vq, i, ctx);
0a8a69dd 822 vq->last_used_idx++;
a5c262c5
MT
823 /* If we expect an interrupt for the next entry, tell host
824 * by writing event index and flush out the write before
825 * the read in the next get_buf call. */
e593bf97 826 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
788e5b3a 827 virtio_store_mb(vq->weak_barriers,
e593bf97 828 &vring_used_event(&vq->split.vring),
788e5b3a 829 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
a5c262c5 830
4d6a105e 831 LAST_ADD_TIME_INVALID(vq);
e93300b1 832
0a8a69dd
RR
833 END_USE(vq);
834 return ret;
835}
138fd251 836
138fd251 837static void virtqueue_disable_cb_split(struct virtqueue *_vq)
18445c4d
RR
838{
839 struct vring_virtqueue *vq = to_vvq(_vq);
840
e593bf97
TB
841 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
842 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
8d622d21
MT
843 if (vq->event)
844 /* TODO: this is a hack. Figure out a cleaner value to write. */
845 vring_used_event(&vq->split.vring) = 0x0;
846 else
e593bf97
TB
847 vq->split.vring.avail->flags =
848 cpu_to_virtio16(_vq->vdev,
849 vq->split.avail_flags_shadow);
f277ec42 850 }
18445c4d
RR
851}
852
31532340 853static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
854{
855 struct vring_virtqueue *vq = to_vvq(_vq);
cc229884 856 u16 last_used_idx;
0a8a69dd
RR
857
858 START_USE(vq);
0a8a69dd
RR
859
860 /* We optimistically turn back on interrupts, then check if there was
861 * more to do. */
a5c262c5
MT
862 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
863 * either clear the flags bit or point the event index at the next
864 * entry. Always do both to keep code simple. */
e593bf97
TB
865 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
866 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 867 if (!vq->event)
e593bf97
TB
868 vq->split.vring.avail->flags =
869 cpu_to_virtio16(_vq->vdev,
870 vq->split.avail_flags_shadow);
f277ec42 871 }
e593bf97
TB
872 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
873 last_used_idx = vq->last_used_idx);
cc229884
MT
874 END_USE(vq);
875 return last_used_idx;
876}
138fd251 877
31532340 878static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
138fd251
TB
879{
880 struct vring_virtqueue *vq = to_vvq(_vq);
881
882 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
e593bf97 883 vq->split.vring.used->idx);
138fd251
TB
884}
885
138fd251 886static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
7ab358c2
MT
887{
888 struct vring_virtqueue *vq = to_vvq(_vq);
889 u16 bufs;
890
891 START_USE(vq);
892
893 /* We optimistically turn back on interrupts, then check if there was
894 * more to do. */
895 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
896 * either clear the flags bit or point the event index at the next
0ea1e4a6 897 * entry. Always update the event index to keep code simple. */
e593bf97
TB
898 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
899 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 900 if (!vq->event)
e593bf97
TB
901 vq->split.vring.avail->flags =
902 cpu_to_virtio16(_vq->vdev,
903 vq->split.avail_flags_shadow);
f277ec42 904 }
7ab358c2 905 /* TODO: tune this threshold */
e593bf97 906 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
788e5b3a
MT
907
908 virtio_store_mb(vq->weak_barriers,
e593bf97 909 &vring_used_event(&vq->split.vring),
788e5b3a
MT
910 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
911
e593bf97
TB
912 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
913 - vq->last_used_idx) > bufs)) {
7ab358c2
MT
914 END_USE(vq);
915 return false;
916 }
917
918 END_USE(vq);
919 return true;
920}
7ab358c2 921
138fd251 922static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
c021eac4
SM
923{
924 struct vring_virtqueue *vq = to_vvq(_vq);
925 unsigned int i;
926 void *buf;
927
928 START_USE(vq);
929
e593bf97 930 for (i = 0; i < vq->split.vring.num; i++) {
cbeedb72 931 if (!vq->split.desc_state[i].data)
c021eac4 932 continue;
138fd251 933 /* detach_buf_split clears data, so grab it now. */
cbeedb72 934 buf = vq->split.desc_state[i].data;
138fd251 935 detach_buf_split(vq, i, NULL);
e593bf97
TB
936 vq->split.avail_idx_shadow--;
937 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
938 vq->split.avail_idx_shadow);
c021eac4
SM
939 END_USE(vq);
940 return buf;
941 }
942 /* That should have freed everything. */
e593bf97 943 BUG_ON(vq->vq.num_free != vq->split.vring.num);
c021eac4
SM
944
945 END_USE(vq);
946 return NULL;
947}
138fd251 948
198fa7be
XZ
949static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
950 struct vring_virtqueue *vq)
951{
952 struct virtio_device *vdev;
953
954 vdev = vq->vq.vdev;
955
956 vring_split->avail_flags_shadow = 0;
957 vring_split->avail_idx_shadow = 0;
958
959 /* No callback? Tell other side not to bother us. */
960 if (!vq->vq.callback) {
961 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
962 if (!vq->event)
963 vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
964 vring_split->avail_flags_shadow);
965 }
966}
967
e5175b41
XZ
968static void virtqueue_reinit_split(struct vring_virtqueue *vq)
969{
970 int num;
971
972 num = vq->split.vring.num;
973
974 vq->split.vring.avail->flags = 0;
975 vq->split.vring.avail->idx = 0;
976
977 /* reset avail event */
978 vq->split.vring.avail->ring[num] = 0;
979
980 vq->split.vring.used->flags = 0;
981 vq->split.vring.used->idx = 0;
982
983 /* reset used event */
984 *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
985
986 virtqueue_init(vq, num);
987
988 virtqueue_vring_init_split(&vq->split, vq);
989}
990
e1d6a423
XZ
991static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
992 struct vring_virtqueue_split *vring_split)
993{
994 vq->split = *vring_split;
995
996 /* Put everything in free lists. */
997 vq->free_head = 0;
998}
999
a2b36c8d
XZ
1000static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1001{
1002 struct vring_desc_state_split *state;
1003 struct vring_desc_extra *extra;
1004 u32 num = vring_split->vring.num;
1005
1006 state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
1007 if (!state)
1008 goto err_state;
1009
1010 extra = vring_alloc_desc_extra(num);
1011 if (!extra)
1012 goto err_extra;
1013
1014 memset(state, 0, num * sizeof(struct vring_desc_state_split));
1015
1016 vring_split->desc_state = state;
1017 vring_split->desc_extra = extra;
1018 return 0;
1019
1020err_extra:
1021 kfree(state);
1022err_state:
1023 return -ENOMEM;
1024}
1025
89f05d94
XZ
1026static void vring_free_split(struct vring_virtqueue_split *vring_split,
1027 struct virtio_device *vdev)
1028{
1029 vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1030 vring_split->vring.desc,
1031 vring_split->queue_dma_addr);
1032
1033 kfree(vring_split->desc_state);
1034 kfree(vring_split->desc_extra);
1035}
1036
c2d87fe6
XZ
1037static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1038 struct virtio_device *vdev,
1039 u32 num,
1040 unsigned int vring_align,
1041 bool may_reduce_num)
d79dca75 1042{
d79dca75
TB
1043 void *queue = NULL;
1044 dma_addr_t dma_addr;
d79dca75
TB
1045
1046 /* We assume num is a power of 2. */
1047 if (num & (num - 1)) {
1048 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
c2d87fe6 1049 return -EINVAL;
d79dca75
TB
1050 }
1051
1052 /* TODO: allocate each queue chunk individually */
1053 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1054 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1055 &dma_addr,
c7cc29aa 1056 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
d79dca75
TB
1057 if (queue)
1058 break;
cf94db21 1059 if (!may_reduce_num)
c2d87fe6 1060 return -ENOMEM;
d79dca75
TB
1061 }
1062
1063 if (!num)
c2d87fe6 1064 return -ENOMEM;
d79dca75
TB
1065
1066 if (!queue) {
1067 /* Try to get a single page. You are my only hope! */
1068 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1069 &dma_addr, GFP_KERNEL|__GFP_ZERO);
1070 }
1071 if (!queue)
c2d87fe6
XZ
1072 return -ENOMEM;
1073
1074 vring_init(&vring_split->vring, num, queue, vring_align);
1075
1076 vring_split->queue_dma_addr = dma_addr;
1077 vring_split->queue_size_in_bytes = vring_size(num, vring_align);
d79dca75 1078
af36b16f
XZ
1079 vring_split->vring_align = vring_align;
1080 vring_split->may_reduce_num = may_reduce_num;
1081
c2d87fe6
XZ
1082 return 0;
1083}
1084
1085static struct virtqueue *vring_create_virtqueue_split(
1086 unsigned int index,
1087 unsigned int num,
1088 unsigned int vring_align,
1089 struct virtio_device *vdev,
1090 bool weak_barriers,
1091 bool may_reduce_num,
1092 bool context,
1093 bool (*notify)(struct virtqueue *),
1094 void (*callback)(struct virtqueue *),
1095 const char *name)
1096{
1097 struct vring_virtqueue_split vring_split = {};
1098 struct virtqueue *vq;
1099 int err;
1100
1101 err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1102 may_reduce_num);
1103 if (err)
1104 return NULL;
d79dca75 1105
cd4c812a
XZ
1106 vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
1107 context, notify, callback, name);
d79dca75 1108 if (!vq) {
c2d87fe6 1109 vring_free_split(&vring_split, vdev);
d79dca75
TB
1110 return NULL;
1111 }
1112
d79dca75
TB
1113 to_vvq(vq)->we_own_ring = true;
1114
1115 return vq;
1116}
1117
6fea20e5
XZ
1118static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1119{
1120 struct vring_virtqueue_split vring_split = {};
1121 struct vring_virtqueue *vq = to_vvq(_vq);
1122 struct virtio_device *vdev = _vq->vdev;
1123 int err;
1124
1125 err = vring_alloc_queue_split(&vring_split, vdev, num,
1126 vq->split.vring_align,
1127 vq->split.may_reduce_num);
1128 if (err)
1129 goto err;
1130
1131 err = vring_alloc_state_extra_split(&vring_split);
1132 if (err)
1133 goto err_state_extra;
1134
1135 vring_free(&vq->vq);
1136
1137 virtqueue_vring_init_split(&vring_split, vq);
1138
1139 virtqueue_init(vq, vring_split.vring.num);
1140 virtqueue_vring_attach_split(vq, &vring_split);
1141
1142 return 0;
1143
1144err_state_extra:
1145 vring_free_split(&vring_split, vdev);
1146err:
1147 virtqueue_reinit_split(vq);
1148 return -ENOMEM;
1149}
1150
e6f633e5 1151
1ce9e605
TB
1152/*
1153 * Packed ring specific functions - *_packed().
1154 */
a7722890 1155static inline bool packed_used_wrap_counter(u16 last_used_idx)
1156{
1157 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1158}
1159
1160static inline u16 packed_last_used(u16 last_used_idx)
1161{
1162 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1163}
1ce9e605 1164
d80dc15b
XZ
1165static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1166 struct vring_desc_extra *extra)
1ce9e605
TB
1167{
1168 u16 flags;
1169
1170 if (!vq->use_dma_api)
1171 return;
1172
d80dc15b 1173 flags = extra->flags;
1ce9e605
TB
1174
1175 if (flags & VRING_DESC_F_INDIRECT) {
1176 dma_unmap_single(vring_dma_dev(vq),
d80dc15b 1177 extra->addr, extra->len,
1ce9e605
TB
1178 (flags & VRING_DESC_F_WRITE) ?
1179 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1180 } else {
1181 dma_unmap_page(vring_dma_dev(vq),
d80dc15b 1182 extra->addr, extra->len,
1ce9e605
TB
1183 (flags & VRING_DESC_F_WRITE) ?
1184 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1185 }
1186}
1187
1188static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1189 struct vring_packed_desc *desc)
1190{
1191 u16 flags;
1192
1193 if (!vq->use_dma_api)
1194 return;
1195
1196 flags = le16_to_cpu(desc->flags);
1197
920379a4
XZ
1198 dma_unmap_page(vring_dma_dev(vq),
1199 le64_to_cpu(desc->addr),
1200 le32_to_cpu(desc->len),
1201 (flags & VRING_DESC_F_WRITE) ?
1202 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1ce9e605
TB
1203}
1204
1205static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1206 gfp_t gfp)
1207{
1208 struct vring_packed_desc *desc;
1209
1210 /*
1211 * We require lowmem mappings for the descriptors because
1212 * otherwise virt_to_phys will give us bogus addresses in the
1213 * virtqueue.
1214 */
1215 gfp &= ~__GFP_HIGHMEM;
1216
1217 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1218
1219 return desc;
1220}
1221
1222static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
8d7670f3
XZ
1223 struct scatterlist *sgs[],
1224 unsigned int total_sg,
1225 unsigned int out_sgs,
1226 unsigned int in_sgs,
1227 void *data,
1228 gfp_t gfp)
1ce9e605
TB
1229{
1230 struct vring_packed_desc *desc;
1231 struct scatterlist *sg;
1232 unsigned int i, n, err_idx;
1233 u16 head, id;
1234 dma_addr_t addr;
1235
1236 head = vq->packed.next_avail_idx;
1237 desc = alloc_indirect_packed(total_sg, gfp);
fc6d70f4
XZ
1238 if (!desc)
1239 return -ENOMEM;
1ce9e605
TB
1240
1241 if (unlikely(vq->vq.num_free < 1)) {
1242 pr_debug("Can't add buf len 1 - avail = 0\n");
df0bfe75 1243 kfree(desc);
1ce9e605
TB
1244 END_USE(vq);
1245 return -ENOSPC;
1246 }
1247
1248 i = 0;
1249 id = vq->free_head;
1250 BUG_ON(id == vq->packed.vring.num);
1251
1252 for (n = 0; n < out_sgs + in_sgs; n++) {
1253 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1254 addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1255 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1256 if (vring_mapping_error(vq, addr))
1257 goto unmap_release;
1258
1259 desc[i].flags = cpu_to_le16(n < out_sgs ?
1260 0 : VRING_DESC_F_WRITE);
1261 desc[i].addr = cpu_to_le64(addr);
1262 desc[i].len = cpu_to_le32(sg->length);
1263 i++;
1264 }
1265 }
1266
1267 /* Now that the indirect table is filled in, map it. */
1268 addr = vring_map_single(vq, desc,
1269 total_sg * sizeof(struct vring_packed_desc),
1270 DMA_TO_DEVICE);
1271 if (vring_mapping_error(vq, addr))
1272 goto unmap_release;
1273
1274 vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1275 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1276 sizeof(struct vring_packed_desc));
1277 vq->packed.vring.desc[head].id = cpu_to_le16(id);
1278
1279 if (vq->use_dma_api) {
1280 vq->packed.desc_extra[id].addr = addr;
1281 vq->packed.desc_extra[id].len = total_sg *
1282 sizeof(struct vring_packed_desc);
1283 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1284 vq->packed.avail_used_flags;
1285 }
1286
1287 /*
1288 * A driver MUST NOT make the first descriptor in the list
1289 * available before all subsequent descriptors comprising
1290 * the list are made available.
1291 */
1292 virtio_wmb(vq->weak_barriers);
1293 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1294 vq->packed.avail_used_flags);
1295
1296 /* We're using some buffers from the free list. */
1297 vq->vq.num_free -= 1;
1298
1299 /* Update free pointer */
1300 n = head + 1;
1301 if (n >= vq->packed.vring.num) {
1302 n = 0;
1303 vq->packed.avail_wrap_counter ^= 1;
1304 vq->packed.avail_used_flags ^=
1305 1 << VRING_PACKED_DESC_F_AVAIL |
1306 1 << VRING_PACKED_DESC_F_USED;
1307 }
1308 vq->packed.next_avail_idx = n;
aeef9b47 1309 vq->free_head = vq->packed.desc_extra[id].next;
1ce9e605
TB
1310
1311 /* Store token and indirect buffer state. */
1312 vq->packed.desc_state[id].num = 1;
1313 vq->packed.desc_state[id].data = data;
1314 vq->packed.desc_state[id].indir_desc = desc;
1315 vq->packed.desc_state[id].last = id;
1316
1317 vq->num_added += 1;
1318
1319 pr_debug("Added buffer head %i to %p\n", head, vq);
1320 END_USE(vq);
1321
1322 return 0;
1323
1324unmap_release:
1325 err_idx = i;
1326
1327 for (i = 0; i < err_idx; i++)
1328 vring_unmap_desc_packed(vq, &desc[i]);
1329
1330 kfree(desc);
1331
1332 END_USE(vq);
f7728002 1333 return -ENOMEM;
1ce9e605
TB
1334}
1335
1336static inline int virtqueue_add_packed(struct virtqueue *_vq,
1337 struct scatterlist *sgs[],
1338 unsigned int total_sg,
1339 unsigned int out_sgs,
1340 unsigned int in_sgs,
1341 void *data,
1342 void *ctx,
1343 gfp_t gfp)
1344{
1345 struct vring_virtqueue *vq = to_vvq(_vq);
1346 struct vring_packed_desc *desc;
1347 struct scatterlist *sg;
1348 unsigned int i, n, c, descs_used, err_idx;
3f649ab7
KC
1349 __le16 head_flags, flags;
1350 u16 head, id, prev, curr, avail_used_flags;
fc6d70f4 1351 int err;
1ce9e605
TB
1352
1353 START_USE(vq);
1354
1355 BUG_ON(data == NULL);
1356 BUG_ON(ctx && vq->indirect);
1357
1358 if (unlikely(vq->broken)) {
1359 END_USE(vq);
1360 return -EIO;
1361 }
1362
1363 LAST_ADD_TIME_UPDATE(vq);
1364
1365 BUG_ON(total_sg == 0);
1366
35c51e09 1367 if (virtqueue_use_indirect(vq, total_sg)) {
fc6d70f4
XZ
1368 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1369 in_sgs, data, gfp);
1861ba62
MT
1370 if (err != -ENOMEM) {
1371 END_USE(vq);
fc6d70f4 1372 return err;
1861ba62 1373 }
fc6d70f4
XZ
1374
1375 /* fall back on direct */
1376 }
1ce9e605
TB
1377
1378 head = vq->packed.next_avail_idx;
1379 avail_used_flags = vq->packed.avail_used_flags;
1380
1381 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1382
1383 desc = vq->packed.vring.desc;
1384 i = head;
1385 descs_used = total_sg;
1386
1387 if (unlikely(vq->vq.num_free < descs_used)) {
1388 pr_debug("Can't add buf len %i - avail = %i\n",
1389 descs_used, vq->vq.num_free);
1390 END_USE(vq);
1391 return -ENOSPC;
1392 }
1393
1394 id = vq->free_head;
1395 BUG_ON(id == vq->packed.vring.num);
1396
1397 curr = id;
1398 c = 0;
1399 for (n = 0; n < out_sgs + in_sgs; n++) {
1400 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1401 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1402 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1403 if (vring_mapping_error(vq, addr))
1404 goto unmap_release;
1405
1406 flags = cpu_to_le16(vq->packed.avail_used_flags |
1407 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1408 (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1409 if (i == head)
1410 head_flags = flags;
1411 else
1412 desc[i].flags = flags;
1413
1414 desc[i].addr = cpu_to_le64(addr);
1415 desc[i].len = cpu_to_le32(sg->length);
1416 desc[i].id = cpu_to_le16(id);
1417
1418 if (unlikely(vq->use_dma_api)) {
1419 vq->packed.desc_extra[curr].addr = addr;
1420 vq->packed.desc_extra[curr].len = sg->length;
1421 vq->packed.desc_extra[curr].flags =
1422 le16_to_cpu(flags);
1423 }
1424 prev = curr;
aeef9b47 1425 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1426
1427 if ((unlikely(++i >= vq->packed.vring.num))) {
1428 i = 0;
1429 vq->packed.avail_used_flags ^=
1430 1 << VRING_PACKED_DESC_F_AVAIL |
1431 1 << VRING_PACKED_DESC_F_USED;
1432 }
1433 }
1434 }
1435
1436 if (i < head)
1437 vq->packed.avail_wrap_counter ^= 1;
1438
1439 /* We're using some buffers from the free list. */
1440 vq->vq.num_free -= descs_used;
1441
1442 /* Update free pointer */
1443 vq->packed.next_avail_idx = i;
1444 vq->free_head = curr;
1445
1446 /* Store token. */
1447 vq->packed.desc_state[id].num = descs_used;
1448 vq->packed.desc_state[id].data = data;
1449 vq->packed.desc_state[id].indir_desc = ctx;
1450 vq->packed.desc_state[id].last = prev;
1451
1452 /*
1453 * A driver MUST NOT make the first descriptor in the list
1454 * available before all subsequent descriptors comprising
1455 * the list are made available.
1456 */
1457 virtio_wmb(vq->weak_barriers);
1458 vq->packed.vring.desc[head].flags = head_flags;
1459 vq->num_added += descs_used;
1460
1461 pr_debug("Added buffer head %i to %p\n", head, vq);
1462 END_USE(vq);
1463
1464 return 0;
1465
1466unmap_release:
1467 err_idx = i;
1468 i = head;
44593865 1469 curr = vq->free_head;
1ce9e605
TB
1470
1471 vq->packed.avail_used_flags = avail_used_flags;
1472
1473 for (n = 0; n < total_sg; n++) {
1474 if (i == err_idx)
1475 break;
d80dc15b 1476 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
44593865 1477 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1478 i++;
1479 if (i >= vq->packed.vring.num)
1480 i = 0;
1481 }
1482
1483 END_USE(vq);
1484 return -EIO;
1485}
1486
1487static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1488{
1489 struct vring_virtqueue *vq = to_vvq(_vq);
f51f9826 1490 u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1ce9e605
TB
1491 bool needs_kick;
1492 union {
1493 struct {
1494 __le16 off_wrap;
1495 __le16 flags;
1496 };
1497 u32 u32;
1498 } snapshot;
1499
1500 START_USE(vq);
1501
1502 /*
1503 * We need to expose the new flags value before checking notification
1504 * suppressions.
1505 */
1506 virtio_mb(vq->weak_barriers);
1507
f51f9826
TB
1508 old = vq->packed.next_avail_idx - vq->num_added;
1509 new = vq->packed.next_avail_idx;
1ce9e605
TB
1510 vq->num_added = 0;
1511
1512 snapshot.u32 = *(u32 *)vq->packed.vring.device;
1513 flags = le16_to_cpu(snapshot.flags);
1514
1515 LAST_ADD_TIME_CHECK(vq);
1516 LAST_ADD_TIME_INVALID(vq);
1517
f51f9826
TB
1518 if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1519 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1520 goto out;
1521 }
1522
1523 off_wrap = le16_to_cpu(snapshot.off_wrap);
1524
1525 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1526 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1527 if (wrap_counter != vq->packed.avail_wrap_counter)
1528 event_idx -= vq->packed.vring.num;
1529
1530 needs_kick = vring_need_event(event_idx, new, old);
1531out:
1ce9e605
TB
1532 END_USE(vq);
1533 return needs_kick;
1534}
1535
1536static void detach_buf_packed(struct vring_virtqueue *vq,
1537 unsigned int id, void **ctx)
1538{
1539 struct vring_desc_state_packed *state = NULL;
1540 struct vring_packed_desc *desc;
1541 unsigned int i, curr;
1542
1543 state = &vq->packed.desc_state[id];
1544
1545 /* Clear data ptr. */
1546 state->data = NULL;
1547
aeef9b47 1548 vq->packed.desc_extra[state->last].next = vq->free_head;
1ce9e605
TB
1549 vq->free_head = id;
1550 vq->vq.num_free += state->num;
1551
1552 if (unlikely(vq->use_dma_api)) {
1553 curr = id;
1554 for (i = 0; i < state->num; i++) {
d80dc15b
XZ
1555 vring_unmap_extra_packed(vq,
1556 &vq->packed.desc_extra[curr]);
aeef9b47 1557 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1558 }
1559 }
1560
1561 if (vq->indirect) {
1562 u32 len;
1563
1564 /* Free the indirect table, if any, now that it's unmapped. */
1565 desc = state->indir_desc;
1566 if (!desc)
1567 return;
1568
1569 if (vq->use_dma_api) {
1570 len = vq->packed.desc_extra[id].len;
1571 for (i = 0; i < len / sizeof(struct vring_packed_desc);
1572 i++)
1573 vring_unmap_desc_packed(vq, &desc[i]);
1574 }
1575 kfree(desc);
1576 state->indir_desc = NULL;
1577 } else if (ctx) {
1578 *ctx = state->indir_desc;
1579 }
1580}
1581
1582static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1583 u16 idx, bool used_wrap_counter)
1584{
1585 bool avail, used;
1586 u16 flags;
1587
1588 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1589 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1590 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1591
1592 return avail == used && used == used_wrap_counter;
1593}
1594
1595static inline bool more_used_packed(const struct vring_virtqueue *vq)
1596{
a7722890 1597 u16 last_used;
1598 u16 last_used_idx;
1599 bool used_wrap_counter;
1600
1601 last_used_idx = READ_ONCE(vq->last_used_idx);
1602 last_used = packed_last_used(last_used_idx);
1603 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1604 return is_used_desc_packed(vq, last_used, used_wrap_counter);
1ce9e605
TB
1605}
1606
1607static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1608 unsigned int *len,
1609 void **ctx)
1610{
1611 struct vring_virtqueue *vq = to_vvq(_vq);
a7722890 1612 u16 last_used, id, last_used_idx;
1613 bool used_wrap_counter;
1ce9e605
TB
1614 void *ret;
1615
1616 START_USE(vq);
1617
1618 if (unlikely(vq->broken)) {
1619 END_USE(vq);
1620 return NULL;
1621 }
1622
1623 if (!more_used_packed(vq)) {
1624 pr_debug("No more buffers in queue\n");
1625 END_USE(vq);
1626 return NULL;
1627 }
1628
1629 /* Only get used elements after they have been exposed by host. */
1630 virtio_rmb(vq->weak_barriers);
1631
a7722890 1632 last_used_idx = READ_ONCE(vq->last_used_idx);
1633 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1634 last_used = packed_last_used(last_used_idx);
1ce9e605
TB
1635 id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1636 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1637
1638 if (unlikely(id >= vq->packed.vring.num)) {
1639 BAD_RING(vq, "id %u out of range\n", id);
1640 return NULL;
1641 }
1642 if (unlikely(!vq->packed.desc_state[id].data)) {
1643 BAD_RING(vq, "id %u is not a head!\n", id);
1644 return NULL;
1645 }
1646
1647 /* detach_buf_packed clears data, so grab it now. */
1648 ret = vq->packed.desc_state[id].data;
1649 detach_buf_packed(vq, id, ctx);
1650
a7722890 1651 last_used += vq->packed.desc_state[id].num;
1652 if (unlikely(last_used >= vq->packed.vring.num)) {
1653 last_used -= vq->packed.vring.num;
1654 used_wrap_counter ^= 1;
1ce9e605
TB
1655 }
1656
a7722890 1657 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1658 WRITE_ONCE(vq->last_used_idx, last_used);
1659
f51f9826
TB
1660 /*
1661 * If we expect an interrupt for the next entry, tell host
1662 * by writing event index and flush out the write before
1663 * the read in the next get_buf call.
1664 */
1665 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1666 virtio_store_mb(vq->weak_barriers,
1667 &vq->packed.vring.driver->off_wrap,
a7722890 1668 cpu_to_le16(vq->last_used_idx));
f51f9826 1669
1ce9e605
TB
1670 LAST_ADD_TIME_INVALID(vq);
1671
1672 END_USE(vq);
1673 return ret;
1674}
1675
1676static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1677{
1678 struct vring_virtqueue *vq = to_vvq(_vq);
1679
1680 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1681 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1682 vq->packed.vring.driver->flags =
1683 cpu_to_le16(vq->packed.event_flags_shadow);
1684 }
1685}
1686
31532340 1687static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1ce9e605
TB
1688{
1689 struct vring_virtqueue *vq = to_vvq(_vq);
1690
1691 START_USE(vq);
1692
1693 /*
1694 * We optimistically turn back on interrupts, then check if there was
1695 * more to do.
1696 */
1697
f51f9826
TB
1698 if (vq->event) {
1699 vq->packed.vring.driver->off_wrap =
a7722890 1700 cpu_to_le16(vq->last_used_idx);
f51f9826
TB
1701 /*
1702 * We need to update event offset and event wrap
1703 * counter first before updating event flags.
1704 */
1705 virtio_wmb(vq->weak_barriers);
1706 }
1707
1ce9e605 1708 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1709 vq->packed.event_flags_shadow = vq->event ?
1710 VRING_PACKED_EVENT_FLAG_DESC :
1711 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1712 vq->packed.vring.driver->flags =
1713 cpu_to_le16(vq->packed.event_flags_shadow);
1714 }
1715
1716 END_USE(vq);
a7722890 1717 return vq->last_used_idx;
1ce9e605
TB
1718}
1719
1720static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1721{
1722 struct vring_virtqueue *vq = to_vvq(_vq);
1723 bool wrap_counter;
1724 u16 used_idx;
1725
1726 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1727 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1728
1729 return is_used_desc_packed(vq, used_idx, wrap_counter);
1730}
1731
1732static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1733{
1734 struct vring_virtqueue *vq = to_vvq(_vq);
a7722890 1735 u16 used_idx, wrap_counter, last_used_idx;
f51f9826 1736 u16 bufs;
1ce9e605
TB
1737
1738 START_USE(vq);
1739
1740 /*
1741 * We optimistically turn back on interrupts, then check if there was
1742 * more to do.
1743 */
1744
f51f9826
TB
1745 if (vq->event) {
1746 /* TODO: tune this threshold */
1747 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
a7722890 1748 last_used_idx = READ_ONCE(vq->last_used_idx);
1749 wrap_counter = packed_used_wrap_counter(last_used_idx);
f51f9826 1750
a7722890 1751 used_idx = packed_last_used(last_used_idx) + bufs;
f51f9826
TB
1752 if (used_idx >= vq->packed.vring.num) {
1753 used_idx -= vq->packed.vring.num;
1754 wrap_counter ^= 1;
1755 }
1756
1757 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1758 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1759
1760 /*
1761 * We need to update event offset and event wrap
1762 * counter first before updating event flags.
1763 */
1764 virtio_wmb(vq->weak_barriers);
f51f9826 1765 }
1ce9e605
TB
1766
1767 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1768 vq->packed.event_flags_shadow = vq->event ?
1769 VRING_PACKED_EVENT_FLAG_DESC :
1770 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1771 vq->packed.vring.driver->flags =
1772 cpu_to_le16(vq->packed.event_flags_shadow);
1773 }
1774
1775 /*
1776 * We need to update event suppression structure first
1777 * before re-checking for more used buffers.
1778 */
1779 virtio_mb(vq->weak_barriers);
1780
a7722890 1781 last_used_idx = READ_ONCE(vq->last_used_idx);
1782 wrap_counter = packed_used_wrap_counter(last_used_idx);
1783 used_idx = packed_last_used(last_used_idx);
1784 if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1ce9e605
TB
1785 END_USE(vq);
1786 return false;
1787 }
1788
1789 END_USE(vq);
1790 return true;
1791}
1792
1793static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1794{
1795 struct vring_virtqueue *vq = to_vvq(_vq);
1796 unsigned int i;
1797 void *buf;
1798
1799 START_USE(vq);
1800
1801 for (i = 0; i < vq->packed.vring.num; i++) {
1802 if (!vq->packed.desc_state[i].data)
1803 continue;
1804 /* detach_buf clears data, so grab it now. */
1805 buf = vq->packed.desc_state[i].data;
1806 detach_buf_packed(vq, i, NULL);
1807 END_USE(vq);
1808 return buf;
1809 }
1810 /* That should have freed everything. */
1811 BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1812
1813 END_USE(vq);
1814 return NULL;
1815}
1816
96ef18a2 1817static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
5a222421
JW
1818{
1819 struct vring_desc_extra *desc_extra;
1820 unsigned int i;
1821
1822 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1823 GFP_KERNEL);
1824 if (!desc_extra)
1825 return NULL;
1826
1827 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1828
1829 for (i = 0; i < num - 1; i++)
1830 desc_extra[i].next = i + 1;
1831
1832 return desc_extra;
1833}
1834
6356f8bb
XZ
1835static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1836 struct virtio_device *vdev)
1837{
1838 if (vring_packed->vring.desc)
1839 vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
1840 vring_packed->vring.desc,
1841 vring_packed->ring_dma_addr);
1842
1843 if (vring_packed->vring.driver)
1844 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1845 vring_packed->vring.driver,
1846 vring_packed->driver_event_dma_addr);
1847
1848 if (vring_packed->vring.device)
1849 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1850 vring_packed->vring.device,
1851 vring_packed->device_event_dma_addr);
1852
1853 kfree(vring_packed->desc_state);
1854 kfree(vring_packed->desc_extra);
1855}
1856
6b60b9c0
XZ
1857static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1858 struct virtio_device *vdev,
1859 u32 num)
1ce9e605 1860{
1ce9e605
TB
1861 struct vring_packed_desc *ring;
1862 struct vring_packed_desc_event *driver, *device;
1863 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1864 size_t ring_size_in_bytes, event_size_in_bytes;
1ce9e605
TB
1865
1866 ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1867
1868 ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1869 &ring_dma_addr,
1870 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1871 if (!ring)
6b60b9c0
XZ
1872 goto err;
1873
1874 vring_packed->vring.desc = ring;
1875 vring_packed->ring_dma_addr = ring_dma_addr;
1876 vring_packed->ring_size_in_bytes = ring_size_in_bytes;
1ce9e605
TB
1877
1878 event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1879
1880 driver = vring_alloc_queue(vdev, event_size_in_bytes,
1881 &driver_event_dma_addr,
1882 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1883 if (!driver)
6b60b9c0
XZ
1884 goto err;
1885
1886 vring_packed->vring.driver = driver;
1887 vring_packed->event_size_in_bytes = event_size_in_bytes;
1888 vring_packed->driver_event_dma_addr = driver_event_dma_addr;
1ce9e605
TB
1889
1890 device = vring_alloc_queue(vdev, event_size_in_bytes,
1891 &device_event_dma_addr,
1892 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1893 if (!device)
6b60b9c0
XZ
1894 goto err;
1895
1896 vring_packed->vring.device = device;
1897 vring_packed->device_event_dma_addr = device_event_dma_addr;
1898
1899 vring_packed->vring.num = num;
1900
1901 return 0;
1902
1903err:
1904 vring_free_packed(vring_packed, vdev);
1905 return -ENOMEM;
1906}
1907
ef3167cf
XZ
1908static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
1909{
1910 struct vring_desc_state_packed *state;
1911 struct vring_desc_extra *extra;
1912 u32 num = vring_packed->vring.num;
1913
1914 state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
1915 if (!state)
1916 goto err_desc_state;
1917
1918 memset(state, 0, num * sizeof(struct vring_desc_state_packed));
1919
1920 extra = vring_alloc_desc_extra(num);
1921 if (!extra)
1922 goto err_desc_extra;
1923
1924 vring_packed->desc_state = state;
1925 vring_packed->desc_extra = extra;
1926
1927 return 0;
1928
1929err_desc_extra:
1930 kfree(state);
1931err_desc_state:
1932 return -ENOMEM;
1933}
1934
1a107c87
XZ
1935static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
1936 bool callback)
1937{
1938 vring_packed->next_avail_idx = 0;
1939 vring_packed->avail_wrap_counter = 1;
1940 vring_packed->event_flags_shadow = 0;
1941 vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1942
1943 /* No callback? Tell other side not to bother us. */
1944 if (!callback) {
1945 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1946 vring_packed->vring.driver->flags =
1947 cpu_to_le16(vring_packed->event_flags_shadow);
1948 }
1949}
1950
51d649f1
XZ
1951static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
1952 struct vring_virtqueue_packed *vring_packed)
1953{
1954 vq->packed = *vring_packed;
1955
1956 /* Put everything in free lists. */
1957 vq->free_head = 0;
1958}
1959
56775e14
XZ
1960static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
1961{
1962 memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
1963 memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
1964
1965 /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
1966 memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
1967
1968 virtqueue_init(vq, vq->packed.vring.num);
1969 virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
1970}
1971
6b60b9c0
XZ
1972static struct virtqueue *vring_create_virtqueue_packed(
1973 unsigned int index,
1974 unsigned int num,
1975 unsigned int vring_align,
1976 struct virtio_device *vdev,
1977 bool weak_barriers,
1978 bool may_reduce_num,
1979 bool context,
1980 bool (*notify)(struct virtqueue *),
1981 void (*callback)(struct virtqueue *),
1982 const char *name)
1983{
1984 struct vring_virtqueue_packed vring_packed = {};
1985 struct vring_virtqueue *vq;
ef3167cf 1986 int err;
6b60b9c0
XZ
1987
1988 if (vring_alloc_queue_packed(&vring_packed, vdev, num))
1989 goto err_ring;
1ce9e605
TB
1990
1991 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1992 if (!vq)
1993 goto err_vq;
1994
1995 vq->vq.callback = callback;
1996 vq->vq.vdev = vdev;
1997 vq->vq.name = name;
1ce9e605 1998 vq->vq.index = index;
4913e854 1999 vq->vq.reset = false;
1ce9e605
TB
2000 vq->we_own_ring = true;
2001 vq->notify = notify;
2002 vq->weak_barriers = weak_barriers;
c346dae4 2003#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d 2004 vq->broken = true;
c346dae4
JW
2005#else
2006 vq->broken = false;
2007#endif
1ce9e605
TB
2008 vq->packed_ring = true;
2009 vq->use_dma_api = vring_use_dma_api(vdev);
1ce9e605
TB
2010
2011 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2012 !context;
2013 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2014
45383fb0
TB
2015 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2016 vq->weak_barriers = false;
2017
ef3167cf
XZ
2018 err = vring_alloc_state_extra_packed(&vring_packed);
2019 if (err)
2020 goto err_state_extra;
1ce9e605 2021
1a107c87 2022 virtqueue_vring_init_packed(&vring_packed, !!callback);
1ce9e605 2023
3a897128 2024 virtqueue_init(vq, num);
51d649f1 2025 virtqueue_vring_attach_packed(vq, &vring_packed);
3a897128 2026
0e566c8f 2027 spin_lock(&vdev->vqs_list_lock);
e152d8af 2028 list_add_tail(&vq->vq.list, &vdev->vqs);
0e566c8f 2029 spin_unlock(&vdev->vqs_list_lock);
1ce9e605
TB
2030 return &vq->vq;
2031
ef3167cf 2032err_state_extra:
1ce9e605
TB
2033 kfree(vq);
2034err_vq:
6b60b9c0 2035 vring_free_packed(&vring_packed, vdev);
1ce9e605
TB
2036err_ring:
2037 return NULL;
2038}
2039
947f9fcf
XZ
2040static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2041{
2042 struct vring_virtqueue_packed vring_packed = {};
2043 struct vring_virtqueue *vq = to_vvq(_vq);
2044 struct virtio_device *vdev = _vq->vdev;
2045 int err;
2046
2047 if (vring_alloc_queue_packed(&vring_packed, vdev, num))
2048 goto err_ring;
2049
2050 err = vring_alloc_state_extra_packed(&vring_packed);
2051 if (err)
2052 goto err_state_extra;
2053
2054 vring_free(&vq->vq);
2055
2056 virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2057
2058 virtqueue_init(vq, vring_packed.vring.num);
2059 virtqueue_vring_attach_packed(vq, &vring_packed);
2060
2061 return 0;
2062
2063err_state_extra:
2064 vring_free_packed(&vring_packed, vdev);
2065err_ring:
2066 virtqueue_reinit_packed(vq);
2067 return -ENOMEM;
2068}
2069
1ce9e605 2070
e6f633e5
TB
2071/*
2072 * Generic functions and exported symbols.
2073 */
2074
2075static inline int virtqueue_add(struct virtqueue *_vq,
2076 struct scatterlist *sgs[],
2077 unsigned int total_sg,
2078 unsigned int out_sgs,
2079 unsigned int in_sgs,
2080 void *data,
2081 void *ctx,
2082 gfp_t gfp)
2083{
1ce9e605
TB
2084 struct vring_virtqueue *vq = to_vvq(_vq);
2085
2086 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2087 out_sgs, in_sgs, data, ctx, gfp) :
2088 virtqueue_add_split(_vq, sgs, total_sg,
2089 out_sgs, in_sgs, data, ctx, gfp);
e6f633e5
TB
2090}
2091
2092/**
2093 * virtqueue_add_sgs - expose buffers to other end
a5581206 2094 * @_vq: the struct virtqueue we're talking about.
e6f633e5 2095 * @sgs: array of terminated scatterlists.
a5581206
JB
2096 * @out_sgs: the number of scatterlists readable by other side
2097 * @in_sgs: the number of scatterlists which are writable (after readable ones)
e6f633e5
TB
2098 * @data: the token identifying the buffer.
2099 * @gfp: how to do memory allocations (if necessary).
2100 *
2101 * Caller must ensure we don't call this with other virtqueue operations
2102 * at the same time (except where noted).
2103 *
2104 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2105 */
2106int virtqueue_add_sgs(struct virtqueue *_vq,
2107 struct scatterlist *sgs[],
2108 unsigned int out_sgs,
2109 unsigned int in_sgs,
2110 void *data,
2111 gfp_t gfp)
2112{
2113 unsigned int i, total_sg = 0;
2114
2115 /* Count them first. */
2116 for (i = 0; i < out_sgs + in_sgs; i++) {
2117 struct scatterlist *sg;
2118
2119 for (sg = sgs[i]; sg; sg = sg_next(sg))
2120 total_sg++;
2121 }
2122 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2123 data, NULL, gfp);
2124}
2125EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2126
2127/**
2128 * virtqueue_add_outbuf - expose output buffers to other end
2129 * @vq: the struct virtqueue we're talking about.
2130 * @sg: scatterlist (must be well-formed and terminated!)
2131 * @num: the number of entries in @sg readable by other side
2132 * @data: the token identifying the buffer.
2133 * @gfp: how to do memory allocations (if necessary).
2134 *
2135 * Caller must ensure we don't call this with other virtqueue operations
2136 * at the same time (except where noted).
2137 *
2138 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2139 */
2140int virtqueue_add_outbuf(struct virtqueue *vq,
2141 struct scatterlist *sg, unsigned int num,
2142 void *data,
2143 gfp_t gfp)
2144{
2145 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
2146}
2147EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2148
2149/**
2150 * virtqueue_add_inbuf - expose input buffers to other end
2151 * @vq: the struct virtqueue we're talking about.
2152 * @sg: scatterlist (must be well-formed and terminated!)
2153 * @num: the number of entries in @sg writable by other side
2154 * @data: the token identifying the buffer.
2155 * @gfp: how to do memory allocations (if necessary).
2156 *
2157 * Caller must ensure we don't call this with other virtqueue operations
2158 * at the same time (except where noted).
2159 *
2160 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2161 */
2162int virtqueue_add_inbuf(struct virtqueue *vq,
2163 struct scatterlist *sg, unsigned int num,
2164 void *data,
2165 gfp_t gfp)
2166{
2167 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
2168}
2169EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2170
2171/**
2172 * virtqueue_add_inbuf_ctx - expose input buffers to other end
2173 * @vq: the struct virtqueue we're talking about.
2174 * @sg: scatterlist (must be well-formed and terminated!)
2175 * @num: the number of entries in @sg writable by other side
2176 * @data: the token identifying the buffer.
2177 * @ctx: extra context for the token
2178 * @gfp: how to do memory allocations (if necessary).
2179 *
2180 * Caller must ensure we don't call this with other virtqueue operations
2181 * at the same time (except where noted).
2182 *
2183 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2184 */
2185int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2186 struct scatterlist *sg, unsigned int num,
2187 void *data,
2188 void *ctx,
2189 gfp_t gfp)
2190{
2191 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
2192}
2193EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2194
2195/**
2196 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
a5581206 2197 * @_vq: the struct virtqueue
e6f633e5
TB
2198 *
2199 * Instead of virtqueue_kick(), you can do:
2200 * if (virtqueue_kick_prepare(vq))
2201 * virtqueue_notify(vq);
2202 *
2203 * This is sometimes useful because the virtqueue_kick_prepare() needs
2204 * to be serialized, but the actual virtqueue_notify() call does not.
2205 */
2206bool virtqueue_kick_prepare(struct virtqueue *_vq)
2207{
1ce9e605
TB
2208 struct vring_virtqueue *vq = to_vvq(_vq);
2209
2210 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2211 virtqueue_kick_prepare_split(_vq);
e6f633e5
TB
2212}
2213EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2214
2215/**
2216 * virtqueue_notify - second half of split virtqueue_kick call.
a5581206 2217 * @_vq: the struct virtqueue
e6f633e5
TB
2218 *
2219 * This does not need to be serialized.
2220 *
2221 * Returns false if host notify failed or queue is broken, otherwise true.
2222 */
2223bool virtqueue_notify(struct virtqueue *_vq)
2224{
2225 struct vring_virtqueue *vq = to_vvq(_vq);
2226
2227 if (unlikely(vq->broken))
2228 return false;
2229
2230 /* Prod other side to tell it about changes. */
2231 if (!vq->notify(_vq)) {
2232 vq->broken = true;
2233 return false;
2234 }
2235 return true;
2236}
2237EXPORT_SYMBOL_GPL(virtqueue_notify);
2238
2239/**
2240 * virtqueue_kick - update after add_buf
2241 * @vq: the struct virtqueue
2242 *
2243 * After one or more virtqueue_add_* calls, invoke this to kick
2244 * the other side.
2245 *
2246 * Caller must ensure we don't call this with other virtqueue
2247 * operations at the same time (except where noted).
2248 *
2249 * Returns false if kick failed, otherwise true.
2250 */
2251bool virtqueue_kick(struct virtqueue *vq)
2252{
2253 if (virtqueue_kick_prepare(vq))
2254 return virtqueue_notify(vq);
2255 return true;
2256}
2257EXPORT_SYMBOL_GPL(virtqueue_kick);
2258
2259/**
31c11db6 2260 * virtqueue_get_buf_ctx - get the next used buffer
a5581206 2261 * @_vq: the struct virtqueue we're talking about.
e6f633e5 2262 * @len: the length written into the buffer
a5581206 2263 * @ctx: extra context for the token
e6f633e5
TB
2264 *
2265 * If the device wrote data into the buffer, @len will be set to the
2266 * amount written. This means you don't need to clear the buffer
2267 * beforehand to ensure there's no data leakage in the case of short
2268 * writes.
2269 *
2270 * Caller must ensure we don't call this with other virtqueue
2271 * operations at the same time (except where noted).
2272 *
2273 * Returns NULL if there are no used buffers, or the "data" token
2274 * handed to virtqueue_add_*().
2275 */
2276void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2277 void **ctx)
2278{
1ce9e605
TB
2279 struct vring_virtqueue *vq = to_vvq(_vq);
2280
2281 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2282 virtqueue_get_buf_ctx_split(_vq, len, ctx);
e6f633e5
TB
2283}
2284EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2285
2286void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2287{
2288 return virtqueue_get_buf_ctx(_vq, len, NULL);
2289}
2290EXPORT_SYMBOL_GPL(virtqueue_get_buf);
e6f633e5
TB
2291/**
2292 * virtqueue_disable_cb - disable callbacks
a5581206 2293 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2294 *
2295 * Note that this is not necessarily synchronous, hence unreliable and only
2296 * useful as an optimization.
2297 *
2298 * Unlike other operations, this need not be serialized.
2299 */
2300void virtqueue_disable_cb(struct virtqueue *_vq)
2301{
1ce9e605
TB
2302 struct vring_virtqueue *vq = to_vvq(_vq);
2303
8d622d21
MT
2304 /* If device triggered an event already it won't trigger one again:
2305 * no need to disable.
2306 */
2307 if (vq->event_triggered)
2308 return;
2309
1ce9e605
TB
2310 if (vq->packed_ring)
2311 virtqueue_disable_cb_packed(_vq);
2312 else
2313 virtqueue_disable_cb_split(_vq);
e6f633e5
TB
2314}
2315EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2316
2317/**
2318 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
a5581206 2319 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2320 *
2321 * This re-enables callbacks; it returns current queue state
2322 * in an opaque unsigned value. This value should be later tested by
2323 * virtqueue_poll, to detect a possible race between the driver checking for
2324 * more work, and enabling callbacks.
2325 *
2326 * Caller must ensure we don't call this with other virtqueue
2327 * operations at the same time (except where noted).
2328 */
31532340 2329unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
e6f633e5 2330{
1ce9e605
TB
2331 struct vring_virtqueue *vq = to_vvq(_vq);
2332
8d622d21
MT
2333 if (vq->event_triggered)
2334 vq->event_triggered = false;
2335
1ce9e605
TB
2336 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2337 virtqueue_enable_cb_prepare_split(_vq);
e6f633e5
TB
2338}
2339EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2340
2341/**
2342 * virtqueue_poll - query pending used buffers
a5581206 2343 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2344 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2345 *
2346 * Returns "true" if there are pending used buffers in the queue.
2347 *
2348 * This does not need to be serialized.
2349 */
31532340 2350bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
e6f633e5
TB
2351{
2352 struct vring_virtqueue *vq = to_vvq(_vq);
2353
481a0d74
MW
2354 if (unlikely(vq->broken))
2355 return false;
2356
e6f633e5 2357 virtio_mb(vq->weak_barriers);
1ce9e605
TB
2358 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2359 virtqueue_poll_split(_vq, last_used_idx);
e6f633e5
TB
2360}
2361EXPORT_SYMBOL_GPL(virtqueue_poll);
2362
2363/**
2364 * virtqueue_enable_cb - restart callbacks after disable_cb.
a5581206 2365 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2366 *
2367 * This re-enables callbacks; it returns "false" if there are pending
2368 * buffers in the queue, to detect a possible race between the driver
2369 * checking for more work, and enabling callbacks.
2370 *
2371 * Caller must ensure we don't call this with other virtqueue
2372 * operations at the same time (except where noted).
2373 */
2374bool virtqueue_enable_cb(struct virtqueue *_vq)
2375{
31532340 2376 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
e6f633e5
TB
2377
2378 return !virtqueue_poll(_vq, last_used_idx);
2379}
2380EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2381
2382/**
2383 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
a5581206 2384 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2385 *
2386 * This re-enables callbacks but hints to the other side to delay
2387 * interrupts until most of the available buffers have been processed;
2388 * it returns "false" if there are many pending buffers in the queue,
2389 * to detect a possible race between the driver checking for more work,
2390 * and enabling callbacks.
2391 *
2392 * Caller must ensure we don't call this with other virtqueue
2393 * operations at the same time (except where noted).
2394 */
2395bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2396{
1ce9e605
TB
2397 struct vring_virtqueue *vq = to_vvq(_vq);
2398
8d622d21
MT
2399 if (vq->event_triggered)
2400 vq->event_triggered = false;
2401
1ce9e605
TB
2402 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2403 virtqueue_enable_cb_delayed_split(_vq);
e6f633e5
TB
2404}
2405EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2406
138fd251
TB
2407/**
2408 * virtqueue_detach_unused_buf - detach first unused buffer
a5581206 2409 * @_vq: the struct virtqueue we're talking about.
138fd251
TB
2410 *
2411 * Returns NULL or the "data" token handed to virtqueue_add_*().
a62eecb3
XZ
2412 * This is not valid on an active queue; it is useful for device
2413 * shutdown or the reset queue.
138fd251
TB
2414 */
2415void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2416{
1ce9e605
TB
2417 struct vring_virtqueue *vq = to_vvq(_vq);
2418
2419 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2420 virtqueue_detach_unused_buf_split(_vq);
138fd251 2421}
7c5e9ed0 2422EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
c021eac4 2423
138fd251
TB
2424static inline bool more_used(const struct vring_virtqueue *vq)
2425{
1ce9e605 2426 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
138fd251
TB
2427}
2428
0a8a69dd
RR
2429irqreturn_t vring_interrupt(int irq, void *_vq)
2430{
2431 struct vring_virtqueue *vq = to_vvq(_vq);
2432
2433 if (!more_used(vq)) {
2434 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2435 return IRQ_NONE;
2436 }
2437
8b4ec69d 2438 if (unlikely(vq->broken)) {
c346dae4 2439#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d
JW
2440 dev_warn_once(&vq->vq.vdev->dev,
2441 "virtio vring IRQ raised before DRIVER_OK");
2442 return IRQ_NONE;
c346dae4
JW
2443#else
2444 return IRQ_HANDLED;
2445#endif
8b4ec69d 2446 }
0a8a69dd 2447
8d622d21
MT
2448 /* Just a hint for performance: so it's ok that this can be racy! */
2449 if (vq->event)
2450 vq->event_triggered = true;
2451
0a8a69dd 2452 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
18445c4d
RR
2453 if (vq->vq.callback)
2454 vq->vq.callback(&vq->vq);
0a8a69dd
RR
2455
2456 return IRQ_HANDLED;
2457}
c6fd4701 2458EXPORT_SYMBOL_GPL(vring_interrupt);
0a8a69dd 2459
1ce9e605 2460/* Only available for split ring */
07d9629d 2461static struct virtqueue *__vring_new_virtqueue(unsigned int index,
cd4c812a 2462 struct vring_virtqueue_split *vring_split,
07d9629d
XZ
2463 struct virtio_device *vdev,
2464 bool weak_barriers,
2465 bool context,
2466 bool (*notify)(struct virtqueue *),
2467 void (*callback)(struct virtqueue *),
2468 const char *name)
0a8a69dd 2469{
2a2d1382 2470 struct vring_virtqueue *vq;
a2b36c8d 2471 int err;
0a8a69dd 2472
1ce9e605
TB
2473 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2474 return NULL;
2475
cbeedb72 2476 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
0a8a69dd
RR
2477 if (!vq)
2478 return NULL;
2479
1ce9e605 2480 vq->packed_ring = false;
0a8a69dd
RR
2481 vq->vq.callback = callback;
2482 vq->vq.vdev = vdev;
9499f5e7 2483 vq->vq.name = name;
06ca287d 2484 vq->vq.index = index;
4913e854 2485 vq->vq.reset = false;
2a2d1382 2486 vq->we_own_ring = false;
0a8a69dd 2487 vq->notify = notify;
7b21e34f 2488 vq->weak_barriers = weak_barriers;
c346dae4 2489#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d 2490 vq->broken = true;
c346dae4
JW
2491#else
2492 vq->broken = false;
2493#endif
fb3fba6b 2494 vq->use_dma_api = vring_use_dma_api(vdev);
0a8a69dd 2495
5a08b04f
MT
2496 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2497 !context;
a5c262c5 2498 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
9fa29b9d 2499
45383fb0
TB
2500 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2501 vq->weak_barriers = false;
2502
a2b36c8d
XZ
2503 err = vring_alloc_state_extra_split(vring_split);
2504 if (err) {
2505 kfree(vq);
2506 return NULL;
2507 }
72b5e895 2508
198fa7be
XZ
2509 virtqueue_vring_init_split(vring_split, vq);
2510
cd4c812a 2511 virtqueue_init(vq, vring_split->vring.num);
e1d6a423 2512 virtqueue_vring_attach_split(vq, vring_split);
3a897128 2513
0e566c8f 2514 spin_lock(&vdev->vqs_list_lock);
e152d8af 2515 list_add_tail(&vq->vq.list, &vdev->vqs);
0e566c8f 2516 spin_unlock(&vdev->vqs_list_lock);
0a8a69dd
RR
2517 return &vq->vq;
2518}
2a2d1382 2519
2a2d1382
AL
2520struct virtqueue *vring_create_virtqueue(
2521 unsigned int index,
2522 unsigned int num,
2523 unsigned int vring_align,
2524 struct virtio_device *vdev,
2525 bool weak_barriers,
2526 bool may_reduce_num,
f94682dd 2527 bool context,
2a2d1382
AL
2528 bool (*notify)(struct virtqueue *),
2529 void (*callback)(struct virtqueue *),
2530 const char *name)
2531{
1ce9e605
TB
2532
2533 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2534 return vring_create_virtqueue_packed(index, num, vring_align,
2535 vdev, weak_barriers, may_reduce_num,
2536 context, notify, callback, name);
2537
d79dca75
TB
2538 return vring_create_virtqueue_split(index, num, vring_align,
2539 vdev, weak_barriers, may_reduce_num,
2540 context, notify, callback, name);
2a2d1382
AL
2541}
2542EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2543
c790e8e1
XZ
2544/**
2545 * virtqueue_resize - resize the vring of vq
2546 * @_vq: the struct virtqueue we're talking about.
2547 * @num: new ring num
2548 * @recycle: callback for recycle the useless buffer
2549 *
2550 * When it is really necessary to create a new vring, it will set the current vq
2551 * into the reset state. Then call the passed callback to recycle the buffer
2552 * that is no longer used. Only after the new vring is successfully created, the
2553 * old vring will be released.
2554 *
2555 * Caller must ensure we don't call this with other virtqueue operations
2556 * at the same time (except where noted).
2557 *
2558 * Returns zero or a negative error.
2559 * 0: success.
2560 * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
2561 * vq can still work normally
2562 * -EBUSY: Failed to sync with device, vq may not work properly
2563 * -ENOENT: Transport or device not supported
2564 * -E2BIG/-EINVAL: num error
2565 * -EPERM: Operation not permitted
2566 *
2567 */
2568int virtqueue_resize(struct virtqueue *_vq, u32 num,
2569 void (*recycle)(struct virtqueue *vq, void *buf))
2570{
2571 struct vring_virtqueue *vq = to_vvq(_vq);
2572 struct virtio_device *vdev = vq->vq.vdev;
2573 void *buf;
2574 int err;
2575
2576 if (!vq->we_own_ring)
2577 return -EPERM;
2578
2579 if (num > vq->vq.num_max)
2580 return -E2BIG;
2581
2582 if (!num)
2583 return -EINVAL;
2584
2585 if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
2586 return 0;
2587
2588 if (!vdev->config->disable_vq_and_reset)
2589 return -ENOENT;
2590
2591 if (!vdev->config->enable_vq_after_reset)
2592 return -ENOENT;
2593
2594 err = vdev->config->disable_vq_and_reset(_vq);
2595 if (err)
2596 return err;
2597
2598 while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2599 recycle(_vq, buf);
2600
2601 if (vq->packed_ring)
2602 err = virtqueue_resize_packed(_vq, num);
2603 else
2604 err = virtqueue_resize_split(_vq, num);
2605
2606 if (vdev->config->enable_vq_after_reset(_vq))
2607 return -EBUSY;
2608
2609 return err;
2610}
2611EXPORT_SYMBOL_GPL(virtqueue_resize);
2612
1ce9e605 2613/* Only available for split ring */
2a2d1382
AL
2614struct virtqueue *vring_new_virtqueue(unsigned int index,
2615 unsigned int num,
2616 unsigned int vring_align,
2617 struct virtio_device *vdev,
2618 bool weak_barriers,
f94682dd 2619 bool context,
2a2d1382
AL
2620 void *pages,
2621 bool (*notify)(struct virtqueue *vq),
2622 void (*callback)(struct virtqueue *vq),
2623 const char *name)
2624{
cd4c812a 2625 struct vring_virtqueue_split vring_split = {};
1ce9e605
TB
2626
2627 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2628 return NULL;
2629
cd4c812a
XZ
2630 vring_init(&vring_split.vring, num, pages, vring_align);
2631 return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2632 context, notify, callback, name);
2a2d1382 2633}
c6fd4701 2634EXPORT_SYMBOL_GPL(vring_new_virtqueue);
0a8a69dd 2635
3ea19e32 2636static void vring_free(struct virtqueue *_vq)
0a8a69dd 2637{
2a2d1382
AL
2638 struct vring_virtqueue *vq = to_vvq(_vq);
2639
2640 if (vq->we_own_ring) {
1ce9e605
TB
2641 if (vq->packed_ring) {
2642 vring_free_queue(vq->vq.vdev,
2643 vq->packed.ring_size_in_bytes,
2644 vq->packed.vring.desc,
2645 vq->packed.ring_dma_addr);
2646
2647 vring_free_queue(vq->vq.vdev,
2648 vq->packed.event_size_in_bytes,
2649 vq->packed.vring.driver,
2650 vq->packed.driver_event_dma_addr);
2651
2652 vring_free_queue(vq->vq.vdev,
2653 vq->packed.event_size_in_bytes,
2654 vq->packed.vring.device,
2655 vq->packed.device_event_dma_addr);
2656
2657 kfree(vq->packed.desc_state);
2658 kfree(vq->packed.desc_extra);
2659 } else {
2660 vring_free_queue(vq->vq.vdev,
2661 vq->split.queue_size_in_bytes,
2662 vq->split.vring.desc,
2663 vq->split.queue_dma_addr);
1ce9e605 2664 }
2a2d1382 2665 }
72b5e895 2666 if (!vq->packed_ring) {
f13f09a1 2667 kfree(vq->split.desc_state);
72b5e895
JW
2668 kfree(vq->split.desc_extra);
2669 }
3ea19e32
XZ
2670}
2671
2672void vring_del_virtqueue(struct virtqueue *_vq)
2673{
2674 struct vring_virtqueue *vq = to_vvq(_vq);
2675
2676 spin_lock(&vq->vq.vdev->vqs_list_lock);
2677 list_del(&_vq->list);
2678 spin_unlock(&vq->vq.vdev->vqs_list_lock);
2679
2680 vring_free(_vq);
2681
2a2d1382 2682 kfree(vq);
0a8a69dd 2683}
c6fd4701 2684EXPORT_SYMBOL_GPL(vring_del_virtqueue);
0a8a69dd 2685
e34f8725
RR
2686/* Manipulates transport-specific feature bits. */
2687void vring_transport_features(struct virtio_device *vdev)
2688{
2689 unsigned int i;
2690
2691 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2692 switch (i) {
9fa29b9d
MM
2693 case VIRTIO_RING_F_INDIRECT_DESC:
2694 break;
a5c262c5
MT
2695 case VIRTIO_RING_F_EVENT_IDX:
2696 break;
747ae34a
MT
2697 case VIRTIO_F_VERSION_1:
2698 break;
321bd212 2699 case VIRTIO_F_ACCESS_PLATFORM:
1a937693 2700 break;
f959a128
TB
2701 case VIRTIO_F_RING_PACKED:
2702 break;
45383fb0
TB
2703 case VIRTIO_F_ORDER_PLATFORM:
2704 break;
e34f8725
RR
2705 default:
2706 /* We don't understand this bit. */
e16e12be 2707 __virtio_clear_bit(vdev, i);
e34f8725
RR
2708 }
2709 }
2710}
2711EXPORT_SYMBOL_GPL(vring_transport_features);
2712
5dfc1762
RR
2713/**
2714 * virtqueue_get_vring_size - return the size of the virtqueue's vring
a5581206 2715 * @_vq: the struct virtqueue containing the vring of interest.
5dfc1762
RR
2716 *
2717 * Returns the size of the vring. This is mainly used for boasting to
2718 * userspace. Unlike other operations, this need not be serialized.
2719 */
8f9f4668
RJ
2720unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2721{
2722
2723 struct vring_virtqueue *vq = to_vvq(_vq);
2724
1ce9e605 2725 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
8f9f4668
RJ
2726}
2727EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2728
32510631
XZ
2729/*
2730 * This function should only be called by the core, not directly by the driver.
2731 */
2732void __virtqueue_break(struct virtqueue *_vq)
2733{
2734 struct vring_virtqueue *vq = to_vvq(_vq);
2735
2736 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2737 WRITE_ONCE(vq->broken, true);
2738}
2739EXPORT_SYMBOL_GPL(__virtqueue_break);
2740
2741/*
2742 * This function should only be called by the core, not directly by the driver.
2743 */
2744void __virtqueue_unbreak(struct virtqueue *_vq)
2745{
2746 struct vring_virtqueue *vq = to_vvq(_vq);
2747
2748 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2749 WRITE_ONCE(vq->broken, false);
2750}
2751EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
2752
b3b32c94
HG
2753bool virtqueue_is_broken(struct virtqueue *_vq)
2754{
2755 struct vring_virtqueue *vq = to_vvq(_vq);
2756
60f07798 2757 return READ_ONCE(vq->broken);
b3b32c94
HG
2758}
2759EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2760
e2dcdfe9
RR
2761/*
2762 * This should prevent the device from being used, allowing drivers to
2763 * recover. You may need to grab appropriate locks to flush.
2764 */
2765void virtio_break_device(struct virtio_device *dev)
2766{
2767 struct virtqueue *_vq;
2768
0e566c8f 2769 spin_lock(&dev->vqs_list_lock);
e2dcdfe9
RR
2770 list_for_each_entry(_vq, &dev->vqs, list) {
2771 struct vring_virtqueue *vq = to_vvq(_vq);
60f07798
PP
2772
2773 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2774 WRITE_ONCE(vq->broken, true);
e2dcdfe9 2775 }
0e566c8f 2776 spin_unlock(&dev->vqs_list_lock);
e2dcdfe9
RR
2777}
2778EXPORT_SYMBOL_GPL(virtio_break_device);
2779
be83f04d
JW
2780/*
2781 * This should allow the device to be used by the driver. You may
2782 * need to grab appropriate locks to flush the write to
2783 * vq->broken. This should only be used in some specific case e.g
2784 * (probing and restoring). This function should only be called by the
2785 * core, not directly by the driver.
2786 */
2787void __virtio_unbreak_device(struct virtio_device *dev)
2788{
2789 struct virtqueue *_vq;
2790
2791 spin_lock(&dev->vqs_list_lock);
2792 list_for_each_entry(_vq, &dev->vqs, list) {
2793 struct vring_virtqueue *vq = to_vvq(_vq);
2794
2795 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2796 WRITE_ONCE(vq->broken, false);
2797 }
2798 spin_unlock(&dev->vqs_list_lock);
2799}
2800EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2801
2a2d1382 2802dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
89062652
CH
2803{
2804 struct vring_virtqueue *vq = to_vvq(_vq);
2805
2a2d1382
AL
2806 BUG_ON(!vq->we_own_ring);
2807
1ce9e605
TB
2808 if (vq->packed_ring)
2809 return vq->packed.ring_dma_addr;
2810
d79dca75 2811 return vq->split.queue_dma_addr;
89062652 2812}
2a2d1382 2813EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
89062652 2814
2a2d1382 2815dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
89062652
CH
2816{
2817 struct vring_virtqueue *vq = to_vvq(_vq);
2818
2a2d1382
AL
2819 BUG_ON(!vq->we_own_ring);
2820
1ce9e605
TB
2821 if (vq->packed_ring)
2822 return vq->packed.driver_event_dma_addr;
2823
d79dca75 2824 return vq->split.queue_dma_addr +
e593bf97 2825 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2a2d1382
AL
2826}
2827EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2828
2829dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2830{
2831 struct vring_virtqueue *vq = to_vvq(_vq);
2832
2833 BUG_ON(!vq->we_own_ring);
2834
1ce9e605
TB
2835 if (vq->packed_ring)
2836 return vq->packed.device_event_dma_addr;
2837
d79dca75 2838 return vq->split.queue_dma_addr +
e593bf97 2839 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2a2d1382
AL
2840}
2841EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2842
1ce9e605 2843/* Only available for split ring */
2a2d1382
AL
2844const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2845{
e593bf97 2846 return &to_vvq(vq)->split.vring;
89062652 2847}
2a2d1382 2848EXPORT_SYMBOL_GPL(virtqueue_get_vring);
89062652 2849
c6fd4701 2850MODULE_LICENSE("GPL");