virtio_ring: packed: introduce virtqueue_resize_packed()
[linux-block.git] / drivers / virtio / virtio_ring.c
CommitLineData
fd534e9b 1// SPDX-License-Identifier: GPL-2.0-or-later
0a8a69dd
RR
2/* Virtio ring implementation.
3 *
4 * Copyright 2007 Rusty Russell IBM Corporation
0a8a69dd
RR
5 */
6#include <linux/virtio.h>
7#include <linux/virtio_ring.h>
e34f8725 8#include <linux/virtio_config.h>
0a8a69dd 9#include <linux/device.h>
5a0e3ad6 10#include <linux/slab.h>
b5a2c4f1 11#include <linux/module.h>
e93300b1 12#include <linux/hrtimer.h>
780bc790 13#include <linux/dma-mapping.h>
f8ce7263 14#include <linux/spinlock.h>
78fe3987 15#include <xen/xen.h>
0a8a69dd
RR
16
17#ifdef DEBUG
18/* For development, we want to crash whenever the ring is screwed. */
9499f5e7
RR
19#define BAD_RING(_vq, fmt, args...) \
20 do { \
21 dev_err(&(_vq)->vq.vdev->dev, \
22 "%s:"fmt, (_vq)->vq.name, ##args); \
23 BUG(); \
24 } while (0)
c5f841f1
RR
25/* Caller is supposed to guarantee no reentry. */
26#define START_USE(_vq) \
27 do { \
28 if ((_vq)->in_use) \
9499f5e7
RR
29 panic("%s:in_use = %i\n", \
30 (_vq)->vq.name, (_vq)->in_use); \
c5f841f1 31 (_vq)->in_use = __LINE__; \
9499f5e7 32 } while (0)
3a35ce7d 33#define END_USE(_vq) \
97a545ab 34 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
4d6a105e
TB
35#define LAST_ADD_TIME_UPDATE(_vq) \
36 do { \
37 ktime_t now = ktime_get(); \
38 \
39 /* No kick or get, with .1 second between? Warn. */ \
40 if ((_vq)->last_add_time_valid) \
41 WARN_ON(ktime_to_ms(ktime_sub(now, \
42 (_vq)->last_add_time)) > 100); \
43 (_vq)->last_add_time = now; \
44 (_vq)->last_add_time_valid = true; \
45 } while (0)
46#define LAST_ADD_TIME_CHECK(_vq) \
47 do { \
48 if ((_vq)->last_add_time_valid) { \
49 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
50 (_vq)->last_add_time)) > 100); \
51 } \
52 } while (0)
53#define LAST_ADD_TIME_INVALID(_vq) \
54 ((_vq)->last_add_time_valid = false)
0a8a69dd 55#else
9499f5e7
RR
56#define BAD_RING(_vq, fmt, args...) \
57 do { \
58 dev_err(&_vq->vq.vdev->dev, \
59 "%s:"fmt, (_vq)->vq.name, ##args); \
60 (_vq)->broken = true; \
61 } while (0)
0a8a69dd
RR
62#define START_USE(vq)
63#define END_USE(vq)
4d6a105e
TB
64#define LAST_ADD_TIME_UPDATE(vq)
65#define LAST_ADD_TIME_CHECK(vq)
66#define LAST_ADD_TIME_INVALID(vq)
0a8a69dd
RR
67#endif
68
cbeedb72 69struct vring_desc_state_split {
780bc790
AL
70 void *data; /* Data for callback. */
71 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
72};
73
1ce9e605
TB
74struct vring_desc_state_packed {
75 void *data; /* Data for callback. */
76 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
77 u16 num; /* Descriptor list length. */
1ce9e605
TB
78 u16 last; /* The last desc state in a list. */
79};
80
1f28750f 81struct vring_desc_extra {
ef5c366f
JW
82 dma_addr_t addr; /* Descriptor DMA addr. */
83 u32 len; /* Descriptor length. */
1ce9e605 84 u16 flags; /* Descriptor flags. */
aeef9b47 85 u16 next; /* The next desc state in a list. */
1ce9e605
TB
86};
87
d76136e4
XZ
88struct vring_virtqueue_split {
89 /* Actual memory layout for this queue. */
90 struct vring vring;
91
92 /* Last written value to avail->flags */
93 u16 avail_flags_shadow;
94
95 /*
96 * Last written value to avail->idx in
97 * guest byte order.
98 */
99 u16 avail_idx_shadow;
100
101 /* Per-descriptor state. */
102 struct vring_desc_state_split *desc_state;
103 struct vring_desc_extra *desc_extra;
104
105 /* DMA address and size information */
106 dma_addr_t queue_dma_addr;
107 size_t queue_size_in_bytes;
af36b16f
XZ
108
109 /*
110 * The parameters for creating vrings are reserved for creating new
111 * vring.
112 */
113 u32 vring_align;
114 bool may_reduce_num;
d76136e4
XZ
115};
116
117struct vring_virtqueue_packed {
118 /* Actual memory layout for this queue. */
119 struct {
120 unsigned int num;
121 struct vring_packed_desc *desc;
122 struct vring_packed_desc_event *driver;
123 struct vring_packed_desc_event *device;
124 } vring;
125
126 /* Driver ring wrap counter. */
127 bool avail_wrap_counter;
128
129 /* Avail used flags. */
130 u16 avail_used_flags;
131
132 /* Index of the next avail descriptor. */
133 u16 next_avail_idx;
134
135 /*
136 * Last written value to driver->flags in
137 * guest byte order.
138 */
139 u16 event_flags_shadow;
140
141 /* Per-descriptor state. */
142 struct vring_desc_state_packed *desc_state;
143 struct vring_desc_extra *desc_extra;
144
145 /* DMA address and size information */
146 dma_addr_t ring_dma_addr;
147 dma_addr_t driver_event_dma_addr;
148 dma_addr_t device_event_dma_addr;
149 size_t ring_size_in_bytes;
150 size_t event_size_in_bytes;
151};
152
43b4f721 153struct vring_virtqueue {
0a8a69dd
RR
154 struct virtqueue vq;
155
1ce9e605
TB
156 /* Is this a packed ring? */
157 bool packed_ring;
158
fb3fba6b
TB
159 /* Is DMA API used? */
160 bool use_dma_api;
161
7b21e34f
RR
162 /* Can we use weak barriers? */
163 bool weak_barriers;
164
0a8a69dd
RR
165 /* Other side has made a mess, don't try any more. */
166 bool broken;
167
9fa29b9d
MM
168 /* Host supports indirect buffers */
169 bool indirect;
170
a5c262c5
MT
171 /* Host publishes avail event idx */
172 bool event;
173
0a8a69dd
RR
174 /* Head of free buffer list. */
175 unsigned int free_head;
176 /* Number we've added since last sync. */
177 unsigned int num_added;
178
a7722890 179 /* Last used index we've seen.
180 * for split ring, it just contains last used index
181 * for packed ring:
182 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
183 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
184 */
1bc4953e 185 u16 last_used_idx;
0a8a69dd 186
8d622d21
MT
187 /* Hint for event idx: already triggered no need to disable. */
188 bool event_triggered;
189
1ce9e605
TB
190 union {
191 /* Available for split ring */
d76136e4 192 struct vring_virtqueue_split split;
e593bf97 193
1ce9e605 194 /* Available for packed ring */
d76136e4 195 struct vring_virtqueue_packed packed;
1ce9e605 196 };
f277ec42 197
0a8a69dd 198 /* How to notify other side. FIXME: commonalize hcalls! */
46f9c2b9 199 bool (*notify)(struct virtqueue *vq);
0a8a69dd 200
2a2d1382
AL
201 /* DMA, allocation, and size information */
202 bool we_own_ring;
2a2d1382 203
0a8a69dd
RR
204#ifdef DEBUG
205 /* They're supposed to lock for us. */
206 unsigned int in_use;
e93300b1
RR
207
208 /* Figure out if their kicks are too delayed. */
209 bool last_add_time_valid;
210 ktime_t last_add_time;
0a8a69dd 211#endif
0a8a69dd
RR
212};
213
07d9629d 214static struct virtqueue *__vring_new_virtqueue(unsigned int index,
cd4c812a 215 struct vring_virtqueue_split *vring_split,
07d9629d
XZ
216 struct virtio_device *vdev,
217 bool weak_barriers,
218 bool context,
219 bool (*notify)(struct virtqueue *),
220 void (*callback)(struct virtqueue *),
221 const char *name);
a2b36c8d 222static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
6fea20e5 223static void vring_free(struct virtqueue *_vq);
e6f633e5
TB
224
225/*
226 * Helpers.
227 */
228
0a8a69dd
RR
229#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
230
35c51e09 231static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
2f18c2d1
TB
232 unsigned int total_sg)
233{
2f18c2d1
TB
234 /*
235 * If the host supports indirect descriptor tables, and we have multiple
236 * buffers, then go indirect. FIXME: tune this threshold
237 */
238 return (vq->indirect && total_sg > 1 && vq->vq.num_free);
239}
240
d26c96c8 241/*
1a937693
MT
242 * Modern virtio devices have feature bits to specify whether they need a
243 * quirk and bypass the IOMMU. If not there, just use the DMA API.
244 *
245 * If there, the interaction between virtio and DMA API is messy.
d26c96c8
AL
246 *
247 * On most systems with virtio, physical addresses match bus addresses,
248 * and it doesn't particularly matter whether we use the DMA API.
249 *
250 * On some systems, including Xen and any system with a physical device
251 * that speaks virtio behind a physical IOMMU, we must use the DMA API
252 * for virtio DMA to work at all.
253 *
254 * On other systems, including SPARC and PPC64, virtio-pci devices are
255 * enumerated as though they are behind an IOMMU, but the virtio host
256 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
257 * there or somehow map everything as the identity.
258 *
259 * For the time being, we preserve historic behavior and bypass the DMA
260 * API.
1a937693
MT
261 *
262 * TODO: install a per-device DMA ops structure that does the right thing
263 * taking into account all the above quirks, and use the DMA API
264 * unconditionally on data path.
d26c96c8
AL
265 */
266
267static bool vring_use_dma_api(struct virtio_device *vdev)
268{
24b6842a 269 if (!virtio_has_dma_quirk(vdev))
1a937693
MT
270 return true;
271
272 /* Otherwise, we are left to guess. */
78fe3987
AL
273 /*
274 * In theory, it's possible to have a buggy QEMU-supposed
275 * emulated Q35 IOMMU and Xen enabled at the same time. On
276 * such a configuration, virtio has never worked and will
277 * not work without an even larger kludge. Instead, enable
278 * the DMA API if we're a Xen guest, which at least allows
279 * all of the sensible Xen configurations to work correctly.
280 */
281 if (xen_domain())
282 return true;
283
d26c96c8
AL
284 return false;
285}
286
e6d6dd6c
JR
287size_t virtio_max_dma_size(struct virtio_device *vdev)
288{
289 size_t max_segment_size = SIZE_MAX;
290
291 if (vring_use_dma_api(vdev))
817fc978 292 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
e6d6dd6c
JR
293
294 return max_segment_size;
295}
296EXPORT_SYMBOL_GPL(virtio_max_dma_size);
297
d79dca75
TB
298static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
299 dma_addr_t *dma_handle, gfp_t flag)
300{
301 if (vring_use_dma_api(vdev)) {
302 return dma_alloc_coherent(vdev->dev.parent, size,
303 dma_handle, flag);
304 } else {
305 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
306
307 if (queue) {
308 phys_addr_t phys_addr = virt_to_phys(queue);
309 *dma_handle = (dma_addr_t)phys_addr;
310
311 /*
312 * Sanity check: make sure we dind't truncate
313 * the address. The only arches I can find that
314 * have 64-bit phys_addr_t but 32-bit dma_addr_t
315 * are certain non-highmem MIPS and x86
316 * configurations, but these configurations
317 * should never allocate physical pages above 32
318 * bits, so this is fine. Just in case, throw a
319 * warning and abort if we end up with an
320 * unrepresentable address.
321 */
322 if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
323 free_pages_exact(queue, PAGE_ALIGN(size));
324 return NULL;
325 }
326 }
327 return queue;
328 }
329}
330
331static void vring_free_queue(struct virtio_device *vdev, size_t size,
332 void *queue, dma_addr_t dma_handle)
333{
334 if (vring_use_dma_api(vdev))
335 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
336 else
337 free_pages_exact(queue, PAGE_ALIGN(size));
338}
339
780bc790
AL
340/*
341 * The DMA ops on various arches are rather gnarly right now, and
342 * making all of the arch DMA ops work on the vring device itself
343 * is a mess. For now, we use the parent device for DMA ops.
344 */
75bfa81b 345static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
780bc790
AL
346{
347 return vq->vq.vdev->dev.parent;
348}
349
350/* Map one sg entry. */
351static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
352 struct scatterlist *sg,
353 enum dma_data_direction direction)
354{
fb3fba6b 355 if (!vq->use_dma_api)
780bc790
AL
356 return (dma_addr_t)sg_phys(sg);
357
358 /*
359 * We can't use dma_map_sg, because we don't use scatterlists in
360 * the way it expects (we don't guarantee that the scatterlist
361 * will exist for the lifetime of the mapping).
362 */
363 return dma_map_page(vring_dma_dev(vq),
364 sg_page(sg), sg->offset, sg->length,
365 direction);
366}
367
368static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
369 void *cpu_addr, size_t size,
370 enum dma_data_direction direction)
371{
fb3fba6b 372 if (!vq->use_dma_api)
780bc790
AL
373 return (dma_addr_t)virt_to_phys(cpu_addr);
374
375 return dma_map_single(vring_dma_dev(vq),
376 cpu_addr, size, direction);
377}
378
e6f633e5
TB
379static int vring_mapping_error(const struct vring_virtqueue *vq,
380 dma_addr_t addr)
381{
fb3fba6b 382 if (!vq->use_dma_api)
e6f633e5
TB
383 return 0;
384
385 return dma_mapping_error(vring_dma_dev(vq), addr);
386}
387
3a897128
XZ
388static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
389{
390 vq->vq.num_free = num;
391
392 if (vq->packed_ring)
393 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
394 else
395 vq->last_used_idx = 0;
396
397 vq->event_triggered = false;
398 vq->num_added = 0;
399
400#ifdef DEBUG
401 vq->in_use = false;
402 vq->last_add_time_valid = false;
403#endif
404}
405
e6f633e5
TB
406
407/*
408 * Split ring specific functions - *_split().
409 */
410
72b5e895
JW
411static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
412 struct vring_desc *desc)
780bc790
AL
413{
414 u16 flags;
415
fb3fba6b 416 if (!vq->use_dma_api)
780bc790
AL
417 return;
418
419 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
420
b4282ebc
XZ
421 dma_unmap_page(vring_dma_dev(vq),
422 virtio64_to_cpu(vq->vq.vdev, desc->addr),
423 virtio32_to_cpu(vq->vq.vdev, desc->len),
424 (flags & VRING_DESC_F_WRITE) ?
425 DMA_FROM_DEVICE : DMA_TO_DEVICE);
780bc790
AL
426}
427
72b5e895
JW
428static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
429 unsigned int i)
430{
431 struct vring_desc_extra *extra = vq->split.desc_extra;
432 u16 flags;
433
434 if (!vq->use_dma_api)
435 goto out;
436
437 flags = extra[i].flags;
438
439 if (flags & VRING_DESC_F_INDIRECT) {
440 dma_unmap_single(vring_dma_dev(vq),
441 extra[i].addr,
442 extra[i].len,
443 (flags & VRING_DESC_F_WRITE) ?
444 DMA_FROM_DEVICE : DMA_TO_DEVICE);
445 } else {
446 dma_unmap_page(vring_dma_dev(vq),
447 extra[i].addr,
448 extra[i].len,
449 (flags & VRING_DESC_F_WRITE) ?
450 DMA_FROM_DEVICE : DMA_TO_DEVICE);
451 }
452
453out:
454 return extra[i].next;
455}
456
138fd251
TB
457static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
458 unsigned int total_sg,
459 gfp_t gfp)
9fa29b9d
MM
460{
461 struct vring_desc *desc;
b25bd251 462 unsigned int i;
9fa29b9d 463
b92b1b89
WD
464 /*
465 * We require lowmem mappings for the descriptors because
466 * otherwise virt_to_phys will give us bogus addresses in the
467 * virtqueue.
468 */
82107539 469 gfp &= ~__GFP_HIGHMEM;
b92b1b89 470
6da2ec56 471 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
9fa29b9d 472 if (!desc)
b25bd251 473 return NULL;
9fa29b9d 474
b25bd251 475 for (i = 0; i < total_sg; i++)
00e6f3d9 476 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
b25bd251 477 return desc;
9fa29b9d
MM
478}
479
fe4c3862
JW
480static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
481 struct vring_desc *desc,
482 unsigned int i,
483 dma_addr_t addr,
484 unsigned int len,
72b5e895
JW
485 u16 flags,
486 bool indirect)
fe4c3862 487{
72b5e895
JW
488 struct vring_virtqueue *vring = to_vvq(vq);
489 struct vring_desc_extra *extra = vring->split.desc_extra;
490 u16 next;
491
fe4c3862
JW
492 desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
493 desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
494 desc[i].len = cpu_to_virtio32(vq->vdev, len);
495
72b5e895
JW
496 if (!indirect) {
497 next = extra[i].next;
498 desc[i].next = cpu_to_virtio16(vq->vdev, next);
499
500 extra[i].addr = addr;
501 extra[i].len = len;
502 extra[i].flags = flags;
503 } else
504 next = virtio16_to_cpu(vq->vdev, desc[i].next);
505
506 return next;
fe4c3862
JW
507}
508
138fd251
TB
509static inline int virtqueue_add_split(struct virtqueue *_vq,
510 struct scatterlist *sgs[],
511 unsigned int total_sg,
512 unsigned int out_sgs,
513 unsigned int in_sgs,
514 void *data,
515 void *ctx,
516 gfp_t gfp)
0a8a69dd
RR
517{
518 struct vring_virtqueue *vq = to_vvq(_vq);
13816c76 519 struct scatterlist *sg;
b25bd251 520 struct vring_desc *desc;
3f649ab7 521 unsigned int i, n, avail, descs_used, prev, err_idx;
1fe9b6fe 522 int head;
b25bd251 523 bool indirect;
0a8a69dd 524
9fa29b9d
MM
525 START_USE(vq);
526
0a8a69dd 527 BUG_ON(data == NULL);
5a08b04f 528 BUG_ON(ctx && vq->indirect);
9fa29b9d 529
70670444
RR
530 if (unlikely(vq->broken)) {
531 END_USE(vq);
532 return -EIO;
533 }
534
4d6a105e 535 LAST_ADD_TIME_UPDATE(vq);
e93300b1 536
b25bd251
RR
537 BUG_ON(total_sg == 0);
538
539 head = vq->free_head;
540
35c51e09 541 if (virtqueue_use_indirect(vq, total_sg))
138fd251 542 desc = alloc_indirect_split(_vq, total_sg, gfp);
44ed8089 543 else {
b25bd251 544 desc = NULL;
e593bf97 545 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
44ed8089 546 }
b25bd251
RR
547
548 if (desc) {
549 /* Use a single buffer which doesn't continue */
780bc790 550 indirect = true;
b25bd251
RR
551 /* Set up rest to use this indirect table. */
552 i = 0;
553 descs_used = 1;
b25bd251 554 } else {
780bc790 555 indirect = false;
e593bf97 556 desc = vq->split.vring.desc;
b25bd251
RR
557 i = head;
558 descs_used = total_sg;
9fa29b9d
MM
559 }
560
b4b4ff73 561 if (unlikely(vq->vq.num_free < descs_used)) {
0a8a69dd 562 pr_debug("Can't add buf len %i - avail = %i\n",
b25bd251 563 descs_used, vq->vq.num_free);
44653eae
RR
564 /* FIXME: for historical reasons, we force a notify here if
565 * there are outgoing parts to the buffer. Presumably the
566 * host should service the ring ASAP. */
13816c76 567 if (out_sgs)
44653eae 568 vq->notify(&vq->vq);
58625edf
WY
569 if (indirect)
570 kfree(desc);
0a8a69dd
RR
571 END_USE(vq);
572 return -ENOSPC;
573 }
574
13816c76 575 for (n = 0; n < out_sgs; n++) {
eeebf9b1 576 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
577 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
578 if (vring_mapping_error(vq, addr))
579 goto unmap_release;
580
13816c76 581 prev = i;
72b5e895
JW
582 /* Note that we trust indirect descriptor
583 * table since it use stream DMA mapping.
584 */
fe4c3862 585 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
72b5e895
JW
586 VRING_DESC_F_NEXT,
587 indirect);
13816c76 588 }
0a8a69dd 589 }
13816c76 590 for (; n < (out_sgs + in_sgs); n++) {
eeebf9b1 591 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
592 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
593 if (vring_mapping_error(vq, addr))
594 goto unmap_release;
595
13816c76 596 prev = i;
72b5e895
JW
597 /* Note that we trust indirect descriptor
598 * table since it use stream DMA mapping.
599 */
fe4c3862
JW
600 i = virtqueue_add_desc_split(_vq, desc, i, addr,
601 sg->length,
602 VRING_DESC_F_NEXT |
72b5e895
JW
603 VRING_DESC_F_WRITE,
604 indirect);
13816c76 605 }
0a8a69dd
RR
606 }
607 /* Last one doesn't continue. */
00e6f3d9 608 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
72b5e895 609 if (!indirect && vq->use_dma_api)
890d3356 610 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
72b5e895 611 ~VRING_DESC_F_NEXT;
0a8a69dd 612
780bc790
AL
613 if (indirect) {
614 /* Now that the indirect table is filled in, map it. */
615 dma_addr_t addr = vring_map_single(
616 vq, desc, total_sg * sizeof(struct vring_desc),
617 DMA_TO_DEVICE);
618 if (vring_mapping_error(vq, addr))
619 goto unmap_release;
620
fe4c3862
JW
621 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
622 head, addr,
623 total_sg * sizeof(struct vring_desc),
72b5e895
JW
624 VRING_DESC_F_INDIRECT,
625 false);
780bc790
AL
626 }
627
628 /* We're using some buffers from the free list. */
629 vq->vq.num_free -= descs_used;
630
0a8a69dd 631 /* Update free pointer */
b25bd251 632 if (indirect)
72b5e895 633 vq->free_head = vq->split.desc_extra[head].next;
b25bd251
RR
634 else
635 vq->free_head = i;
0a8a69dd 636
780bc790 637 /* Store token and indirect buffer state. */
cbeedb72 638 vq->split.desc_state[head].data = data;
780bc790 639 if (indirect)
cbeedb72 640 vq->split.desc_state[head].indir_desc = desc;
87646a34 641 else
cbeedb72 642 vq->split.desc_state[head].indir_desc = ctx;
0a8a69dd
RR
643
644 /* Put entry in available array (but don't update avail->idx until they
3b720b8c 645 * do sync). */
e593bf97
TB
646 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
647 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
0a8a69dd 648
ee7cd898
RR
649 /* Descriptors and available array need to be set before we expose the
650 * new available array entries. */
a9a0fef7 651 virtio_wmb(vq->weak_barriers);
e593bf97
TB
652 vq->split.avail_idx_shadow++;
653 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
654 vq->split.avail_idx_shadow);
ee7cd898
RR
655 vq->num_added++;
656
5e05bf58
TH
657 pr_debug("Added buffer head %i to %p\n", head, vq);
658 END_USE(vq);
659
ee7cd898
RR
660 /* This is very unlikely, but theoretically possible. Kick
661 * just in case. */
662 if (unlikely(vq->num_added == (1 << 16) - 1))
663 virtqueue_kick(_vq);
664
98e8c6bc 665 return 0;
780bc790
AL
666
667unmap_release:
668 err_idx = i;
cf8f1696
ML
669
670 if (indirect)
671 i = 0;
672 else
673 i = head;
780bc790
AL
674
675 for (n = 0; n < total_sg; n++) {
676 if (i == err_idx)
677 break;
72b5e895
JW
678 if (indirect) {
679 vring_unmap_one_split_indirect(vq, &desc[i]);
680 i = virtio16_to_cpu(_vq->vdev, desc[i].next);
681 } else
682 i = vring_unmap_one_split(vq, i);
780bc790
AL
683 }
684
780bc790
AL
685 if (indirect)
686 kfree(desc);
687
3cc36f6e 688 END_USE(vq);
f7728002 689 return -ENOMEM;
0a8a69dd 690}
13816c76 691
138fd251 692static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
693{
694 struct vring_virtqueue *vq = to_vvq(_vq);
a5c262c5 695 u16 new, old;
41f0377f
RR
696 bool needs_kick;
697
0a8a69dd 698 START_USE(vq);
a72caae2
JW
699 /* We need to expose available array entries before checking avail
700 * event. */
a9a0fef7 701 virtio_mb(vq->weak_barriers);
0a8a69dd 702
e593bf97
TB
703 old = vq->split.avail_idx_shadow - vq->num_added;
704 new = vq->split.avail_idx_shadow;
0a8a69dd
RR
705 vq->num_added = 0;
706
4d6a105e
TB
707 LAST_ADD_TIME_CHECK(vq);
708 LAST_ADD_TIME_INVALID(vq);
e93300b1 709
41f0377f 710 if (vq->event) {
e593bf97
TB
711 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
712 vring_avail_event(&vq->split.vring)),
41f0377f
RR
713 new, old);
714 } else {
e593bf97
TB
715 needs_kick = !(vq->split.vring.used->flags &
716 cpu_to_virtio16(_vq->vdev,
717 VRING_USED_F_NO_NOTIFY));
41f0377f 718 }
0a8a69dd 719 END_USE(vq);
41f0377f
RR
720 return needs_kick;
721}
138fd251 722
138fd251
TB
723static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
724 void **ctx)
0a8a69dd 725{
780bc790 726 unsigned int i, j;
c60923cb 727 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
0a8a69dd
RR
728
729 /* Clear data ptr. */
cbeedb72 730 vq->split.desc_state[head].data = NULL;
0a8a69dd 731
780bc790 732 /* Put back on free list: unmap first-level descriptors and find end */
0a8a69dd 733 i = head;
9fa29b9d 734
e593bf97 735 while (vq->split.vring.desc[i].flags & nextflag) {
72b5e895
JW
736 vring_unmap_one_split(vq, i);
737 i = vq->split.desc_extra[i].next;
06ca287d 738 vq->vq.num_free++;
0a8a69dd
RR
739 }
740
72b5e895
JW
741 vring_unmap_one_split(vq, i);
742 vq->split.desc_extra[i].next = vq->free_head;
0a8a69dd 743 vq->free_head = head;
780bc790 744
0a8a69dd 745 /* Plus final descriptor */
06ca287d 746 vq->vq.num_free++;
780bc790 747
5a08b04f 748 if (vq->indirect) {
cbeedb72
TB
749 struct vring_desc *indir_desc =
750 vq->split.desc_state[head].indir_desc;
5a08b04f
MT
751 u32 len;
752
753 /* Free the indirect table, if any, now that it's unmapped. */
754 if (!indir_desc)
755 return;
756
72b5e895 757 len = vq->split.desc_extra[head].len;
780bc790 758
72b5e895
JW
759 BUG_ON(!(vq->split.desc_extra[head].flags &
760 VRING_DESC_F_INDIRECT));
780bc790
AL
761 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
762
763 for (j = 0; j < len / sizeof(struct vring_desc); j++)
72b5e895 764 vring_unmap_one_split_indirect(vq, &indir_desc[j]);
780bc790 765
5a08b04f 766 kfree(indir_desc);
cbeedb72 767 vq->split.desc_state[head].indir_desc = NULL;
5a08b04f 768 } else if (ctx) {
cbeedb72 769 *ctx = vq->split.desc_state[head].indir_desc;
780bc790 770 }
0a8a69dd
RR
771}
772
138fd251 773static inline bool more_used_split(const struct vring_virtqueue *vq)
0a8a69dd 774{
e593bf97
TB
775 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
776 vq->split.vring.used->idx);
0a8a69dd
RR
777}
778
138fd251
TB
779static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
780 unsigned int *len,
781 void **ctx)
0a8a69dd
RR
782{
783 struct vring_virtqueue *vq = to_vvq(_vq);
784 void *ret;
785 unsigned int i;
3b720b8c 786 u16 last_used;
0a8a69dd
RR
787
788 START_USE(vq);
789
5ef82752
RR
790 if (unlikely(vq->broken)) {
791 END_USE(vq);
792 return NULL;
793 }
794
138fd251 795 if (!more_used_split(vq)) {
0a8a69dd
RR
796 pr_debug("No more buffers in queue\n");
797 END_USE(vq);
798 return NULL;
799 }
800
2d61ba95 801 /* Only get used array entries after they have been exposed by host. */
a9a0fef7 802 virtio_rmb(vq->weak_barriers);
2d61ba95 803
e593bf97
TB
804 last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
805 i = virtio32_to_cpu(_vq->vdev,
806 vq->split.vring.used->ring[last_used].id);
807 *len = virtio32_to_cpu(_vq->vdev,
808 vq->split.vring.used->ring[last_used].len);
0a8a69dd 809
e593bf97 810 if (unlikely(i >= vq->split.vring.num)) {
0a8a69dd
RR
811 BAD_RING(vq, "id %u out of range\n", i);
812 return NULL;
813 }
cbeedb72 814 if (unlikely(!vq->split.desc_state[i].data)) {
0a8a69dd
RR
815 BAD_RING(vq, "id %u is not a head!\n", i);
816 return NULL;
817 }
818
138fd251 819 /* detach_buf_split clears data, so grab it now. */
cbeedb72 820 ret = vq->split.desc_state[i].data;
138fd251 821 detach_buf_split(vq, i, ctx);
0a8a69dd 822 vq->last_used_idx++;
a5c262c5
MT
823 /* If we expect an interrupt for the next entry, tell host
824 * by writing event index and flush out the write before
825 * the read in the next get_buf call. */
e593bf97 826 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
788e5b3a 827 virtio_store_mb(vq->weak_barriers,
e593bf97 828 &vring_used_event(&vq->split.vring),
788e5b3a 829 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
a5c262c5 830
4d6a105e 831 LAST_ADD_TIME_INVALID(vq);
e93300b1 832
0a8a69dd
RR
833 END_USE(vq);
834 return ret;
835}
138fd251 836
138fd251 837static void virtqueue_disable_cb_split(struct virtqueue *_vq)
18445c4d
RR
838{
839 struct vring_virtqueue *vq = to_vvq(_vq);
840
e593bf97
TB
841 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
842 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
8d622d21
MT
843 if (vq->event)
844 /* TODO: this is a hack. Figure out a cleaner value to write. */
845 vring_used_event(&vq->split.vring) = 0x0;
846 else
e593bf97
TB
847 vq->split.vring.avail->flags =
848 cpu_to_virtio16(_vq->vdev,
849 vq->split.avail_flags_shadow);
f277ec42 850 }
18445c4d
RR
851}
852
31532340 853static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
854{
855 struct vring_virtqueue *vq = to_vvq(_vq);
cc229884 856 u16 last_used_idx;
0a8a69dd
RR
857
858 START_USE(vq);
0a8a69dd
RR
859
860 /* We optimistically turn back on interrupts, then check if there was
861 * more to do. */
a5c262c5
MT
862 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
863 * either clear the flags bit or point the event index at the next
864 * entry. Always do both to keep code simple. */
e593bf97
TB
865 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
866 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 867 if (!vq->event)
e593bf97
TB
868 vq->split.vring.avail->flags =
869 cpu_to_virtio16(_vq->vdev,
870 vq->split.avail_flags_shadow);
f277ec42 871 }
e593bf97
TB
872 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
873 last_used_idx = vq->last_used_idx);
cc229884
MT
874 END_USE(vq);
875 return last_used_idx;
876}
138fd251 877
31532340 878static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
138fd251
TB
879{
880 struct vring_virtqueue *vq = to_vvq(_vq);
881
882 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
e593bf97 883 vq->split.vring.used->idx);
138fd251
TB
884}
885
138fd251 886static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
7ab358c2
MT
887{
888 struct vring_virtqueue *vq = to_vvq(_vq);
889 u16 bufs;
890
891 START_USE(vq);
892
893 /* We optimistically turn back on interrupts, then check if there was
894 * more to do. */
895 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
896 * either clear the flags bit or point the event index at the next
0ea1e4a6 897 * entry. Always update the event index to keep code simple. */
e593bf97
TB
898 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
899 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 900 if (!vq->event)
e593bf97
TB
901 vq->split.vring.avail->flags =
902 cpu_to_virtio16(_vq->vdev,
903 vq->split.avail_flags_shadow);
f277ec42 904 }
7ab358c2 905 /* TODO: tune this threshold */
e593bf97 906 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
788e5b3a
MT
907
908 virtio_store_mb(vq->weak_barriers,
e593bf97 909 &vring_used_event(&vq->split.vring),
788e5b3a
MT
910 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
911
e593bf97
TB
912 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
913 - vq->last_used_idx) > bufs)) {
7ab358c2
MT
914 END_USE(vq);
915 return false;
916 }
917
918 END_USE(vq);
919 return true;
920}
7ab358c2 921
138fd251 922static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
c021eac4
SM
923{
924 struct vring_virtqueue *vq = to_vvq(_vq);
925 unsigned int i;
926 void *buf;
927
928 START_USE(vq);
929
e593bf97 930 for (i = 0; i < vq->split.vring.num; i++) {
cbeedb72 931 if (!vq->split.desc_state[i].data)
c021eac4 932 continue;
138fd251 933 /* detach_buf_split clears data, so grab it now. */
cbeedb72 934 buf = vq->split.desc_state[i].data;
138fd251 935 detach_buf_split(vq, i, NULL);
e593bf97
TB
936 vq->split.avail_idx_shadow--;
937 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
938 vq->split.avail_idx_shadow);
c021eac4
SM
939 END_USE(vq);
940 return buf;
941 }
942 /* That should have freed everything. */
e593bf97 943 BUG_ON(vq->vq.num_free != vq->split.vring.num);
c021eac4
SM
944
945 END_USE(vq);
946 return NULL;
947}
138fd251 948
198fa7be
XZ
949static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
950 struct vring_virtqueue *vq)
951{
952 struct virtio_device *vdev;
953
954 vdev = vq->vq.vdev;
955
956 vring_split->avail_flags_shadow = 0;
957 vring_split->avail_idx_shadow = 0;
958
959 /* No callback? Tell other side not to bother us. */
960 if (!vq->vq.callback) {
961 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
962 if (!vq->event)
963 vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
964 vring_split->avail_flags_shadow);
965 }
966}
967
e5175b41
XZ
968static void virtqueue_reinit_split(struct vring_virtqueue *vq)
969{
970 int num;
971
972 num = vq->split.vring.num;
973
974 vq->split.vring.avail->flags = 0;
975 vq->split.vring.avail->idx = 0;
976
977 /* reset avail event */
978 vq->split.vring.avail->ring[num] = 0;
979
980 vq->split.vring.used->flags = 0;
981 vq->split.vring.used->idx = 0;
982
983 /* reset used event */
984 *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
985
986 virtqueue_init(vq, num);
987
988 virtqueue_vring_init_split(&vq->split, vq);
989}
990
e1d6a423
XZ
991static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
992 struct vring_virtqueue_split *vring_split)
993{
994 vq->split = *vring_split;
995
996 /* Put everything in free lists. */
997 vq->free_head = 0;
998}
999
a2b36c8d
XZ
1000static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1001{
1002 struct vring_desc_state_split *state;
1003 struct vring_desc_extra *extra;
1004 u32 num = vring_split->vring.num;
1005
1006 state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
1007 if (!state)
1008 goto err_state;
1009
1010 extra = vring_alloc_desc_extra(num);
1011 if (!extra)
1012 goto err_extra;
1013
1014 memset(state, 0, num * sizeof(struct vring_desc_state_split));
1015
1016 vring_split->desc_state = state;
1017 vring_split->desc_extra = extra;
1018 return 0;
1019
1020err_extra:
1021 kfree(state);
1022err_state:
1023 return -ENOMEM;
1024}
1025
89f05d94
XZ
1026static void vring_free_split(struct vring_virtqueue_split *vring_split,
1027 struct virtio_device *vdev)
1028{
1029 vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1030 vring_split->vring.desc,
1031 vring_split->queue_dma_addr);
1032
1033 kfree(vring_split->desc_state);
1034 kfree(vring_split->desc_extra);
1035}
1036
c2d87fe6
XZ
1037static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1038 struct virtio_device *vdev,
1039 u32 num,
1040 unsigned int vring_align,
1041 bool may_reduce_num)
d79dca75 1042{
d79dca75
TB
1043 void *queue = NULL;
1044 dma_addr_t dma_addr;
d79dca75
TB
1045
1046 /* We assume num is a power of 2. */
1047 if (num & (num - 1)) {
1048 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
c2d87fe6 1049 return -EINVAL;
d79dca75
TB
1050 }
1051
1052 /* TODO: allocate each queue chunk individually */
1053 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1054 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1055 &dma_addr,
c7cc29aa 1056 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
d79dca75
TB
1057 if (queue)
1058 break;
cf94db21 1059 if (!may_reduce_num)
c2d87fe6 1060 return -ENOMEM;
d79dca75
TB
1061 }
1062
1063 if (!num)
c2d87fe6 1064 return -ENOMEM;
d79dca75
TB
1065
1066 if (!queue) {
1067 /* Try to get a single page. You are my only hope! */
1068 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1069 &dma_addr, GFP_KERNEL|__GFP_ZERO);
1070 }
1071 if (!queue)
c2d87fe6
XZ
1072 return -ENOMEM;
1073
1074 vring_init(&vring_split->vring, num, queue, vring_align);
1075
1076 vring_split->queue_dma_addr = dma_addr;
1077 vring_split->queue_size_in_bytes = vring_size(num, vring_align);
d79dca75 1078
af36b16f
XZ
1079 vring_split->vring_align = vring_align;
1080 vring_split->may_reduce_num = may_reduce_num;
1081
c2d87fe6
XZ
1082 return 0;
1083}
1084
1085static struct virtqueue *vring_create_virtqueue_split(
1086 unsigned int index,
1087 unsigned int num,
1088 unsigned int vring_align,
1089 struct virtio_device *vdev,
1090 bool weak_barriers,
1091 bool may_reduce_num,
1092 bool context,
1093 bool (*notify)(struct virtqueue *),
1094 void (*callback)(struct virtqueue *),
1095 const char *name)
1096{
1097 struct vring_virtqueue_split vring_split = {};
1098 struct virtqueue *vq;
1099 int err;
1100
1101 err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1102 may_reduce_num);
1103 if (err)
1104 return NULL;
d79dca75 1105
cd4c812a
XZ
1106 vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
1107 context, notify, callback, name);
d79dca75 1108 if (!vq) {
c2d87fe6 1109 vring_free_split(&vring_split, vdev);
d79dca75
TB
1110 return NULL;
1111 }
1112
d79dca75
TB
1113 to_vvq(vq)->we_own_ring = true;
1114
1115 return vq;
1116}
1117
6fea20e5
XZ
1118static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1119{
1120 struct vring_virtqueue_split vring_split = {};
1121 struct vring_virtqueue *vq = to_vvq(_vq);
1122 struct virtio_device *vdev = _vq->vdev;
1123 int err;
1124
1125 err = vring_alloc_queue_split(&vring_split, vdev, num,
1126 vq->split.vring_align,
1127 vq->split.may_reduce_num);
1128 if (err)
1129 goto err;
1130
1131 err = vring_alloc_state_extra_split(&vring_split);
1132 if (err)
1133 goto err_state_extra;
1134
1135 vring_free(&vq->vq);
1136
1137 virtqueue_vring_init_split(&vring_split, vq);
1138
1139 virtqueue_init(vq, vring_split.vring.num);
1140 virtqueue_vring_attach_split(vq, &vring_split);
1141
1142 return 0;
1143
1144err_state_extra:
1145 vring_free_split(&vring_split, vdev);
1146err:
1147 virtqueue_reinit_split(vq);
1148 return -ENOMEM;
1149}
1150
e6f633e5 1151
1ce9e605
TB
1152/*
1153 * Packed ring specific functions - *_packed().
1154 */
a7722890 1155static inline bool packed_used_wrap_counter(u16 last_used_idx)
1156{
1157 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1158}
1159
1160static inline u16 packed_last_used(u16 last_used_idx)
1161{
1162 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1163}
1ce9e605 1164
d80dc15b
XZ
1165static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1166 struct vring_desc_extra *extra)
1ce9e605
TB
1167{
1168 u16 flags;
1169
1170 if (!vq->use_dma_api)
1171 return;
1172
d80dc15b 1173 flags = extra->flags;
1ce9e605
TB
1174
1175 if (flags & VRING_DESC_F_INDIRECT) {
1176 dma_unmap_single(vring_dma_dev(vq),
d80dc15b 1177 extra->addr, extra->len,
1ce9e605
TB
1178 (flags & VRING_DESC_F_WRITE) ?
1179 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1180 } else {
1181 dma_unmap_page(vring_dma_dev(vq),
d80dc15b 1182 extra->addr, extra->len,
1ce9e605
TB
1183 (flags & VRING_DESC_F_WRITE) ?
1184 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1185 }
1186}
1187
1188static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1189 struct vring_packed_desc *desc)
1190{
1191 u16 flags;
1192
1193 if (!vq->use_dma_api)
1194 return;
1195
1196 flags = le16_to_cpu(desc->flags);
1197
920379a4
XZ
1198 dma_unmap_page(vring_dma_dev(vq),
1199 le64_to_cpu(desc->addr),
1200 le32_to_cpu(desc->len),
1201 (flags & VRING_DESC_F_WRITE) ?
1202 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1ce9e605
TB
1203}
1204
1205static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1206 gfp_t gfp)
1207{
1208 struct vring_packed_desc *desc;
1209
1210 /*
1211 * We require lowmem mappings for the descriptors because
1212 * otherwise virt_to_phys will give us bogus addresses in the
1213 * virtqueue.
1214 */
1215 gfp &= ~__GFP_HIGHMEM;
1216
1217 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1218
1219 return desc;
1220}
1221
1222static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
8d7670f3
XZ
1223 struct scatterlist *sgs[],
1224 unsigned int total_sg,
1225 unsigned int out_sgs,
1226 unsigned int in_sgs,
1227 void *data,
1228 gfp_t gfp)
1ce9e605
TB
1229{
1230 struct vring_packed_desc *desc;
1231 struct scatterlist *sg;
1232 unsigned int i, n, err_idx;
1233 u16 head, id;
1234 dma_addr_t addr;
1235
1236 head = vq->packed.next_avail_idx;
1237 desc = alloc_indirect_packed(total_sg, gfp);
fc6d70f4
XZ
1238 if (!desc)
1239 return -ENOMEM;
1ce9e605
TB
1240
1241 if (unlikely(vq->vq.num_free < 1)) {
1242 pr_debug("Can't add buf len 1 - avail = 0\n");
df0bfe75 1243 kfree(desc);
1ce9e605
TB
1244 END_USE(vq);
1245 return -ENOSPC;
1246 }
1247
1248 i = 0;
1249 id = vq->free_head;
1250 BUG_ON(id == vq->packed.vring.num);
1251
1252 for (n = 0; n < out_sgs + in_sgs; n++) {
1253 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1254 addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1255 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1256 if (vring_mapping_error(vq, addr))
1257 goto unmap_release;
1258
1259 desc[i].flags = cpu_to_le16(n < out_sgs ?
1260 0 : VRING_DESC_F_WRITE);
1261 desc[i].addr = cpu_to_le64(addr);
1262 desc[i].len = cpu_to_le32(sg->length);
1263 i++;
1264 }
1265 }
1266
1267 /* Now that the indirect table is filled in, map it. */
1268 addr = vring_map_single(vq, desc,
1269 total_sg * sizeof(struct vring_packed_desc),
1270 DMA_TO_DEVICE);
1271 if (vring_mapping_error(vq, addr))
1272 goto unmap_release;
1273
1274 vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1275 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1276 sizeof(struct vring_packed_desc));
1277 vq->packed.vring.desc[head].id = cpu_to_le16(id);
1278
1279 if (vq->use_dma_api) {
1280 vq->packed.desc_extra[id].addr = addr;
1281 vq->packed.desc_extra[id].len = total_sg *
1282 sizeof(struct vring_packed_desc);
1283 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1284 vq->packed.avail_used_flags;
1285 }
1286
1287 /*
1288 * A driver MUST NOT make the first descriptor in the list
1289 * available before all subsequent descriptors comprising
1290 * the list are made available.
1291 */
1292 virtio_wmb(vq->weak_barriers);
1293 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1294 vq->packed.avail_used_flags);
1295
1296 /* We're using some buffers from the free list. */
1297 vq->vq.num_free -= 1;
1298
1299 /* Update free pointer */
1300 n = head + 1;
1301 if (n >= vq->packed.vring.num) {
1302 n = 0;
1303 vq->packed.avail_wrap_counter ^= 1;
1304 vq->packed.avail_used_flags ^=
1305 1 << VRING_PACKED_DESC_F_AVAIL |
1306 1 << VRING_PACKED_DESC_F_USED;
1307 }
1308 vq->packed.next_avail_idx = n;
aeef9b47 1309 vq->free_head = vq->packed.desc_extra[id].next;
1ce9e605
TB
1310
1311 /* Store token and indirect buffer state. */
1312 vq->packed.desc_state[id].num = 1;
1313 vq->packed.desc_state[id].data = data;
1314 vq->packed.desc_state[id].indir_desc = desc;
1315 vq->packed.desc_state[id].last = id;
1316
1317 vq->num_added += 1;
1318
1319 pr_debug("Added buffer head %i to %p\n", head, vq);
1320 END_USE(vq);
1321
1322 return 0;
1323
1324unmap_release:
1325 err_idx = i;
1326
1327 for (i = 0; i < err_idx; i++)
1328 vring_unmap_desc_packed(vq, &desc[i]);
1329
1330 kfree(desc);
1331
1332 END_USE(vq);
f7728002 1333 return -ENOMEM;
1ce9e605
TB
1334}
1335
1336static inline int virtqueue_add_packed(struct virtqueue *_vq,
1337 struct scatterlist *sgs[],
1338 unsigned int total_sg,
1339 unsigned int out_sgs,
1340 unsigned int in_sgs,
1341 void *data,
1342 void *ctx,
1343 gfp_t gfp)
1344{
1345 struct vring_virtqueue *vq = to_vvq(_vq);
1346 struct vring_packed_desc *desc;
1347 struct scatterlist *sg;
1348 unsigned int i, n, c, descs_used, err_idx;
3f649ab7
KC
1349 __le16 head_flags, flags;
1350 u16 head, id, prev, curr, avail_used_flags;
fc6d70f4 1351 int err;
1ce9e605
TB
1352
1353 START_USE(vq);
1354
1355 BUG_ON(data == NULL);
1356 BUG_ON(ctx && vq->indirect);
1357
1358 if (unlikely(vq->broken)) {
1359 END_USE(vq);
1360 return -EIO;
1361 }
1362
1363 LAST_ADD_TIME_UPDATE(vq);
1364
1365 BUG_ON(total_sg == 0);
1366
35c51e09 1367 if (virtqueue_use_indirect(vq, total_sg)) {
fc6d70f4
XZ
1368 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1369 in_sgs, data, gfp);
1861ba62
MT
1370 if (err != -ENOMEM) {
1371 END_USE(vq);
fc6d70f4 1372 return err;
1861ba62 1373 }
fc6d70f4
XZ
1374
1375 /* fall back on direct */
1376 }
1ce9e605
TB
1377
1378 head = vq->packed.next_avail_idx;
1379 avail_used_flags = vq->packed.avail_used_flags;
1380
1381 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1382
1383 desc = vq->packed.vring.desc;
1384 i = head;
1385 descs_used = total_sg;
1386
1387 if (unlikely(vq->vq.num_free < descs_used)) {
1388 pr_debug("Can't add buf len %i - avail = %i\n",
1389 descs_used, vq->vq.num_free);
1390 END_USE(vq);
1391 return -ENOSPC;
1392 }
1393
1394 id = vq->free_head;
1395 BUG_ON(id == vq->packed.vring.num);
1396
1397 curr = id;
1398 c = 0;
1399 for (n = 0; n < out_sgs + in_sgs; n++) {
1400 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1401 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1402 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1403 if (vring_mapping_error(vq, addr))
1404 goto unmap_release;
1405
1406 flags = cpu_to_le16(vq->packed.avail_used_flags |
1407 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1408 (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1409 if (i == head)
1410 head_flags = flags;
1411 else
1412 desc[i].flags = flags;
1413
1414 desc[i].addr = cpu_to_le64(addr);
1415 desc[i].len = cpu_to_le32(sg->length);
1416 desc[i].id = cpu_to_le16(id);
1417
1418 if (unlikely(vq->use_dma_api)) {
1419 vq->packed.desc_extra[curr].addr = addr;
1420 vq->packed.desc_extra[curr].len = sg->length;
1421 vq->packed.desc_extra[curr].flags =
1422 le16_to_cpu(flags);
1423 }
1424 prev = curr;
aeef9b47 1425 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1426
1427 if ((unlikely(++i >= vq->packed.vring.num))) {
1428 i = 0;
1429 vq->packed.avail_used_flags ^=
1430 1 << VRING_PACKED_DESC_F_AVAIL |
1431 1 << VRING_PACKED_DESC_F_USED;
1432 }
1433 }
1434 }
1435
1436 if (i < head)
1437 vq->packed.avail_wrap_counter ^= 1;
1438
1439 /* We're using some buffers from the free list. */
1440 vq->vq.num_free -= descs_used;
1441
1442 /* Update free pointer */
1443 vq->packed.next_avail_idx = i;
1444 vq->free_head = curr;
1445
1446 /* Store token. */
1447 vq->packed.desc_state[id].num = descs_used;
1448 vq->packed.desc_state[id].data = data;
1449 vq->packed.desc_state[id].indir_desc = ctx;
1450 vq->packed.desc_state[id].last = prev;
1451
1452 /*
1453 * A driver MUST NOT make the first descriptor in the list
1454 * available before all subsequent descriptors comprising
1455 * the list are made available.
1456 */
1457 virtio_wmb(vq->weak_barriers);
1458 vq->packed.vring.desc[head].flags = head_flags;
1459 vq->num_added += descs_used;
1460
1461 pr_debug("Added buffer head %i to %p\n", head, vq);
1462 END_USE(vq);
1463
1464 return 0;
1465
1466unmap_release:
1467 err_idx = i;
1468 i = head;
44593865 1469 curr = vq->free_head;
1ce9e605
TB
1470
1471 vq->packed.avail_used_flags = avail_used_flags;
1472
1473 for (n = 0; n < total_sg; n++) {
1474 if (i == err_idx)
1475 break;
d80dc15b 1476 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
44593865 1477 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1478 i++;
1479 if (i >= vq->packed.vring.num)
1480 i = 0;
1481 }
1482
1483 END_USE(vq);
1484 return -EIO;
1485}
1486
1487static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1488{
1489 struct vring_virtqueue *vq = to_vvq(_vq);
f51f9826 1490 u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1ce9e605
TB
1491 bool needs_kick;
1492 union {
1493 struct {
1494 __le16 off_wrap;
1495 __le16 flags;
1496 };
1497 u32 u32;
1498 } snapshot;
1499
1500 START_USE(vq);
1501
1502 /*
1503 * We need to expose the new flags value before checking notification
1504 * suppressions.
1505 */
1506 virtio_mb(vq->weak_barriers);
1507
f51f9826
TB
1508 old = vq->packed.next_avail_idx - vq->num_added;
1509 new = vq->packed.next_avail_idx;
1ce9e605
TB
1510 vq->num_added = 0;
1511
1512 snapshot.u32 = *(u32 *)vq->packed.vring.device;
1513 flags = le16_to_cpu(snapshot.flags);
1514
1515 LAST_ADD_TIME_CHECK(vq);
1516 LAST_ADD_TIME_INVALID(vq);
1517
f51f9826
TB
1518 if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1519 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1520 goto out;
1521 }
1522
1523 off_wrap = le16_to_cpu(snapshot.off_wrap);
1524
1525 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1526 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1527 if (wrap_counter != vq->packed.avail_wrap_counter)
1528 event_idx -= vq->packed.vring.num;
1529
1530 needs_kick = vring_need_event(event_idx, new, old);
1531out:
1ce9e605
TB
1532 END_USE(vq);
1533 return needs_kick;
1534}
1535
1536static void detach_buf_packed(struct vring_virtqueue *vq,
1537 unsigned int id, void **ctx)
1538{
1539 struct vring_desc_state_packed *state = NULL;
1540 struct vring_packed_desc *desc;
1541 unsigned int i, curr;
1542
1543 state = &vq->packed.desc_state[id];
1544
1545 /* Clear data ptr. */
1546 state->data = NULL;
1547
aeef9b47 1548 vq->packed.desc_extra[state->last].next = vq->free_head;
1ce9e605
TB
1549 vq->free_head = id;
1550 vq->vq.num_free += state->num;
1551
1552 if (unlikely(vq->use_dma_api)) {
1553 curr = id;
1554 for (i = 0; i < state->num; i++) {
d80dc15b
XZ
1555 vring_unmap_extra_packed(vq,
1556 &vq->packed.desc_extra[curr]);
aeef9b47 1557 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1558 }
1559 }
1560
1561 if (vq->indirect) {
1562 u32 len;
1563
1564 /* Free the indirect table, if any, now that it's unmapped. */
1565 desc = state->indir_desc;
1566 if (!desc)
1567 return;
1568
1569 if (vq->use_dma_api) {
1570 len = vq->packed.desc_extra[id].len;
1571 for (i = 0; i < len / sizeof(struct vring_packed_desc);
1572 i++)
1573 vring_unmap_desc_packed(vq, &desc[i]);
1574 }
1575 kfree(desc);
1576 state->indir_desc = NULL;
1577 } else if (ctx) {
1578 *ctx = state->indir_desc;
1579 }
1580}
1581
1582static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1583 u16 idx, bool used_wrap_counter)
1584{
1585 bool avail, used;
1586 u16 flags;
1587
1588 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1589 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1590 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1591
1592 return avail == used && used == used_wrap_counter;
1593}
1594
1595static inline bool more_used_packed(const struct vring_virtqueue *vq)
1596{
a7722890 1597 u16 last_used;
1598 u16 last_used_idx;
1599 bool used_wrap_counter;
1600
1601 last_used_idx = READ_ONCE(vq->last_used_idx);
1602 last_used = packed_last_used(last_used_idx);
1603 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1604 return is_used_desc_packed(vq, last_used, used_wrap_counter);
1ce9e605
TB
1605}
1606
1607static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1608 unsigned int *len,
1609 void **ctx)
1610{
1611 struct vring_virtqueue *vq = to_vvq(_vq);
a7722890 1612 u16 last_used, id, last_used_idx;
1613 bool used_wrap_counter;
1ce9e605
TB
1614 void *ret;
1615
1616 START_USE(vq);
1617
1618 if (unlikely(vq->broken)) {
1619 END_USE(vq);
1620 return NULL;
1621 }
1622
1623 if (!more_used_packed(vq)) {
1624 pr_debug("No more buffers in queue\n");
1625 END_USE(vq);
1626 return NULL;
1627 }
1628
1629 /* Only get used elements after they have been exposed by host. */
1630 virtio_rmb(vq->weak_barriers);
1631
a7722890 1632 last_used_idx = READ_ONCE(vq->last_used_idx);
1633 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1634 last_used = packed_last_used(last_used_idx);
1ce9e605
TB
1635 id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1636 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1637
1638 if (unlikely(id >= vq->packed.vring.num)) {
1639 BAD_RING(vq, "id %u out of range\n", id);
1640 return NULL;
1641 }
1642 if (unlikely(!vq->packed.desc_state[id].data)) {
1643 BAD_RING(vq, "id %u is not a head!\n", id);
1644 return NULL;
1645 }
1646
1647 /* detach_buf_packed clears data, so grab it now. */
1648 ret = vq->packed.desc_state[id].data;
1649 detach_buf_packed(vq, id, ctx);
1650
a7722890 1651 last_used += vq->packed.desc_state[id].num;
1652 if (unlikely(last_used >= vq->packed.vring.num)) {
1653 last_used -= vq->packed.vring.num;
1654 used_wrap_counter ^= 1;
1ce9e605
TB
1655 }
1656
a7722890 1657 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1658 WRITE_ONCE(vq->last_used_idx, last_used);
1659
f51f9826
TB
1660 /*
1661 * If we expect an interrupt for the next entry, tell host
1662 * by writing event index and flush out the write before
1663 * the read in the next get_buf call.
1664 */
1665 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1666 virtio_store_mb(vq->weak_barriers,
1667 &vq->packed.vring.driver->off_wrap,
a7722890 1668 cpu_to_le16(vq->last_used_idx));
f51f9826 1669
1ce9e605
TB
1670 LAST_ADD_TIME_INVALID(vq);
1671
1672 END_USE(vq);
1673 return ret;
1674}
1675
1676static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1677{
1678 struct vring_virtqueue *vq = to_vvq(_vq);
1679
1680 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1681 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1682 vq->packed.vring.driver->flags =
1683 cpu_to_le16(vq->packed.event_flags_shadow);
1684 }
1685}
1686
31532340 1687static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1ce9e605
TB
1688{
1689 struct vring_virtqueue *vq = to_vvq(_vq);
1690
1691 START_USE(vq);
1692
1693 /*
1694 * We optimistically turn back on interrupts, then check if there was
1695 * more to do.
1696 */
1697
f51f9826
TB
1698 if (vq->event) {
1699 vq->packed.vring.driver->off_wrap =
a7722890 1700 cpu_to_le16(vq->last_used_idx);
f51f9826
TB
1701 /*
1702 * We need to update event offset and event wrap
1703 * counter first before updating event flags.
1704 */
1705 virtio_wmb(vq->weak_barriers);
1706 }
1707
1ce9e605 1708 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1709 vq->packed.event_flags_shadow = vq->event ?
1710 VRING_PACKED_EVENT_FLAG_DESC :
1711 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1712 vq->packed.vring.driver->flags =
1713 cpu_to_le16(vq->packed.event_flags_shadow);
1714 }
1715
1716 END_USE(vq);
a7722890 1717 return vq->last_used_idx;
1ce9e605
TB
1718}
1719
1720static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1721{
1722 struct vring_virtqueue *vq = to_vvq(_vq);
1723 bool wrap_counter;
1724 u16 used_idx;
1725
1726 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1727 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1728
1729 return is_used_desc_packed(vq, used_idx, wrap_counter);
1730}
1731
1732static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1733{
1734 struct vring_virtqueue *vq = to_vvq(_vq);
a7722890 1735 u16 used_idx, wrap_counter, last_used_idx;
f51f9826 1736 u16 bufs;
1ce9e605
TB
1737
1738 START_USE(vq);
1739
1740 /*
1741 * We optimistically turn back on interrupts, then check if there was
1742 * more to do.
1743 */
1744
f51f9826
TB
1745 if (vq->event) {
1746 /* TODO: tune this threshold */
1747 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
a7722890 1748 last_used_idx = READ_ONCE(vq->last_used_idx);
1749 wrap_counter = packed_used_wrap_counter(last_used_idx);
f51f9826 1750
a7722890 1751 used_idx = packed_last_used(last_used_idx) + bufs;
f51f9826
TB
1752 if (used_idx >= vq->packed.vring.num) {
1753 used_idx -= vq->packed.vring.num;
1754 wrap_counter ^= 1;
1755 }
1756
1757 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1758 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1759
1760 /*
1761 * We need to update event offset and event wrap
1762 * counter first before updating event flags.
1763 */
1764 virtio_wmb(vq->weak_barriers);
f51f9826 1765 }
1ce9e605
TB
1766
1767 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1768 vq->packed.event_flags_shadow = vq->event ?
1769 VRING_PACKED_EVENT_FLAG_DESC :
1770 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1771 vq->packed.vring.driver->flags =
1772 cpu_to_le16(vq->packed.event_flags_shadow);
1773 }
1774
1775 /*
1776 * We need to update event suppression structure first
1777 * before re-checking for more used buffers.
1778 */
1779 virtio_mb(vq->weak_barriers);
1780
a7722890 1781 last_used_idx = READ_ONCE(vq->last_used_idx);
1782 wrap_counter = packed_used_wrap_counter(last_used_idx);
1783 used_idx = packed_last_used(last_used_idx);
1784 if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1ce9e605
TB
1785 END_USE(vq);
1786 return false;
1787 }
1788
1789 END_USE(vq);
1790 return true;
1791}
1792
1793static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1794{
1795 struct vring_virtqueue *vq = to_vvq(_vq);
1796 unsigned int i;
1797 void *buf;
1798
1799 START_USE(vq);
1800
1801 for (i = 0; i < vq->packed.vring.num; i++) {
1802 if (!vq->packed.desc_state[i].data)
1803 continue;
1804 /* detach_buf clears data, so grab it now. */
1805 buf = vq->packed.desc_state[i].data;
1806 detach_buf_packed(vq, i, NULL);
1807 END_USE(vq);
1808 return buf;
1809 }
1810 /* That should have freed everything. */
1811 BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1812
1813 END_USE(vq);
1814 return NULL;
1815}
1816
96ef18a2 1817static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
5a222421
JW
1818{
1819 struct vring_desc_extra *desc_extra;
1820 unsigned int i;
1821
1822 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1823 GFP_KERNEL);
1824 if (!desc_extra)
1825 return NULL;
1826
1827 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1828
1829 for (i = 0; i < num - 1; i++)
1830 desc_extra[i].next = i + 1;
1831
1832 return desc_extra;
1833}
1834
6356f8bb
XZ
1835static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1836 struct virtio_device *vdev)
1837{
1838 if (vring_packed->vring.desc)
1839 vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
1840 vring_packed->vring.desc,
1841 vring_packed->ring_dma_addr);
1842
1843 if (vring_packed->vring.driver)
1844 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1845 vring_packed->vring.driver,
1846 vring_packed->driver_event_dma_addr);
1847
1848 if (vring_packed->vring.device)
1849 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1850 vring_packed->vring.device,
1851 vring_packed->device_event_dma_addr);
1852
1853 kfree(vring_packed->desc_state);
1854 kfree(vring_packed->desc_extra);
1855}
1856
6b60b9c0
XZ
1857static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1858 struct virtio_device *vdev,
1859 u32 num)
1ce9e605 1860{
1ce9e605
TB
1861 struct vring_packed_desc *ring;
1862 struct vring_packed_desc_event *driver, *device;
1863 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1864 size_t ring_size_in_bytes, event_size_in_bytes;
1ce9e605
TB
1865
1866 ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1867
1868 ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1869 &ring_dma_addr,
1870 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1871 if (!ring)
6b60b9c0
XZ
1872 goto err;
1873
1874 vring_packed->vring.desc = ring;
1875 vring_packed->ring_dma_addr = ring_dma_addr;
1876 vring_packed->ring_size_in_bytes = ring_size_in_bytes;
1ce9e605
TB
1877
1878 event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1879
1880 driver = vring_alloc_queue(vdev, event_size_in_bytes,
1881 &driver_event_dma_addr,
1882 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1883 if (!driver)
6b60b9c0
XZ
1884 goto err;
1885
1886 vring_packed->vring.driver = driver;
1887 vring_packed->event_size_in_bytes = event_size_in_bytes;
1888 vring_packed->driver_event_dma_addr = driver_event_dma_addr;
1ce9e605
TB
1889
1890 device = vring_alloc_queue(vdev, event_size_in_bytes,
1891 &device_event_dma_addr,
1892 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1893 if (!device)
6b60b9c0
XZ
1894 goto err;
1895
1896 vring_packed->vring.device = device;
1897 vring_packed->device_event_dma_addr = device_event_dma_addr;
1898
1899 vring_packed->vring.num = num;
1900
1901 return 0;
1902
1903err:
1904 vring_free_packed(vring_packed, vdev);
1905 return -ENOMEM;
1906}
1907
ef3167cf
XZ
1908static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
1909{
1910 struct vring_desc_state_packed *state;
1911 struct vring_desc_extra *extra;
1912 u32 num = vring_packed->vring.num;
1913
1914 state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
1915 if (!state)
1916 goto err_desc_state;
1917
1918 memset(state, 0, num * sizeof(struct vring_desc_state_packed));
1919
1920 extra = vring_alloc_desc_extra(num);
1921 if (!extra)
1922 goto err_desc_extra;
1923
1924 vring_packed->desc_state = state;
1925 vring_packed->desc_extra = extra;
1926
1927 return 0;
1928
1929err_desc_extra:
1930 kfree(state);
1931err_desc_state:
1932 return -ENOMEM;
1933}
1934
1a107c87
XZ
1935static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
1936 bool callback)
1937{
1938 vring_packed->next_avail_idx = 0;
1939 vring_packed->avail_wrap_counter = 1;
1940 vring_packed->event_flags_shadow = 0;
1941 vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1942
1943 /* No callback? Tell other side not to bother us. */
1944 if (!callback) {
1945 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1946 vring_packed->vring.driver->flags =
1947 cpu_to_le16(vring_packed->event_flags_shadow);
1948 }
1949}
1950
51d649f1
XZ
1951static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
1952 struct vring_virtqueue_packed *vring_packed)
1953{
1954 vq->packed = *vring_packed;
1955
1956 /* Put everything in free lists. */
1957 vq->free_head = 0;
1958}
1959
56775e14
XZ
1960static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
1961{
1962 memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
1963 memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
1964
1965 /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
1966 memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
1967
1968 virtqueue_init(vq, vq->packed.vring.num);
1969 virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
1970}
1971
6b60b9c0
XZ
1972static struct virtqueue *vring_create_virtqueue_packed(
1973 unsigned int index,
1974 unsigned int num,
1975 unsigned int vring_align,
1976 struct virtio_device *vdev,
1977 bool weak_barriers,
1978 bool may_reduce_num,
1979 bool context,
1980 bool (*notify)(struct virtqueue *),
1981 void (*callback)(struct virtqueue *),
1982 const char *name)
1983{
1984 struct vring_virtqueue_packed vring_packed = {};
1985 struct vring_virtqueue *vq;
ef3167cf 1986 int err;
6b60b9c0
XZ
1987
1988 if (vring_alloc_queue_packed(&vring_packed, vdev, num))
1989 goto err_ring;
1ce9e605
TB
1990
1991 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1992 if (!vq)
1993 goto err_vq;
1994
1995 vq->vq.callback = callback;
1996 vq->vq.vdev = vdev;
1997 vq->vq.name = name;
1ce9e605
TB
1998 vq->vq.index = index;
1999 vq->we_own_ring = true;
2000 vq->notify = notify;
2001 vq->weak_barriers = weak_barriers;
c346dae4 2002#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d 2003 vq->broken = true;
c346dae4
JW
2004#else
2005 vq->broken = false;
2006#endif
1ce9e605
TB
2007 vq->packed_ring = true;
2008 vq->use_dma_api = vring_use_dma_api(vdev);
1ce9e605
TB
2009
2010 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2011 !context;
2012 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2013
45383fb0
TB
2014 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2015 vq->weak_barriers = false;
2016
ef3167cf
XZ
2017 err = vring_alloc_state_extra_packed(&vring_packed);
2018 if (err)
2019 goto err_state_extra;
1ce9e605 2020
1a107c87 2021 virtqueue_vring_init_packed(&vring_packed, !!callback);
1ce9e605 2022
3a897128 2023 virtqueue_init(vq, num);
51d649f1 2024 virtqueue_vring_attach_packed(vq, &vring_packed);
3a897128 2025
0e566c8f 2026 spin_lock(&vdev->vqs_list_lock);
e152d8af 2027 list_add_tail(&vq->vq.list, &vdev->vqs);
0e566c8f 2028 spin_unlock(&vdev->vqs_list_lock);
1ce9e605
TB
2029 return &vq->vq;
2030
ef3167cf 2031err_state_extra:
1ce9e605
TB
2032 kfree(vq);
2033err_vq:
6b60b9c0 2034 vring_free_packed(&vring_packed, vdev);
1ce9e605
TB
2035err_ring:
2036 return NULL;
2037}
2038
947f9fcf
XZ
2039static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2040{
2041 struct vring_virtqueue_packed vring_packed = {};
2042 struct vring_virtqueue *vq = to_vvq(_vq);
2043 struct virtio_device *vdev = _vq->vdev;
2044 int err;
2045
2046 if (vring_alloc_queue_packed(&vring_packed, vdev, num))
2047 goto err_ring;
2048
2049 err = vring_alloc_state_extra_packed(&vring_packed);
2050 if (err)
2051 goto err_state_extra;
2052
2053 vring_free(&vq->vq);
2054
2055 virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2056
2057 virtqueue_init(vq, vring_packed.vring.num);
2058 virtqueue_vring_attach_packed(vq, &vring_packed);
2059
2060 return 0;
2061
2062err_state_extra:
2063 vring_free_packed(&vring_packed, vdev);
2064err_ring:
2065 virtqueue_reinit_packed(vq);
2066 return -ENOMEM;
2067}
2068
1ce9e605 2069
e6f633e5
TB
2070/*
2071 * Generic functions and exported symbols.
2072 */
2073
2074static inline int virtqueue_add(struct virtqueue *_vq,
2075 struct scatterlist *sgs[],
2076 unsigned int total_sg,
2077 unsigned int out_sgs,
2078 unsigned int in_sgs,
2079 void *data,
2080 void *ctx,
2081 gfp_t gfp)
2082{
1ce9e605
TB
2083 struct vring_virtqueue *vq = to_vvq(_vq);
2084
2085 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2086 out_sgs, in_sgs, data, ctx, gfp) :
2087 virtqueue_add_split(_vq, sgs, total_sg,
2088 out_sgs, in_sgs, data, ctx, gfp);
e6f633e5
TB
2089}
2090
2091/**
2092 * virtqueue_add_sgs - expose buffers to other end
a5581206 2093 * @_vq: the struct virtqueue we're talking about.
e6f633e5 2094 * @sgs: array of terminated scatterlists.
a5581206
JB
2095 * @out_sgs: the number of scatterlists readable by other side
2096 * @in_sgs: the number of scatterlists which are writable (after readable ones)
e6f633e5
TB
2097 * @data: the token identifying the buffer.
2098 * @gfp: how to do memory allocations (if necessary).
2099 *
2100 * Caller must ensure we don't call this with other virtqueue operations
2101 * at the same time (except where noted).
2102 *
2103 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2104 */
2105int virtqueue_add_sgs(struct virtqueue *_vq,
2106 struct scatterlist *sgs[],
2107 unsigned int out_sgs,
2108 unsigned int in_sgs,
2109 void *data,
2110 gfp_t gfp)
2111{
2112 unsigned int i, total_sg = 0;
2113
2114 /* Count them first. */
2115 for (i = 0; i < out_sgs + in_sgs; i++) {
2116 struct scatterlist *sg;
2117
2118 for (sg = sgs[i]; sg; sg = sg_next(sg))
2119 total_sg++;
2120 }
2121 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2122 data, NULL, gfp);
2123}
2124EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2125
2126/**
2127 * virtqueue_add_outbuf - expose output buffers to other end
2128 * @vq: the struct virtqueue we're talking about.
2129 * @sg: scatterlist (must be well-formed and terminated!)
2130 * @num: the number of entries in @sg readable by other side
2131 * @data: the token identifying the buffer.
2132 * @gfp: how to do memory allocations (if necessary).
2133 *
2134 * Caller must ensure we don't call this with other virtqueue operations
2135 * at the same time (except where noted).
2136 *
2137 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2138 */
2139int virtqueue_add_outbuf(struct virtqueue *vq,
2140 struct scatterlist *sg, unsigned int num,
2141 void *data,
2142 gfp_t gfp)
2143{
2144 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
2145}
2146EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2147
2148/**
2149 * virtqueue_add_inbuf - expose input buffers to other end
2150 * @vq: the struct virtqueue we're talking about.
2151 * @sg: scatterlist (must be well-formed and terminated!)
2152 * @num: the number of entries in @sg writable by other side
2153 * @data: the token identifying the buffer.
2154 * @gfp: how to do memory allocations (if necessary).
2155 *
2156 * Caller must ensure we don't call this with other virtqueue operations
2157 * at the same time (except where noted).
2158 *
2159 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2160 */
2161int virtqueue_add_inbuf(struct virtqueue *vq,
2162 struct scatterlist *sg, unsigned int num,
2163 void *data,
2164 gfp_t gfp)
2165{
2166 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
2167}
2168EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2169
2170/**
2171 * virtqueue_add_inbuf_ctx - expose input buffers to other end
2172 * @vq: the struct virtqueue we're talking about.
2173 * @sg: scatterlist (must be well-formed and terminated!)
2174 * @num: the number of entries in @sg writable by other side
2175 * @data: the token identifying the buffer.
2176 * @ctx: extra context for the token
2177 * @gfp: how to do memory allocations (if necessary).
2178 *
2179 * Caller must ensure we don't call this with other virtqueue operations
2180 * at the same time (except where noted).
2181 *
2182 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2183 */
2184int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2185 struct scatterlist *sg, unsigned int num,
2186 void *data,
2187 void *ctx,
2188 gfp_t gfp)
2189{
2190 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
2191}
2192EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2193
2194/**
2195 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
a5581206 2196 * @_vq: the struct virtqueue
e6f633e5
TB
2197 *
2198 * Instead of virtqueue_kick(), you can do:
2199 * if (virtqueue_kick_prepare(vq))
2200 * virtqueue_notify(vq);
2201 *
2202 * This is sometimes useful because the virtqueue_kick_prepare() needs
2203 * to be serialized, but the actual virtqueue_notify() call does not.
2204 */
2205bool virtqueue_kick_prepare(struct virtqueue *_vq)
2206{
1ce9e605
TB
2207 struct vring_virtqueue *vq = to_vvq(_vq);
2208
2209 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2210 virtqueue_kick_prepare_split(_vq);
e6f633e5
TB
2211}
2212EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2213
2214/**
2215 * virtqueue_notify - second half of split virtqueue_kick call.
a5581206 2216 * @_vq: the struct virtqueue
e6f633e5
TB
2217 *
2218 * This does not need to be serialized.
2219 *
2220 * Returns false if host notify failed or queue is broken, otherwise true.
2221 */
2222bool virtqueue_notify(struct virtqueue *_vq)
2223{
2224 struct vring_virtqueue *vq = to_vvq(_vq);
2225
2226 if (unlikely(vq->broken))
2227 return false;
2228
2229 /* Prod other side to tell it about changes. */
2230 if (!vq->notify(_vq)) {
2231 vq->broken = true;
2232 return false;
2233 }
2234 return true;
2235}
2236EXPORT_SYMBOL_GPL(virtqueue_notify);
2237
2238/**
2239 * virtqueue_kick - update after add_buf
2240 * @vq: the struct virtqueue
2241 *
2242 * After one or more virtqueue_add_* calls, invoke this to kick
2243 * the other side.
2244 *
2245 * Caller must ensure we don't call this with other virtqueue
2246 * operations at the same time (except where noted).
2247 *
2248 * Returns false if kick failed, otherwise true.
2249 */
2250bool virtqueue_kick(struct virtqueue *vq)
2251{
2252 if (virtqueue_kick_prepare(vq))
2253 return virtqueue_notify(vq);
2254 return true;
2255}
2256EXPORT_SYMBOL_GPL(virtqueue_kick);
2257
2258/**
31c11db6 2259 * virtqueue_get_buf_ctx - get the next used buffer
a5581206 2260 * @_vq: the struct virtqueue we're talking about.
e6f633e5 2261 * @len: the length written into the buffer
a5581206 2262 * @ctx: extra context for the token
e6f633e5
TB
2263 *
2264 * If the device wrote data into the buffer, @len will be set to the
2265 * amount written. This means you don't need to clear the buffer
2266 * beforehand to ensure there's no data leakage in the case of short
2267 * writes.
2268 *
2269 * Caller must ensure we don't call this with other virtqueue
2270 * operations at the same time (except where noted).
2271 *
2272 * Returns NULL if there are no used buffers, or the "data" token
2273 * handed to virtqueue_add_*().
2274 */
2275void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2276 void **ctx)
2277{
1ce9e605
TB
2278 struct vring_virtqueue *vq = to_vvq(_vq);
2279
2280 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2281 virtqueue_get_buf_ctx_split(_vq, len, ctx);
e6f633e5
TB
2282}
2283EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2284
2285void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2286{
2287 return virtqueue_get_buf_ctx(_vq, len, NULL);
2288}
2289EXPORT_SYMBOL_GPL(virtqueue_get_buf);
e6f633e5
TB
2290/**
2291 * virtqueue_disable_cb - disable callbacks
a5581206 2292 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2293 *
2294 * Note that this is not necessarily synchronous, hence unreliable and only
2295 * useful as an optimization.
2296 *
2297 * Unlike other operations, this need not be serialized.
2298 */
2299void virtqueue_disable_cb(struct virtqueue *_vq)
2300{
1ce9e605
TB
2301 struct vring_virtqueue *vq = to_vvq(_vq);
2302
8d622d21
MT
2303 /* If device triggered an event already it won't trigger one again:
2304 * no need to disable.
2305 */
2306 if (vq->event_triggered)
2307 return;
2308
1ce9e605
TB
2309 if (vq->packed_ring)
2310 virtqueue_disable_cb_packed(_vq);
2311 else
2312 virtqueue_disable_cb_split(_vq);
e6f633e5
TB
2313}
2314EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2315
2316/**
2317 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
a5581206 2318 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2319 *
2320 * This re-enables callbacks; it returns current queue state
2321 * in an opaque unsigned value. This value should be later tested by
2322 * virtqueue_poll, to detect a possible race between the driver checking for
2323 * more work, and enabling callbacks.
2324 *
2325 * Caller must ensure we don't call this with other virtqueue
2326 * operations at the same time (except where noted).
2327 */
31532340 2328unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
e6f633e5 2329{
1ce9e605
TB
2330 struct vring_virtqueue *vq = to_vvq(_vq);
2331
8d622d21
MT
2332 if (vq->event_triggered)
2333 vq->event_triggered = false;
2334
1ce9e605
TB
2335 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2336 virtqueue_enable_cb_prepare_split(_vq);
e6f633e5
TB
2337}
2338EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2339
2340/**
2341 * virtqueue_poll - query pending used buffers
a5581206 2342 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2343 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2344 *
2345 * Returns "true" if there are pending used buffers in the queue.
2346 *
2347 * This does not need to be serialized.
2348 */
31532340 2349bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
e6f633e5
TB
2350{
2351 struct vring_virtqueue *vq = to_vvq(_vq);
2352
481a0d74
MW
2353 if (unlikely(vq->broken))
2354 return false;
2355
e6f633e5 2356 virtio_mb(vq->weak_barriers);
1ce9e605
TB
2357 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2358 virtqueue_poll_split(_vq, last_used_idx);
e6f633e5
TB
2359}
2360EXPORT_SYMBOL_GPL(virtqueue_poll);
2361
2362/**
2363 * virtqueue_enable_cb - restart callbacks after disable_cb.
a5581206 2364 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2365 *
2366 * This re-enables callbacks; it returns "false" if there are pending
2367 * buffers in the queue, to detect a possible race between the driver
2368 * checking for more work, and enabling callbacks.
2369 *
2370 * Caller must ensure we don't call this with other virtqueue
2371 * operations at the same time (except where noted).
2372 */
2373bool virtqueue_enable_cb(struct virtqueue *_vq)
2374{
31532340 2375 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
e6f633e5
TB
2376
2377 return !virtqueue_poll(_vq, last_used_idx);
2378}
2379EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2380
2381/**
2382 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
a5581206 2383 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2384 *
2385 * This re-enables callbacks but hints to the other side to delay
2386 * interrupts until most of the available buffers have been processed;
2387 * it returns "false" if there are many pending buffers in the queue,
2388 * to detect a possible race between the driver checking for more work,
2389 * and enabling callbacks.
2390 *
2391 * Caller must ensure we don't call this with other virtqueue
2392 * operations at the same time (except where noted).
2393 */
2394bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2395{
1ce9e605
TB
2396 struct vring_virtqueue *vq = to_vvq(_vq);
2397
8d622d21
MT
2398 if (vq->event_triggered)
2399 vq->event_triggered = false;
2400
1ce9e605
TB
2401 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2402 virtqueue_enable_cb_delayed_split(_vq);
e6f633e5
TB
2403}
2404EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2405
138fd251
TB
2406/**
2407 * virtqueue_detach_unused_buf - detach first unused buffer
a5581206 2408 * @_vq: the struct virtqueue we're talking about.
138fd251
TB
2409 *
2410 * Returns NULL or the "data" token handed to virtqueue_add_*().
a62eecb3
XZ
2411 * This is not valid on an active queue; it is useful for device
2412 * shutdown or the reset queue.
138fd251
TB
2413 */
2414void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2415{
1ce9e605
TB
2416 struct vring_virtqueue *vq = to_vvq(_vq);
2417
2418 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2419 virtqueue_detach_unused_buf_split(_vq);
138fd251 2420}
7c5e9ed0 2421EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
c021eac4 2422
138fd251
TB
2423static inline bool more_used(const struct vring_virtqueue *vq)
2424{
1ce9e605 2425 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
138fd251
TB
2426}
2427
0a8a69dd
RR
2428irqreturn_t vring_interrupt(int irq, void *_vq)
2429{
2430 struct vring_virtqueue *vq = to_vvq(_vq);
2431
2432 if (!more_used(vq)) {
2433 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2434 return IRQ_NONE;
2435 }
2436
8b4ec69d 2437 if (unlikely(vq->broken)) {
c346dae4 2438#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d
JW
2439 dev_warn_once(&vq->vq.vdev->dev,
2440 "virtio vring IRQ raised before DRIVER_OK");
2441 return IRQ_NONE;
c346dae4
JW
2442#else
2443 return IRQ_HANDLED;
2444#endif
8b4ec69d 2445 }
0a8a69dd 2446
8d622d21
MT
2447 /* Just a hint for performance: so it's ok that this can be racy! */
2448 if (vq->event)
2449 vq->event_triggered = true;
2450
0a8a69dd 2451 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
18445c4d
RR
2452 if (vq->vq.callback)
2453 vq->vq.callback(&vq->vq);
0a8a69dd
RR
2454
2455 return IRQ_HANDLED;
2456}
c6fd4701 2457EXPORT_SYMBOL_GPL(vring_interrupt);
0a8a69dd 2458
1ce9e605 2459/* Only available for split ring */
07d9629d 2460static struct virtqueue *__vring_new_virtqueue(unsigned int index,
cd4c812a 2461 struct vring_virtqueue_split *vring_split,
07d9629d
XZ
2462 struct virtio_device *vdev,
2463 bool weak_barriers,
2464 bool context,
2465 bool (*notify)(struct virtqueue *),
2466 void (*callback)(struct virtqueue *),
2467 const char *name)
0a8a69dd 2468{
2a2d1382 2469 struct vring_virtqueue *vq;
a2b36c8d 2470 int err;
0a8a69dd 2471
1ce9e605
TB
2472 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2473 return NULL;
2474
cbeedb72 2475 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
0a8a69dd
RR
2476 if (!vq)
2477 return NULL;
2478
1ce9e605 2479 vq->packed_ring = false;
0a8a69dd
RR
2480 vq->vq.callback = callback;
2481 vq->vq.vdev = vdev;
9499f5e7 2482 vq->vq.name = name;
06ca287d 2483 vq->vq.index = index;
2a2d1382 2484 vq->we_own_ring = false;
0a8a69dd 2485 vq->notify = notify;
7b21e34f 2486 vq->weak_barriers = weak_barriers;
c346dae4 2487#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
8b4ec69d 2488 vq->broken = true;
c346dae4
JW
2489#else
2490 vq->broken = false;
2491#endif
fb3fba6b 2492 vq->use_dma_api = vring_use_dma_api(vdev);
0a8a69dd 2493
5a08b04f
MT
2494 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2495 !context;
a5c262c5 2496 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
9fa29b9d 2497
45383fb0
TB
2498 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2499 vq->weak_barriers = false;
2500
a2b36c8d
XZ
2501 err = vring_alloc_state_extra_split(vring_split);
2502 if (err) {
2503 kfree(vq);
2504 return NULL;
2505 }
72b5e895 2506
198fa7be
XZ
2507 virtqueue_vring_init_split(vring_split, vq);
2508
cd4c812a 2509 virtqueue_init(vq, vring_split->vring.num);
e1d6a423 2510 virtqueue_vring_attach_split(vq, vring_split);
3a897128 2511
0e566c8f 2512 spin_lock(&vdev->vqs_list_lock);
e152d8af 2513 list_add_tail(&vq->vq.list, &vdev->vqs);
0e566c8f 2514 spin_unlock(&vdev->vqs_list_lock);
0a8a69dd
RR
2515 return &vq->vq;
2516}
2a2d1382 2517
2a2d1382
AL
2518struct virtqueue *vring_create_virtqueue(
2519 unsigned int index,
2520 unsigned int num,
2521 unsigned int vring_align,
2522 struct virtio_device *vdev,
2523 bool weak_barriers,
2524 bool may_reduce_num,
f94682dd 2525 bool context,
2a2d1382
AL
2526 bool (*notify)(struct virtqueue *),
2527 void (*callback)(struct virtqueue *),
2528 const char *name)
2529{
1ce9e605
TB
2530
2531 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2532 return vring_create_virtqueue_packed(index, num, vring_align,
2533 vdev, weak_barriers, may_reduce_num,
2534 context, notify, callback, name);
2535
d79dca75
TB
2536 return vring_create_virtqueue_split(index, num, vring_align,
2537 vdev, weak_barriers, may_reduce_num,
2538 context, notify, callback, name);
2a2d1382
AL
2539}
2540EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2541
1ce9e605 2542/* Only available for split ring */
2a2d1382
AL
2543struct virtqueue *vring_new_virtqueue(unsigned int index,
2544 unsigned int num,
2545 unsigned int vring_align,
2546 struct virtio_device *vdev,
2547 bool weak_barriers,
f94682dd 2548 bool context,
2a2d1382
AL
2549 void *pages,
2550 bool (*notify)(struct virtqueue *vq),
2551 void (*callback)(struct virtqueue *vq),
2552 const char *name)
2553{
cd4c812a 2554 struct vring_virtqueue_split vring_split = {};
1ce9e605
TB
2555
2556 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2557 return NULL;
2558
cd4c812a
XZ
2559 vring_init(&vring_split.vring, num, pages, vring_align);
2560 return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2561 context, notify, callback, name);
2a2d1382 2562}
c6fd4701 2563EXPORT_SYMBOL_GPL(vring_new_virtqueue);
0a8a69dd 2564
3ea19e32 2565static void vring_free(struct virtqueue *_vq)
0a8a69dd 2566{
2a2d1382
AL
2567 struct vring_virtqueue *vq = to_vvq(_vq);
2568
2569 if (vq->we_own_ring) {
1ce9e605
TB
2570 if (vq->packed_ring) {
2571 vring_free_queue(vq->vq.vdev,
2572 vq->packed.ring_size_in_bytes,
2573 vq->packed.vring.desc,
2574 vq->packed.ring_dma_addr);
2575
2576 vring_free_queue(vq->vq.vdev,
2577 vq->packed.event_size_in_bytes,
2578 vq->packed.vring.driver,
2579 vq->packed.driver_event_dma_addr);
2580
2581 vring_free_queue(vq->vq.vdev,
2582 vq->packed.event_size_in_bytes,
2583 vq->packed.vring.device,
2584 vq->packed.device_event_dma_addr);
2585
2586 kfree(vq->packed.desc_state);
2587 kfree(vq->packed.desc_extra);
2588 } else {
2589 vring_free_queue(vq->vq.vdev,
2590 vq->split.queue_size_in_bytes,
2591 vq->split.vring.desc,
2592 vq->split.queue_dma_addr);
1ce9e605 2593 }
2a2d1382 2594 }
72b5e895 2595 if (!vq->packed_ring) {
f13f09a1 2596 kfree(vq->split.desc_state);
72b5e895
JW
2597 kfree(vq->split.desc_extra);
2598 }
3ea19e32
XZ
2599}
2600
2601void vring_del_virtqueue(struct virtqueue *_vq)
2602{
2603 struct vring_virtqueue *vq = to_vvq(_vq);
2604
2605 spin_lock(&vq->vq.vdev->vqs_list_lock);
2606 list_del(&_vq->list);
2607 spin_unlock(&vq->vq.vdev->vqs_list_lock);
2608
2609 vring_free(_vq);
2610
2a2d1382 2611 kfree(vq);
0a8a69dd 2612}
c6fd4701 2613EXPORT_SYMBOL_GPL(vring_del_virtqueue);
0a8a69dd 2614
e34f8725
RR
2615/* Manipulates transport-specific feature bits. */
2616void vring_transport_features(struct virtio_device *vdev)
2617{
2618 unsigned int i;
2619
2620 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2621 switch (i) {
9fa29b9d
MM
2622 case VIRTIO_RING_F_INDIRECT_DESC:
2623 break;
a5c262c5
MT
2624 case VIRTIO_RING_F_EVENT_IDX:
2625 break;
747ae34a
MT
2626 case VIRTIO_F_VERSION_1:
2627 break;
321bd212 2628 case VIRTIO_F_ACCESS_PLATFORM:
1a937693 2629 break;
f959a128
TB
2630 case VIRTIO_F_RING_PACKED:
2631 break;
45383fb0
TB
2632 case VIRTIO_F_ORDER_PLATFORM:
2633 break;
e34f8725
RR
2634 default:
2635 /* We don't understand this bit. */
e16e12be 2636 __virtio_clear_bit(vdev, i);
e34f8725
RR
2637 }
2638 }
2639}
2640EXPORT_SYMBOL_GPL(vring_transport_features);
2641
5dfc1762
RR
2642/**
2643 * virtqueue_get_vring_size - return the size of the virtqueue's vring
a5581206 2644 * @_vq: the struct virtqueue containing the vring of interest.
5dfc1762
RR
2645 *
2646 * Returns the size of the vring. This is mainly used for boasting to
2647 * userspace. Unlike other operations, this need not be serialized.
2648 */
8f9f4668
RJ
2649unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2650{
2651
2652 struct vring_virtqueue *vq = to_vvq(_vq);
2653
1ce9e605 2654 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
8f9f4668
RJ
2655}
2656EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2657
b3b32c94
HG
2658bool virtqueue_is_broken(struct virtqueue *_vq)
2659{
2660 struct vring_virtqueue *vq = to_vvq(_vq);
2661
60f07798 2662 return READ_ONCE(vq->broken);
b3b32c94
HG
2663}
2664EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2665
e2dcdfe9
RR
2666/*
2667 * This should prevent the device from being used, allowing drivers to
2668 * recover. You may need to grab appropriate locks to flush.
2669 */
2670void virtio_break_device(struct virtio_device *dev)
2671{
2672 struct virtqueue *_vq;
2673
0e566c8f 2674 spin_lock(&dev->vqs_list_lock);
e2dcdfe9
RR
2675 list_for_each_entry(_vq, &dev->vqs, list) {
2676 struct vring_virtqueue *vq = to_vvq(_vq);
60f07798
PP
2677
2678 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2679 WRITE_ONCE(vq->broken, true);
e2dcdfe9 2680 }
0e566c8f 2681 spin_unlock(&dev->vqs_list_lock);
e2dcdfe9
RR
2682}
2683EXPORT_SYMBOL_GPL(virtio_break_device);
2684
be83f04d
JW
2685/*
2686 * This should allow the device to be used by the driver. You may
2687 * need to grab appropriate locks to flush the write to
2688 * vq->broken. This should only be used in some specific case e.g
2689 * (probing and restoring). This function should only be called by the
2690 * core, not directly by the driver.
2691 */
2692void __virtio_unbreak_device(struct virtio_device *dev)
2693{
2694 struct virtqueue *_vq;
2695
2696 spin_lock(&dev->vqs_list_lock);
2697 list_for_each_entry(_vq, &dev->vqs, list) {
2698 struct vring_virtqueue *vq = to_vvq(_vq);
2699
2700 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2701 WRITE_ONCE(vq->broken, false);
2702 }
2703 spin_unlock(&dev->vqs_list_lock);
2704}
2705EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2706
2a2d1382 2707dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
89062652
CH
2708{
2709 struct vring_virtqueue *vq = to_vvq(_vq);
2710
2a2d1382
AL
2711 BUG_ON(!vq->we_own_ring);
2712
1ce9e605
TB
2713 if (vq->packed_ring)
2714 return vq->packed.ring_dma_addr;
2715
d79dca75 2716 return vq->split.queue_dma_addr;
89062652 2717}
2a2d1382 2718EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
89062652 2719
2a2d1382 2720dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
89062652
CH
2721{
2722 struct vring_virtqueue *vq = to_vvq(_vq);
2723
2a2d1382
AL
2724 BUG_ON(!vq->we_own_ring);
2725
1ce9e605
TB
2726 if (vq->packed_ring)
2727 return vq->packed.driver_event_dma_addr;
2728
d79dca75 2729 return vq->split.queue_dma_addr +
e593bf97 2730 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2a2d1382
AL
2731}
2732EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2733
2734dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2735{
2736 struct vring_virtqueue *vq = to_vvq(_vq);
2737
2738 BUG_ON(!vq->we_own_ring);
2739
1ce9e605
TB
2740 if (vq->packed_ring)
2741 return vq->packed.device_event_dma_addr;
2742
d79dca75 2743 return vq->split.queue_dma_addr +
e593bf97 2744 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2a2d1382
AL
2745}
2746EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2747
1ce9e605 2748/* Only available for split ring */
2a2d1382
AL
2749const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2750{
e593bf97 2751 return &to_vvq(vq)->split.vring;
89062652 2752}
2a2d1382 2753EXPORT_SYMBOL_GPL(virtqueue_get_vring);
89062652 2754
c6fd4701 2755MODULE_LICENSE("GPL");