virtio: use err label in __vring_new_virtqueue()
[linux-2.6-block.git] / drivers / virtio / virtio_ring.c
CommitLineData
fd534e9b 1// SPDX-License-Identifier: GPL-2.0-or-later
0a8a69dd
RR
2/* Virtio ring implementation.
3 *
4 * Copyright 2007 Rusty Russell IBM Corporation
0a8a69dd
RR
5 */
6#include <linux/virtio.h>
7#include <linux/virtio_ring.h>
e34f8725 8#include <linux/virtio_config.h>
0a8a69dd 9#include <linux/device.h>
5a0e3ad6 10#include <linux/slab.h>
b5a2c4f1 11#include <linux/module.h>
e93300b1 12#include <linux/hrtimer.h>
780bc790 13#include <linux/dma-mapping.h>
78fe3987 14#include <xen/xen.h>
0a8a69dd
RR
15
16#ifdef DEBUG
17/* For development, we want to crash whenever the ring is screwed. */
9499f5e7
RR
18#define BAD_RING(_vq, fmt, args...) \
19 do { \
20 dev_err(&(_vq)->vq.vdev->dev, \
21 "%s:"fmt, (_vq)->vq.name, ##args); \
22 BUG(); \
23 } while (0)
c5f841f1
RR
24/* Caller is supposed to guarantee no reentry. */
25#define START_USE(_vq) \
26 do { \
27 if ((_vq)->in_use) \
9499f5e7
RR
28 panic("%s:in_use = %i\n", \
29 (_vq)->vq.name, (_vq)->in_use); \
c5f841f1 30 (_vq)->in_use = __LINE__; \
9499f5e7 31 } while (0)
3a35ce7d 32#define END_USE(_vq) \
97a545ab 33 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
4d6a105e
TB
34#define LAST_ADD_TIME_UPDATE(_vq) \
35 do { \
36 ktime_t now = ktime_get(); \
37 \
38 /* No kick or get, with .1 second between? Warn. */ \
39 if ((_vq)->last_add_time_valid) \
40 WARN_ON(ktime_to_ms(ktime_sub(now, \
41 (_vq)->last_add_time)) > 100); \
42 (_vq)->last_add_time = now; \
43 (_vq)->last_add_time_valid = true; \
44 } while (0)
45#define LAST_ADD_TIME_CHECK(_vq) \
46 do { \
47 if ((_vq)->last_add_time_valid) { \
48 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
49 (_vq)->last_add_time)) > 100); \
50 } \
51 } while (0)
52#define LAST_ADD_TIME_INVALID(_vq) \
53 ((_vq)->last_add_time_valid = false)
0a8a69dd 54#else
9499f5e7
RR
55#define BAD_RING(_vq, fmt, args...) \
56 do { \
57 dev_err(&_vq->vq.vdev->dev, \
58 "%s:"fmt, (_vq)->vq.name, ##args); \
59 (_vq)->broken = true; \
60 } while (0)
0a8a69dd
RR
61#define START_USE(vq)
62#define END_USE(vq)
4d6a105e
TB
63#define LAST_ADD_TIME_UPDATE(vq)
64#define LAST_ADD_TIME_CHECK(vq)
65#define LAST_ADD_TIME_INVALID(vq)
0a8a69dd
RR
66#endif
67
cbeedb72 68struct vring_desc_state_split {
780bc790
AL
69 void *data; /* Data for callback. */
70 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
71};
72
1ce9e605
TB
73struct vring_desc_state_packed {
74 void *data; /* Data for callback. */
75 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
76 u16 num; /* Descriptor list length. */
1ce9e605
TB
77 u16 last; /* The last desc state in a list. */
78};
79
1f28750f 80struct vring_desc_extra {
1ce9e605
TB
81 dma_addr_t addr; /* Buffer DMA addr. */
82 u32 len; /* Buffer length. */
83 u16 flags; /* Descriptor flags. */
aeef9b47 84 u16 next; /* The next desc state in a list. */
1ce9e605
TB
85};
86
43b4f721 87struct vring_virtqueue {
0a8a69dd
RR
88 struct virtqueue vq;
89
1ce9e605
TB
90 /* Is this a packed ring? */
91 bool packed_ring;
92
fb3fba6b
TB
93 /* Is DMA API used? */
94 bool use_dma_api;
95
7b21e34f
RR
96 /* Can we use weak barriers? */
97 bool weak_barriers;
98
0a8a69dd
RR
99 /* Other side has made a mess, don't try any more. */
100 bool broken;
101
9fa29b9d
MM
102 /* Host supports indirect buffers */
103 bool indirect;
104
a5c262c5
MT
105 /* Host publishes avail event idx */
106 bool event;
107
0a8a69dd
RR
108 /* Head of free buffer list. */
109 unsigned int free_head;
110 /* Number we've added since last sync. */
111 unsigned int num_added;
112
113 /* Last used index we've seen. */
1bc4953e 114 u16 last_used_idx;
0a8a69dd 115
8d622d21
MT
116 /* Hint for event idx: already triggered no need to disable. */
117 bool event_triggered;
118
1ce9e605
TB
119 union {
120 /* Available for split ring */
121 struct {
122 /* Actual memory layout for this queue. */
123 struct vring vring;
124
125 /* Last written value to avail->flags */
126 u16 avail_flags_shadow;
f277ec42 127
1ce9e605
TB
128 /*
129 * Last written value to avail->idx in
130 * guest byte order.
131 */
132 u16 avail_idx_shadow;
133
134 /* Per-descriptor state. */
135 struct vring_desc_state_split *desc_state;
136
137 /* DMA address and size information */
138 dma_addr_t queue_dma_addr;
139 size_t queue_size_in_bytes;
140 } split;
e593bf97 141
1ce9e605
TB
142 /* Available for packed ring */
143 struct {
144 /* Actual memory layout for this queue. */
9c0644ee
MT
145 struct {
146 unsigned int num;
147 struct vring_packed_desc *desc;
148 struct vring_packed_desc_event *driver;
149 struct vring_packed_desc_event *device;
150 } vring;
cbeedb72 151
1ce9e605
TB
152 /* Driver ring wrap counter. */
153 bool avail_wrap_counter;
d79dca75 154
1ce9e605
TB
155 /* Device ring wrap counter. */
156 bool used_wrap_counter;
157
158 /* Avail used flags. */
159 u16 avail_used_flags;
160
161 /* Index of the next avail descriptor. */
162 u16 next_avail_idx;
163
164 /*
165 * Last written value to driver->flags in
166 * guest byte order.
167 */
168 u16 event_flags_shadow;
169
170 /* Per-descriptor state. */
171 struct vring_desc_state_packed *desc_state;
1f28750f 172 struct vring_desc_extra *desc_extra;
1ce9e605
TB
173
174 /* DMA address and size information */
175 dma_addr_t ring_dma_addr;
176 dma_addr_t driver_event_dma_addr;
177 dma_addr_t device_event_dma_addr;
178 size_t ring_size_in_bytes;
179 size_t event_size_in_bytes;
180 } packed;
181 };
f277ec42 182
0a8a69dd 183 /* How to notify other side. FIXME: commonalize hcalls! */
46f9c2b9 184 bool (*notify)(struct virtqueue *vq);
0a8a69dd 185
2a2d1382
AL
186 /* DMA, allocation, and size information */
187 bool we_own_ring;
2a2d1382 188
0a8a69dd
RR
189#ifdef DEBUG
190 /* They're supposed to lock for us. */
191 unsigned int in_use;
e93300b1
RR
192
193 /* Figure out if their kicks are too delayed. */
194 bool last_add_time_valid;
195 ktime_t last_add_time;
0a8a69dd 196#endif
0a8a69dd
RR
197};
198
e6f633e5
TB
199
200/*
201 * Helpers.
202 */
203
0a8a69dd
RR
204#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
205
2f18c2d1
TB
206static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
207 unsigned int total_sg)
208{
209 struct vring_virtqueue *vq = to_vvq(_vq);
210
211 /*
212 * If the host supports indirect descriptor tables, and we have multiple
213 * buffers, then go indirect. FIXME: tune this threshold
214 */
215 return (vq->indirect && total_sg > 1 && vq->vq.num_free);
216}
217
d26c96c8 218/*
1a937693
MT
219 * Modern virtio devices have feature bits to specify whether they need a
220 * quirk and bypass the IOMMU. If not there, just use the DMA API.
221 *
222 * If there, the interaction between virtio and DMA API is messy.
d26c96c8
AL
223 *
224 * On most systems with virtio, physical addresses match bus addresses,
225 * and it doesn't particularly matter whether we use the DMA API.
226 *
227 * On some systems, including Xen and any system with a physical device
228 * that speaks virtio behind a physical IOMMU, we must use the DMA API
229 * for virtio DMA to work at all.
230 *
231 * On other systems, including SPARC and PPC64, virtio-pci devices are
232 * enumerated as though they are behind an IOMMU, but the virtio host
233 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
234 * there or somehow map everything as the identity.
235 *
236 * For the time being, we preserve historic behavior and bypass the DMA
237 * API.
1a937693
MT
238 *
239 * TODO: install a per-device DMA ops structure that does the right thing
240 * taking into account all the above quirks, and use the DMA API
241 * unconditionally on data path.
d26c96c8
AL
242 */
243
244static bool vring_use_dma_api(struct virtio_device *vdev)
245{
24b6842a 246 if (!virtio_has_dma_quirk(vdev))
1a937693
MT
247 return true;
248
249 /* Otherwise, we are left to guess. */
78fe3987
AL
250 /*
251 * In theory, it's possible to have a buggy QEMU-supposed
252 * emulated Q35 IOMMU and Xen enabled at the same time. On
253 * such a configuration, virtio has never worked and will
254 * not work without an even larger kludge. Instead, enable
255 * the DMA API if we're a Xen guest, which at least allows
256 * all of the sensible Xen configurations to work correctly.
257 */
258 if (xen_domain())
259 return true;
260
d26c96c8
AL
261 return false;
262}
263
e6d6dd6c
JR
264size_t virtio_max_dma_size(struct virtio_device *vdev)
265{
266 size_t max_segment_size = SIZE_MAX;
267
268 if (vring_use_dma_api(vdev))
269 max_segment_size = dma_max_mapping_size(&vdev->dev);
270
271 return max_segment_size;
272}
273EXPORT_SYMBOL_GPL(virtio_max_dma_size);
274
d79dca75
TB
275static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
276 dma_addr_t *dma_handle, gfp_t flag)
277{
278 if (vring_use_dma_api(vdev)) {
279 return dma_alloc_coherent(vdev->dev.parent, size,
280 dma_handle, flag);
281 } else {
282 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
283
284 if (queue) {
285 phys_addr_t phys_addr = virt_to_phys(queue);
286 *dma_handle = (dma_addr_t)phys_addr;
287
288 /*
289 * Sanity check: make sure we dind't truncate
290 * the address. The only arches I can find that
291 * have 64-bit phys_addr_t but 32-bit dma_addr_t
292 * are certain non-highmem MIPS and x86
293 * configurations, but these configurations
294 * should never allocate physical pages above 32
295 * bits, so this is fine. Just in case, throw a
296 * warning and abort if we end up with an
297 * unrepresentable address.
298 */
299 if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
300 free_pages_exact(queue, PAGE_ALIGN(size));
301 return NULL;
302 }
303 }
304 return queue;
305 }
306}
307
308static void vring_free_queue(struct virtio_device *vdev, size_t size,
309 void *queue, dma_addr_t dma_handle)
310{
311 if (vring_use_dma_api(vdev))
312 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
313 else
314 free_pages_exact(queue, PAGE_ALIGN(size));
315}
316
780bc790
AL
317/*
318 * The DMA ops on various arches are rather gnarly right now, and
319 * making all of the arch DMA ops work on the vring device itself
320 * is a mess. For now, we use the parent device for DMA ops.
321 */
75bfa81b 322static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
780bc790
AL
323{
324 return vq->vq.vdev->dev.parent;
325}
326
327/* Map one sg entry. */
328static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
329 struct scatterlist *sg,
330 enum dma_data_direction direction)
331{
fb3fba6b 332 if (!vq->use_dma_api)
780bc790
AL
333 return (dma_addr_t)sg_phys(sg);
334
335 /*
336 * We can't use dma_map_sg, because we don't use scatterlists in
337 * the way it expects (we don't guarantee that the scatterlist
338 * will exist for the lifetime of the mapping).
339 */
340 return dma_map_page(vring_dma_dev(vq),
341 sg_page(sg), sg->offset, sg->length,
342 direction);
343}
344
345static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
346 void *cpu_addr, size_t size,
347 enum dma_data_direction direction)
348{
fb3fba6b 349 if (!vq->use_dma_api)
780bc790
AL
350 return (dma_addr_t)virt_to_phys(cpu_addr);
351
352 return dma_map_single(vring_dma_dev(vq),
353 cpu_addr, size, direction);
354}
355
e6f633e5
TB
356static int vring_mapping_error(const struct vring_virtqueue *vq,
357 dma_addr_t addr)
358{
fb3fba6b 359 if (!vq->use_dma_api)
e6f633e5
TB
360 return 0;
361
362 return dma_mapping_error(vring_dma_dev(vq), addr);
363}
364
365
366/*
367 * Split ring specific functions - *_split().
368 */
369
138fd251
TB
370static void vring_unmap_one_split(const struct vring_virtqueue *vq,
371 struct vring_desc *desc)
780bc790
AL
372{
373 u16 flags;
374
fb3fba6b 375 if (!vq->use_dma_api)
780bc790
AL
376 return;
377
378 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
379
380 if (flags & VRING_DESC_F_INDIRECT) {
381 dma_unmap_single(vring_dma_dev(vq),
382 virtio64_to_cpu(vq->vq.vdev, desc->addr),
383 virtio32_to_cpu(vq->vq.vdev, desc->len),
384 (flags & VRING_DESC_F_WRITE) ?
385 DMA_FROM_DEVICE : DMA_TO_DEVICE);
386 } else {
387 dma_unmap_page(vring_dma_dev(vq),
388 virtio64_to_cpu(vq->vq.vdev, desc->addr),
389 virtio32_to_cpu(vq->vq.vdev, desc->len),
390 (flags & VRING_DESC_F_WRITE) ?
391 DMA_FROM_DEVICE : DMA_TO_DEVICE);
392 }
393}
394
138fd251
TB
395static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
396 unsigned int total_sg,
397 gfp_t gfp)
9fa29b9d
MM
398{
399 struct vring_desc *desc;
b25bd251 400 unsigned int i;
9fa29b9d 401
b92b1b89
WD
402 /*
403 * We require lowmem mappings for the descriptors because
404 * otherwise virt_to_phys will give us bogus addresses in the
405 * virtqueue.
406 */
82107539 407 gfp &= ~__GFP_HIGHMEM;
b92b1b89 408
6da2ec56 409 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
9fa29b9d 410 if (!desc)
b25bd251 411 return NULL;
9fa29b9d 412
b25bd251 413 for (i = 0; i < total_sg; i++)
00e6f3d9 414 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
b25bd251 415 return desc;
9fa29b9d
MM
416}
417
fe4c3862
JW
418static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
419 struct vring_desc *desc,
420 unsigned int i,
421 dma_addr_t addr,
422 unsigned int len,
423 u16 flags)
424{
425 desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
426 desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
427 desc[i].len = cpu_to_virtio32(vq->vdev, len);
428
429 return virtio16_to_cpu(vq->vdev, desc[i].next);
430}
431
138fd251
TB
432static inline int virtqueue_add_split(struct virtqueue *_vq,
433 struct scatterlist *sgs[],
434 unsigned int total_sg,
435 unsigned int out_sgs,
436 unsigned int in_sgs,
437 void *data,
438 void *ctx,
439 gfp_t gfp)
0a8a69dd
RR
440{
441 struct vring_virtqueue *vq = to_vvq(_vq);
13816c76 442 struct scatterlist *sg;
b25bd251 443 struct vring_desc *desc;
3f649ab7 444 unsigned int i, n, avail, descs_used, prev, err_idx;
1fe9b6fe 445 int head;
b25bd251 446 bool indirect;
0a8a69dd 447
9fa29b9d
MM
448 START_USE(vq);
449
0a8a69dd 450 BUG_ON(data == NULL);
5a08b04f 451 BUG_ON(ctx && vq->indirect);
9fa29b9d 452
70670444
RR
453 if (unlikely(vq->broken)) {
454 END_USE(vq);
455 return -EIO;
456 }
457
4d6a105e 458 LAST_ADD_TIME_UPDATE(vq);
e93300b1 459
b25bd251
RR
460 BUG_ON(total_sg == 0);
461
462 head = vq->free_head;
463
2f18c2d1 464 if (virtqueue_use_indirect(_vq, total_sg))
138fd251 465 desc = alloc_indirect_split(_vq, total_sg, gfp);
44ed8089 466 else {
b25bd251 467 desc = NULL;
e593bf97 468 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
44ed8089 469 }
b25bd251
RR
470
471 if (desc) {
472 /* Use a single buffer which doesn't continue */
780bc790 473 indirect = true;
b25bd251
RR
474 /* Set up rest to use this indirect table. */
475 i = 0;
476 descs_used = 1;
b25bd251 477 } else {
780bc790 478 indirect = false;
e593bf97 479 desc = vq->split.vring.desc;
b25bd251
RR
480 i = head;
481 descs_used = total_sg;
9fa29b9d
MM
482 }
483
b25bd251 484 if (vq->vq.num_free < descs_used) {
0a8a69dd 485 pr_debug("Can't add buf len %i - avail = %i\n",
b25bd251 486 descs_used, vq->vq.num_free);
44653eae
RR
487 /* FIXME: for historical reasons, we force a notify here if
488 * there are outgoing parts to the buffer. Presumably the
489 * host should service the ring ASAP. */
13816c76 490 if (out_sgs)
44653eae 491 vq->notify(&vq->vq);
58625edf
WY
492 if (indirect)
493 kfree(desc);
0a8a69dd
RR
494 END_USE(vq);
495 return -ENOSPC;
496 }
497
13816c76 498 for (n = 0; n < out_sgs; n++) {
eeebf9b1 499 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
500 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
501 if (vring_mapping_error(vq, addr))
502 goto unmap_release;
503
13816c76 504 prev = i;
fe4c3862
JW
505 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
506 VRING_DESC_F_NEXT);
13816c76 507 }
0a8a69dd 508 }
13816c76 509 for (; n < (out_sgs + in_sgs); n++) {
eeebf9b1 510 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
780bc790
AL
511 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
512 if (vring_mapping_error(vq, addr))
513 goto unmap_release;
514
13816c76 515 prev = i;
fe4c3862
JW
516 i = virtqueue_add_desc_split(_vq, desc, i, addr,
517 sg->length,
518 VRING_DESC_F_NEXT |
519 VRING_DESC_F_WRITE);
13816c76 520 }
0a8a69dd
RR
521 }
522 /* Last one doesn't continue. */
00e6f3d9 523 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
0a8a69dd 524
780bc790
AL
525 if (indirect) {
526 /* Now that the indirect table is filled in, map it. */
527 dma_addr_t addr = vring_map_single(
528 vq, desc, total_sg * sizeof(struct vring_desc),
529 DMA_TO_DEVICE);
530 if (vring_mapping_error(vq, addr))
531 goto unmap_release;
532
fe4c3862
JW
533 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
534 head, addr,
535 total_sg * sizeof(struct vring_desc),
536 VRING_DESC_F_INDIRECT);
780bc790
AL
537 }
538
539 /* We're using some buffers from the free list. */
540 vq->vq.num_free -= descs_used;
541
0a8a69dd 542 /* Update free pointer */
b25bd251 543 if (indirect)
e593bf97
TB
544 vq->free_head = virtio16_to_cpu(_vq->vdev,
545 vq->split.vring.desc[head].next);
b25bd251
RR
546 else
547 vq->free_head = i;
0a8a69dd 548
780bc790 549 /* Store token and indirect buffer state. */
cbeedb72 550 vq->split.desc_state[head].data = data;
780bc790 551 if (indirect)
cbeedb72 552 vq->split.desc_state[head].indir_desc = desc;
87646a34 553 else
cbeedb72 554 vq->split.desc_state[head].indir_desc = ctx;
0a8a69dd
RR
555
556 /* Put entry in available array (but don't update avail->idx until they
3b720b8c 557 * do sync). */
e593bf97
TB
558 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
559 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
0a8a69dd 560
ee7cd898
RR
561 /* Descriptors and available array need to be set before we expose the
562 * new available array entries. */
a9a0fef7 563 virtio_wmb(vq->weak_barriers);
e593bf97
TB
564 vq->split.avail_idx_shadow++;
565 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
566 vq->split.avail_idx_shadow);
ee7cd898
RR
567 vq->num_added++;
568
5e05bf58
TH
569 pr_debug("Added buffer head %i to %p\n", head, vq);
570 END_USE(vq);
571
ee7cd898
RR
572 /* This is very unlikely, but theoretically possible. Kick
573 * just in case. */
574 if (unlikely(vq->num_added == (1 << 16) - 1))
575 virtqueue_kick(_vq);
576
98e8c6bc 577 return 0;
780bc790
AL
578
579unmap_release:
580 err_idx = i;
cf8f1696
ML
581
582 if (indirect)
583 i = 0;
584 else
585 i = head;
780bc790
AL
586
587 for (n = 0; n < total_sg; n++) {
588 if (i == err_idx)
589 break;
138fd251 590 vring_unmap_one_split(vq, &desc[i]);
cf8f1696 591 i = virtio16_to_cpu(_vq->vdev, desc[i].next);
780bc790
AL
592 }
593
780bc790
AL
594 if (indirect)
595 kfree(desc);
596
3cc36f6e 597 END_USE(vq);
f7728002 598 return -ENOMEM;
0a8a69dd 599}
13816c76 600
138fd251 601static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
602{
603 struct vring_virtqueue *vq = to_vvq(_vq);
a5c262c5 604 u16 new, old;
41f0377f
RR
605 bool needs_kick;
606
0a8a69dd 607 START_USE(vq);
a72caae2
JW
608 /* We need to expose available array entries before checking avail
609 * event. */
a9a0fef7 610 virtio_mb(vq->weak_barriers);
0a8a69dd 611
e593bf97
TB
612 old = vq->split.avail_idx_shadow - vq->num_added;
613 new = vq->split.avail_idx_shadow;
0a8a69dd
RR
614 vq->num_added = 0;
615
4d6a105e
TB
616 LAST_ADD_TIME_CHECK(vq);
617 LAST_ADD_TIME_INVALID(vq);
e93300b1 618
41f0377f 619 if (vq->event) {
e593bf97
TB
620 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
621 vring_avail_event(&vq->split.vring)),
41f0377f
RR
622 new, old);
623 } else {
e593bf97
TB
624 needs_kick = !(vq->split.vring.used->flags &
625 cpu_to_virtio16(_vq->vdev,
626 VRING_USED_F_NO_NOTIFY));
41f0377f 627 }
0a8a69dd 628 END_USE(vq);
41f0377f
RR
629 return needs_kick;
630}
138fd251 631
138fd251
TB
632static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
633 void **ctx)
0a8a69dd 634{
780bc790 635 unsigned int i, j;
c60923cb 636 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
0a8a69dd
RR
637
638 /* Clear data ptr. */
cbeedb72 639 vq->split.desc_state[head].data = NULL;
0a8a69dd 640
780bc790 641 /* Put back on free list: unmap first-level descriptors and find end */
0a8a69dd 642 i = head;
9fa29b9d 643
e593bf97
TB
644 while (vq->split.vring.desc[i].flags & nextflag) {
645 vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
646 i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next);
06ca287d 647 vq->vq.num_free++;
0a8a69dd
RR
648 }
649
e593bf97
TB
650 vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
651 vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev,
652 vq->free_head);
0a8a69dd 653 vq->free_head = head;
780bc790 654
0a8a69dd 655 /* Plus final descriptor */
06ca287d 656 vq->vq.num_free++;
780bc790 657
5a08b04f 658 if (vq->indirect) {
cbeedb72
TB
659 struct vring_desc *indir_desc =
660 vq->split.desc_state[head].indir_desc;
5a08b04f
MT
661 u32 len;
662
663 /* Free the indirect table, if any, now that it's unmapped. */
664 if (!indir_desc)
665 return;
666
e593bf97
TB
667 len = virtio32_to_cpu(vq->vq.vdev,
668 vq->split.vring.desc[head].len);
780bc790 669
e593bf97 670 BUG_ON(!(vq->split.vring.desc[head].flags &
780bc790
AL
671 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
672 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
673
674 for (j = 0; j < len / sizeof(struct vring_desc); j++)
138fd251 675 vring_unmap_one_split(vq, &indir_desc[j]);
780bc790 676
5a08b04f 677 kfree(indir_desc);
cbeedb72 678 vq->split.desc_state[head].indir_desc = NULL;
5a08b04f 679 } else if (ctx) {
cbeedb72 680 *ctx = vq->split.desc_state[head].indir_desc;
780bc790 681 }
0a8a69dd
RR
682}
683
138fd251 684static inline bool more_used_split(const struct vring_virtqueue *vq)
0a8a69dd 685{
e593bf97
TB
686 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
687 vq->split.vring.used->idx);
0a8a69dd
RR
688}
689
138fd251
TB
690static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
691 unsigned int *len,
692 void **ctx)
0a8a69dd
RR
693{
694 struct vring_virtqueue *vq = to_vvq(_vq);
695 void *ret;
696 unsigned int i;
3b720b8c 697 u16 last_used;
0a8a69dd
RR
698
699 START_USE(vq);
700
5ef82752
RR
701 if (unlikely(vq->broken)) {
702 END_USE(vq);
703 return NULL;
704 }
705
138fd251 706 if (!more_used_split(vq)) {
0a8a69dd
RR
707 pr_debug("No more buffers in queue\n");
708 END_USE(vq);
709 return NULL;
710 }
711
2d61ba95 712 /* Only get used array entries after they have been exposed by host. */
a9a0fef7 713 virtio_rmb(vq->weak_barriers);
2d61ba95 714
e593bf97
TB
715 last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
716 i = virtio32_to_cpu(_vq->vdev,
717 vq->split.vring.used->ring[last_used].id);
718 *len = virtio32_to_cpu(_vq->vdev,
719 vq->split.vring.used->ring[last_used].len);
0a8a69dd 720
e593bf97 721 if (unlikely(i >= vq->split.vring.num)) {
0a8a69dd
RR
722 BAD_RING(vq, "id %u out of range\n", i);
723 return NULL;
724 }
cbeedb72 725 if (unlikely(!vq->split.desc_state[i].data)) {
0a8a69dd
RR
726 BAD_RING(vq, "id %u is not a head!\n", i);
727 return NULL;
728 }
729
138fd251 730 /* detach_buf_split clears data, so grab it now. */
cbeedb72 731 ret = vq->split.desc_state[i].data;
138fd251 732 detach_buf_split(vq, i, ctx);
0a8a69dd 733 vq->last_used_idx++;
a5c262c5
MT
734 /* If we expect an interrupt for the next entry, tell host
735 * by writing event index and flush out the write before
736 * the read in the next get_buf call. */
e593bf97 737 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
788e5b3a 738 virtio_store_mb(vq->weak_barriers,
e593bf97 739 &vring_used_event(&vq->split.vring),
788e5b3a 740 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
a5c262c5 741
4d6a105e 742 LAST_ADD_TIME_INVALID(vq);
e93300b1 743
0a8a69dd
RR
744 END_USE(vq);
745 return ret;
746}
138fd251 747
138fd251 748static void virtqueue_disable_cb_split(struct virtqueue *_vq)
18445c4d
RR
749{
750 struct vring_virtqueue *vq = to_vvq(_vq);
751
e593bf97
TB
752 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
753 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
8d622d21
MT
754 if (vq->event)
755 /* TODO: this is a hack. Figure out a cleaner value to write. */
756 vring_used_event(&vq->split.vring) = 0x0;
757 else
e593bf97
TB
758 vq->split.vring.avail->flags =
759 cpu_to_virtio16(_vq->vdev,
760 vq->split.avail_flags_shadow);
f277ec42 761 }
18445c4d
RR
762}
763
138fd251 764static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
0a8a69dd
RR
765{
766 struct vring_virtqueue *vq = to_vvq(_vq);
cc229884 767 u16 last_used_idx;
0a8a69dd
RR
768
769 START_USE(vq);
0a8a69dd
RR
770
771 /* We optimistically turn back on interrupts, then check if there was
772 * more to do. */
a5c262c5
MT
773 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
774 * either clear the flags bit or point the event index at the next
775 * entry. Always do both to keep code simple. */
e593bf97
TB
776 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
777 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 778 if (!vq->event)
e593bf97
TB
779 vq->split.vring.avail->flags =
780 cpu_to_virtio16(_vq->vdev,
781 vq->split.avail_flags_shadow);
f277ec42 782 }
e593bf97
TB
783 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
784 last_used_idx = vq->last_used_idx);
cc229884
MT
785 END_USE(vq);
786 return last_used_idx;
787}
138fd251 788
138fd251
TB
789static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
790{
791 struct vring_virtqueue *vq = to_vvq(_vq);
792
793 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
e593bf97 794 vq->split.vring.used->idx);
138fd251
TB
795}
796
138fd251 797static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
7ab358c2
MT
798{
799 struct vring_virtqueue *vq = to_vvq(_vq);
800 u16 bufs;
801
802 START_USE(vq);
803
804 /* We optimistically turn back on interrupts, then check if there was
805 * more to do. */
806 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
807 * either clear the flags bit or point the event index at the next
0ea1e4a6 808 * entry. Always update the event index to keep code simple. */
e593bf97
TB
809 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
810 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 811 if (!vq->event)
e593bf97
TB
812 vq->split.vring.avail->flags =
813 cpu_to_virtio16(_vq->vdev,
814 vq->split.avail_flags_shadow);
f277ec42 815 }
7ab358c2 816 /* TODO: tune this threshold */
e593bf97 817 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
788e5b3a
MT
818
819 virtio_store_mb(vq->weak_barriers,
e593bf97 820 &vring_used_event(&vq->split.vring),
788e5b3a
MT
821 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
822
e593bf97
TB
823 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
824 - vq->last_used_idx) > bufs)) {
7ab358c2
MT
825 END_USE(vq);
826 return false;
827 }
828
829 END_USE(vq);
830 return true;
831}
7ab358c2 832
138fd251 833static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
c021eac4
SM
834{
835 struct vring_virtqueue *vq = to_vvq(_vq);
836 unsigned int i;
837 void *buf;
838
839 START_USE(vq);
840
e593bf97 841 for (i = 0; i < vq->split.vring.num; i++) {
cbeedb72 842 if (!vq->split.desc_state[i].data)
c021eac4 843 continue;
138fd251 844 /* detach_buf_split clears data, so grab it now. */
cbeedb72 845 buf = vq->split.desc_state[i].data;
138fd251 846 detach_buf_split(vq, i, NULL);
e593bf97
TB
847 vq->split.avail_idx_shadow--;
848 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
849 vq->split.avail_idx_shadow);
c021eac4
SM
850 END_USE(vq);
851 return buf;
852 }
853 /* That should have freed everything. */
e593bf97 854 BUG_ON(vq->vq.num_free != vq->split.vring.num);
c021eac4
SM
855
856 END_USE(vq);
857 return NULL;
858}
138fd251 859
d79dca75
TB
860static struct virtqueue *vring_create_virtqueue_split(
861 unsigned int index,
862 unsigned int num,
863 unsigned int vring_align,
864 struct virtio_device *vdev,
865 bool weak_barriers,
866 bool may_reduce_num,
867 bool context,
868 bool (*notify)(struct virtqueue *),
869 void (*callback)(struct virtqueue *),
870 const char *name)
871{
872 struct virtqueue *vq;
873 void *queue = NULL;
874 dma_addr_t dma_addr;
875 size_t queue_size_in_bytes;
876 struct vring vring;
877
878 /* We assume num is a power of 2. */
879 if (num & (num - 1)) {
880 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
881 return NULL;
882 }
883
884 /* TODO: allocate each queue chunk individually */
885 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
886 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
887 &dma_addr,
888 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
889 if (queue)
890 break;
cf94db21
CH
891 if (!may_reduce_num)
892 return NULL;
d79dca75
TB
893 }
894
895 if (!num)
896 return NULL;
897
898 if (!queue) {
899 /* Try to get a single page. You are my only hope! */
900 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
901 &dma_addr, GFP_KERNEL|__GFP_ZERO);
902 }
903 if (!queue)
904 return NULL;
905
906 queue_size_in_bytes = vring_size(num, vring_align);
907 vring_init(&vring, num, queue, vring_align);
908
909 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
910 notify, callback, name);
911 if (!vq) {
912 vring_free_queue(vdev, queue_size_in_bytes, queue,
913 dma_addr);
914 return NULL;
915 }
916
917 to_vvq(vq)->split.queue_dma_addr = dma_addr;
918 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
919 to_vvq(vq)->we_own_ring = true;
920
921 return vq;
922}
923
e6f633e5 924
1ce9e605
TB
925/*
926 * Packed ring specific functions - *_packed().
927 */
928
929static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
1f28750f 930 struct vring_desc_extra *state)
1ce9e605
TB
931{
932 u16 flags;
933
934 if (!vq->use_dma_api)
935 return;
936
937 flags = state->flags;
938
939 if (flags & VRING_DESC_F_INDIRECT) {
940 dma_unmap_single(vring_dma_dev(vq),
941 state->addr, state->len,
942 (flags & VRING_DESC_F_WRITE) ?
943 DMA_FROM_DEVICE : DMA_TO_DEVICE);
944 } else {
945 dma_unmap_page(vring_dma_dev(vq),
946 state->addr, state->len,
947 (flags & VRING_DESC_F_WRITE) ?
948 DMA_FROM_DEVICE : DMA_TO_DEVICE);
949 }
950}
951
952static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
953 struct vring_packed_desc *desc)
954{
955 u16 flags;
956
957 if (!vq->use_dma_api)
958 return;
959
960 flags = le16_to_cpu(desc->flags);
961
962 if (flags & VRING_DESC_F_INDIRECT) {
963 dma_unmap_single(vring_dma_dev(vq),
964 le64_to_cpu(desc->addr),
965 le32_to_cpu(desc->len),
966 (flags & VRING_DESC_F_WRITE) ?
967 DMA_FROM_DEVICE : DMA_TO_DEVICE);
968 } else {
969 dma_unmap_page(vring_dma_dev(vq),
970 le64_to_cpu(desc->addr),
971 le32_to_cpu(desc->len),
972 (flags & VRING_DESC_F_WRITE) ?
973 DMA_FROM_DEVICE : DMA_TO_DEVICE);
974 }
975}
976
977static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
978 gfp_t gfp)
979{
980 struct vring_packed_desc *desc;
981
982 /*
983 * We require lowmem mappings for the descriptors because
984 * otherwise virt_to_phys will give us bogus addresses in the
985 * virtqueue.
986 */
987 gfp &= ~__GFP_HIGHMEM;
988
989 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
990
991 return desc;
992}
993
994static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
995 struct scatterlist *sgs[],
996 unsigned int total_sg,
997 unsigned int out_sgs,
998 unsigned int in_sgs,
999 void *data,
1000 gfp_t gfp)
1001{
1002 struct vring_packed_desc *desc;
1003 struct scatterlist *sg;
1004 unsigned int i, n, err_idx;
1005 u16 head, id;
1006 dma_addr_t addr;
1007
1008 head = vq->packed.next_avail_idx;
1009 desc = alloc_indirect_packed(total_sg, gfp);
1010
1011 if (unlikely(vq->vq.num_free < 1)) {
1012 pr_debug("Can't add buf len 1 - avail = 0\n");
df0bfe75 1013 kfree(desc);
1ce9e605
TB
1014 END_USE(vq);
1015 return -ENOSPC;
1016 }
1017
1018 i = 0;
1019 id = vq->free_head;
1020 BUG_ON(id == vq->packed.vring.num);
1021
1022 for (n = 0; n < out_sgs + in_sgs; n++) {
1023 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1024 addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1025 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1026 if (vring_mapping_error(vq, addr))
1027 goto unmap_release;
1028
1029 desc[i].flags = cpu_to_le16(n < out_sgs ?
1030 0 : VRING_DESC_F_WRITE);
1031 desc[i].addr = cpu_to_le64(addr);
1032 desc[i].len = cpu_to_le32(sg->length);
1033 i++;
1034 }
1035 }
1036
1037 /* Now that the indirect table is filled in, map it. */
1038 addr = vring_map_single(vq, desc,
1039 total_sg * sizeof(struct vring_packed_desc),
1040 DMA_TO_DEVICE);
1041 if (vring_mapping_error(vq, addr))
1042 goto unmap_release;
1043
1044 vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1045 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1046 sizeof(struct vring_packed_desc));
1047 vq->packed.vring.desc[head].id = cpu_to_le16(id);
1048
1049 if (vq->use_dma_api) {
1050 vq->packed.desc_extra[id].addr = addr;
1051 vq->packed.desc_extra[id].len = total_sg *
1052 sizeof(struct vring_packed_desc);
1053 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1054 vq->packed.avail_used_flags;
1055 }
1056
1057 /*
1058 * A driver MUST NOT make the first descriptor in the list
1059 * available before all subsequent descriptors comprising
1060 * the list are made available.
1061 */
1062 virtio_wmb(vq->weak_barriers);
1063 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1064 vq->packed.avail_used_flags);
1065
1066 /* We're using some buffers from the free list. */
1067 vq->vq.num_free -= 1;
1068
1069 /* Update free pointer */
1070 n = head + 1;
1071 if (n >= vq->packed.vring.num) {
1072 n = 0;
1073 vq->packed.avail_wrap_counter ^= 1;
1074 vq->packed.avail_used_flags ^=
1075 1 << VRING_PACKED_DESC_F_AVAIL |
1076 1 << VRING_PACKED_DESC_F_USED;
1077 }
1078 vq->packed.next_avail_idx = n;
aeef9b47 1079 vq->free_head = vq->packed.desc_extra[id].next;
1ce9e605
TB
1080
1081 /* Store token and indirect buffer state. */
1082 vq->packed.desc_state[id].num = 1;
1083 vq->packed.desc_state[id].data = data;
1084 vq->packed.desc_state[id].indir_desc = desc;
1085 vq->packed.desc_state[id].last = id;
1086
1087 vq->num_added += 1;
1088
1089 pr_debug("Added buffer head %i to %p\n", head, vq);
1090 END_USE(vq);
1091
1092 return 0;
1093
1094unmap_release:
1095 err_idx = i;
1096
1097 for (i = 0; i < err_idx; i++)
1098 vring_unmap_desc_packed(vq, &desc[i]);
1099
1100 kfree(desc);
1101
1102 END_USE(vq);
f7728002 1103 return -ENOMEM;
1ce9e605
TB
1104}
1105
1106static inline int virtqueue_add_packed(struct virtqueue *_vq,
1107 struct scatterlist *sgs[],
1108 unsigned int total_sg,
1109 unsigned int out_sgs,
1110 unsigned int in_sgs,
1111 void *data,
1112 void *ctx,
1113 gfp_t gfp)
1114{
1115 struct vring_virtqueue *vq = to_vvq(_vq);
1116 struct vring_packed_desc *desc;
1117 struct scatterlist *sg;
1118 unsigned int i, n, c, descs_used, err_idx;
3f649ab7
KC
1119 __le16 head_flags, flags;
1120 u16 head, id, prev, curr, avail_used_flags;
1ce9e605
TB
1121
1122 START_USE(vq);
1123
1124 BUG_ON(data == NULL);
1125 BUG_ON(ctx && vq->indirect);
1126
1127 if (unlikely(vq->broken)) {
1128 END_USE(vq);
1129 return -EIO;
1130 }
1131
1132 LAST_ADD_TIME_UPDATE(vq);
1133
1134 BUG_ON(total_sg == 0);
1135
1136 if (virtqueue_use_indirect(_vq, total_sg))
1137 return virtqueue_add_indirect_packed(vq, sgs, total_sg,
1138 out_sgs, in_sgs, data, gfp);
1139
1140 head = vq->packed.next_avail_idx;
1141 avail_used_flags = vq->packed.avail_used_flags;
1142
1143 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1144
1145 desc = vq->packed.vring.desc;
1146 i = head;
1147 descs_used = total_sg;
1148
1149 if (unlikely(vq->vq.num_free < descs_used)) {
1150 pr_debug("Can't add buf len %i - avail = %i\n",
1151 descs_used, vq->vq.num_free);
1152 END_USE(vq);
1153 return -ENOSPC;
1154 }
1155
1156 id = vq->free_head;
1157 BUG_ON(id == vq->packed.vring.num);
1158
1159 curr = id;
1160 c = 0;
1161 for (n = 0; n < out_sgs + in_sgs; n++) {
1162 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1163 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1164 DMA_TO_DEVICE : DMA_FROM_DEVICE);
1165 if (vring_mapping_error(vq, addr))
1166 goto unmap_release;
1167
1168 flags = cpu_to_le16(vq->packed.avail_used_flags |
1169 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1170 (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1171 if (i == head)
1172 head_flags = flags;
1173 else
1174 desc[i].flags = flags;
1175
1176 desc[i].addr = cpu_to_le64(addr);
1177 desc[i].len = cpu_to_le32(sg->length);
1178 desc[i].id = cpu_to_le16(id);
1179
1180 if (unlikely(vq->use_dma_api)) {
1181 vq->packed.desc_extra[curr].addr = addr;
1182 vq->packed.desc_extra[curr].len = sg->length;
1183 vq->packed.desc_extra[curr].flags =
1184 le16_to_cpu(flags);
1185 }
1186 prev = curr;
aeef9b47 1187 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1188
1189 if ((unlikely(++i >= vq->packed.vring.num))) {
1190 i = 0;
1191 vq->packed.avail_used_flags ^=
1192 1 << VRING_PACKED_DESC_F_AVAIL |
1193 1 << VRING_PACKED_DESC_F_USED;
1194 }
1195 }
1196 }
1197
1198 if (i < head)
1199 vq->packed.avail_wrap_counter ^= 1;
1200
1201 /* We're using some buffers from the free list. */
1202 vq->vq.num_free -= descs_used;
1203
1204 /* Update free pointer */
1205 vq->packed.next_avail_idx = i;
1206 vq->free_head = curr;
1207
1208 /* Store token. */
1209 vq->packed.desc_state[id].num = descs_used;
1210 vq->packed.desc_state[id].data = data;
1211 vq->packed.desc_state[id].indir_desc = ctx;
1212 vq->packed.desc_state[id].last = prev;
1213
1214 /*
1215 * A driver MUST NOT make the first descriptor in the list
1216 * available before all subsequent descriptors comprising
1217 * the list are made available.
1218 */
1219 virtio_wmb(vq->weak_barriers);
1220 vq->packed.vring.desc[head].flags = head_flags;
1221 vq->num_added += descs_used;
1222
1223 pr_debug("Added buffer head %i to %p\n", head, vq);
1224 END_USE(vq);
1225
1226 return 0;
1227
1228unmap_release:
1229 err_idx = i;
1230 i = head;
44593865 1231 curr = vq->free_head;
1ce9e605
TB
1232
1233 vq->packed.avail_used_flags = avail_used_flags;
1234
1235 for (n = 0; n < total_sg; n++) {
1236 if (i == err_idx)
1237 break;
44593865
JW
1238 vring_unmap_state_packed(vq,
1239 &vq->packed.desc_extra[curr]);
1240 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1241 i++;
1242 if (i >= vq->packed.vring.num)
1243 i = 0;
1244 }
1245
1246 END_USE(vq);
1247 return -EIO;
1248}
1249
1250static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1251{
1252 struct vring_virtqueue *vq = to_vvq(_vq);
f51f9826 1253 u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1ce9e605
TB
1254 bool needs_kick;
1255 union {
1256 struct {
1257 __le16 off_wrap;
1258 __le16 flags;
1259 };
1260 u32 u32;
1261 } snapshot;
1262
1263 START_USE(vq);
1264
1265 /*
1266 * We need to expose the new flags value before checking notification
1267 * suppressions.
1268 */
1269 virtio_mb(vq->weak_barriers);
1270
f51f9826
TB
1271 old = vq->packed.next_avail_idx - vq->num_added;
1272 new = vq->packed.next_avail_idx;
1ce9e605
TB
1273 vq->num_added = 0;
1274
1275 snapshot.u32 = *(u32 *)vq->packed.vring.device;
1276 flags = le16_to_cpu(snapshot.flags);
1277
1278 LAST_ADD_TIME_CHECK(vq);
1279 LAST_ADD_TIME_INVALID(vq);
1280
f51f9826
TB
1281 if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1282 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1283 goto out;
1284 }
1285
1286 off_wrap = le16_to_cpu(snapshot.off_wrap);
1287
1288 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1289 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1290 if (wrap_counter != vq->packed.avail_wrap_counter)
1291 event_idx -= vq->packed.vring.num;
1292
1293 needs_kick = vring_need_event(event_idx, new, old);
1294out:
1ce9e605
TB
1295 END_USE(vq);
1296 return needs_kick;
1297}
1298
1299static void detach_buf_packed(struct vring_virtqueue *vq,
1300 unsigned int id, void **ctx)
1301{
1302 struct vring_desc_state_packed *state = NULL;
1303 struct vring_packed_desc *desc;
1304 unsigned int i, curr;
1305
1306 state = &vq->packed.desc_state[id];
1307
1308 /* Clear data ptr. */
1309 state->data = NULL;
1310
aeef9b47 1311 vq->packed.desc_extra[state->last].next = vq->free_head;
1ce9e605
TB
1312 vq->free_head = id;
1313 vq->vq.num_free += state->num;
1314
1315 if (unlikely(vq->use_dma_api)) {
1316 curr = id;
1317 for (i = 0; i < state->num; i++) {
1318 vring_unmap_state_packed(vq,
1319 &vq->packed.desc_extra[curr]);
aeef9b47 1320 curr = vq->packed.desc_extra[curr].next;
1ce9e605
TB
1321 }
1322 }
1323
1324 if (vq->indirect) {
1325 u32 len;
1326
1327 /* Free the indirect table, if any, now that it's unmapped. */
1328 desc = state->indir_desc;
1329 if (!desc)
1330 return;
1331
1332 if (vq->use_dma_api) {
1333 len = vq->packed.desc_extra[id].len;
1334 for (i = 0; i < len / sizeof(struct vring_packed_desc);
1335 i++)
1336 vring_unmap_desc_packed(vq, &desc[i]);
1337 }
1338 kfree(desc);
1339 state->indir_desc = NULL;
1340 } else if (ctx) {
1341 *ctx = state->indir_desc;
1342 }
1343}
1344
1345static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1346 u16 idx, bool used_wrap_counter)
1347{
1348 bool avail, used;
1349 u16 flags;
1350
1351 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1352 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1353 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1354
1355 return avail == used && used == used_wrap_counter;
1356}
1357
1358static inline bool more_used_packed(const struct vring_virtqueue *vq)
1359{
1360 return is_used_desc_packed(vq, vq->last_used_idx,
1361 vq->packed.used_wrap_counter);
1362}
1363
1364static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1365 unsigned int *len,
1366 void **ctx)
1367{
1368 struct vring_virtqueue *vq = to_vvq(_vq);
1369 u16 last_used, id;
1370 void *ret;
1371
1372 START_USE(vq);
1373
1374 if (unlikely(vq->broken)) {
1375 END_USE(vq);
1376 return NULL;
1377 }
1378
1379 if (!more_used_packed(vq)) {
1380 pr_debug("No more buffers in queue\n");
1381 END_USE(vq);
1382 return NULL;
1383 }
1384
1385 /* Only get used elements after they have been exposed by host. */
1386 virtio_rmb(vq->weak_barriers);
1387
1388 last_used = vq->last_used_idx;
1389 id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1390 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1391
1392 if (unlikely(id >= vq->packed.vring.num)) {
1393 BAD_RING(vq, "id %u out of range\n", id);
1394 return NULL;
1395 }
1396 if (unlikely(!vq->packed.desc_state[id].data)) {
1397 BAD_RING(vq, "id %u is not a head!\n", id);
1398 return NULL;
1399 }
1400
1401 /* detach_buf_packed clears data, so grab it now. */
1402 ret = vq->packed.desc_state[id].data;
1403 detach_buf_packed(vq, id, ctx);
1404
1405 vq->last_used_idx += vq->packed.desc_state[id].num;
1406 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
1407 vq->last_used_idx -= vq->packed.vring.num;
1408 vq->packed.used_wrap_counter ^= 1;
1409 }
1410
f51f9826
TB
1411 /*
1412 * If we expect an interrupt for the next entry, tell host
1413 * by writing event index and flush out the write before
1414 * the read in the next get_buf call.
1415 */
1416 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1417 virtio_store_mb(vq->weak_barriers,
1418 &vq->packed.vring.driver->off_wrap,
1419 cpu_to_le16(vq->last_used_idx |
1420 (vq->packed.used_wrap_counter <<
1421 VRING_PACKED_EVENT_F_WRAP_CTR)));
1422
1ce9e605
TB
1423 LAST_ADD_TIME_INVALID(vq);
1424
1425 END_USE(vq);
1426 return ret;
1427}
1428
1429static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1430{
1431 struct vring_virtqueue *vq = to_vvq(_vq);
1432
1433 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1434 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1435 vq->packed.vring.driver->flags =
1436 cpu_to_le16(vq->packed.event_flags_shadow);
1437 }
1438}
1439
1440static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1441{
1442 struct vring_virtqueue *vq = to_vvq(_vq);
1443
1444 START_USE(vq);
1445
1446 /*
1447 * We optimistically turn back on interrupts, then check if there was
1448 * more to do.
1449 */
1450
f51f9826
TB
1451 if (vq->event) {
1452 vq->packed.vring.driver->off_wrap =
1453 cpu_to_le16(vq->last_used_idx |
1454 (vq->packed.used_wrap_counter <<
1455 VRING_PACKED_EVENT_F_WRAP_CTR));
1456 /*
1457 * We need to update event offset and event wrap
1458 * counter first before updating event flags.
1459 */
1460 virtio_wmb(vq->weak_barriers);
1461 }
1462
1ce9e605 1463 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1464 vq->packed.event_flags_shadow = vq->event ?
1465 VRING_PACKED_EVENT_FLAG_DESC :
1466 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1467 vq->packed.vring.driver->flags =
1468 cpu_to_le16(vq->packed.event_flags_shadow);
1469 }
1470
1471 END_USE(vq);
1472 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
1473 VRING_PACKED_EVENT_F_WRAP_CTR);
1474}
1475
1476static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1477{
1478 struct vring_virtqueue *vq = to_vvq(_vq);
1479 bool wrap_counter;
1480 u16 used_idx;
1481
1482 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1483 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1484
1485 return is_used_desc_packed(vq, used_idx, wrap_counter);
1486}
1487
1488static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1489{
1490 struct vring_virtqueue *vq = to_vvq(_vq);
1491 u16 used_idx, wrap_counter;
f51f9826 1492 u16 bufs;
1ce9e605
TB
1493
1494 START_USE(vq);
1495
1496 /*
1497 * We optimistically turn back on interrupts, then check if there was
1498 * more to do.
1499 */
1500
f51f9826
TB
1501 if (vq->event) {
1502 /* TODO: tune this threshold */
1503 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1504 wrap_counter = vq->packed.used_wrap_counter;
1505
1506 used_idx = vq->last_used_idx + bufs;
1507 if (used_idx >= vq->packed.vring.num) {
1508 used_idx -= vq->packed.vring.num;
1509 wrap_counter ^= 1;
1510 }
1511
1512 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1513 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1514
1515 /*
1516 * We need to update event offset and event wrap
1517 * counter first before updating event flags.
1518 */
1519 virtio_wmb(vq->weak_barriers);
f51f9826 1520 }
1ce9e605
TB
1521
1522 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
f51f9826
TB
1523 vq->packed.event_flags_shadow = vq->event ?
1524 VRING_PACKED_EVENT_FLAG_DESC :
1525 VRING_PACKED_EVENT_FLAG_ENABLE;
1ce9e605
TB
1526 vq->packed.vring.driver->flags =
1527 cpu_to_le16(vq->packed.event_flags_shadow);
1528 }
1529
1530 /*
1531 * We need to update event suppression structure first
1532 * before re-checking for more used buffers.
1533 */
1534 virtio_mb(vq->weak_barriers);
1535
40ce7919
ML
1536 if (is_used_desc_packed(vq,
1537 vq->last_used_idx,
1538 vq->packed.used_wrap_counter)) {
1ce9e605
TB
1539 END_USE(vq);
1540 return false;
1541 }
1542
1543 END_USE(vq);
1544 return true;
1545}
1546
1547static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1548{
1549 struct vring_virtqueue *vq = to_vvq(_vq);
1550 unsigned int i;
1551 void *buf;
1552
1553 START_USE(vq);
1554
1555 for (i = 0; i < vq->packed.vring.num; i++) {
1556 if (!vq->packed.desc_state[i].data)
1557 continue;
1558 /* detach_buf clears data, so grab it now. */
1559 buf = vq->packed.desc_state[i].data;
1560 detach_buf_packed(vq, i, NULL);
1561 END_USE(vq);
1562 return buf;
1563 }
1564 /* That should have freed everything. */
1565 BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1566
1567 END_USE(vq);
1568 return NULL;
1569}
1570
5a222421
JW
1571static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq,
1572 unsigned int num)
1573{
1574 struct vring_desc_extra *desc_extra;
1575 unsigned int i;
1576
1577 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1578 GFP_KERNEL);
1579 if (!desc_extra)
1580 return NULL;
1581
1582 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1583
1584 for (i = 0; i < num - 1; i++)
1585 desc_extra[i].next = i + 1;
1586
1587 return desc_extra;
1588}
1589
1ce9e605
TB
1590static struct virtqueue *vring_create_virtqueue_packed(
1591 unsigned int index,
1592 unsigned int num,
1593 unsigned int vring_align,
1594 struct virtio_device *vdev,
1595 bool weak_barriers,
1596 bool may_reduce_num,
1597 bool context,
1598 bool (*notify)(struct virtqueue *),
1599 void (*callback)(struct virtqueue *),
1600 const char *name)
1601{
1602 struct vring_virtqueue *vq;
1603 struct vring_packed_desc *ring;
1604 struct vring_packed_desc_event *driver, *device;
1605 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1606 size_t ring_size_in_bytes, event_size_in_bytes;
1ce9e605
TB
1607
1608 ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1609
1610 ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1611 &ring_dma_addr,
1612 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1613 if (!ring)
1614 goto err_ring;
1615
1616 event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1617
1618 driver = vring_alloc_queue(vdev, event_size_in_bytes,
1619 &driver_event_dma_addr,
1620 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1621 if (!driver)
1622 goto err_driver;
1623
1624 device = vring_alloc_queue(vdev, event_size_in_bytes,
1625 &device_event_dma_addr,
1626 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1627 if (!device)
1628 goto err_device;
1629
1630 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1631 if (!vq)
1632 goto err_vq;
1633
1634 vq->vq.callback = callback;
1635 vq->vq.vdev = vdev;
1636 vq->vq.name = name;
1637 vq->vq.num_free = num;
1638 vq->vq.index = index;
1639 vq->we_own_ring = true;
1640 vq->notify = notify;
1641 vq->weak_barriers = weak_barriers;
1642 vq->broken = false;
1643 vq->last_used_idx = 0;
8d622d21 1644 vq->event_triggered = false;
1ce9e605
TB
1645 vq->num_added = 0;
1646 vq->packed_ring = true;
1647 vq->use_dma_api = vring_use_dma_api(vdev);
1ce9e605
TB
1648#ifdef DEBUG
1649 vq->in_use = false;
1650 vq->last_add_time_valid = false;
1651#endif
1652
1653 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1654 !context;
1655 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1656
45383fb0
TB
1657 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1658 vq->weak_barriers = false;
1659
1ce9e605
TB
1660 vq->packed.ring_dma_addr = ring_dma_addr;
1661 vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1662 vq->packed.device_event_dma_addr = device_event_dma_addr;
1663
1664 vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1665 vq->packed.event_size_in_bytes = event_size_in_bytes;
1666
1667 vq->packed.vring.num = num;
1668 vq->packed.vring.desc = ring;
1669 vq->packed.vring.driver = driver;
1670 vq->packed.vring.device = device;
1671
1672 vq->packed.next_avail_idx = 0;
1673 vq->packed.avail_wrap_counter = 1;
1674 vq->packed.used_wrap_counter = 1;
1675 vq->packed.event_flags_shadow = 0;
1676 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1677
1678 vq->packed.desc_state = kmalloc_array(num,
1679 sizeof(struct vring_desc_state_packed),
1680 GFP_KERNEL);
1681 if (!vq->packed.desc_state)
1682 goto err_desc_state;
1683
1684 memset(vq->packed.desc_state, 0,
1685 num * sizeof(struct vring_desc_state_packed));
1686
1687 /* Put everything in free lists. */
1688 vq->free_head = 0;
1ce9e605 1689
5a222421 1690 vq->packed.desc_extra = vring_alloc_desc_extra(vq, num);
1ce9e605
TB
1691 if (!vq->packed.desc_extra)
1692 goto err_desc_extra;
1693
1ce9e605
TB
1694 /* No callback? Tell other side not to bother us. */
1695 if (!callback) {
1696 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1697 vq->packed.vring.driver->flags =
1698 cpu_to_le16(vq->packed.event_flags_shadow);
1699 }
1700
e152d8af 1701 list_add_tail(&vq->vq.list, &vdev->vqs);
1ce9e605
TB
1702 return &vq->vq;
1703
1704err_desc_extra:
1705 kfree(vq->packed.desc_state);
1706err_desc_state:
1707 kfree(vq);
1708err_vq:
ae93d8ea 1709 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
1ce9e605 1710err_device:
ae93d8ea 1711 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
1ce9e605
TB
1712err_driver:
1713 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1714err_ring:
1715 return NULL;
1716}
1717
1718
e6f633e5
TB
1719/*
1720 * Generic functions and exported symbols.
1721 */
1722
1723static inline int virtqueue_add(struct virtqueue *_vq,
1724 struct scatterlist *sgs[],
1725 unsigned int total_sg,
1726 unsigned int out_sgs,
1727 unsigned int in_sgs,
1728 void *data,
1729 void *ctx,
1730 gfp_t gfp)
1731{
1ce9e605
TB
1732 struct vring_virtqueue *vq = to_vvq(_vq);
1733
1734 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1735 out_sgs, in_sgs, data, ctx, gfp) :
1736 virtqueue_add_split(_vq, sgs, total_sg,
1737 out_sgs, in_sgs, data, ctx, gfp);
e6f633e5
TB
1738}
1739
1740/**
1741 * virtqueue_add_sgs - expose buffers to other end
a5581206 1742 * @_vq: the struct virtqueue we're talking about.
e6f633e5 1743 * @sgs: array of terminated scatterlists.
a5581206
JB
1744 * @out_sgs: the number of scatterlists readable by other side
1745 * @in_sgs: the number of scatterlists which are writable (after readable ones)
e6f633e5
TB
1746 * @data: the token identifying the buffer.
1747 * @gfp: how to do memory allocations (if necessary).
1748 *
1749 * Caller must ensure we don't call this with other virtqueue operations
1750 * at the same time (except where noted).
1751 *
1752 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1753 */
1754int virtqueue_add_sgs(struct virtqueue *_vq,
1755 struct scatterlist *sgs[],
1756 unsigned int out_sgs,
1757 unsigned int in_sgs,
1758 void *data,
1759 gfp_t gfp)
1760{
1761 unsigned int i, total_sg = 0;
1762
1763 /* Count them first. */
1764 for (i = 0; i < out_sgs + in_sgs; i++) {
1765 struct scatterlist *sg;
1766
1767 for (sg = sgs[i]; sg; sg = sg_next(sg))
1768 total_sg++;
1769 }
1770 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1771 data, NULL, gfp);
1772}
1773EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1774
1775/**
1776 * virtqueue_add_outbuf - expose output buffers to other end
1777 * @vq: the struct virtqueue we're talking about.
1778 * @sg: scatterlist (must be well-formed and terminated!)
1779 * @num: the number of entries in @sg readable by other side
1780 * @data: the token identifying the buffer.
1781 * @gfp: how to do memory allocations (if necessary).
1782 *
1783 * Caller must ensure we don't call this with other virtqueue operations
1784 * at the same time (except where noted).
1785 *
1786 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1787 */
1788int virtqueue_add_outbuf(struct virtqueue *vq,
1789 struct scatterlist *sg, unsigned int num,
1790 void *data,
1791 gfp_t gfp)
1792{
1793 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1794}
1795EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1796
1797/**
1798 * virtqueue_add_inbuf - expose input buffers to other end
1799 * @vq: the struct virtqueue we're talking about.
1800 * @sg: scatterlist (must be well-formed and terminated!)
1801 * @num: the number of entries in @sg writable by other side
1802 * @data: the token identifying the buffer.
1803 * @gfp: how to do memory allocations (if necessary).
1804 *
1805 * Caller must ensure we don't call this with other virtqueue operations
1806 * at the same time (except where noted).
1807 *
1808 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1809 */
1810int virtqueue_add_inbuf(struct virtqueue *vq,
1811 struct scatterlist *sg, unsigned int num,
1812 void *data,
1813 gfp_t gfp)
1814{
1815 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1816}
1817EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1818
1819/**
1820 * virtqueue_add_inbuf_ctx - expose input buffers to other end
1821 * @vq: the struct virtqueue we're talking about.
1822 * @sg: scatterlist (must be well-formed and terminated!)
1823 * @num: the number of entries in @sg writable by other side
1824 * @data: the token identifying the buffer.
1825 * @ctx: extra context for the token
1826 * @gfp: how to do memory allocations (if necessary).
1827 *
1828 * Caller must ensure we don't call this with other virtqueue operations
1829 * at the same time (except where noted).
1830 *
1831 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1832 */
1833int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1834 struct scatterlist *sg, unsigned int num,
1835 void *data,
1836 void *ctx,
1837 gfp_t gfp)
1838{
1839 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1840}
1841EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1842
1843/**
1844 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
a5581206 1845 * @_vq: the struct virtqueue
e6f633e5
TB
1846 *
1847 * Instead of virtqueue_kick(), you can do:
1848 * if (virtqueue_kick_prepare(vq))
1849 * virtqueue_notify(vq);
1850 *
1851 * This is sometimes useful because the virtqueue_kick_prepare() needs
1852 * to be serialized, but the actual virtqueue_notify() call does not.
1853 */
1854bool virtqueue_kick_prepare(struct virtqueue *_vq)
1855{
1ce9e605
TB
1856 struct vring_virtqueue *vq = to_vvq(_vq);
1857
1858 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1859 virtqueue_kick_prepare_split(_vq);
e6f633e5
TB
1860}
1861EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
1862
1863/**
1864 * virtqueue_notify - second half of split virtqueue_kick call.
a5581206 1865 * @_vq: the struct virtqueue
e6f633e5
TB
1866 *
1867 * This does not need to be serialized.
1868 *
1869 * Returns false if host notify failed or queue is broken, otherwise true.
1870 */
1871bool virtqueue_notify(struct virtqueue *_vq)
1872{
1873 struct vring_virtqueue *vq = to_vvq(_vq);
1874
1875 if (unlikely(vq->broken))
1876 return false;
1877
1878 /* Prod other side to tell it about changes. */
1879 if (!vq->notify(_vq)) {
1880 vq->broken = true;
1881 return false;
1882 }
1883 return true;
1884}
1885EXPORT_SYMBOL_GPL(virtqueue_notify);
1886
1887/**
1888 * virtqueue_kick - update after add_buf
1889 * @vq: the struct virtqueue
1890 *
1891 * After one or more virtqueue_add_* calls, invoke this to kick
1892 * the other side.
1893 *
1894 * Caller must ensure we don't call this with other virtqueue
1895 * operations at the same time (except where noted).
1896 *
1897 * Returns false if kick failed, otherwise true.
1898 */
1899bool virtqueue_kick(struct virtqueue *vq)
1900{
1901 if (virtqueue_kick_prepare(vq))
1902 return virtqueue_notify(vq);
1903 return true;
1904}
1905EXPORT_SYMBOL_GPL(virtqueue_kick);
1906
1907/**
31c11db6 1908 * virtqueue_get_buf_ctx - get the next used buffer
a5581206 1909 * @_vq: the struct virtqueue we're talking about.
e6f633e5 1910 * @len: the length written into the buffer
a5581206 1911 * @ctx: extra context for the token
e6f633e5
TB
1912 *
1913 * If the device wrote data into the buffer, @len will be set to the
1914 * amount written. This means you don't need to clear the buffer
1915 * beforehand to ensure there's no data leakage in the case of short
1916 * writes.
1917 *
1918 * Caller must ensure we don't call this with other virtqueue
1919 * operations at the same time (except where noted).
1920 *
1921 * Returns NULL if there are no used buffers, or the "data" token
1922 * handed to virtqueue_add_*().
1923 */
1924void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
1925 void **ctx)
1926{
1ce9e605
TB
1927 struct vring_virtqueue *vq = to_vvq(_vq);
1928
1929 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
1930 virtqueue_get_buf_ctx_split(_vq, len, ctx);
e6f633e5
TB
1931}
1932EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
1933
1934void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
1935{
1936 return virtqueue_get_buf_ctx(_vq, len, NULL);
1937}
1938EXPORT_SYMBOL_GPL(virtqueue_get_buf);
e6f633e5
TB
1939/**
1940 * virtqueue_disable_cb - disable callbacks
a5581206 1941 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
1942 *
1943 * Note that this is not necessarily synchronous, hence unreliable and only
1944 * useful as an optimization.
1945 *
1946 * Unlike other operations, this need not be serialized.
1947 */
1948void virtqueue_disable_cb(struct virtqueue *_vq)
1949{
1ce9e605
TB
1950 struct vring_virtqueue *vq = to_vvq(_vq);
1951
8d622d21
MT
1952 /* If device triggered an event already it won't trigger one again:
1953 * no need to disable.
1954 */
1955 if (vq->event_triggered)
1956 return;
1957
1ce9e605
TB
1958 if (vq->packed_ring)
1959 virtqueue_disable_cb_packed(_vq);
1960 else
1961 virtqueue_disable_cb_split(_vq);
e6f633e5
TB
1962}
1963EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
1964
1965/**
1966 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
a5581206 1967 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
1968 *
1969 * This re-enables callbacks; it returns current queue state
1970 * in an opaque unsigned value. This value should be later tested by
1971 * virtqueue_poll, to detect a possible race between the driver checking for
1972 * more work, and enabling callbacks.
1973 *
1974 * Caller must ensure we don't call this with other virtqueue
1975 * operations at the same time (except where noted).
1976 */
1977unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
1978{
1ce9e605
TB
1979 struct vring_virtqueue *vq = to_vvq(_vq);
1980
8d622d21
MT
1981 if (vq->event_triggered)
1982 vq->event_triggered = false;
1983
1ce9e605
TB
1984 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
1985 virtqueue_enable_cb_prepare_split(_vq);
e6f633e5
TB
1986}
1987EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
1988
1989/**
1990 * virtqueue_poll - query pending used buffers
a5581206 1991 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
1992 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
1993 *
1994 * Returns "true" if there are pending used buffers in the queue.
1995 *
1996 * This does not need to be serialized.
1997 */
1998bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
1999{
2000 struct vring_virtqueue *vq = to_vvq(_vq);
2001
481a0d74
MW
2002 if (unlikely(vq->broken))
2003 return false;
2004
e6f633e5 2005 virtio_mb(vq->weak_barriers);
1ce9e605
TB
2006 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2007 virtqueue_poll_split(_vq, last_used_idx);
e6f633e5
TB
2008}
2009EXPORT_SYMBOL_GPL(virtqueue_poll);
2010
2011/**
2012 * virtqueue_enable_cb - restart callbacks after disable_cb.
a5581206 2013 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2014 *
2015 * This re-enables callbacks; it returns "false" if there are pending
2016 * buffers in the queue, to detect a possible race between the driver
2017 * checking for more work, and enabling callbacks.
2018 *
2019 * Caller must ensure we don't call this with other virtqueue
2020 * operations at the same time (except where noted).
2021 */
2022bool virtqueue_enable_cb(struct virtqueue *_vq)
2023{
2024 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
2025
2026 return !virtqueue_poll(_vq, last_used_idx);
2027}
2028EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2029
2030/**
2031 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
a5581206 2032 * @_vq: the struct virtqueue we're talking about.
e6f633e5
TB
2033 *
2034 * This re-enables callbacks but hints to the other side to delay
2035 * interrupts until most of the available buffers have been processed;
2036 * it returns "false" if there are many pending buffers in the queue,
2037 * to detect a possible race between the driver checking for more work,
2038 * and enabling callbacks.
2039 *
2040 * Caller must ensure we don't call this with other virtqueue
2041 * operations at the same time (except where noted).
2042 */
2043bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2044{
1ce9e605
TB
2045 struct vring_virtqueue *vq = to_vvq(_vq);
2046
8d622d21
MT
2047 if (vq->event_triggered)
2048 vq->event_triggered = false;
2049
1ce9e605
TB
2050 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2051 virtqueue_enable_cb_delayed_split(_vq);
e6f633e5
TB
2052}
2053EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2054
138fd251
TB
2055/**
2056 * virtqueue_detach_unused_buf - detach first unused buffer
a5581206 2057 * @_vq: the struct virtqueue we're talking about.
138fd251
TB
2058 *
2059 * Returns NULL or the "data" token handed to virtqueue_add_*().
2060 * This is not valid on an active queue; it is useful only for device
2061 * shutdown.
2062 */
2063void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2064{
1ce9e605
TB
2065 struct vring_virtqueue *vq = to_vvq(_vq);
2066
2067 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2068 virtqueue_detach_unused_buf_split(_vq);
138fd251 2069}
7c5e9ed0 2070EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
c021eac4 2071
138fd251
TB
2072static inline bool more_used(const struct vring_virtqueue *vq)
2073{
1ce9e605 2074 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
138fd251
TB
2075}
2076
0a8a69dd
RR
2077irqreturn_t vring_interrupt(int irq, void *_vq)
2078{
2079 struct vring_virtqueue *vq = to_vvq(_vq);
2080
2081 if (!more_used(vq)) {
2082 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2083 return IRQ_NONE;
2084 }
2085
2086 if (unlikely(vq->broken))
2087 return IRQ_HANDLED;
2088
8d622d21
MT
2089 /* Just a hint for performance: so it's ok that this can be racy! */
2090 if (vq->event)
2091 vq->event_triggered = true;
2092
0a8a69dd 2093 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
18445c4d
RR
2094 if (vq->vq.callback)
2095 vq->vq.callback(&vq->vq);
0a8a69dd
RR
2096
2097 return IRQ_HANDLED;
2098}
c6fd4701 2099EXPORT_SYMBOL_GPL(vring_interrupt);
0a8a69dd 2100
1ce9e605 2101/* Only available for split ring */
2a2d1382
AL
2102struct virtqueue *__vring_new_virtqueue(unsigned int index,
2103 struct vring vring,
2104 struct virtio_device *vdev,
2105 bool weak_barriers,
f94682dd 2106 bool context,
2a2d1382
AL
2107 bool (*notify)(struct virtqueue *),
2108 void (*callback)(struct virtqueue *),
2109 const char *name)
0a8a69dd 2110{
0a8a69dd 2111 unsigned int i;
2a2d1382 2112 struct vring_virtqueue *vq;
0a8a69dd 2113
1ce9e605
TB
2114 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2115 return NULL;
2116
cbeedb72 2117 vq = kmalloc(sizeof(*vq), GFP_KERNEL);
0a8a69dd
RR
2118 if (!vq)
2119 return NULL;
2120
1ce9e605 2121 vq->packed_ring = false;
0a8a69dd
RR
2122 vq->vq.callback = callback;
2123 vq->vq.vdev = vdev;
9499f5e7 2124 vq->vq.name = name;
2a2d1382 2125 vq->vq.num_free = vring.num;
06ca287d 2126 vq->vq.index = index;
2a2d1382 2127 vq->we_own_ring = false;
0a8a69dd 2128 vq->notify = notify;
7b21e34f 2129 vq->weak_barriers = weak_barriers;
0a8a69dd
RR
2130 vq->broken = false;
2131 vq->last_used_idx = 0;
8d622d21 2132 vq->event_triggered = false;
0a8a69dd 2133 vq->num_added = 0;
fb3fba6b 2134 vq->use_dma_api = vring_use_dma_api(vdev);
0a8a69dd
RR
2135#ifdef DEBUG
2136 vq->in_use = false;
e93300b1 2137 vq->last_add_time_valid = false;
0a8a69dd
RR
2138#endif
2139
5a08b04f
MT
2140 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2141 !context;
a5c262c5 2142 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
9fa29b9d 2143
45383fb0
TB
2144 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2145 vq->weak_barriers = false;
2146
d79dca75
TB
2147 vq->split.queue_dma_addr = 0;
2148 vq->split.queue_size_in_bytes = 0;
2149
e593bf97
TB
2150 vq->split.vring = vring;
2151 vq->split.avail_flags_shadow = 0;
2152 vq->split.avail_idx_shadow = 0;
2153
0a8a69dd 2154 /* No callback? Tell other side not to bother us. */
f277ec42 2155 if (!callback) {
e593bf97 2156 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
0ea1e4a6 2157 if (!vq->event)
e593bf97
TB
2158 vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2159 vq->split.avail_flags_shadow);
f277ec42 2160 }
0a8a69dd 2161
cbeedb72
TB
2162 vq->split.desc_state = kmalloc_array(vring.num,
2163 sizeof(struct vring_desc_state_split), GFP_KERNEL);
5bc72234
JW
2164 if (!vq->split.desc_state)
2165 goto err_state;
cbeedb72 2166
0a8a69dd 2167 /* Put everything in free lists. */
0a8a69dd 2168 vq->free_head = 0;
2a2d1382 2169 for (i = 0; i < vring.num-1; i++)
e593bf97 2170 vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
cbeedb72
TB
2171 memset(vq->split.desc_state, 0, vring.num *
2172 sizeof(struct vring_desc_state_split));
0a8a69dd 2173
e152d8af 2174 list_add_tail(&vq->vq.list, &vdev->vqs);
0a8a69dd 2175 return &vq->vq;
5bc72234
JW
2176
2177err_state:
2178 kfree(vq);
2179 return NULL;
0a8a69dd 2180}
2a2d1382
AL
2181EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
2182
2a2d1382
AL
2183struct virtqueue *vring_create_virtqueue(
2184 unsigned int index,
2185 unsigned int num,
2186 unsigned int vring_align,
2187 struct virtio_device *vdev,
2188 bool weak_barriers,
2189 bool may_reduce_num,
f94682dd 2190 bool context,
2a2d1382
AL
2191 bool (*notify)(struct virtqueue *),
2192 void (*callback)(struct virtqueue *),
2193 const char *name)
2194{
1ce9e605
TB
2195
2196 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2197 return vring_create_virtqueue_packed(index, num, vring_align,
2198 vdev, weak_barriers, may_reduce_num,
2199 context, notify, callback, name);
2200
d79dca75
TB
2201 return vring_create_virtqueue_split(index, num, vring_align,
2202 vdev, weak_barriers, may_reduce_num,
2203 context, notify, callback, name);
2a2d1382
AL
2204}
2205EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2206
1ce9e605 2207/* Only available for split ring */
2a2d1382
AL
2208struct virtqueue *vring_new_virtqueue(unsigned int index,
2209 unsigned int num,
2210 unsigned int vring_align,
2211 struct virtio_device *vdev,
2212 bool weak_barriers,
f94682dd 2213 bool context,
2a2d1382
AL
2214 void *pages,
2215 bool (*notify)(struct virtqueue *vq),
2216 void (*callback)(struct virtqueue *vq),
2217 const char *name)
2218{
2219 struct vring vring;
1ce9e605
TB
2220
2221 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2222 return NULL;
2223
2a2d1382 2224 vring_init(&vring, num, pages, vring_align);
f94682dd 2225 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
2a2d1382
AL
2226 notify, callback, name);
2227}
c6fd4701 2228EXPORT_SYMBOL_GPL(vring_new_virtqueue);
0a8a69dd 2229
2a2d1382 2230void vring_del_virtqueue(struct virtqueue *_vq)
0a8a69dd 2231{
2a2d1382
AL
2232 struct vring_virtqueue *vq = to_vvq(_vq);
2233
2234 if (vq->we_own_ring) {
1ce9e605
TB
2235 if (vq->packed_ring) {
2236 vring_free_queue(vq->vq.vdev,
2237 vq->packed.ring_size_in_bytes,
2238 vq->packed.vring.desc,
2239 vq->packed.ring_dma_addr);
2240
2241 vring_free_queue(vq->vq.vdev,
2242 vq->packed.event_size_in_bytes,
2243 vq->packed.vring.driver,
2244 vq->packed.driver_event_dma_addr);
2245
2246 vring_free_queue(vq->vq.vdev,
2247 vq->packed.event_size_in_bytes,
2248 vq->packed.vring.device,
2249 vq->packed.device_event_dma_addr);
2250
2251 kfree(vq->packed.desc_state);
2252 kfree(vq->packed.desc_extra);
2253 } else {
2254 vring_free_queue(vq->vq.vdev,
2255 vq->split.queue_size_in_bytes,
2256 vq->split.vring.desc,
2257 vq->split.queue_dma_addr);
1ce9e605 2258 }
2a2d1382 2259 }
f13f09a1
SA
2260 if (!vq->packed_ring)
2261 kfree(vq->split.desc_state);
2a2d1382
AL
2262 list_del(&_vq->list);
2263 kfree(vq);
0a8a69dd 2264}
c6fd4701 2265EXPORT_SYMBOL_GPL(vring_del_virtqueue);
0a8a69dd 2266
e34f8725
RR
2267/* Manipulates transport-specific feature bits. */
2268void vring_transport_features(struct virtio_device *vdev)
2269{
2270 unsigned int i;
2271
2272 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2273 switch (i) {
9fa29b9d
MM
2274 case VIRTIO_RING_F_INDIRECT_DESC:
2275 break;
a5c262c5
MT
2276 case VIRTIO_RING_F_EVENT_IDX:
2277 break;
747ae34a
MT
2278 case VIRTIO_F_VERSION_1:
2279 break;
321bd212 2280 case VIRTIO_F_ACCESS_PLATFORM:
1a937693 2281 break;
f959a128
TB
2282 case VIRTIO_F_RING_PACKED:
2283 break;
45383fb0
TB
2284 case VIRTIO_F_ORDER_PLATFORM:
2285 break;
e34f8725
RR
2286 default:
2287 /* We don't understand this bit. */
e16e12be 2288 __virtio_clear_bit(vdev, i);
e34f8725
RR
2289 }
2290 }
2291}
2292EXPORT_SYMBOL_GPL(vring_transport_features);
2293
5dfc1762
RR
2294/**
2295 * virtqueue_get_vring_size - return the size of the virtqueue's vring
a5581206 2296 * @_vq: the struct virtqueue containing the vring of interest.
5dfc1762
RR
2297 *
2298 * Returns the size of the vring. This is mainly used for boasting to
2299 * userspace. Unlike other operations, this need not be serialized.
2300 */
8f9f4668
RJ
2301unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2302{
2303
2304 struct vring_virtqueue *vq = to_vvq(_vq);
2305
1ce9e605 2306 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
8f9f4668
RJ
2307}
2308EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2309
b3b32c94
HG
2310bool virtqueue_is_broken(struct virtqueue *_vq)
2311{
2312 struct vring_virtqueue *vq = to_vvq(_vq);
2313
2314 return vq->broken;
2315}
2316EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2317
e2dcdfe9
RR
2318/*
2319 * This should prevent the device from being used, allowing drivers to
2320 * recover. You may need to grab appropriate locks to flush.
2321 */
2322void virtio_break_device(struct virtio_device *dev)
2323{
2324 struct virtqueue *_vq;
2325
2326 list_for_each_entry(_vq, &dev->vqs, list) {
2327 struct vring_virtqueue *vq = to_vvq(_vq);
2328 vq->broken = true;
2329 }
2330}
2331EXPORT_SYMBOL_GPL(virtio_break_device);
2332
2a2d1382 2333dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
89062652
CH
2334{
2335 struct vring_virtqueue *vq = to_vvq(_vq);
2336
2a2d1382
AL
2337 BUG_ON(!vq->we_own_ring);
2338
1ce9e605
TB
2339 if (vq->packed_ring)
2340 return vq->packed.ring_dma_addr;
2341
d79dca75 2342 return vq->split.queue_dma_addr;
89062652 2343}
2a2d1382 2344EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
89062652 2345
2a2d1382 2346dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
89062652
CH
2347{
2348 struct vring_virtqueue *vq = to_vvq(_vq);
2349
2a2d1382
AL
2350 BUG_ON(!vq->we_own_ring);
2351
1ce9e605
TB
2352 if (vq->packed_ring)
2353 return vq->packed.driver_event_dma_addr;
2354
d79dca75 2355 return vq->split.queue_dma_addr +
e593bf97 2356 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2a2d1382
AL
2357}
2358EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2359
2360dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2361{
2362 struct vring_virtqueue *vq = to_vvq(_vq);
2363
2364 BUG_ON(!vq->we_own_ring);
2365
1ce9e605
TB
2366 if (vq->packed_ring)
2367 return vq->packed.device_event_dma_addr;
2368
d79dca75 2369 return vq->split.queue_dma_addr +
e593bf97 2370 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2a2d1382
AL
2371}
2372EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2373
1ce9e605 2374/* Only available for split ring */
2a2d1382
AL
2375const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2376{
e593bf97 2377 return &to_vvq(vq)->split.vring;
89062652 2378}
2a2d1382 2379EXPORT_SYMBOL_GPL(virtqueue_get_vring);
89062652 2380
c6fd4701 2381MODULE_LICENSE("GPL");