2 * Virtio PCI driver - modern (virtio 1.0) device support
4 * This module allows virtio devices to be used over a virtual PCI device.
5 * This can be used with QEMU based VMMs like KVM or Xen.
7 * Copyright IBM Corp. 2007
8 * Copyright Red Hat, Inc. 2014
11 * Anthony Liguori <aliguori@us.ibm.com>
12 * Rusty Russell <rusty@rustcorp.com.au>
13 * Michael S. Tsirkin <mst@redhat.com>
15 * This work is licensed under the terms of the GNU GPL, version 2 or later.
16 * See the COPYING file in the top-level directory.
20 #define VIRTIO_PCI_NO_LEGACY
21 #include "virtio_pci_common.h"
23 static void __iomem *map_capability(struct pci_dev *dev, int off,
33 pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
36 pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
38 pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
41 if (length <= start) {
43 "virtio_pci: bad capability len %u (>%u expected)\n",
48 if (length - start < minlen) {
50 "virtio_pci: bad capability len %u (>=%zu expected)\n",
57 if (start + offset < offset) {
59 "virtio_pci: map wrap-around %u+%u\n",
66 if (offset & (align - 1)) {
68 "virtio_pci: offset %u not aligned to %u\n",
79 if (minlen + offset < minlen ||
80 minlen + offset > pci_resource_len(dev, bar)) {
82 "virtio_pci: map virtio %zu@%u "
83 "out of range on bar %i length %lu\n",
85 bar, (unsigned long)pci_resource_len(dev, bar));
89 p = pci_iomap_range(dev, bar, offset, length);
92 "virtio_pci: unable to map virtio %u@%u on bar %i\n",
97 static void iowrite64_twopart(u64 val, __le32 __iomem *lo, __le32 __iomem *hi)
99 iowrite32((u32)val, lo);
100 iowrite32(val >> 32, hi);
103 /* virtio config->get_features() implementation */
104 static u64 vp_get_features(struct virtio_device *vdev)
106 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
109 iowrite32(0, &vp_dev->common->device_feature_select);
110 features = ioread32(&vp_dev->common->device_feature);
111 iowrite32(1, &vp_dev->common->device_feature_select);
112 features |= ((u64)ioread32(&vp_dev->common->device_feature) << 32);
117 /* virtio config->finalize_features() implementation */
118 static int vp_finalize_features(struct virtio_device *vdev)
120 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
122 /* Give virtio_ring a chance to accept features. */
123 vring_transport_features(vdev);
125 if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
126 dev_err(&vdev->dev, "virtio: device uses modern interface "
127 "but does not have VIRTIO_F_VERSION_1\n");
131 iowrite32(0, &vp_dev->common->guest_feature_select);
132 iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
133 iowrite32(1, &vp_dev->common->guest_feature_select);
134 iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
139 /* virtio config->get() implementation */
140 static void vp_get(struct virtio_device *vdev, unsigned offset,
141 void *buf, unsigned len)
143 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
148 BUG_ON(offset + len > vp_dev->device_len);
152 b = ioread8(vp_dev->device + offset);
153 memcpy(buf, &b, sizeof b);
156 w = cpu_to_le16(ioread16(vp_dev->device + offset));
157 memcpy(buf, &w, sizeof w);
160 l = cpu_to_le32(ioread32(vp_dev->device + offset));
161 memcpy(buf, &l, sizeof l);
164 l = cpu_to_le32(ioread32(vp_dev->device + offset));
165 memcpy(buf, &l, sizeof l);
166 l = cpu_to_le32(ioread32(vp_dev->device + offset + sizeof l));
167 memcpy(buf + sizeof l, &l, sizeof l);
174 /* the config->set() implementation. it's symmetric to the config->get()
176 static void vp_set(struct virtio_device *vdev, unsigned offset,
177 const void *buf, unsigned len)
179 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
184 BUG_ON(offset + len > vp_dev->device_len);
188 memcpy(&b, buf, sizeof b);
189 iowrite8(b, vp_dev->device + offset);
192 memcpy(&w, buf, sizeof w);
193 iowrite16(le16_to_cpu(w), vp_dev->device + offset);
196 memcpy(&l, buf, sizeof l);
197 iowrite32(le32_to_cpu(l), vp_dev->device + offset);
200 memcpy(&l, buf, sizeof l);
201 iowrite32(le32_to_cpu(l), vp_dev->device + offset);
202 memcpy(&l, buf + sizeof l, sizeof l);
203 iowrite32(le32_to_cpu(l), vp_dev->device + offset + sizeof l);
210 static u32 vp_generation(struct virtio_device *vdev)
212 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
213 return ioread8(&vp_dev->common->config_generation);
216 /* config->{get,set}_status() implementations */
217 static u8 vp_get_status(struct virtio_device *vdev)
219 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
220 return ioread8(&vp_dev->common->device_status);
223 static void vp_set_status(struct virtio_device *vdev, u8 status)
225 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
226 /* We should never be setting status to 0. */
228 iowrite8(status, &vp_dev->common->device_status);
231 static void vp_reset(struct virtio_device *vdev)
233 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
234 /* 0 status means a reset. */
235 iowrite8(0, &vp_dev->common->device_status);
236 /* Flush out the status write, and flush in device writes,
237 * including MSI-X interrupts, if any. */
238 ioread8(&vp_dev->common->device_status);
239 /* Flush pending VQ/configuration callbacks. */
240 vp_synchronize_vectors(vdev);
243 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
245 /* Setup the vector used for configuration events */
246 iowrite16(vector, &vp_dev->common->msix_config);
247 /* Verify we had enough resources to assign the vector */
248 /* Will also flush the write out to device */
249 return ioread16(&vp_dev->common->msix_config);
252 static size_t vring_pci_size(u16 num)
254 /* We only need a cacheline separation. */
255 return PAGE_ALIGN(vring_size(num, SMP_CACHE_BYTES));
258 static void *alloc_virtqueue_pages(int *num)
262 /* TODO: allocate each queue chunk individually */
263 for (; *num && vring_pci_size(*num) > PAGE_SIZE; *num /= 2) {
264 pages = alloc_pages_exact(vring_pci_size(*num),
265 GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
273 /* Try to get a single page. You are my only hope! */
274 return alloc_pages_exact(vring_pci_size(*num), GFP_KERNEL|__GFP_ZERO);
277 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
278 struct virtio_pci_vq_info *info,
280 void (*callback)(struct virtqueue *vq),
284 struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
285 struct virtqueue *vq;
289 if (index >= ioread16(&cfg->num_queues))
290 return ERR_PTR(-ENOENT);
292 /* Select the queue we're interested in */
293 iowrite16(index, &cfg->queue_select);
295 /* Check if queue is either not available or already active. */
296 num = ioread16(&cfg->queue_size);
297 if (!num || ioread8(&cfg->queue_enable))
298 return ERR_PTR(-ENOENT);
300 if (num & (num - 1)) {
301 dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
302 return ERR_PTR(-EINVAL);
305 /* get offset of notification word for this vq */
306 off = ioread16(&cfg->queue_notify_off);
309 info->msix_vector = msix_vec;
311 info->queue = alloc_virtqueue_pages(&info->num);
312 if (info->queue == NULL)
313 return ERR_PTR(-ENOMEM);
315 /* create the vring */
316 vq = vring_new_virtqueue(index, info->num,
317 SMP_CACHE_BYTES, &vp_dev->vdev,
318 true, info->queue, vp_notify, callback, name);
324 /* activate the queue */
325 iowrite16(num, &cfg->queue_size);
326 iowrite64_twopart(virt_to_phys(info->queue),
327 &cfg->queue_desc_lo, &cfg->queue_desc_hi);
328 iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq)),
329 &cfg->queue_avail_lo, &cfg->queue_avail_hi);
330 iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq)),
331 &cfg->queue_used_lo, &cfg->queue_used_hi);
333 vq->priv = (void __force *)map_capability(vp_dev->pci_dev,
334 vp_dev->notify_map_cap, 2, 2,
335 off * vp_dev->notify_offset_multiplier, 2,
343 if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
344 iowrite16(msix_vec, &cfg->queue_msix_vector);
345 msix_vec = ioread16(&cfg->queue_msix_vector);
346 if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
348 goto err_assign_vector;
355 pci_iounmap(vp_dev->pci_dev, (void __iomem __force *)vq->priv);
357 vring_del_virtqueue(vq);
359 free_pages_exact(info->queue, vring_pci_size(info->num));
363 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
364 struct virtqueue *vqs[],
365 vq_callback_t *callbacks[],
368 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
369 struct virtqueue *vq;
370 int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names);
375 /* Select and activate all queues. Has to be done last: once we do
376 * this, there's no way to go back except reset.
378 list_for_each_entry(vq, &vdev->vqs, list) {
379 iowrite16(vq->index, &vp_dev->common->queue_select);
380 iowrite8(1, &vp_dev->common->queue_enable);
386 static void del_vq(struct virtio_pci_vq_info *info)
388 struct virtqueue *vq = info->vq;
389 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
391 iowrite16(vq->index, &vp_dev->common->queue_select);
393 if (vp_dev->msix_enabled) {
394 iowrite16(VIRTIO_MSI_NO_VECTOR,
395 &vp_dev->common->queue_msix_vector);
396 /* Flush the write out to device */
397 ioread16(&vp_dev->common->queue_msix_vector);
400 pci_iounmap(vp_dev->pci_dev, (void __force __iomem *)vq->priv);
402 vring_del_virtqueue(vq);
404 free_pages_exact(info->queue, vring_pci_size(info->num));
407 static const struct virtio_config_ops virtio_pci_config_ops = {
410 .generation = vp_generation,
411 .get_status = vp_get_status,
412 .set_status = vp_set_status,
414 .find_vqs = vp_modern_find_vqs,
415 .del_vqs = vp_del_vqs,
416 .get_features = vp_get_features,
417 .finalize_features = vp_finalize_features,
418 .bus_name = vp_bus_name,
419 .set_vq_affinity = vp_set_vq_affinity,
423 * virtio_pci_find_capability - walk capabilities to find device info.
424 * @dev: the pci device
425 * @cfg_type: the VIRTIO_PCI_CAP_* value we seek
426 * @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
428 * Returns offset of the capability, or 0.
430 static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type,
431 u32 ioresource_types)
435 for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
437 pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
439 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
442 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
446 /* Ignore structures with reserved BAR values */
450 if (type == cfg_type) {
451 if (pci_resource_len(dev, bar) &&
452 pci_resource_flags(dev, bar) & ioresource_types)
459 static void virtio_pci_release_dev(struct device *_d)
461 struct virtio_device *vdev = dev_to_virtio(_d);
462 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
467 /* This is part of the ABI. Don't screw with it. */
468 static inline void check_offsets(void)
470 /* Note: disk space was harmed in compilation of this function. */
471 BUILD_BUG_ON(VIRTIO_PCI_CAP_VNDR !=
472 offsetof(struct virtio_pci_cap, cap_vndr));
473 BUILD_BUG_ON(VIRTIO_PCI_CAP_NEXT !=
474 offsetof(struct virtio_pci_cap, cap_next));
475 BUILD_BUG_ON(VIRTIO_PCI_CAP_LEN !=
476 offsetof(struct virtio_pci_cap, cap_len));
477 BUILD_BUG_ON(VIRTIO_PCI_CAP_CFG_TYPE !=
478 offsetof(struct virtio_pci_cap, cfg_type));
479 BUILD_BUG_ON(VIRTIO_PCI_CAP_BAR !=
480 offsetof(struct virtio_pci_cap, bar));
481 BUILD_BUG_ON(VIRTIO_PCI_CAP_OFFSET !=
482 offsetof(struct virtio_pci_cap, offset));
483 BUILD_BUG_ON(VIRTIO_PCI_CAP_LENGTH !=
484 offsetof(struct virtio_pci_cap, length));
485 BUILD_BUG_ON(VIRTIO_PCI_NOTIFY_CAP_MULT !=
486 offsetof(struct virtio_pci_notify_cap,
487 notify_off_multiplier));
488 BUILD_BUG_ON(VIRTIO_PCI_COMMON_DFSELECT !=
489 offsetof(struct virtio_pci_common_cfg,
490 device_feature_select));
491 BUILD_BUG_ON(VIRTIO_PCI_COMMON_DF !=
492 offsetof(struct virtio_pci_common_cfg, device_feature));
493 BUILD_BUG_ON(VIRTIO_PCI_COMMON_GFSELECT !=
494 offsetof(struct virtio_pci_common_cfg,
495 guest_feature_select));
496 BUILD_BUG_ON(VIRTIO_PCI_COMMON_GF !=
497 offsetof(struct virtio_pci_common_cfg, guest_feature));
498 BUILD_BUG_ON(VIRTIO_PCI_COMMON_MSIX !=
499 offsetof(struct virtio_pci_common_cfg, msix_config));
500 BUILD_BUG_ON(VIRTIO_PCI_COMMON_NUMQ !=
501 offsetof(struct virtio_pci_common_cfg, num_queues));
502 BUILD_BUG_ON(VIRTIO_PCI_COMMON_STATUS !=
503 offsetof(struct virtio_pci_common_cfg, device_status));
504 BUILD_BUG_ON(VIRTIO_PCI_COMMON_CFGGENERATION !=
505 offsetof(struct virtio_pci_common_cfg, config_generation));
506 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SELECT !=
507 offsetof(struct virtio_pci_common_cfg, queue_select));
508 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SIZE !=
509 offsetof(struct virtio_pci_common_cfg, queue_size));
510 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_MSIX !=
511 offsetof(struct virtio_pci_common_cfg, queue_msix_vector));
512 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_ENABLE !=
513 offsetof(struct virtio_pci_common_cfg, queue_enable));
514 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NOFF !=
515 offsetof(struct virtio_pci_common_cfg, queue_notify_off));
516 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCLO !=
517 offsetof(struct virtio_pci_common_cfg, queue_desc_lo));
518 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCHI !=
519 offsetof(struct virtio_pci_common_cfg, queue_desc_hi));
520 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILLO !=
521 offsetof(struct virtio_pci_common_cfg, queue_avail_lo));
522 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILHI !=
523 offsetof(struct virtio_pci_common_cfg, queue_avail_hi));
524 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDLO !=
525 offsetof(struct virtio_pci_common_cfg, queue_used_lo));
526 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
527 offsetof(struct virtio_pci_common_cfg, queue_used_hi));
530 /* the PCI probing function */
531 int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
533 struct pci_dev *pci_dev = vp_dev->pci_dev;
534 int err, common, isr, notify, device;
539 /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
540 if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
543 if (pci_dev->device < 0x1040) {
544 /* Transitional devices: use the PCI subsystem device id as
545 * virtio device id, same as legacy driver always did.
547 vp_dev->vdev.id.device = pci_dev->subsystem_device;
549 /* Modern devices: simply use PCI device id, but start from 0x1040. */
550 vp_dev->vdev.id.device = pci_dev->device - 0x1040;
552 vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
554 if (virtio_device_is_legacy_only(vp_dev->vdev.id))
557 /* check for a common config: if not, use legacy mode (bar 0). */
558 common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
559 IORESOURCE_IO | IORESOURCE_MEM);
561 dev_info(&pci_dev->dev,
562 "virtio_pci: leaving for legacy driver\n");
566 /* If common is there, these should be too... */
567 isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
568 IORESOURCE_IO | IORESOURCE_MEM);
569 notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
570 IORESOURCE_IO | IORESOURCE_MEM);
571 if (!isr || !notify) {
572 dev_err(&pci_dev->dev,
573 "virtio_pci: missing capabilities %i/%i/%i\n",
574 common, isr, notify);
578 /* Device capability is only mandatory for devices that have
579 * device-specific configuration.
581 device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
582 IORESOURCE_IO | IORESOURCE_MEM);
585 vp_dev->common = map_capability(pci_dev, common,
586 sizeof(struct virtio_pci_common_cfg), 4,
587 0, sizeof(struct virtio_pci_common_cfg),
591 vp_dev->isr = map_capability(pci_dev, isr, sizeof(u8), 1,
597 /* Read notify_off_multiplier from config space. */
598 pci_read_config_dword(pci_dev,
599 notify + offsetof(struct virtio_pci_notify_cap,
600 notify_off_multiplier),
601 &vp_dev->notify_offset_multiplier);
602 /* Read notify length from config space. */
603 pci_read_config_dword(pci_dev,
604 notify + offsetof(struct virtio_pci_notify_cap,
608 vp_dev->notify_map_cap = notify;
610 /* Again, we don't know how much we should map, but PAGE_SIZE
611 * is more than enough for all existing devices.
614 vp_dev->device = map_capability(pci_dev, device, 0, 4,
616 &vp_dev->device_len);
621 vp_dev->vdev.config = &virtio_pci_config_ops;
623 vp_dev->config_vector = vp_config_vector;
624 vp_dev->setup_vq = setup_vq;
625 vp_dev->del_vq = del_vq;
630 pci_iounmap(pci_dev, vp_dev->isr);
632 pci_iounmap(pci_dev, vp_dev->common);
637 void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
639 struct pci_dev *pci_dev = vp_dev->pci_dev;
642 pci_iounmap(pci_dev, vp_dev->device);
643 pci_iounmap(pci_dev, vp_dev->isr);
644 pci_iounmap(pci_dev, vp_dev->common);