Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Aug 2022 16:50:34 +0000 (09:50 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Aug 2022 16:50:34 +0000 (09:50 -0700)
Pull virtio updates from Michael Tsirkin:

 - A huge patchset supporting vq resize using the new vq reset
   capability

 - Features, fixes, and cleanups all over the place

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (88 commits)
  vdpa/mlx5: Fix possible uninitialized return value
  vdpa_sim_blk: add support for discard and write-zeroes
  vdpa_sim_blk: add support for VIRTIO_BLK_T_FLUSH
  vdpa_sim_blk: make vdpasim_blk_check_range usable by other requests
  vdpa_sim_blk: check if sector is 0 for commands other than read or write
  vdpa_sim: Implement suspend vdpa op
  vhost-vdpa: uAPI to suspend the device
  vhost-vdpa: introduce SUSPEND backend feature bit
  vdpa: Add suspend operation
  virtio-blk: Avoid use-after-free on suspend/resume
  virtio_vdpa: support the arg sizes of find_vqs()
  vhost-vdpa: Call ida_simple_remove() when failed
  vDPA: fix 'cast to restricted le16' warnings in vdpa.c
  vDPA: !FEATURES_OK should not block querying device config space
  vDPA/ifcvf: support userspace to query features and MQ of a management device
  vDPA/ifcvf: get_config_size should return a value no greater than dev implementation
  vhost scsi: Allow user to control num virtqueues
  vhost-scsi: Fix max number of virtqueues
  vdpa/mlx5: Support different address spaces for control and data
  vdpa/mlx5: Implement susupend virtqueue callback
  ...

51 files changed:
Documentation/devicetree/bindings/virtio/mmio.yaml
arch/um/drivers/virtio_uml.c
drivers/block/virtio_blk.c
drivers/net/virtio_net.c
drivers/nvdimm/virtio_pmem.c
drivers/platform/mellanox/mlxbf-tmfifo.c
drivers/remoteproc/remoteproc_core.c
drivers/remoteproc/remoteproc_virtio.c
drivers/s390/virtio/virtio_ccw.c
drivers/vdpa/ifcvf/ifcvf_base.c
drivers/vdpa/ifcvf/ifcvf_base.h
drivers/vdpa/ifcvf/ifcvf_main.c
drivers/vdpa/mlx5/core/mlx5_vdpa.h
drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vdpa/vdpa.c
drivers/vdpa/vdpa_sim/vdpa_sim.c
drivers/vdpa/vdpa_sim/vdpa_sim.h
drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
drivers/vdpa/vdpa_sim/vdpa_sim_net.c
drivers/vdpa/vdpa_user/iova_domain.c
drivers/vdpa/vdpa_user/iova_domain.h
drivers/vdpa/vdpa_user/vduse_dev.c
drivers/vhost/scsi.c
drivers/vhost/vdpa.c
drivers/vhost/vringh.c
drivers/virtio/Kconfig
drivers/virtio/virtio.c
drivers/virtio/virtio_mmio.c
drivers/virtio/virtio_pci_common.c
drivers/virtio/virtio_pci_common.h
drivers/virtio/virtio_pci_legacy.c
drivers/virtio/virtio_pci_modern.c
drivers/virtio/virtio_pci_modern_dev.c
drivers/virtio/virtio_ring.c
drivers/virtio/virtio_vdpa.c
include/linux/mlx5/mlx5_ifc_vdpa.h
include/linux/remoteproc.h
include/linux/vdpa.h
include/linux/virtio.h
include/linux/virtio_config.h
include/linux/virtio_pci_modern.h
include/linux/virtio_ring.h
include/uapi/linux/vduse.h
include/uapi/linux/vhost.h
include/uapi/linux/vhost_types.h
include/uapi/linux/virtio_config.h
include/uapi/linux/virtio_net.h
include/uapi/linux/virtio_pci.h
tools/virtio/linux/kernel.h
tools/virtio/linux/vringh.h
tools/virtio/virtio_test.c

index 10c22b5bd16a34688eb0d7586f44cf062dbc36fd..0aa8433f0a5eb7051996aa259ac7f5acfc98a570 100644 (file)
@@ -33,6 +33,10 @@ properties:
     description: Required for devices making accesses thru an IOMMU.
     maxItems: 1
 
+  wakeup-source:
+    type: boolean
+    description: Required for setting irq of a virtio_mmio device as wakeup source.
+
 required:
   - compatible
   - reg
index 82ff3785bf69f96394d5fb9aad1588f14450cba3..79e38afd4b91efced1baeb0b9c16f0693d26474a 100644 (file)
@@ -958,6 +958,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
                goto error_create;
        }
        vq->priv = info;
+       vq->num_max = num;
        num = virtqueue_get_vring_size(vq);
 
        if (vu_dev->protocol_features &
@@ -1010,7 +1011,7 @@ error_kzalloc:
 
 static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
                       struct virtqueue *vqs[], vq_callback_t *callbacks[],
-                      const char * const names[], const bool *ctx,
+                      const char * const names[], u32 sizes[], const bool *ctx,
                       struct irq_affinity *desc)
 {
        struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
index d7d72e8f6e551297ddd7c0e5aa7e654ec8e7c727..30255fcaf18121c5051ba156e821ee0fd33ba4aa 100644 (file)
@@ -101,6 +101,14 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
        }
 }
 
+static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
+{
+       struct virtio_blk *vblk = hctx->queue->queuedata;
+       struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
+
+       return vq;
+}
+
 static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
 {
        struct scatterlist hdr, status, *sgs[3];
@@ -416,7 +424,7 @@ static void virtio_queue_rqs(struct request **rqlist)
        struct request *requeue_list = NULL;
 
        rq_list_for_each_safe(rqlist, req, next) {
-               struct virtio_blk_vq *vq = req->mq_hctx->driver_data;
+               struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
                bool kick;
 
                if (!virtblk_prep_rq_batch(req)) {
@@ -837,7 +845,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
 static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
 {
        struct virtio_blk *vblk = hctx->queue->queuedata;
-       struct virtio_blk_vq *vq = hctx->driver_data;
+       struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
        struct virtblk_req *vbr;
        unsigned long flags;
        unsigned int len;
@@ -862,22 +870,10 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
        return found;
 }
 
-static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
-                         unsigned int hctx_idx)
-{
-       struct virtio_blk *vblk = data;
-       struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
-
-       WARN_ON(vblk->tag_set.tags[hctx_idx] != hctx->tags);
-       hctx->driver_data = vq;
-       return 0;
-}
-
 static const struct blk_mq_ops virtio_mq_ops = {
        .queue_rq       = virtio_queue_rq,
        .queue_rqs      = virtio_queue_rqs,
        .commit_rqs     = virtio_commit_rqs,
-       .init_hctx      = virtblk_init_hctx,
        .complete       = virtblk_request_done,
        .map_queues     = virtblk_map_queues,
        .poll           = virtblk_poll,
index 3b3eebad39772c9160098f20e32b278dd6e7127b..d934774e9733bc1077c05f026127ea143fdc9f8f 100644 (file)
@@ -135,6 +135,9 @@ struct send_queue {
        struct virtnet_sq_stats stats;
 
        struct napi_struct napi;
+
+       /* Record whether sq is in reset state. */
+       bool reset;
 };
 
 /* Internal representation of a receive virtqueue */
@@ -267,6 +270,12 @@ struct virtnet_info {
        u8 duplex;
        u32 speed;
 
+       /* Interrupt coalescing settings */
+       u32 tx_usecs;
+       u32 rx_usecs;
+       u32 tx_max_packets;
+       u32 rx_max_packets;
+
        unsigned long guest_offloads;
        unsigned long guest_offloads_capable;
 
@@ -284,6 +293,9 @@ struct padded_vnet_hdr {
        char padding[12];
 };
 
+static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
+static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
+
 static bool is_xdp_frame(void *ptr)
 {
        return (unsigned long)ptr & VIRTIO_XDP_FLAG;
@@ -1628,6 +1640,11 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
                return;
 
        if (__netif_tx_trylock(txq)) {
+               if (sq->reset) {
+                       __netif_tx_unlock(txq);
+                       return;
+               }
+
                do {
                        virtqueue_disable_cb(sq->vq);
                        free_old_xmit_skbs(sq, true);
@@ -1875,6 +1892,70 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
        return NETDEV_TX_OK;
 }
 
+static int virtnet_rx_resize(struct virtnet_info *vi,
+                            struct receive_queue *rq, u32 ring_num)
+{
+       bool running = netif_running(vi->dev);
+       int err, qindex;
+
+       qindex = rq - vi->rq;
+
+       if (running)
+               napi_disable(&rq->napi);
+
+       err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
+       if (err)
+               netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err);
+
+       if (!try_fill_recv(vi, rq, GFP_KERNEL))
+               schedule_delayed_work(&vi->refill, 0);
+
+       if (running)
+               virtnet_napi_enable(rq->vq, &rq->napi);
+       return err;
+}
+
+static int virtnet_tx_resize(struct virtnet_info *vi,
+                            struct send_queue *sq, u32 ring_num)
+{
+       bool running = netif_running(vi->dev);
+       struct netdev_queue *txq;
+       int err, qindex;
+
+       qindex = sq - vi->sq;
+
+       if (running)
+               virtnet_napi_tx_disable(&sq->napi);
+
+       txq = netdev_get_tx_queue(vi->dev, qindex);
+
+       /* 1. wait all ximt complete
+        * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
+        */
+       __netif_tx_lock_bh(txq);
+
+       /* Prevent rx poll from accessing sq. */
+       sq->reset = true;
+
+       /* Prevent the upper layer from trying to send packets. */
+       netif_stop_subqueue(vi->dev, qindex);
+
+       __netif_tx_unlock_bh(txq);
+
+       err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
+       if (err)
+               netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err);
+
+       __netif_tx_lock_bh(txq);
+       sq->reset = false;
+       netif_tx_wake_queue(txq);
+       __netif_tx_unlock_bh(txq);
+
+       if (running)
+               virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
+       return err;
+}
+
 /*
  * Send command via the control virtqueue and check status.  Commands
  * supported by the hypervisor, as indicated by feature bits, should
@@ -2285,10 +2366,57 @@ static void virtnet_get_ringparam(struct net_device *dev,
 {
        struct virtnet_info *vi = netdev_priv(dev);
 
-       ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
-       ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
-       ring->rx_pending = ring->rx_max_pending;
-       ring->tx_pending = ring->tx_max_pending;
+       ring->rx_max_pending = vi->rq[0].vq->num_max;
+       ring->tx_max_pending = vi->sq[0].vq->num_max;
+       ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
+       ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
+}
+
+static int virtnet_set_ringparam(struct net_device *dev,
+                                struct ethtool_ringparam *ring,
+                                struct kernel_ethtool_ringparam *kernel_ring,
+                                struct netlink_ext_ack *extack)
+{
+       struct virtnet_info *vi = netdev_priv(dev);
+       u32 rx_pending, tx_pending;
+       struct receive_queue *rq;
+       struct send_queue *sq;
+       int i, err;
+
+       if (ring->rx_mini_pending || ring->rx_jumbo_pending)
+               return -EINVAL;
+
+       rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
+       tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
+
+       if (ring->rx_pending == rx_pending &&
+           ring->tx_pending == tx_pending)
+               return 0;
+
+       if (ring->rx_pending > vi->rq[0].vq->num_max)
+               return -EINVAL;
+
+       if (ring->tx_pending > vi->sq[0].vq->num_max)
+               return -EINVAL;
+
+       for (i = 0; i < vi->max_queue_pairs; i++) {
+               rq = vi->rq + i;
+               sq = vi->sq + i;
+
+               if (ring->tx_pending != tx_pending) {
+                       err = virtnet_tx_resize(vi, sq, ring->tx_pending);
+                       if (err)
+                               return err;
+               }
+
+               if (ring->rx_pending != rx_pending) {
+                       err = virtnet_rx_resize(vi, rq, ring->rx_pending);
+                       if (err)
+                               return err;
+               }
+       }
+
+       return 0;
 }
 
 static bool virtnet_commit_rss_command(struct virtnet_info *vi)
@@ -2618,27 +2746,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
        return 0;
 }
 
+static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
+                                      struct ethtool_coalesce *ec)
+{
+       struct scatterlist sgs_tx, sgs_rx;
+       struct virtio_net_ctrl_coal_tx coal_tx;
+       struct virtio_net_ctrl_coal_rx coal_rx;
+
+       coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
+       coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
+       sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
+
+       if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
+                                 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
+                                 &sgs_tx))
+               return -EINVAL;
+
+       /* Save parameters */
+       vi->tx_usecs = ec->tx_coalesce_usecs;
+       vi->tx_max_packets = ec->tx_max_coalesced_frames;
+
+       coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
+       coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
+       sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
+
+       if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
+                                 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
+                                 &sgs_rx))
+               return -EINVAL;
+
+       /* Save parameters */
+       vi->rx_usecs = ec->rx_coalesce_usecs;
+       vi->rx_max_packets = ec->rx_max_coalesced_frames;
+
+       return 0;
+}
+
+static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
+{
+       /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
+        * feature is negotiated.
+        */
+       if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
+               return -EOPNOTSUPP;
+
+       if (ec->tx_max_coalesced_frames > 1 ||
+           ec->rx_max_coalesced_frames != 1)
+               return -EINVAL;
+
+       return 0;
+}
+
 static int virtnet_set_coalesce(struct net_device *dev,
                                struct ethtool_coalesce *ec,
                                struct kernel_ethtool_coalesce *kernel_coal,
                                struct netlink_ext_ack *extack)
 {
        struct virtnet_info *vi = netdev_priv(dev);
-       int i, napi_weight;
-
-       if (ec->tx_max_coalesced_frames > 1 ||
-           ec->rx_max_coalesced_frames != 1)
-               return -EINVAL;
+       int ret, i, napi_weight;
+       bool update_napi = false;
 
+       /* Can't change NAPI weight if the link is up */
        napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
        if (napi_weight ^ vi->sq[0].napi.weight) {
                if (dev->flags & IFF_UP)
                        return -EBUSY;
+               else
+                       update_napi = true;
+       }
+
+       if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
+               ret = virtnet_send_notf_coal_cmds(vi, ec);
+       else
+               ret = virtnet_coal_params_supported(ec);
+
+       if (ret)
+               return ret;
+
+       if (update_napi) {
                for (i = 0; i < vi->max_queue_pairs; i++)
                        vi->sq[i].napi.weight = napi_weight;
        }
 
-       return 0;
+       return ret;
 }
 
 static int virtnet_get_coalesce(struct net_device *dev,
@@ -2646,16 +2836,19 @@ static int virtnet_get_coalesce(struct net_device *dev,
                                struct kernel_ethtool_coalesce *kernel_coal,
                                struct netlink_ext_ack *extack)
 {
-       struct ethtool_coalesce ec_default = {
-               .cmd = ETHTOOL_GCOALESCE,
-               .rx_max_coalesced_frames = 1,
-       };
        struct virtnet_info *vi = netdev_priv(dev);
 
-       memcpy(ec, &ec_default, sizeof(ec_default));
+       if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
+               ec->rx_coalesce_usecs = vi->rx_usecs;
+               ec->tx_coalesce_usecs = vi->tx_usecs;
+               ec->tx_max_coalesced_frames = vi->tx_max_packets;
+               ec->rx_max_coalesced_frames = vi->rx_max_packets;
+       } else {
+               ec->rx_max_coalesced_frames = 1;
 
-       if (vi->sq[0].napi.weight)
-               ec->tx_max_coalesced_frames = 1;
+               if (vi->sq[0].napi.weight)
+                       ec->tx_max_coalesced_frames = 1;
+       }
 
        return 0;
 }
@@ -2774,10 +2967,12 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
 }
 
 static const struct ethtool_ops virtnet_ethtool_ops = {
-       .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
+       .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
+               ETHTOOL_COALESCE_USECS,
        .get_drvinfo = virtnet_get_drvinfo,
        .get_link = ethtool_op_get_link,
        .get_ringparam = virtnet_get_ringparam,
+       .set_ringparam = virtnet_set_ringparam,
        .get_strings = virtnet_get_strings,
        .get_sset_count = virtnet_get_sset_count,
        .get_ethtool_stats = virtnet_get_ethtool_stats,
@@ -3171,6 +3366,27 @@ static void free_receive_page_frags(struct virtnet_info *vi)
                        put_page(vi->rq[i].alloc_frag.page);
 }
 
+static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
+{
+       if (!is_xdp_frame(buf))
+               dev_kfree_skb(buf);
+       else
+               xdp_return_frame(ptr_to_xdp(buf));
+}
+
+static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
+{
+       struct virtnet_info *vi = vq->vdev->priv;
+       int i = vq2rxq(vq);
+
+       if (vi->mergeable_rx_bufs)
+               put_page(virt_to_head_page(buf));
+       else if (vi->big_packets)
+               give_pages(&vi->rq[i], buf);
+       else
+               put_page(virt_to_head_page(buf));
+}
+
 static void free_unused_bufs(struct virtnet_info *vi)
 {
        void *buf;
@@ -3178,26 +3394,14 @@ static void free_unused_bufs(struct virtnet_info *vi)
 
        for (i = 0; i < vi->max_queue_pairs; i++) {
                struct virtqueue *vq = vi->sq[i].vq;
-               while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
-                       if (!is_xdp_frame(buf))
-                               dev_kfree_skb(buf);
-                       else
-                               xdp_return_frame(ptr_to_xdp(buf));
-               }
+               while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+                       virtnet_sq_free_unused_buf(vq, buf);
        }
 
        for (i = 0; i < vi->max_queue_pairs; i++) {
                struct virtqueue *vq = vi->rq[i].vq;
-
-               while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
-                       if (vi->mergeable_rx_bufs) {
-                               put_page(virt_to_head_page(buf));
-                       } else if (vi->big_packets) {
-                               give_pages(&vi->rq[i], buf);
-                       } else {
-                               put_page(virt_to_head_page(buf));
-                       }
-               }
+               while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+                       virtnet_rq_free_unused_buf(vq, buf);
        }
 }
 
@@ -3228,6 +3432,29 @@ static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqu
                   (unsigned int)GOOD_PACKET_LEN);
 }
 
+static void virtnet_config_sizes(struct virtnet_info *vi, u32 *sizes)
+{
+       u32 i, rx_size, tx_size;
+
+       if (vi->speed == SPEED_UNKNOWN || vi->speed < SPEED_10000) {
+               rx_size = 1024;
+               tx_size = 1024;
+
+       } else if (vi->speed < SPEED_40000) {
+               rx_size = 1024 * 4;
+               tx_size = 1024 * 4;
+
+       } else {
+               rx_size = 1024 * 8;
+               tx_size = 1024 * 8;
+       }
+
+       for (i = 0; i < vi->max_queue_pairs; i++) {
+               sizes[rxq2vq(i)] = rx_size;
+               sizes[txq2vq(i)] = tx_size;
+       }
+}
+
 static int virtnet_find_vqs(struct virtnet_info *vi)
 {
        vq_callback_t **callbacks;
@@ -3235,6 +3462,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
        int ret = -ENOMEM;
        int i, total_vqs;
        const char **names;
+       u32 *sizes;
        bool *ctx;
 
        /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
@@ -3262,10 +3490,15 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
                ctx = NULL;
        }
 
+       sizes = kmalloc_array(total_vqs, sizeof(*sizes), GFP_KERNEL);
+       if (!sizes)
+               goto err_sizes;
+
        /* Parameters for control virtqueue, if any */
        if (vi->has_cvq) {
                callbacks[total_vqs - 1] = NULL;
                names[total_vqs - 1] = "control";
+               sizes[total_vqs - 1] = 64;
        }
 
        /* Allocate/initialize parameters for send/receive virtqueues */
@@ -3280,8 +3513,10 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
                        ctx[rxq2vq(i)] = true;
        }
 
-       ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks,
-                                 names, ctx, NULL);
+       virtnet_config_sizes(vi, sizes);
+
+       ret = virtio_find_vqs_ctx_size(vi->vdev, total_vqs, vqs, callbacks,
+                                      names, sizes, ctx, NULL);
        if (ret)
                goto err_find;
 
@@ -3301,6 +3536,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 
 
 err_find:
+       kfree(sizes);
+err_sizes:
        kfree(ctx);
 err_ctx:
        kfree(names);
@@ -3444,6 +3681,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
             VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
                             "VIRTIO_NET_F_CTRL_VQ") ||
             VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
+                            "VIRTIO_NET_F_CTRL_VQ") ||
+            VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
                             "VIRTIO_NET_F_CTRL_VQ"))) {
                return false;
        }
@@ -3580,6 +3819,13 @@ static int virtnet_probe(struct virtio_device *vdev)
        if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
                vi->mergeable_rx_bufs = true;
 
+       if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
+               vi->rx_usecs = 0;
+               vi->tx_usecs = 0;
+               vi->tx_max_packets = 0;
+               vi->rx_max_packets = 0;
+       }
+
        if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
                vi->has_rss_hash_report = true;
 
@@ -3651,6 +3897,9 @@ static int virtnet_probe(struct virtio_device *vdev)
                vi->curr_queue_pairs = num_online_cpus();
        vi->max_queue_pairs = max_queue_pairs;
 
+       virtnet_init_settings(dev);
+       virtnet_update_settings(vi);
+
        /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
        err = init_vqs(vi);
        if (err)
@@ -3663,8 +3912,6 @@ static int virtnet_probe(struct virtio_device *vdev)
        netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
        netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
 
-       virtnet_init_settings(dev);
-
        if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
                vi->failover = net_failover_create(vi->dev);
                if (IS_ERR(vi->failover)) {
@@ -3814,7 +4061,7 @@ static struct virtio_device_id id_table[] = {
        VIRTIO_NET_F_CTRL_MAC_ADDR, \
        VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
        VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
-       VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT
+       VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL
 
 static unsigned int features[] = {
        VIRTNET_FEATURES,
index 995b6cdc67ede881a3e94eb81472ba52e0945f36..20da455d2ef6378791956cba06a939fc8bdb32ad 100644 (file)
@@ -81,17 +81,24 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
        ndr_desc.res = &res;
        ndr_desc.numa_node = nid;
        ndr_desc.flush = async_pmem_flush;
+       ndr_desc.provider_data = vdev;
        set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
        set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+       /*
+        * The NVDIMM region could be available before the
+        * virtio_device_ready() that is called by
+        * virtio_dev_probe(), so we set device ready here.
+        */
+       virtio_device_ready(vdev);
        nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc);
        if (!nd_region) {
                dev_err(&vdev->dev, "failed to create nvdimm region\n");
                err = -ENXIO;
                goto out_nd;
        }
-       nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent);
        return 0;
 out_nd:
+       virtio_reset_device(vdev);
        nvdimm_bus_unregister(vpmem->nvdimm_bus);
 out_vq:
        vdev->config->del_vqs(vdev);
index 38800e86ed8ad47371abe6f4459b259137a85c0f..8be13d416f485d4a9dd17afcf486626591ee690c 100644 (file)
@@ -928,6 +928,7 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
                                        struct virtqueue *vqs[],
                                        vq_callback_t *callbacks[],
                                        const char * const names[],
+                                       u32 sizes[],
                                        const bool *ctx,
                                        struct irq_affinity *desc)
 {
@@ -959,6 +960,8 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
                        goto error;
                }
 
+               vq->num_max = vring->num;
+
                vqs[i] = vq;
                vring->vq = vq;
                vq->priv = vring;
index 89832399e0280636dd334008552cd21972d4ceed..e5279ed9a8d7c7ace6f396795833a0379762177a 100644 (file)
@@ -335,7 +335,7 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
        size_t size;
 
        /* actual size of vring (in bytes) */
-       size = PAGE_ALIGN(vring_size(rvring->len, rvring->align));
+       size = PAGE_ALIGN(vring_size(rvring->num, rvring->align));
 
        rsc = (void *)rproc->table_ptr + rvdev->rsc_offset;
 
@@ -402,7 +402,7 @@ rproc_parse_vring(struct rproc_vdev *rvdev, struct fw_rsc_vdev *rsc, int i)
                return -EINVAL;
        }
 
-       rvring->len = vring->num;
+       rvring->num = vring->num;
        rvring->align = vring->align;
        rvring->rvdev = rvdev;
 
index 70ab496d0431c521f008dbf004761de4f3cbfb63..81c4f57761092f8a53aeff941b40df873c6bed5f 100644 (file)
@@ -87,7 +87,7 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
        struct fw_rsc_vdev *rsc;
        struct virtqueue *vq;
        void *addr;
-       int len, size;
+       int num, size;
 
        /* we're temporarily limited to two virtqueues per rvdev */
        if (id >= ARRAY_SIZE(rvdev->vring))
@@ -104,20 +104,20 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
 
        rvring = &rvdev->vring[id];
        addr = mem->va;
-       len = rvring->len;
+       num = rvring->num;
 
        /* zero vring */
-       size = vring_size(len, rvring->align);
+       size = vring_size(num, rvring->align);
        memset(addr, 0, size);
 
        dev_dbg(dev, "vring%d: va %pK qsz %d notifyid %d\n",
-               id, addr, len, rvring->notifyid);
+               id, addr, num, rvring->notifyid);
 
        /*
         * Create the new vq, and tell virtio we're not interested in
         * the 'weak' smp barriers, since we're talking with a real device.
         */
-       vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, ctx,
+       vq = vring_new_virtqueue(id, num, rvring->align, vdev, false, ctx,
                                 addr, rproc_virtio_notify, callback, name);
        if (!vq) {
                dev_err(dev, "vring_new_virtqueue %s failed\n", name);
@@ -125,6 +125,8 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
                return ERR_PTR(-ENOMEM);
        }
 
+       vq->num_max = num;
+
        rvring->vq = vq;
        vq->priv = rvring;
 
@@ -156,6 +158,7 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
                                 struct virtqueue *vqs[],
                                 vq_callback_t *callbacks[],
                                 const char * const names[],
+                                u32 sizes[],
                                 const bool * ctx,
                                 struct irq_affinity *desc)
 {
index aa96f67dd0b1635b4181e88c302e9375e933beb6..896896e326645ef615cdba25bf399eeb3d75dced 100644 (file)
@@ -532,6 +532,9 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
                err = -ENOMEM;
                goto out_err;
        }
+
+       vq->num_max = info->num;
+
        /* it may have been reduced */
        info->num = virtqueue_get_vring_size(vq);
 
@@ -634,6 +637,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
                               struct virtqueue *vqs[],
                               vq_callback_t *callbacks[],
                               const char * const names[],
+                              u32 sizes[],
                               const bool *ctx,
                               struct irq_affinity *desc)
 {
index 48c4dadb0c7c7b9f4aac321731ceb59004836748..75a703b803a240bdb966acac2d6bfe0bc8cdded7 100644 (file)
@@ -29,7 +29,6 @@ u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector)
 {
        struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
 
-       cfg = hw->common_cfg;
        vp_iowrite16(vector,  &cfg->msix_config);
 
        return vp_ioread16(&cfg->msix_config);
@@ -128,6 +127,7 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
                        break;
                case VIRTIO_PCI_CAP_DEVICE_CFG:
                        hw->dev_cfg = get_cap_addr(hw, &cap);
+                       hw->cap_dev_config_size = le32_to_cpu(cap.length);
                        IFCVF_DBG(pdev, "hw->dev_cfg = %p\n", hw->dev_cfg);
                        break;
                }
@@ -233,15 +233,23 @@ int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
 u32 ifcvf_get_config_size(struct ifcvf_hw *hw)
 {
        struct ifcvf_adapter *adapter;
+       u32 net_config_size = sizeof(struct virtio_net_config);
+       u32 blk_config_size = sizeof(struct virtio_blk_config);
+       u32 cap_size = hw->cap_dev_config_size;
        u32 config_size;
 
        adapter = vf_to_adapter(hw);
+       /* If the onboard device config space size is greater than
+        * the size of struct virtio_net/blk_config, only the spec
+        * implementing contents size is returned, this is very
+        * unlikely, defensive programming.
+        */
        switch (hw->dev_type) {
        case VIRTIO_ID_NET:
-               config_size = sizeof(struct virtio_net_config);
+               config_size = min(cap_size, net_config_size);
                break;
        case VIRTIO_ID_BLOCK:
-               config_size = sizeof(struct virtio_blk_config);
+               config_size = min(cap_size, blk_config_size);
                break;
        default:
                config_size = 0;
index 115b61f4924b9444cca93a08e72573400bc48f7c..f5563f665cc625b1834d98c21dbc63d7a8be6dbe 100644 (file)
@@ -87,6 +87,8 @@ struct ifcvf_hw {
        int config_irq;
        int vqs_reused_irq;
        u16 nr_vring;
+       /* VIRTIO_PCI_CAP_DEVICE_CFG size */
+       u32 cap_dev_config_size;
 };
 
 struct ifcvf_adapter {
index 0a5670729412cbbc5d6f68dc24a1d5466545541c..f9c0044c6442e4b115224fe2927c02fa2f33aeb0 100644 (file)
@@ -685,7 +685,7 @@ static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_devic
 }
 
 /*
- * IFCVF currently does't have on-chip IOMMU, so not
+ * IFCVF currently doesn't have on-chip IOMMU, so not
  * implemented set_map()/dma_map()/dma_unmap()
  */
 static const struct vdpa_config_ops ifc_vdpa_ops = {
@@ -752,59 +752,36 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
 {
        struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
        struct ifcvf_adapter *adapter;
+       struct vdpa_device *vdpa_dev;
        struct pci_dev *pdev;
        struct ifcvf_hw *vf;
-       struct device *dev;
-       int ret, i;
+       int ret;
 
        ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev);
-       if (ifcvf_mgmt_dev->adapter)
+       if (!ifcvf_mgmt_dev->adapter)
                return -EOPNOTSUPP;
 
-       pdev = ifcvf_mgmt_dev->pdev;
-       dev = &pdev->dev;
-       adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
-                                   dev, &ifc_vdpa_ops, 1, 1, name, false);
-       if (IS_ERR(adapter)) {
-               IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
-               return PTR_ERR(adapter);
-       }
-
-       ifcvf_mgmt_dev->adapter = adapter;
-
+       adapter = ifcvf_mgmt_dev->adapter;
        vf = &adapter->vf;
-       vf->dev_type = get_dev_type(pdev);
-       vf->base = pcim_iomap_table(pdev);
+       pdev = adapter->pdev;
+       vdpa_dev = &adapter->vdpa;
 
-       adapter->pdev = pdev;
-       adapter->vdpa.dma_dev = &pdev->dev;
-
-       ret = ifcvf_init_hw(vf, pdev);
-       if (ret) {
-               IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
-               goto err;
-       }
-
-       for (i = 0; i < vf->nr_vring; i++)
-               vf->vring[i].irq = -EINVAL;
-
-       vf->hw_features = ifcvf_get_hw_features(vf);
-       vf->config_size = ifcvf_get_config_size(vf);
+       if (name)
+               ret = dev_set_name(&vdpa_dev->dev, "%s", name);
+       else
+               ret = dev_set_name(&vdpa_dev->dev, "vdpa%u", vdpa_dev->index);
 
-       adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
        ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring);
        if (ret) {
+               put_device(&adapter->vdpa.dev);
                IFCVF_ERR(pdev, "Failed to register to vDPA bus");
-               goto err;
+               return ret;
        }
 
        return 0;
-
-err:
-       put_device(&adapter->vdpa.dev);
-       return ret;
 }
 
+
 static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
 {
        struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
@@ -823,61 +800,94 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
        struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
        struct device *dev = &pdev->dev;
+       struct ifcvf_adapter *adapter;
+       struct ifcvf_hw *vf;
        u32 dev_type;
-       int ret;
-
-       ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
-       if (!ifcvf_mgmt_dev) {
-               IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
-               return -ENOMEM;
-       }
-
-       dev_type = get_dev_type(pdev);
-       switch (dev_type) {
-       case VIRTIO_ID_NET:
-               ifcvf_mgmt_dev->mdev.id_table = id_table_net;
-               break;
-       case VIRTIO_ID_BLOCK:
-               ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
-               break;
-       default:
-               IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
-               ret = -EOPNOTSUPP;
-               goto err;
-       }
-
-       ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
-       ifcvf_mgmt_dev->mdev.device = dev;
-       ifcvf_mgmt_dev->pdev = pdev;
+       int ret, i;
 
        ret = pcim_enable_device(pdev);
        if (ret) {
                IFCVF_ERR(pdev, "Failed to enable device\n");
-               goto err;
+               return ret;
        }
-
        ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
                                 IFCVF_DRIVER_NAME);
        if (ret) {
                IFCVF_ERR(pdev, "Failed to request MMIO region\n");
-               goto err;
+               return ret;
        }
 
        ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
        if (ret) {
                IFCVF_ERR(pdev, "No usable DMA configuration\n");
-               goto err;
+               return ret;
        }
 
        ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
        if (ret) {
                IFCVF_ERR(pdev,
                          "Failed for adding devres for freeing irq vectors\n");
-               goto err;
+               return ret;
        }
 
        pci_set_master(pdev);
 
+       adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
+                                   dev, &ifc_vdpa_ops, 1, 1, NULL, false);
+       if (IS_ERR(adapter)) {
+               IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
+               return PTR_ERR(adapter);
+       }
+
+       vf = &adapter->vf;
+       vf->dev_type = get_dev_type(pdev);
+       vf->base = pcim_iomap_table(pdev);
+
+       adapter->pdev = pdev;
+       adapter->vdpa.dma_dev = &pdev->dev;
+
+       ret = ifcvf_init_hw(vf, pdev);
+       if (ret) {
+               IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
+               return ret;
+       }
+
+       for (i = 0; i < vf->nr_vring; i++)
+               vf->vring[i].irq = -EINVAL;
+
+       vf->hw_features = ifcvf_get_hw_features(vf);
+       vf->config_size = ifcvf_get_config_size(vf);
+
+       ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
+       if (!ifcvf_mgmt_dev) {
+               IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
+               return -ENOMEM;
+       }
+
+       ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
+       ifcvf_mgmt_dev->mdev.device = dev;
+       ifcvf_mgmt_dev->adapter = adapter;
+
+       dev_type = get_dev_type(pdev);
+       switch (dev_type) {
+       case VIRTIO_ID_NET:
+               ifcvf_mgmt_dev->mdev.id_table = id_table_net;
+               break;
+       case VIRTIO_ID_BLOCK:
+               ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
+               break;
+       default:
+               IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
+               ret = -EOPNOTSUPP;
+               goto err;
+       }
+
+       ifcvf_mgmt_dev->mdev.max_supported_vqs = vf->nr_vring;
+       ifcvf_mgmt_dev->mdev.supported_features = vf->hw_features;
+
+       adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
+
+
        ret = vdpa_mgmtdev_register(&ifcvf_mgmt_dev->mdev);
        if (ret) {
                IFCVF_ERR(pdev,
index 44104093163b11d47ef47e2615f06b15ac28f0d5..6af9fdbb86b7a4c40b4c7d8f7ac1df070b0e5932 100644 (file)
@@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent {
        struct mlx5_vdpa_dev *mvdev;
 };
 
+enum {
+       MLX5_VDPA_DATAVQ_GROUP,
+       MLX5_VDPA_CVQ_GROUP,
+       MLX5_VDPA_NUMVQ_GROUPS
+};
+
+enum {
+       MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS
+};
+
 struct mlx5_vdpa_dev {
        struct vdpa_device vdev;
        struct mlx5_core_dev *mdev;
@@ -85,6 +95,7 @@ struct mlx5_vdpa_dev {
        struct mlx5_vdpa_mr mr;
        struct mlx5_control_vq cvq;
        struct workqueue_struct *wq;
+       unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
 };
 
 int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
index e85c1d71f4ed28ca9718bcf0515809d383c74ff5..ed100a35e5969a4c602bc595f0ddf487a07dbc92 100644 (file)
@@ -164,6 +164,7 @@ struct mlx5_vdpa_net {
        bool setup;
        u32 cur_num_vqs;
        u32 rqt_size;
+       bool nb_registered;
        struct notifier_block nb;
        struct vdpa_callback config_cb;
        struct mlx5_vdpa_wq_ent cvq_ent;
@@ -895,6 +896,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
        if (err)
                goto err_cmd;
 
+       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
        kfree(in);
        mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
 
@@ -922,6 +924,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
                mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
                return;
        }
+       mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
        umems_destroy(ndev, mvq);
 }
 
@@ -1121,6 +1124,20 @@ err_cmd:
        return err;
 }
 
+static bool is_valid_state_change(int oldstate, int newstate)
+{
+       switch (oldstate) {
+       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
+               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
+       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
+               return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
+       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
+       case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
+       default:
+               return false;
+       }
+}
+
 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
 {
        int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
@@ -1130,6 +1147,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
        void *in;
        int err;
 
+       if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
+               return 0;
+
+       if (!is_valid_state_change(mvq->fw_state, state))
+               return -EINVAL;
+
        in = kzalloc(inlen, GFP_KERNEL);
        if (!in)
                return -ENOMEM;
@@ -1440,7 +1463,7 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
        headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
        dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
        dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
-       memset(dmac_c, 0xff, ETH_ALEN);
+       eth_broadcast_addr(dmac_c);
        ether_addr_copy(dmac_v, mac);
        MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
        if (tagged) {
@@ -1992,6 +2015,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
        struct mlx5_vdpa_virtqueue *mvq;
+       int err;
 
        if (!mvdev->actual_features)
                return;
@@ -2005,8 +2029,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
        }
 
        mvq = &ndev->vqs[idx];
-       if (!ready)
+       if (!ready) {
                suspend_vq(ndev, mvq);
+       } else {
+               err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
+               if (err) {
+                       mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
+                       ready = false;
+               }
+       }
+
 
        mvq->ready = ready;
 }
@@ -2095,9 +2127,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
        return PAGE_SIZE;
 }
 
-static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx)
+static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
 {
-       return 0;
+       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+
+       if (is_ctrl_vq_idx(mvdev, idx))
+               return MLX5_VDPA_CVQ_GROUP;
+
+       return MLX5_VDPA_DATAVQ_GROUP;
 }
 
 enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
@@ -2511,6 +2548,15 @@ err_clear:
        up_write(&ndev->reslock);
 }
 
+static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
+{
+       int i;
+
+       /* default mapping all groups are mapped to asid 0 */
+       for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
+               mvdev->group2asid[i] = 0;
+}
+
 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
 {
        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -2529,7 +2575,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
        ndev->mvdev.cvq.completed_desc = 0;
        memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
        ndev->mvdev.actual_features = 0;
+       init_group_to_asid_map(mvdev);
        ++mvdev->generation;
+
        if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
                if (mlx5_vdpa_create_mr(mvdev, NULL))
                        mlx5_vdpa_warn(mvdev, "create MR failed\n");
@@ -2567,26 +2615,63 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
        return mvdev->generation;
 }
 
-static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
-                            struct vhost_iotlb *iotlb)
+static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+{
+       u64 start = 0ULL, last = 0ULL - 1;
+       struct vhost_iotlb_map *map;
+       int err = 0;
+
+       spin_lock(&mvdev->cvq.iommu_lock);
+       vhost_iotlb_reset(mvdev->cvq.iotlb);
+
+       for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
+            map = vhost_iotlb_itree_next(map, start, last)) {
+               err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
+                                           map->last, map->addr, map->perm);
+               if (err)
+                       goto out;
+       }
+
+out:
+       spin_unlock(&mvdev->cvq.iommu_lock);
+       return err;
+}
+
+static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
 {
-       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
-       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
        bool change_map;
        int err;
 
-       down_write(&ndev->reslock);
-
        err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
        if (err) {
                mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
-               goto err;
+               return err;
        }
 
        if (change_map)
                err = mlx5_vdpa_change_map(mvdev, iotlb);
 
-err:
+       return err;
+}
+
+static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
+                            struct vhost_iotlb *iotlb)
+{
+       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+       int err = -EINVAL;
+
+       down_write(&ndev->reslock);
+       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
+               err = set_map_data(mvdev, iotlb);
+               if (err)
+                       goto out;
+       }
+
+       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
+               err = set_map_control(mvdev, iotlb);
+
+out:
        up_write(&ndev->reslock);
        return err;
 }
@@ -2733,6 +2818,49 @@ out_err:
        return err;
 }
 
+static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
+{
+       struct mlx5_control_vq *cvq;
+
+       if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
+               return;
+
+       cvq = &mvdev->cvq;
+       cvq->ready = false;
+}
+
+static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
+{
+       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+       struct mlx5_vdpa_virtqueue *mvq;
+       int i;
+
+       down_write(&ndev->reslock);
+       mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
+       ndev->nb_registered = false;
+       flush_workqueue(ndev->mvdev.wq);
+       for (i = 0; i < ndev->cur_num_vqs; i++) {
+               mvq = &ndev->vqs[i];
+               suspend_vq(ndev, mvq);
+       }
+       mlx5_vdpa_cvq_suspend(mvdev);
+       up_write(&ndev->reslock);
+       return 0;
+}
+
+static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
+                              unsigned int asid)
+{
+       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+
+       if (group >= MLX5_VDPA_NUMVQ_GROUPS)
+               return -EINVAL;
+
+       mvdev->group2asid[group] = asid;
+       return 0;
+}
+
 static const struct vdpa_config_ops mlx5_vdpa_ops = {
        .set_vq_address = mlx5_vdpa_set_vq_address,
        .set_vq_num = mlx5_vdpa_set_vq_num,
@@ -2762,7 +2890,9 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
        .set_config = mlx5_vdpa_set_config,
        .get_generation = mlx5_vdpa_get_generation,
        .set_map = mlx5_vdpa_set_map,
+       .set_group_asid = mlx5_set_group_asid,
        .free = mlx5_vdpa_free,
+       .suspend = mlx5_vdpa_suspend,
 };
 
 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
@@ -2828,6 +2958,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
                mvq->index = i;
                mvq->ndev = ndev;
                mvq->fwqp.fw = true;
+               mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
        }
        for (; i < ndev->mvdev.max_vqs; i++) {
                mvq = &ndev->vqs[i];
@@ -2902,13 +3033,21 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p
                switch (eqe->sub_type) {
                case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
                case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+                       down_read(&ndev->reslock);
+                       if (!ndev->nb_registered) {
+                               up_read(&ndev->reslock);
+                               return NOTIFY_DONE;
+                       }
                        wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
-                       if (!wqent)
+                       if (!wqent) {
+                               up_read(&ndev->reslock);
                                return NOTIFY_DONE;
+                       }
 
                        wqent->mvdev = &ndev->mvdev;
                        INIT_WORK(&wqent->work, update_carrier);
                        queue_work(ndev->mvdev.wq, &wqent->work);
+                       up_read(&ndev->reslock);
                        ret = NOTIFY_OK;
                        break;
                default:
@@ -2982,7 +3121,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
        }
 
        ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
-                                1, 1, name, false);
+                                MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
        if (IS_ERR(ndev))
                return PTR_ERR(ndev);
 
@@ -3062,6 +3201,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
 
        ndev->nb.notifier_call = event_handler;
        mlx5_notifier_register(mdev, &ndev->nb);
+       ndev->nb_registered = true;
        mvdev->vdev.mdev = &mgtdev->mgtdev;
        err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
        if (err)
@@ -3093,7 +3233,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
        struct workqueue_struct *wq;
 
-       mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
+       if (ndev->nb_registered) {
+               mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
+               ndev->nb_registered = false;
+       }
        wq = mvdev->wq;
        mvdev->wq = NULL;
        destroy_workqueue(wq);
index ebf2f363fbe7864febece8072af5fee1e3493d76..c06c02704461006baa4d2a60db2a30640b100b42 100644 (file)
@@ -824,11 +824,11 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms
                    config.mac))
                return -EMSGSIZE;
 
-       val_u16 = le16_to_cpu(config.status);
+       val_u16 = __virtio16_to_cpu(true, config.status);
        if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16))
                return -EMSGSIZE;
 
-       val_u16 = le16_to_cpu(config.mtu);
+       val_u16 = __virtio16_to_cpu(true, config.mtu);
        if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16))
                return -EMSGSIZE;
 
@@ -846,17 +846,9 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
 {
        u32 device_id;
        void *hdr;
-       u8 status;
        int err;
 
        down_read(&vdev->cf_lock);
-       status = vdev->config->get_status(vdev);
-       if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
-               NL_SET_ERR_MSG_MOD(extack, "Features negotiation not completed");
-               err = -EAGAIN;
-               goto out;
-       }
-
        hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
                          VDPA_CMD_DEV_CONFIG_GET);
        if (!hdr) {
@@ -913,7 +905,7 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
        }
        vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config));
 
-       max_vqp = le16_to_cpu(config.max_virtqueue_pairs);
+       max_vqp = __virtio16_to_cpu(true, config.max_virtqueue_pairs);
        if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, max_vqp))
                return -EMSGSIZE;
 
index 0f28658996472b2306b721b0e2bd4b5c83d1fb37..225b7f5d8be353c16dbcbeb98790f2486ab943a8 100644 (file)
@@ -33,7 +33,7 @@ MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
 static int max_iotlb_entries = 2048;
 module_param(max_iotlb_entries, int, 0444);
 MODULE_PARM_DESC(max_iotlb_entries,
-                "Maximum number of iotlb entries. 0 means unlimited. (default: 2048)");
+                "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
 
 #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
 #define VDPASIM_QUEUE_MAX 256
@@ -107,6 +107,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
        for (i = 0; i < vdpasim->dev_attr.nas; i++)
                vhost_iotlb_reset(&vdpasim->iommu[i]);
 
+       vdpasim->running = true;
        spin_unlock(&vdpasim->iommu_lock);
 
        vdpasim->features = 0;
@@ -291,7 +292,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
                goto err_iommu;
 
        for (i = 0; i < vdpasim->dev_attr.nas; i++)
-               vhost_iotlb_init(&vdpasim->iommu[i], 0, 0);
+               vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0);
 
        vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL);
        if (!vdpasim->buffer)
@@ -505,6 +506,17 @@ static int vdpasim_reset(struct vdpa_device *vdpa)
        return 0;
 }
 
+static int vdpasim_suspend(struct vdpa_device *vdpa)
+{
+       struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+       spin_lock(&vdpasim->lock);
+       vdpasim->running = false;
+       spin_unlock(&vdpasim->lock);
+
+       return 0;
+}
+
 static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
 {
        struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
@@ -694,6 +706,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
        .get_status             = vdpasim_get_status,
        .set_status             = vdpasim_set_status,
        .reset                  = vdpasim_reset,
+       .suspend                = vdpasim_suspend,
        .get_config_size        = vdpasim_get_config_size,
        .get_config             = vdpasim_get_config,
        .set_config             = vdpasim_set_config,
@@ -726,6 +739,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
        .get_status             = vdpasim_get_status,
        .set_status             = vdpasim_set_status,
        .reset                  = vdpasim_reset,
+       .suspend                = vdpasim_suspend,
        .get_config_size        = vdpasim_get_config_size,
        .get_config             = vdpasim_get_config,
        .set_config             = vdpasim_set_config,
index 622782e922391c217c690b98459b8e272a443175..061986f30911a73f9b3fd2d3f47a79e926f23bb1 100644 (file)
@@ -66,6 +66,7 @@ struct vdpasim {
        u32 generation;
        u64 features;
        u32 groups;
+       bool running;
        /* spinlock to synchronize iommu table */
        spinlock_t iommu_lock;
 };
index 42d401d4391171f453b73f895efd548f7aed6d2a..c8bfea3b7db230a16623e5016b706a23e370571a 100644 (file)
 #define DRV_LICENSE  "GPL v2"
 
 #define VDPASIM_BLK_FEATURES   (VDPASIM_FEATURES | \
+                                (1ULL << VIRTIO_BLK_F_FLUSH)    | \
                                 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
                                 (1ULL << VIRTIO_BLK_F_SEG_MAX)  | \
                                 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
                                 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
-                                (1ULL << VIRTIO_BLK_F_MQ))
+                                (1ULL << VIRTIO_BLK_F_MQ)       | \
+                                (1ULL << VIRTIO_BLK_F_DISCARD)  | \
+                                (1ULL << VIRTIO_BLK_F_WRITE_ZEROES))
 
 #define VDPASIM_BLK_CAPACITY   0x40000
 #define VDPASIM_BLK_SIZE_MAX   0x1000
 #define VDPASIM_BLK_SEG_MAX    32
+#define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX
+
+/* 1 virtqueue, 1 address space, 1 virtqueue group */
 #define VDPASIM_BLK_VQ_NUM     1
+#define VDPASIM_BLK_AS_NUM     1
+#define VDPASIM_BLK_GROUP_NUM  1
 
 static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
 
-static bool vdpasim_blk_check_range(u64 start_sector, size_t range_size)
+static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
+                                   u64 num_sectors, u64 max_sectors)
 {
-       u64 range_sectors = range_size >> SECTOR_SHIFT;
-
-       if (range_size > VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)
-               return false;
+       if (start_sector > VDPASIM_BLK_CAPACITY) {
+               dev_dbg(&vdpasim->vdpa.dev,
+                       "starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n",
+                       start_sector, VDPASIM_BLK_CAPACITY);
+       }
 
-       if (start_sector > VDPASIM_BLK_CAPACITY)
+       if (num_sectors > max_sectors) {
+               dev_dbg(&vdpasim->vdpa.dev,
+                       "number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n",
+                       num_sectors, max_sectors);
                return false;
+       }
 
-       if (range_sectors > VDPASIM_BLK_CAPACITY - start_sector)
+       if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) {
+               dev_dbg(&vdpasim->vdpa.dev,
+                       "request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n",
+                       start_sector, num_sectors, VDPASIM_BLK_CAPACITY);
                return false;
+       }
 
        return true;
 }
@@ -63,6 +81,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
 {
        size_t pushed = 0, to_pull, to_push;
        struct virtio_blk_outhdr hdr;
+       bool handled = false;
        ssize_t bytes;
        loff_t offset;
        u64 sector;
@@ -76,14 +95,14 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
                return false;
 
        if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
-               dev_err(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
+               dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
                        vq->out_iov.used, vq->in_iov.used);
-               return false;
+               goto err;
        }
 
        if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
-               dev_err(&vdpasim->vdpa.dev, "request in header too short\n");
-               return false;
+               dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n");
+               goto err;
        }
 
        /* The last byte is the status and we checked if the last iov has
@@ -96,8 +115,8 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
        bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
                                      sizeof(hdr));
        if (bytes != sizeof(hdr)) {
-               dev_err(&vdpasim->vdpa.dev, "request out header too short\n");
-               return false;
+               dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n");
+               goto err;
        }
 
        to_pull -= bytes;
@@ -107,12 +126,20 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
        offset = sector << SECTOR_SHIFT;
        status = VIRTIO_BLK_S_OK;
 
+       if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT &&
+           sector != 0) {
+               dev_dbg(&vdpasim->vdpa.dev,
+                       "sector must be 0 for %u request - sector: 0x%llx\n",
+                       type, sector);
+               status = VIRTIO_BLK_S_IOERR;
+               goto err_status;
+       }
+
        switch (type) {
        case VIRTIO_BLK_T_IN:
-               if (!vdpasim_blk_check_range(sector, to_push)) {
-                       dev_err(&vdpasim->vdpa.dev,
-                               "reading over the capacity - offset: 0x%llx len: 0x%zx\n",
-                               offset, to_push);
+               if (!vdpasim_blk_check_range(vdpasim, sector,
+                                            to_push >> SECTOR_SHIFT,
+                                            VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
                        status = VIRTIO_BLK_S_IOERR;
                        break;
                }
@@ -121,7 +148,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
                                              vdpasim->buffer + offset,
                                              to_push);
                if (bytes < 0) {
-                       dev_err(&vdpasim->vdpa.dev,
+                       dev_dbg(&vdpasim->vdpa.dev,
                                "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
                                bytes, offset, to_push);
                        status = VIRTIO_BLK_S_IOERR;
@@ -132,10 +159,9 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
                break;
 
        case VIRTIO_BLK_T_OUT:
-               if (!vdpasim_blk_check_range(sector, to_pull)) {
-                       dev_err(&vdpasim->vdpa.dev,
-                               "writing over the capacity - offset: 0x%llx len: 0x%zx\n",
-                               offset, to_pull);
+               if (!vdpasim_blk_check_range(vdpasim, sector,
+                                            to_pull >> SECTOR_SHIFT,
+                                            VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
                        status = VIRTIO_BLK_S_IOERR;
                        break;
                }
@@ -144,7 +170,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
                                              vdpasim->buffer + offset,
                                              to_pull);
                if (bytes < 0) {
-                       dev_err(&vdpasim->vdpa.dev,
+                       dev_dbg(&vdpasim->vdpa.dev,
                                "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
                                bytes, offset, to_pull);
                        status = VIRTIO_BLK_S_IOERR;
@@ -157,7 +183,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
                                              vdpasim_blk_id,
                                              VIRTIO_BLK_ID_BYTES);
                if (bytes < 0) {
-                       dev_err(&vdpasim->vdpa.dev,
+                       dev_dbg(&vdpasim->vdpa.dev,
                                "vringh_iov_push_iotlb() error: %zd\n", bytes);
                        status = VIRTIO_BLK_S_IOERR;
                        break;
@@ -166,13 +192,76 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
                pushed += bytes;
                break;
 
+       case VIRTIO_BLK_T_FLUSH:
+               /* nothing to do */
+               break;
+
+       case VIRTIO_BLK_T_DISCARD:
+       case VIRTIO_BLK_T_WRITE_ZEROES: {
+               struct virtio_blk_discard_write_zeroes range;
+               u32 num_sectors, flags;
+
+               if (to_pull != sizeof(range)) {
+                       dev_dbg(&vdpasim->vdpa.dev,
+                               "discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n",
+                               to_pull, sizeof(range));
+                       status = VIRTIO_BLK_S_IOERR;
+                       break;
+               }
+
+               bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range,
+                                             to_pull);
+               if (bytes < 0) {
+                       dev_dbg(&vdpasim->vdpa.dev,
+                               "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
+                               bytes, offset, to_pull);
+                       status = VIRTIO_BLK_S_IOERR;
+                       break;
+               }
+
+               sector = le64_to_cpu(range.sector);
+               offset = sector << SECTOR_SHIFT;
+               num_sectors = le32_to_cpu(range.num_sectors);
+               flags = le32_to_cpu(range.flags);
+
+               if (type == VIRTIO_BLK_T_DISCARD && flags != 0) {
+                       dev_dbg(&vdpasim->vdpa.dev,
+                               "discard unexpected flags set - flags: 0x%x\n",
+                               flags);
+                       status = VIRTIO_BLK_S_UNSUPP;
+                       break;
+               }
+
+               if (type == VIRTIO_BLK_T_WRITE_ZEROES &&
+                   flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
+                       dev_dbg(&vdpasim->vdpa.dev,
+                               "write_zeroes unexpected flags set - flags: 0x%x\n",
+                               flags);
+                       status = VIRTIO_BLK_S_UNSUPP;
+                       break;
+               }
+
+               if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors,
+                                            VDPASIM_BLK_DWZ_MAX_SECTORS)) {
+                       status = VIRTIO_BLK_S_IOERR;
+                       break;
+               }
+
+               if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
+                       memset(vdpasim->buffer + offset, 0,
+                              num_sectors << SECTOR_SHIFT);
+               }
+
+               break;
+       }
        default:
-               dev_warn(&vdpasim->vdpa.dev,
-                        "Unsupported request type %d\n", type);
+               dev_dbg(&vdpasim->vdpa.dev,
+                       "Unsupported request type %d\n", type);
                status = VIRTIO_BLK_S_IOERR;
                break;
        }
 
+err_status:
        /* If some operations fail, we need to skip the remaining bytes
         * to put the status in the last byte
         */
@@ -182,21 +271,25 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
        /* Last byte is the status */
        bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
        if (bytes != 1)
-               return false;
+               goto err;
 
        pushed += bytes;
 
        /* Make sure data is wrote before advancing index */
        smp_wmb();
 
+       handled = true;
+
+err:
        vringh_complete_iotlb(&vq->vring, vq->head, pushed);
 
-       return true;
+       return handled;
 }
 
 static void vdpasim_blk_work(struct work_struct *work)
 {
        struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
+       bool reschedule = false;
        int i;
 
        spin_lock(&vdpasim->lock);
@@ -204,8 +297,12 @@ static void vdpasim_blk_work(struct work_struct *work)
        if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
                goto out;
 
+       if (!vdpasim->running)
+               goto out;
+
        for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
                struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
+               int reqs = 0;
 
                if (!vq->ready)
                        continue;
@@ -218,10 +315,18 @@ static void vdpasim_blk_work(struct work_struct *work)
                        if (vringh_need_notify_iotlb(&vq->vring) > 0)
                                vringh_notify(&vq->vring);
                        local_bh_enable();
+
+                       if (++reqs > 4) {
+                               reschedule = true;
+                               break;
+                       }
                }
        }
 out:
        spin_unlock(&vdpasim->lock);
+
+       if (reschedule)
+               schedule_work(&vdpasim->work);
 }
 
 static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
@@ -237,6 +342,17 @@ static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
        blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
        blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
        blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
+       /* VIRTIO_BLK_F_DISCARD */
+       blk_config->discard_sector_alignment =
+               cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
+       blk_config->max_discard_sectors =
+               cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
+       blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1);
+       /* VIRTIO_BLK_F_WRITE_ZEROES */
+       blk_config->max_write_zeroes_sectors =
+               cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
+       blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1);
+
 }
 
 static void vdpasim_blk_mgmtdev_release(struct device *dev)
@@ -260,6 +376,8 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
        dev_attr.id = VIRTIO_ID_BLOCK;
        dev_attr.supported_features = VDPASIM_BLK_FEATURES;
        dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
+       dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM;
+       dev_attr.nas = VDPASIM_BLK_AS_NUM;
        dev_attr.config_size = sizeof(struct virtio_blk_config);
        dev_attr.get_config = vdpasim_blk_get_config;
        dev_attr.work_fn = vdpasim_blk_work;
index 5125976a4df87c7c71557bca89cc776e3a5c84bc..886449e885026ab2b7ee1e4ca3681bb59edf2cbd 100644 (file)
@@ -154,6 +154,9 @@ static void vdpasim_net_work(struct work_struct *work)
 
        spin_lock(&vdpasim->lock);
 
+       if (!vdpasim->running)
+               goto out;
+
        if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
                goto out;
 
index 6daa3978d290a1566036fad55744c4a1d5b78988..e682bc7ee6c9994d7cb44892efc366941a269024 100644 (file)
@@ -138,18 +138,17 @@ static void do_bounce(phys_addr_t orig, void *addr, size_t size,
 {
        unsigned long pfn = PFN_DOWN(orig);
        unsigned int offset = offset_in_page(orig);
-       char *buffer;
+       struct page *page;
        unsigned int sz = 0;
 
        while (size) {
                sz = min_t(size_t, PAGE_SIZE - offset, size);
 
-               buffer = kmap_atomic(pfn_to_page(pfn));
+               page = pfn_to_page(pfn);
                if (dir == DMA_TO_DEVICE)
-                       memcpy(addr, buffer + offset, sz);
+                       memcpy_from_page(addr, page, offset, sz);
                else
-                       memcpy(buffer + offset, addr, sz);
-               kunmap_atomic(buffer);
+                       memcpy_to_page(page, offset, addr, sz);
 
                size -= sz;
                pfn++;
@@ -179,8 +178,9 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
                            map->orig_phys == INVALID_PHYS_ADDR))
                        return;
 
-               addr = page_address(map->bounce_page) + offset;
-               do_bounce(map->orig_phys + offset, addr, sz, dir);
+               addr = kmap_local_page(map->bounce_page);
+               do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
+               kunmap_local(addr);
                size -= sz;
                iova += sz;
        }
@@ -213,21 +213,21 @@ vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
        struct vduse_bounce_map *map;
        struct page *page = NULL;
 
-       spin_lock(&domain->iotlb_lock);
+       read_lock(&domain->bounce_lock);
        map = &domain->bounce_maps[iova >> PAGE_SHIFT];
-       if (!map->bounce_page)
+       if (domain->user_bounce_pages || !map->bounce_page)
                goto out;
 
        page = map->bounce_page;
        get_page(page);
 out:
-       spin_unlock(&domain->iotlb_lock);
+       read_unlock(&domain->bounce_lock);
 
        return page;
 }
 
 static void
-vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain)
+vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
 {
        struct vduse_bounce_map *map;
        unsigned long pfn, bounce_pfns;
@@ -247,6 +247,73 @@ vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain)
        }
 }
 
+int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
+                                      struct page **pages, int count)
+{
+       struct vduse_bounce_map *map;
+       int i, ret;
+
+       /* Now we don't support partial mapping */
+       if (count != (domain->bounce_size >> PAGE_SHIFT))
+               return -EINVAL;
+
+       write_lock(&domain->bounce_lock);
+       ret = -EEXIST;
+       if (domain->user_bounce_pages)
+               goto out;
+
+       for (i = 0; i < count; i++) {
+               map = &domain->bounce_maps[i];
+               if (map->bounce_page) {
+                       /* Copy kernel page to user page if it's in use */
+                       if (map->orig_phys != INVALID_PHYS_ADDR)
+                               memcpy_to_page(pages[i], 0,
+                                              page_address(map->bounce_page),
+                                              PAGE_SIZE);
+                       __free_page(map->bounce_page);
+               }
+               map->bounce_page = pages[i];
+               get_page(pages[i]);
+       }
+       domain->user_bounce_pages = true;
+       ret = 0;
+out:
+       write_unlock(&domain->bounce_lock);
+
+       return ret;
+}
+
+void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
+{
+       struct vduse_bounce_map *map;
+       unsigned long i, count;
+
+       write_lock(&domain->bounce_lock);
+       if (!domain->user_bounce_pages)
+               goto out;
+
+       count = domain->bounce_size >> PAGE_SHIFT;
+       for (i = 0; i < count; i++) {
+               struct page *page = NULL;
+
+               map = &domain->bounce_maps[i];
+               if (WARN_ON(!map->bounce_page))
+                       continue;
+
+               /* Copy user page to kernel page if it's in use */
+               if (map->orig_phys != INVALID_PHYS_ADDR) {
+                       page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
+                       memcpy_from_page(page_address(page),
+                                        map->bounce_page, 0, PAGE_SIZE);
+               }
+               put_page(map->bounce_page);
+               map->bounce_page = page;
+       }
+       domain->user_bounce_pages = false;
+out:
+       write_unlock(&domain->bounce_lock);
+}
+
 void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
 {
        if (!domain->bounce_map)
@@ -322,13 +389,18 @@ dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
        if (vduse_domain_init_bounce_map(domain))
                goto err;
 
+       read_lock(&domain->bounce_lock);
        if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
-               goto err;
+               goto err_unlock;
 
        if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
                vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
 
+       read_unlock(&domain->bounce_lock);
+
        return iova;
+err_unlock:
+       read_unlock(&domain->bounce_lock);
 err:
        vduse_domain_free_iova(iovad, iova, size);
        return DMA_MAPPING_ERROR;
@@ -340,10 +412,12 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
 {
        struct iova_domain *iovad = &domain->stream_iovad;
 
+       read_lock(&domain->bounce_lock);
        if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
                vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
 
        vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
+       read_unlock(&domain->bounce_lock);
        vduse_domain_free_iova(iovad, dma_addr, size);
 }
 
@@ -451,7 +525,8 @@ static int vduse_domain_release(struct inode *inode, struct file *file)
 
        spin_lock(&domain->iotlb_lock);
        vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
-       vduse_domain_free_bounce_pages(domain);
+       vduse_domain_remove_user_bounce_pages(domain);
+       vduse_domain_free_kernel_bounce_pages(domain);
        spin_unlock(&domain->iotlb_lock);
        put_iova_domain(&domain->stream_iovad);
        put_iova_domain(&domain->consistent_iovad);
@@ -511,6 +586,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
                goto err_file;
 
        domain->file = file;
+       rwlock_init(&domain->bounce_lock);
        spin_lock_init(&domain->iotlb_lock);
        init_iova_domain(&domain->stream_iovad,
                        PAGE_SIZE, IOVA_START_PFN);
index 2722d9b8e21aff4221da75a1ad81424da14490f9..4e0e50e7ac1535863d9795484bc64d168a5ffd70 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/iova.h>
 #include <linux/dma-mapping.h>
 #include <linux/vhost_iotlb.h>
+#include <linux/rwlock.h>
 
 #define IOVA_START_PFN 1
 
@@ -34,6 +35,8 @@ struct vduse_iova_domain {
        struct vhost_iotlb *iotlb;
        spinlock_t iotlb_lock;
        struct file *file;
+       bool user_bounce_pages;
+       rwlock_t bounce_lock;
 };
 
 int vduse_domain_set_map(struct vduse_iova_domain *domain,
@@ -61,6 +64,11 @@ void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
 
 void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain);
 
+int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
+                                      struct page **pages, int count);
+
+void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain);
+
 void vduse_domain_destroy(struct vduse_iova_domain *domain);
 
 struct vduse_iova_domain *vduse_domain_create(unsigned long iova_limit,
index 3bc27de58f46b0cfaa01a96e88303cac9c472d64..41c0b29739f16484b1afdc14d50ece2e05c1bcb1 100644 (file)
@@ -21,6 +21,8 @@
 #include <linux/uio.h>
 #include <linux/vdpa.h>
 #include <linux/nospec.h>
+#include <linux/vmalloc.h>
+#include <linux/sched/mm.h>
 #include <uapi/linux/vduse.h>
 #include <uapi/linux/vdpa.h>
 #include <uapi/linux/virtio_config.h>
@@ -64,6 +66,13 @@ struct vduse_vdpa {
        struct vduse_dev *dev;
 };
 
+struct vduse_umem {
+       unsigned long iova;
+       unsigned long npages;
+       struct page **pages;
+       struct mm_struct *mm;
+};
+
 struct vduse_dev {
        struct vduse_vdpa *vdev;
        struct device *dev;
@@ -95,6 +104,8 @@ struct vduse_dev {
        u8 status;
        u32 vq_num;
        u32 vq_align;
+       struct vduse_umem *umem;
+       struct mutex mem_lock;
 };
 
 struct vduse_dev_msg {
@@ -917,6 +928,102 @@ unlock:
        return ret;
 }
 
+static int vduse_dev_dereg_umem(struct vduse_dev *dev,
+                               u64 iova, u64 size)
+{
+       int ret;
+
+       mutex_lock(&dev->mem_lock);
+       ret = -ENOENT;
+       if (!dev->umem)
+               goto unlock;
+
+       ret = -EINVAL;
+       if (dev->umem->iova != iova || size != dev->domain->bounce_size)
+               goto unlock;
+
+       vduse_domain_remove_user_bounce_pages(dev->domain);
+       unpin_user_pages_dirty_lock(dev->umem->pages,
+                                   dev->umem->npages, true);
+       atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
+       mmdrop(dev->umem->mm);
+       vfree(dev->umem->pages);
+       kfree(dev->umem);
+       dev->umem = NULL;
+       ret = 0;
+unlock:
+       mutex_unlock(&dev->mem_lock);
+       return ret;
+}
+
+static int vduse_dev_reg_umem(struct vduse_dev *dev,
+                             u64 iova, u64 uaddr, u64 size)
+{
+       struct page **page_list = NULL;
+       struct vduse_umem *umem = NULL;
+       long pinned = 0;
+       unsigned long npages, lock_limit;
+       int ret;
+
+       if (!dev->domain->bounce_map ||
+           size != dev->domain->bounce_size ||
+           iova != 0 || uaddr & ~PAGE_MASK)
+               return -EINVAL;
+
+       mutex_lock(&dev->mem_lock);
+       ret = -EEXIST;
+       if (dev->umem)
+               goto unlock;
+
+       ret = -ENOMEM;
+       npages = size >> PAGE_SHIFT;
+       page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
+                             GFP_KERNEL_ACCOUNT);
+       umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+       if (!page_list || !umem)
+               goto unlock;
+
+       mmap_read_lock(current->mm);
+
+       lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
+       if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
+               goto out;
+
+       pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
+                               page_list, NULL);
+       if (pinned != npages) {
+               ret = pinned < 0 ? pinned : -ENOMEM;
+               goto out;
+       }
+
+       ret = vduse_domain_add_user_bounce_pages(dev->domain,
+                                                page_list, pinned);
+       if (ret)
+               goto out;
+
+       atomic64_add(npages, &current->mm->pinned_vm);
+
+       umem->pages = page_list;
+       umem->npages = pinned;
+       umem->iova = iova;
+       umem->mm = current->mm;
+       mmgrab(current->mm);
+
+       dev->umem = umem;
+out:
+       if (ret && pinned > 0)
+               unpin_user_pages(page_list, pinned);
+
+       mmap_read_unlock(current->mm);
+unlock:
+       if (ret) {
+               vfree(page_list);
+               kfree(umem);
+       }
+       mutex_unlock(&dev->mem_lock);
+       return ret;
+}
+
 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
                            unsigned long arg)
 {
@@ -1089,6 +1196,77 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
                ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
                break;
        }
+       case VDUSE_IOTLB_REG_UMEM: {
+               struct vduse_iova_umem umem;
+
+               ret = -EFAULT;
+               if (copy_from_user(&umem, argp, sizeof(umem)))
+                       break;
+
+               ret = -EINVAL;
+               if (!is_mem_zero((const char *)umem.reserved,
+                                sizeof(umem.reserved)))
+                       break;
+
+               ret = vduse_dev_reg_umem(dev, umem.iova,
+                                        umem.uaddr, umem.size);
+               break;
+       }
+       case VDUSE_IOTLB_DEREG_UMEM: {
+               struct vduse_iova_umem umem;
+
+               ret = -EFAULT;
+               if (copy_from_user(&umem, argp, sizeof(umem)))
+                       break;
+
+               ret = -EINVAL;
+               if (!is_mem_zero((const char *)umem.reserved,
+                                sizeof(umem.reserved)))
+                       break;
+
+               ret = vduse_dev_dereg_umem(dev, umem.iova,
+                                          umem.size);
+               break;
+       }
+       case VDUSE_IOTLB_GET_INFO: {
+               struct vduse_iova_info info;
+               struct vhost_iotlb_map *map;
+               struct vduse_iova_domain *domain = dev->domain;
+
+               ret = -EFAULT;
+               if (copy_from_user(&info, argp, sizeof(info)))
+                       break;
+
+               ret = -EINVAL;
+               if (info.start > info.last)
+                       break;
+
+               if (!is_mem_zero((const char *)info.reserved,
+                                sizeof(info.reserved)))
+                       break;
+
+               spin_lock(&domain->iotlb_lock);
+               map = vhost_iotlb_itree_first(domain->iotlb,
+                                             info.start, info.last);
+               if (map) {
+                       info.start = map->start;
+                       info.last = map->last;
+                       info.capability = 0;
+                       if (domain->bounce_map && map->start == 0 &&
+                           map->last == domain->bounce_size - 1)
+                               info.capability |= VDUSE_IOVA_CAP_UMEM;
+               }
+               spin_unlock(&domain->iotlb_lock);
+               if (!map)
+                       break;
+
+               ret = -EFAULT;
+               if (copy_to_user(argp, &info, sizeof(info)))
+                       break;
+
+               ret = 0;
+               break;
+       }
        default:
                ret = -ENOIOCTLCMD;
                break;
@@ -1101,6 +1279,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
 {
        struct vduse_dev *dev = file->private_data;
 
+       vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
        spin_lock(&dev->msg_lock);
        /* Make sure the inflight messages can processed after reconncection */
        list_splice_init(&dev->recv_list, &dev->send_list);
@@ -1163,6 +1342,7 @@ static struct vduse_dev *vduse_dev_create(void)
                return NULL;
 
        mutex_init(&dev->lock);
+       mutex_init(&dev->mem_lock);
        spin_lock_init(&dev->msg_lock);
        INIT_LIST_HEAD(&dev->send_list);
        INIT_LIST_HEAD(&dev->recv_list);
index 9b65509424dc6baf31ebe89bceb7d9ffaf96f5d1..7ebf106d50c15ecaffcecdd8ca5d58e6e0d08fa1 100644 (file)
@@ -159,9 +159,13 @@ enum {
 };
 
 #define VHOST_SCSI_MAX_TARGET  256
-#define VHOST_SCSI_MAX_VQ      128
+#define VHOST_SCSI_MAX_IO_VQ   1024
 #define VHOST_SCSI_MAX_EVENT   128
 
+static unsigned vhost_scsi_max_io_vqs = 128;
+module_param_named(max_io_vqs, vhost_scsi_max_io_vqs, uint, 0644);
+MODULE_PARM_DESC(max_io_vqs, "Set the max number of IO virtqueues a vhost scsi device can support. The default is 128. The max is 1024.");
+
 struct vhost_scsi_virtqueue {
        struct vhost_virtqueue vq;
        /*
@@ -186,7 +190,9 @@ struct vhost_scsi {
        char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
 
        struct vhost_dev dev;
-       struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ];
+       struct vhost_scsi_virtqueue *vqs;
+       unsigned long *compl_bitmap;
+       struct vhost_scsi_inflight **old_inflight;
 
        struct vhost_work vs_completion_work; /* cmd completion work item */
        struct llist_head vs_completion_list; /* cmd completion queue */
@@ -245,7 +251,7 @@ static void vhost_scsi_init_inflight(struct vhost_scsi *vs,
        struct vhost_virtqueue *vq;
        int idx, i;
 
-       for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+       for (i = 0; i < vs->dev.nvqs;  i++) {
                vq = &vs->vqs[i].vq;
 
                mutex_lock(&vq->mutex);
@@ -533,7 +539,6 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
 {
        struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
                                        vs_completion_work);
-       DECLARE_BITMAP(signal, VHOST_SCSI_MAX_VQ);
        struct virtio_scsi_cmd_resp v_rsp;
        struct vhost_scsi_cmd *cmd, *t;
        struct llist_node *llnode;
@@ -541,7 +546,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
        struct iov_iter iov_iter;
        int ret, vq;
 
-       bitmap_zero(signal, VHOST_SCSI_MAX_VQ);
+       bitmap_zero(vs->compl_bitmap, vs->dev.nvqs);
        llnode = llist_del_all(&vs->vs_completion_list);
        llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) {
                se_cmd = &cmd->tvc_se_cmd;
@@ -566,7 +571,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
                        vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0);
                        q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
                        vq = q - vs->vqs;
-                       __set_bit(vq, signal);
+                       __set_bit(vq, vs->compl_bitmap);
                } else
                        pr_err("Faulted on virtio_scsi_cmd_resp\n");
 
@@ -574,8 +579,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
        }
 
        vq = -1;
-       while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1))
-               < VHOST_SCSI_MAX_VQ)
+       while ((vq = find_next_bit(vs->compl_bitmap, vs->dev.nvqs, vq + 1))
+               < vs->dev.nvqs)
                vhost_signal(&vs->dev, &vs->vqs[vq].vq);
 }
 
@@ -1419,26 +1424,25 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
 /* Callers must hold dev mutex */
 static void vhost_scsi_flush(struct vhost_scsi *vs)
 {
-       struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ];
        int i;
 
        /* Init new inflight and remember the old inflight */
-       vhost_scsi_init_inflight(vs, old_inflight);
+       vhost_scsi_init_inflight(vs, vs->old_inflight);
 
        /*
         * The inflight->kref was initialized to 1. We decrement it here to
         * indicate the start of the flush operation so that it will reach 0
         * when all the reqs are finished.
         */
-       for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
-               kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight);
+       for (i = 0; i < vs->dev.nvqs; i++)
+               kref_put(&vs->old_inflight[i]->kref, vhost_scsi_done_inflight);
 
        /* Flush both the vhost poll and vhost work */
        vhost_dev_flush(&vs->dev);
 
        /* Wait for all reqs issued before the flush to be finished */
-       for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
-               wait_for_completion(&old_inflight[i]->comp);
+       for (i = 0; i < vs->dev.nvqs; i++)
+               wait_for_completion(&vs->old_inflight[i]->comp);
 }
 
 static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq)
@@ -1601,7 +1605,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
                memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
                       sizeof(vs->vs_vhost_wwpn));
 
-               for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
+               for (i = VHOST_SCSI_VQ_IO; i < vs->dev.nvqs; i++) {
                        vq = &vs->vqs[i].vq;
                        if (!vhost_vq_is_setup(vq))
                                continue;
@@ -1611,7 +1615,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
                                goto destroy_vq_cmds;
                }
 
-               for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+               for (i = 0; i < vs->dev.nvqs; i++) {
                        vq = &vs->vqs[i].vq;
                        mutex_lock(&vq->mutex);
                        vhost_vq_set_backend(vq, vs_tpg);
@@ -1713,7 +1717,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
                target_undepend_item(&se_tpg->tpg_group.cg_item);
        }
        if (match) {
-               for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+               for (i = 0; i < vs->dev.nvqs; i++) {
                        vq = &vs->vqs[i].vq;
                        mutex_lock(&vq->mutex);
                        vhost_vq_set_backend(vq, NULL);
@@ -1722,7 +1726,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
                /* Make sure cmds are not running before tearing them down. */
                vhost_scsi_flush(vs);
 
-               for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+               for (i = 0; i < vs->dev.nvqs; i++) {
                        vq = &vs->vqs[i].vq;
                        vhost_scsi_destroy_vq_cmds(vq);
                }
@@ -1762,7 +1766,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
                return -EFAULT;
        }
 
-       for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+       for (i = 0; i < vs->dev.nvqs; i++) {
                vq = &vs->vqs[i].vq;
                mutex_lock(&vq->mutex);
                vq->acked_features = features;
@@ -1776,16 +1780,40 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
 {
        struct vhost_scsi *vs;
        struct vhost_virtqueue **vqs;
-       int r = -ENOMEM, i;
+       int r = -ENOMEM, i, nvqs = vhost_scsi_max_io_vqs;
 
        vs = kvzalloc(sizeof(*vs), GFP_KERNEL);
        if (!vs)
                goto err_vs;
 
-       vqs = kmalloc_array(VHOST_SCSI_MAX_VQ, sizeof(*vqs), GFP_KERNEL);
-       if (!vqs)
+       if (nvqs > VHOST_SCSI_MAX_IO_VQ) {
+               pr_err("Invalid max_io_vqs of %d. Using %d.\n", nvqs,
+                      VHOST_SCSI_MAX_IO_VQ);
+               nvqs = VHOST_SCSI_MAX_IO_VQ;
+       } else if (nvqs == 0) {
+               pr_err("Invalid max_io_vqs of %d. Using 1.\n", nvqs);
+               nvqs = 1;
+       }
+       nvqs += VHOST_SCSI_VQ_IO;
+
+       vs->compl_bitmap = bitmap_alloc(nvqs, GFP_KERNEL);
+       if (!vs->compl_bitmap)
+               goto err_compl_bitmap;
+
+       vs->old_inflight = kmalloc_array(nvqs, sizeof(*vs->old_inflight),
+                                        GFP_KERNEL | __GFP_ZERO);
+       if (!vs->old_inflight)
+               goto err_inflight;
+
+       vs->vqs = kmalloc_array(nvqs, sizeof(*vs->vqs),
+                               GFP_KERNEL | __GFP_ZERO);
+       if (!vs->vqs)
                goto err_vqs;
 
+       vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
+       if (!vqs)
+               goto err_local_vqs;
+
        vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work);
        vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work);
 
@@ -1796,11 +1824,11 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
        vqs[VHOST_SCSI_VQ_EVT] = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
        vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
        vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
-       for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
+       for (i = VHOST_SCSI_VQ_IO; i < nvqs; i++) {
                vqs[i] = &vs->vqs[i].vq;
                vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
        }
-       vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV,
+       vhost_dev_init(&vs->dev, vqs, nvqs, UIO_MAXIOV,
                       VHOST_SCSI_WEIGHT, 0, true, NULL);
 
        vhost_scsi_init_inflight(vs, NULL);
@@ -1808,7 +1836,13 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
        f->private_data = vs;
        return 0;
 
+err_local_vqs:
+       kfree(vs->vqs);
 err_vqs:
+       kfree(vs->old_inflight);
+err_inflight:
+       bitmap_free(vs->compl_bitmap);
+err_compl_bitmap:
        kvfree(vs);
 err_vs:
        return r;
@@ -1826,6 +1860,9 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
        vhost_dev_stop(&vs->dev);
        vhost_dev_cleanup(&vs->dev);
        kfree(vs->dev.vqs);
+       kfree(vs->vqs);
+       kfree(vs->old_inflight);
+       bitmap_free(vs->compl_bitmap);
        kvfree(vs);
        return 0;
 }
index 23dcbfdfa13b19fb6f8ecfddd90769255ddf6902..166044642fd5cc268c867bb78ba91c351b9e3b3c 100644 (file)
@@ -347,6 +347,14 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
        return 0;
 }
 
+static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
+{
+       struct vdpa_device *vdpa = v->vdpa;
+       const struct vdpa_config_ops *ops = vdpa->config;
+
+       return ops->suspend;
+}
+
 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
 {
        struct vdpa_device *vdpa = v->vdpa;
@@ -470,6 +478,22 @@ static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
        return 0;
 }
 
+/* After a successful return of ioctl the device must not process more
+ * virtqueue descriptors. The device can answer to read or writes of config
+ * fields as if it were not suspended. In particular, writing to "queue_enable"
+ * with a value of 1 will not make the device start processing buffers.
+ */
+static long vhost_vdpa_suspend(struct vhost_vdpa *v)
+{
+       struct vdpa_device *vdpa = v->vdpa;
+       const struct vdpa_config_ops *ops = vdpa->config;
+
+       if (!ops->suspend)
+               return -EOPNOTSUPP;
+
+       return ops->suspend(vdpa);
+}
+
 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
                                   void __user *argp)
 {
@@ -577,7 +601,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
        if (cmd == VHOST_SET_BACKEND_FEATURES) {
                if (copy_from_user(&features, featurep, sizeof(features)))
                        return -EFAULT;
-               if (features & ~VHOST_VDPA_BACKEND_FEATURES)
+               if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
+                                BIT_ULL(VHOST_BACKEND_F_SUSPEND)))
+                       return -EOPNOTSUPP;
+               if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
+                    !vhost_vdpa_can_suspend(v))
                        return -EOPNOTSUPP;
                vhost_set_backend_features(&v->vdev, features);
                return 0;
@@ -628,6 +656,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
                break;
        case VHOST_GET_BACKEND_FEATURES:
                features = VHOST_VDPA_BACKEND_FEATURES;
+               if (vhost_vdpa_can_suspend(v))
+                       features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
                if (copy_to_user(featurep, &features, sizeof(features)))
                        r = -EFAULT;
                break;
@@ -640,6 +670,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
        case VHOST_VDPA_GET_VQS_COUNT:
                r = vhost_vdpa_get_vqs_count(v, argp);
                break;
+       case VHOST_VDPA_SUSPEND:
+               r = vhost_vdpa_suspend(v);
+               break;
        default:
                r = vhost_dev_ioctl(&v->vdev, cmd, argp);
                if (r == -ENOIOCTLCMD)
@@ -1076,7 +1109,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
        if (!bus)
                return -EFAULT;
 
-       if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
+       if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY))
                return -ENOTSUPP;
 
        v->domain = iommu_domain_alloc(bus);
@@ -1363,6 +1396,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
 
 err:
        put_device(&v->dev);
+       ida_simple_remove(&vhost_vdpa_ida, v->minor);
        return r;
 }
 
index eab55accf381f83ad96ffaaa0998f276dca8551a..11f59dd06a74e1de5a8bc257dbe84aaf1993e39e 100644 (file)
@@ -1095,7 +1095,8 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
 #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
 
 static int iotlb_translate(const struct vringh *vrh,
-                          u64 addr, u64 len, struct bio_vec iov[],
+                          u64 addr, u64 len, u64 *translated,
+                          struct bio_vec iov[],
                           int iov_size, u32 perm)
 {
        struct vhost_iotlb_map *map;
@@ -1136,43 +1137,76 @@ static int iotlb_translate(const struct vringh *vrh,
 
        spin_unlock(vrh->iotlb_lock);
 
+       if (translated)
+               *translated = min(len, s);
+
        return ret;
 }
 
 static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
                                  void *src, size_t len)
 {
-       struct iov_iter iter;
-       struct bio_vec iov[16];
-       int ret;
+       u64 total_translated = 0;
 
-       ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
-                             len, iov, 16, VHOST_MAP_RO);
-       if (ret < 0)
-               return ret;
+       while (total_translated < len) {
+               struct bio_vec iov[16];
+               struct iov_iter iter;
+               u64 translated;
+               int ret;
 
-       iov_iter_bvec(&iter, READ, iov, ret, len);
+               ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
+                                     len - total_translated, &translated,
+                                     iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
+               if (ret == -ENOBUFS)
+                       ret = ARRAY_SIZE(iov);
+               else if (ret < 0)
+                       return ret;
 
-       ret = copy_from_iter(dst, len, &iter);
+               iov_iter_bvec(&iter, READ, iov, ret, translated);
 
-       return ret;
+               ret = copy_from_iter(dst, translated, &iter);
+               if (ret < 0)
+                       return ret;
+
+               src += translated;
+               dst += translated;
+               total_translated += translated;
+       }
+
+       return total_translated;
 }
 
 static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
                                void *src, size_t len)
 {
-       struct iov_iter iter;
-       struct bio_vec iov[16];
-       int ret;
+       u64 total_translated = 0;
 
-       ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
-                             len, iov, 16, VHOST_MAP_WO);
-       if (ret < 0)
-               return ret;
+       while (total_translated < len) {
+               struct bio_vec iov[16];
+               struct iov_iter iter;
+               u64 translated;
+               int ret;
+
+               ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
+                                     len - total_translated, &translated,
+                                     iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
+               if (ret == -ENOBUFS)
+                       ret = ARRAY_SIZE(iov);
+               else if (ret < 0)
+                       return ret;
 
-       iov_iter_bvec(&iter, WRITE, iov, ret, len);
+               iov_iter_bvec(&iter, WRITE, iov, ret, translated);
+
+               ret = copy_to_iter(src, translated, &iter);
+               if (ret < 0)
+                       return ret;
+
+               src += translated;
+               dst += translated;
+               total_translated += translated;
+       }
 
-       return copy_to_iter(src, len, &iter);
+       return total_translated;
 }
 
 static inline int getu16_iotlb(const struct vringh *vrh,
@@ -1183,7 +1217,7 @@ static inline int getu16_iotlb(const struct vringh *vrh,
        int ret;
 
        /* Atomic read is needed for getu16 */
-       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
+       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
                              &iov, 1, VHOST_MAP_RO);
        if (ret < 0)
                return ret;
@@ -1204,7 +1238,7 @@ static inline int putu16_iotlb(const struct vringh *vrh,
        int ret;
 
        /* Atomic write is needed for putu16 */
-       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
+       ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
                              &iov, 1, VHOST_MAP_WO);
        if (ret < 0)
                return ret;
index 56c77f63cd224f7b97b37eaaa7efba3b66aa5b97..0a53a61231c2944092d3ecac74d5caf40aaca2d3 100644 (file)
@@ -35,11 +35,12 @@ if VIRTIO_MENU
 
 config VIRTIO_HARDEN_NOTIFICATION
         bool "Harden virtio notification"
+        depends on BROKEN
         help
           Enable this to harden the device notifications and suppress
           those that happen at a time where notifications are illegal.
 
-          Experimental: Note that several drivers still have bugs that
+          Experimental: Note that several drivers still have issues that
           may cause crashes or hangs when correct handling of
           notifications is enforced; depending on the subset of
           drivers and devices you use, this may or may not work.
@@ -126,9 +127,11 @@ config VIRTIO_MEM
         This driver provides access to virtio-mem paravirtualized memory
         devices, allowing to hotplug and hotunplug memory.
 
-        This driver was only tested under x86-64 and arm64, but should
-        theoretically work on all architectures that support memory hotplug
-        and hotremove.
+        This driver currently only supports x86-64 and arm64. Although it
+        should compile on other architectures that implement memory
+        hot(un)plug, architecture-specific and/or common
+        code changes may be required for virtio-mem, kdump and kexec to work as
+        expected.
 
         If unsure, say M.
 
index 14c142d77fba1b6d73b85251580bed803622f6f0..828ced060742358069ae33e46dd62899d5400760 100644 (file)
@@ -428,7 +428,9 @@ int register_virtio_device(struct virtio_device *dev)
                goto out;
 
        dev->index = err;
-       dev_set_name(&dev->dev, "virtio%u", dev->index);
+       err = dev_set_name(&dev->dev, "virtio%u", dev->index);
+       if (err)
+               goto out_ida_remove;
 
        err = virtio_device_of_init(dev);
        if (err)
index 083ff1eb743d3398a09400c48c50683319b7eb85..c492a57531c613f6923a3751880e40ef34481d1c 100644 (file)
@@ -360,7 +360,7 @@ static void vm_synchronize_cbs(struct virtio_device *vdev)
 
 static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int index,
                                  void (*callback)(struct virtqueue *vq),
-                                 const char *name, bool ctx)
+                                 const char *name, u32 size, bool ctx)
 {
        struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
        struct virtio_mmio_vq_info *info;
@@ -395,14 +395,19 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in
                goto error_new_virtqueue;
        }
 
+       if (!size || size > num)
+               size = num;
+
        /* Create the vring */
-       vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev,
+       vq = vring_create_virtqueue(index, size, VIRTIO_MMIO_VRING_ALIGN, vdev,
                                 true, true, ctx, vm_notify, callback, name);
        if (!vq) {
                err = -ENOMEM;
                goto error_new_virtqueue;
        }
 
+       vq->num_max = num;
+
        /* Activate the queue */
        writel(virtqueue_get_vring_size(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NUM);
        if (vm_dev->version == 1) {
@@ -472,6 +477,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
                       struct virtqueue *vqs[],
                       vq_callback_t *callbacks[],
                       const char * const names[],
+                      u32 sizes[],
                       const bool *ctx,
                       struct irq_affinity *desc)
 {
@@ -487,6 +493,9 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
        if (err)
                return err;
 
+       if (of_property_read_bool(vm_dev->pdev->dev.of_node, "wakeup-source"))
+               enable_irq_wake(irq);
+
        for (i = 0; i < nvqs; ++i) {
                if (!names[i]) {
                        vqs[i] = NULL;
@@ -494,6 +503,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
                }
 
                vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
+                                    sizes ? sizes[i] : 0,
                                     ctx ? ctx[i] : false);
                if (IS_ERR(vqs[i])) {
                        vm_del_vqs(vdev);
index ca51fcc9daabb9016bafe01a9900963a3ed89ce9..00ad476a815d7254109cbc4e08785ff64db135d0 100644 (file)
@@ -174,6 +174,7 @@ error:
 static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index,
                                     void (*callback)(struct virtqueue *vq),
                                     const char *name,
+                                    u32 size,
                                     bool ctx,
                                     u16 msix_vec)
 {
@@ -186,7 +187,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int in
        if (!info)
                return ERR_PTR(-ENOMEM);
 
-       vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
+       vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, size, ctx,
                              msix_vec);
        if (IS_ERR(vq))
                goto out_info;
@@ -214,9 +215,15 @@ static void vp_del_vq(struct virtqueue *vq)
        struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
        unsigned long flags;
 
-       spin_lock_irqsave(&vp_dev->lock, flags);
-       list_del(&info->node);
-       spin_unlock_irqrestore(&vp_dev->lock, flags);
+       /*
+        * If it fails during re-enable reset vq. This way we won't rejoin
+        * info->node to the queue. Prevent unexpected irqs.
+        */
+       if (!vq->reset) {
+               spin_lock_irqsave(&vp_dev->lock, flags);
+               list_del(&info->node);
+               spin_unlock_irqrestore(&vp_dev->lock, flags);
+       }
 
        vp_dev->del_vq(info);
        kfree(info);
@@ -277,7 +284,7 @@ void vp_del_vqs(struct virtio_device *vdev)
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
                struct virtqueue *vqs[], vq_callback_t *callbacks[],
-               const char * const names[], bool per_vq_vectors,
+               const char * const names[], u32 sizes[], bool per_vq_vectors,
                const bool *ctx,
                struct irq_affinity *desc)
 {
@@ -320,8 +327,8 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
                else
                        msix_vec = VP_MSIX_VQ_VECTOR;
                vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
-                                    ctx ? ctx[i] : false,
-                                    msix_vec);
+                                    sizes ? sizes[i] : 0,
+                                    ctx ? ctx[i] : false, msix_vec);
                if (IS_ERR(vqs[i])) {
                        err = PTR_ERR(vqs[i]);
                        goto error_find;
@@ -351,7 +358,7 @@ error_find:
 
 static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
                struct virtqueue *vqs[], vq_callback_t *callbacks[],
-               const char * const names[], const bool *ctx)
+               const char * const names[], u32 sizes[], const bool *ctx)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
        int i, err, queue_idx = 0;
@@ -373,6 +380,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
                        continue;
                }
                vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
+                                    sizes ? sizes[i] : 0,
                                     ctx ? ctx[i] : false,
                                     VIRTIO_MSI_NO_VECTOR);
                if (IS_ERR(vqs[i])) {
@@ -390,21 +398,21 @@ out_del_vqs:
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
                struct virtqueue *vqs[], vq_callback_t *callbacks[],
-               const char * const names[], const bool *ctx,
+               const char * const names[], u32 sizes[], const bool *ctx,
                struct irq_affinity *desc)
 {
        int err;
 
        /* Try MSI-X with one vector per queue. */
-       err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
+       err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, true, ctx, desc);
        if (!err)
                return 0;
        /* Fallback: MSI-X with one vector for config, one shared for queues. */
-       err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
+       err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, false, ctx, desc);
        if (!err)
                return 0;
        /* Finally fall back to regular interrupts. */
-       return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
+       return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, sizes, ctx);
 }
 
 const char *vp_bus_name(struct virtio_device *vdev)
index 23112d84218fbd4f283c69319a1a0039fadcc9a8..c0448378b698623bfebeece90decfd76140eaafe 100644 (file)
@@ -80,6 +80,7 @@ struct virtio_pci_device {
                                      unsigned int idx,
                                      void (*callback)(struct virtqueue *vq),
                                      const char *name,
+                                     u32 size,
                                      bool ctx,
                                      u16 msix_vec);
        void (*del_vq)(struct virtio_pci_vq_info *info);
@@ -110,7 +111,7 @@ void vp_del_vqs(struct virtio_device *vdev);
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
                struct virtqueue *vqs[], vq_callback_t *callbacks[],
-               const char * const names[], const bool *ctx,
+               const char * const names[], u32 sizes[], const bool *ctx,
                struct irq_affinity *desc);
 const char *vp_bus_name(struct virtio_device *vdev);
 
index a5e5721145c72db600e8bee0aff4d14d4dbff3e2..d75e5c4e637fc4979d506e3f372202223be11dba 100644 (file)
@@ -112,6 +112,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
                                  unsigned int index,
                                  void (*callback)(struct virtqueue *vq),
                                  const char *name,
+                                 u32 size,
                                  bool ctx,
                                  u16 msix_vec)
 {
@@ -125,16 +126,21 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
        if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index))
                return ERR_PTR(-ENOENT);
 
+       if (!size || size > num)
+               size = num;
+
        info->msix_vector = msix_vec;
 
        /* create the vring */
-       vq = vring_create_virtqueue(index, num,
+       vq = vring_create_virtqueue(index, size,
                                    VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
                                    true, false, ctx,
                                    vp_notify, callback, name);
        if (!vq)
                return ERR_PTR(-ENOMEM);
 
+       vq->num_max = num;
+
        q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
        if (q_pfn >> 32) {
                dev_err(&vp_dev->pci_dev->dev,
index 623906b4996c767586c0a25411665d332a071988..f7965c5dd36b00218ffc9b4ce16b94e9f8399c71 100644 (file)
@@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features)
        if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
                        pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
                __virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
+
+       if (features & BIT_ULL(VIRTIO_F_RING_RESET))
+               __virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
 }
 
 /* virtio config->finalize_features() implementation */
@@ -176,6 +179,110 @@ static void vp_reset(struct virtio_device *vdev)
        vp_synchronize_vectors(vdev);
 }
 
+static int vp_active_vq(struct virtqueue *vq, u16 msix_vec)
+{
+       struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+       unsigned long index;
+
+       index = vq->index;
+
+       /* activate the queue */
+       vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
+       vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq),
+                               virtqueue_get_avail_addr(vq),
+                               virtqueue_get_used_addr(vq));
+
+       if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
+               msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
+               if (msix_vec == VIRTIO_MSI_NO_VECTOR)
+                       return -EBUSY;
+       }
+
+       return 0;
+}
+
+static int vp_modern_disable_vq_and_reset(struct virtqueue *vq)
+{
+       struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+       struct virtio_pci_vq_info *info;
+       unsigned long flags;
+
+       if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET))
+               return -ENOENT;
+
+       vp_modern_set_queue_reset(mdev, vq->index);
+
+       info = vp_dev->vqs[vq->index];
+
+       /* delete vq from irq handler */
+       spin_lock_irqsave(&vp_dev->lock, flags);
+       list_del(&info->node);
+       spin_unlock_irqrestore(&vp_dev->lock, flags);
+
+       INIT_LIST_HEAD(&info->node);
+
+#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
+       __virtqueue_break(vq);
+#endif
+
+       /* For the case where vq has an exclusive irq, call synchronize_irq() to
+        * wait for completion.
+        *
+        * note: We can't use disable_irq() since it conflicts with the affinity
+        * managed IRQ that is used by some drivers.
+        */
+       if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
+               synchronize_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
+
+       vq->reset = true;
+
+       return 0;
+}
+
+static int vp_modern_enable_vq_after_reset(struct virtqueue *vq)
+{
+       struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+       struct virtio_pci_vq_info *info;
+       unsigned long flags, index;
+       int err;
+
+       if (!vq->reset)
+               return -EBUSY;
+
+       index = vq->index;
+       info = vp_dev->vqs[index];
+
+       if (vp_modern_get_queue_reset(mdev, index))
+               return -EBUSY;
+
+       if (vp_modern_get_queue_enable(mdev, index))
+               return -EBUSY;
+
+       err = vp_active_vq(vq, info->msix_vector);
+       if (err)
+               return err;
+
+       if (vq->callback) {
+               spin_lock_irqsave(&vp_dev->lock, flags);
+               list_add(&info->node, &vp_dev->virtqueues);
+               spin_unlock_irqrestore(&vp_dev->lock, flags);
+       } else {
+               INIT_LIST_HEAD(&info->node);
+       }
+
+#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
+       __virtqueue_unbreak(vq);
+#endif
+
+       vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
+       vq->reset = false;
+
+       return 0;
+}
+
 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
 {
        return vp_modern_config_vector(&vp_dev->mdev, vector);
@@ -186,6 +293,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
                                  unsigned int index,
                                  void (*callback)(struct virtqueue *vq),
                                  const char *name,
+                                 u32 size,
                                  bool ctx,
                                  u16 msix_vec)
 {
@@ -203,47 +311,39 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
        if (!num || vp_modern_get_queue_enable(mdev, index))
                return ERR_PTR(-ENOENT);
 
-       if (num & (num - 1)) {
-               dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
+       if (!size || size > num)
+               size = num;
+
+       if (size & (size - 1)) {
+               dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", size);
                return ERR_PTR(-EINVAL);
        }
 
        info->msix_vector = msix_vec;
 
        /* create the vring */
-       vq = vring_create_virtqueue(index, num,
+       vq = vring_create_virtqueue(index, size,
                                    SMP_CACHE_BYTES, &vp_dev->vdev,
                                    true, true, ctx,
                                    vp_notify, callback, name);
        if (!vq)
                return ERR_PTR(-ENOMEM);
 
-       /* activate the queue */
-       vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
-       vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq),
-                               virtqueue_get_avail_addr(vq),
-                               virtqueue_get_used_addr(vq));
+       vq->num_max = num;
+
+       err = vp_active_vq(vq, msix_vec);
+       if (err)
+               goto err;
 
        vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
        if (!vq->priv) {
                err = -ENOMEM;
-               goto err_map_notify;
-       }
-
-       if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
-               msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
-               if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
-                       err = -EBUSY;
-                       goto err_assign_vector;
-               }
+               goto err;
        }
 
        return vq;
 
-err_assign_vector:
-       if (!mdev->notify_base)
-               pci_iounmap(mdev->pci_dev, (void __iomem __force *)vq->priv);
-err_map_notify:
+err:
        vring_del_virtqueue(vq);
        return ERR_PTR(err);
 }
@@ -251,12 +351,15 @@ err_map_notify:
 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
                              struct virtqueue *vqs[],
                              vq_callback_t *callbacks[],
-                             const char * const names[], const bool *ctx,
+                             const char * const names[],
+                             u32 sizes[],
+                             const bool *ctx,
                              struct irq_affinity *desc)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
        struct virtqueue *vq;
-       int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
+       int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, ctx,
+                            desc);
 
        if (rc)
                return rc;
@@ -401,6 +504,8 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
        .set_vq_affinity = vp_set_vq_affinity,
        .get_vq_affinity = vp_get_vq_affinity,
        .get_shm_region  = vp_get_shm_region,
+       .disable_vq_and_reset = vp_modern_disable_vq_and_reset,
+       .enable_vq_after_reset = vp_modern_enable_vq_after_reset,
 };
 
 static const struct virtio_config_ops virtio_pci_config_ops = {
@@ -419,6 +524,8 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
        .set_vq_affinity = vp_set_vq_affinity,
        .get_vq_affinity = vp_get_vq_affinity,
        .get_shm_region  = vp_get_shm_region,
+       .disable_vq_and_reset = vp_modern_disable_vq_and_reset,
+       .enable_vq_after_reset = vp_modern_enable_vq_after_reset,
 };
 
 /* the PCI probing function */
index fa2a9445bb18c8b3799f8da51e520c45f34643a3..869cb46bef9603597b44db5f72d19c186e6c056e 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/virtio_pci_modern.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/delay.h>
 
 /*
  * vp_modern_map_capability - map a part of virtio pci capability
@@ -474,6 +475,44 @@ void vp_modern_set_status(struct virtio_pci_modern_device *mdev,
 }
 EXPORT_SYMBOL_GPL(vp_modern_set_status);
 
+/*
+ * vp_modern_get_queue_reset - get the queue reset status
+ * @mdev: the modern virtio-pci device
+ * @index: queue index
+ */
+int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index)
+{
+       struct virtio_pci_modern_common_cfg __iomem *cfg;
+
+       cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common;
+
+       vp_iowrite16(index, &cfg->cfg.queue_select);
+       return vp_ioread16(&cfg->queue_reset);
+}
+EXPORT_SYMBOL_GPL(vp_modern_get_queue_reset);
+
+/*
+ * vp_modern_set_queue_reset - reset the queue
+ * @mdev: the modern virtio-pci device
+ * @index: queue index
+ */
+void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index)
+{
+       struct virtio_pci_modern_common_cfg __iomem *cfg;
+
+       cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common;
+
+       vp_iowrite16(index, &cfg->cfg.queue_select);
+       vp_iowrite16(1, &cfg->queue_reset);
+
+       while (vp_ioread16(&cfg->queue_reset))
+               msleep(1);
+
+       while (vp_ioread16(&cfg->cfg.queue_enable))
+               msleep(1);
+}
+EXPORT_SYMBOL_GPL(vp_modern_set_queue_reset);
+
 /*
  * vp_modern_queue_vector - set the MSIX vector for a specific virtqueue
  * @mdev: the modern virtio-pci device
index 643ca779fcc6354ece2c1d473bf5dbbd81ae97db..d66c8e6d0ef313f93663bfc420603034c03d8b58 100644 (file)
@@ -85,6 +85,71 @@ struct vring_desc_extra {
        u16 next;                       /* The next desc state in a list. */
 };
 
+struct vring_virtqueue_split {
+       /* Actual memory layout for this queue. */
+       struct vring vring;
+
+       /* Last written value to avail->flags */
+       u16 avail_flags_shadow;
+
+       /*
+        * Last written value to avail->idx in
+        * guest byte order.
+        */
+       u16 avail_idx_shadow;
+
+       /* Per-descriptor state. */
+       struct vring_desc_state_split *desc_state;
+       struct vring_desc_extra *desc_extra;
+
+       /* DMA address and size information */
+       dma_addr_t queue_dma_addr;
+       size_t queue_size_in_bytes;
+
+       /*
+        * The parameters for creating vrings are reserved for creating new
+        * vring.
+        */
+       u32 vring_align;
+       bool may_reduce_num;
+};
+
+struct vring_virtqueue_packed {
+       /* Actual memory layout for this queue. */
+       struct {
+               unsigned int num;
+               struct vring_packed_desc *desc;
+               struct vring_packed_desc_event *driver;
+               struct vring_packed_desc_event *device;
+       } vring;
+
+       /* Driver ring wrap counter. */
+       bool avail_wrap_counter;
+
+       /* Avail used flags. */
+       u16 avail_used_flags;
+
+       /* Index of the next avail descriptor. */
+       u16 next_avail_idx;
+
+       /*
+        * Last written value to driver->flags in
+        * guest byte order.
+        */
+       u16 event_flags_shadow;
+
+       /* Per-descriptor state. */
+       struct vring_desc_state_packed *desc_state;
+       struct vring_desc_extra *desc_extra;
+
+       /* DMA address and size information */
+       dma_addr_t ring_dma_addr;
+       dma_addr_t driver_event_dma_addr;
+       dma_addr_t device_event_dma_addr;
+       size_t ring_size_in_bytes;
+       size_t event_size_in_bytes;
+};
+
 struct vring_virtqueue {
        struct virtqueue vq;
 
@@ -124,64 +189,10 @@ struct vring_virtqueue {
 
        union {
                /* Available for split ring */
-               struct {
-                       /* Actual memory layout for this queue. */
-                       struct vring vring;
-
-                       /* Last written value to avail->flags */
-                       u16 avail_flags_shadow;
-
-                       /*
-                        * Last written value to avail->idx in
-                        * guest byte order.
-                        */
-                       u16 avail_idx_shadow;
-
-                       /* Per-descriptor state. */
-                       struct vring_desc_state_split *desc_state;
-                       struct vring_desc_extra *desc_extra;
-
-                       /* DMA address and size information */
-                       dma_addr_t queue_dma_addr;
-                       size_t queue_size_in_bytes;
-               } split;
+               struct vring_virtqueue_split split;
 
                /* Available for packed ring */
-               struct {
-                       /* Actual memory layout for this queue. */
-                       struct {
-                               unsigned int num;
-                               struct vring_packed_desc *desc;
-                               struct vring_packed_desc_event *driver;
-                               struct vring_packed_desc_event *device;
-                       } vring;
-
-                       /* Driver ring wrap counter. */
-                       bool avail_wrap_counter;
-
-                       /* Avail used flags. */
-                       u16 avail_used_flags;
-
-                       /* Index of the next avail descriptor. */
-                       u16 next_avail_idx;
-
-                       /*
-                        * Last written value to driver->flags in
-                        * guest byte order.
-                        */
-                       u16 event_flags_shadow;
-
-                       /* Per-descriptor state. */
-                       struct vring_desc_state_packed *desc_state;
-                       struct vring_desc_extra *desc_extra;
-
-                       /* DMA address and size information */
-                       dma_addr_t ring_dma_addr;
-                       dma_addr_t driver_event_dma_addr;
-                       dma_addr_t device_event_dma_addr;
-                       size_t ring_size_in_bytes;
-                       size_t event_size_in_bytes;
-               } packed;
+               struct vring_virtqueue_packed packed;
        };
 
        /* How to notify other side. FIXME: commonalize hcalls! */
@@ -200,6 +211,16 @@ struct vring_virtqueue {
 #endif
 };
 
+static struct virtqueue *__vring_new_virtqueue(unsigned int index,
+                                              struct vring_virtqueue_split *vring_split,
+                                              struct virtio_device *vdev,
+                                              bool weak_barriers,
+                                              bool context,
+                                              bool (*notify)(struct virtqueue *),
+                                              void (*callback)(struct virtqueue *),
+                                              const char *name);
+static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
+static void vring_free(struct virtqueue *_vq);
 
 /*
  * Helpers.
@@ -364,6 +385,24 @@ static int vring_mapping_error(const struct vring_virtqueue *vq,
        return dma_mapping_error(vring_dma_dev(vq), addr);
 }
 
+static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
+{
+       vq->vq.num_free = num;
+
+       if (vq->packed_ring)
+               vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
+       else
+               vq->last_used_idx = 0;
+
+       vq->event_triggered = false;
+       vq->num_added = 0;
+
+#ifdef DEBUG
+       vq->in_use = false;
+       vq->last_add_time_valid = false;
+#endif
+}
+
 
 /*
  * Split ring specific functions - *_split().
@@ -907,28 +946,107 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
        return NULL;
 }
 
-static struct virtqueue *vring_create_virtqueue_split(
-       unsigned int index,
-       unsigned int num,
-       unsigned int vring_align,
-       struct virtio_device *vdev,
-       bool weak_barriers,
-       bool may_reduce_num,
-       bool context,
-       bool (*notify)(struct virtqueue *),
-       void (*callback)(struct virtqueue *),
-       const char *name)
+static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
+                                      struct vring_virtqueue *vq)
+{
+       struct virtio_device *vdev;
+
+       vdev = vq->vq.vdev;
+
+       vring_split->avail_flags_shadow = 0;
+       vring_split->avail_idx_shadow = 0;
+
+       /* No callback?  Tell other side not to bother us. */
+       if (!vq->vq.callback) {
+               vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+               if (!vq->event)
+                       vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
+                                       vring_split->avail_flags_shadow);
+       }
+}
+
+static void virtqueue_reinit_split(struct vring_virtqueue *vq)
+{
+       int num;
+
+       num = vq->split.vring.num;
+
+       vq->split.vring.avail->flags = 0;
+       vq->split.vring.avail->idx = 0;
+
+       /* reset avail event */
+       vq->split.vring.avail->ring[num] = 0;
+
+       vq->split.vring.used->flags = 0;
+       vq->split.vring.used->idx = 0;
+
+       /* reset used event */
+       *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
+
+       virtqueue_init(vq, num);
+
+       virtqueue_vring_init_split(&vq->split, vq);
+}
+
+static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
+                                        struct vring_virtqueue_split *vring_split)
+{
+       vq->split = *vring_split;
+
+       /* Put everything in free lists. */
+       vq->free_head = 0;
+}
+
+static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
+{
+       struct vring_desc_state_split *state;
+       struct vring_desc_extra *extra;
+       u32 num = vring_split->vring.num;
+
+       state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
+       if (!state)
+               goto err_state;
+
+       extra = vring_alloc_desc_extra(num);
+       if (!extra)
+               goto err_extra;
+
+       memset(state, 0, num * sizeof(struct vring_desc_state_split));
+
+       vring_split->desc_state = state;
+       vring_split->desc_extra = extra;
+       return 0;
+
+err_extra:
+       kfree(state);
+err_state:
+       return -ENOMEM;
+}
+
+static void vring_free_split(struct vring_virtqueue_split *vring_split,
+                            struct virtio_device *vdev)
+{
+       vring_free_queue(vdev, vring_split->queue_size_in_bytes,
+                        vring_split->vring.desc,
+                        vring_split->queue_dma_addr);
+
+       kfree(vring_split->desc_state);
+       kfree(vring_split->desc_extra);
+}
+
+static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
+                                  struct virtio_device *vdev,
+                                  u32 num,
+                                  unsigned int vring_align,
+                                  bool may_reduce_num)
 {
-       struct virtqueue *vq;
        void *queue = NULL;
        dma_addr_t dma_addr;
-       size_t queue_size_in_bytes;
-       struct vring vring;
 
        /* We assume num is a power of 2. */
        if (num & (num - 1)) {
                dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
-               return NULL;
+               return -EINVAL;
        }
 
        /* TODO: allocate each queue chunk individually */
@@ -939,11 +1057,11 @@ static struct virtqueue *vring_create_virtqueue_split(
                if (queue)
                        break;
                if (!may_reduce_num)
-                       return NULL;
+                       return -ENOMEM;
        }
 
        if (!num)
-               return NULL;
+               return -ENOMEM;
 
        if (!queue) {
                /* Try to get a single page. You are my only hope! */
@@ -951,26 +1069,85 @@ static struct virtqueue *vring_create_virtqueue_split(
                                          &dma_addr, GFP_KERNEL|__GFP_ZERO);
        }
        if (!queue)
-               return NULL;
+               return -ENOMEM;
+
+       vring_init(&vring_split->vring, num, queue, vring_align);
+
+       vring_split->queue_dma_addr = dma_addr;
+       vring_split->queue_size_in_bytes = vring_size(num, vring_align);
 
-       queue_size_in_bytes = vring_size(num, vring_align);
-       vring_init(&vring, num, queue, vring_align);
+       vring_split->vring_align = vring_align;
+       vring_split->may_reduce_num = may_reduce_num;
 
-       vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
-                                  notify, callback, name);
+       return 0;
+}
+
+static struct virtqueue *vring_create_virtqueue_split(
+       unsigned int index,
+       unsigned int num,
+       unsigned int vring_align,
+       struct virtio_device *vdev,
+       bool weak_barriers,
+       bool may_reduce_num,
+       bool context,
+       bool (*notify)(struct virtqueue *),
+       void (*callback)(struct virtqueue *),
+       const char *name)
+{
+       struct vring_virtqueue_split vring_split = {};
+       struct virtqueue *vq;
+       int err;
+
+       err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
+                                     may_reduce_num);
+       if (err)
+               return NULL;
+
+       vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
+                                  context, notify, callback, name);
        if (!vq) {
-               vring_free_queue(vdev, queue_size_in_bytes, queue,
-                                dma_addr);
+               vring_free_split(&vring_split, vdev);
                return NULL;
        }
 
-       to_vvq(vq)->split.queue_dma_addr = dma_addr;
-       to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
        to_vvq(vq)->we_own_ring = true;
 
        return vq;
 }
 
+static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
+{
+       struct vring_virtqueue_split vring_split = {};
+       struct vring_virtqueue *vq = to_vvq(_vq);
+       struct virtio_device *vdev = _vq->vdev;
+       int err;
+
+       err = vring_alloc_queue_split(&vring_split, vdev, num,
+                                     vq->split.vring_align,
+                                     vq->split.may_reduce_num);
+       if (err)
+               goto err;
+
+       err = vring_alloc_state_extra_split(&vring_split);
+       if (err)
+               goto err_state_extra;
+
+       vring_free(&vq->vq);
+
+       virtqueue_vring_init_split(&vring_split, vq);
+
+       virtqueue_init(vq, vring_split.vring.num);
+       virtqueue_vring_attach_split(vq, &vring_split);
+
+       return 0;
+
+err_state_extra:
+       vring_free_split(&vring_split, vdev);
+err:
+       virtqueue_reinit_split(vq);
+       return -ENOMEM;
+}
+
 
 /*
  * Packed ring specific functions - *_packed().
@@ -1637,8 +1814,7 @@ static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
        return NULL;
 }
 
-static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq,
-                                                      unsigned int num)
+static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
 {
        struct vring_desc_extra *desc_extra;
        unsigned int i;
@@ -1656,19 +1832,32 @@ static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *v
        return desc_extra;
 }
 
-static struct virtqueue *vring_create_virtqueue_packed(
-       unsigned int index,
-       unsigned int num,
-       unsigned int vring_align,
-       struct virtio_device *vdev,
-       bool weak_barriers,
-       bool may_reduce_num,
-       bool context,
-       bool (*notify)(struct virtqueue *),
-       void (*callback)(struct virtqueue *),
-       const char *name)
+static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
+                             struct virtio_device *vdev)
+{
+       if (vring_packed->vring.desc)
+               vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
+                                vring_packed->vring.desc,
+                                vring_packed->ring_dma_addr);
+
+       if (vring_packed->vring.driver)
+               vring_free_queue(vdev, vring_packed->event_size_in_bytes,
+                                vring_packed->vring.driver,
+                                vring_packed->driver_event_dma_addr);
+
+       if (vring_packed->vring.device)
+               vring_free_queue(vdev, vring_packed->event_size_in_bytes,
+                                vring_packed->vring.device,
+                                vring_packed->device_event_dma_addr);
+
+       kfree(vring_packed->desc_state);
+       kfree(vring_packed->desc_extra);
+}
+
+static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
+                                   struct virtio_device *vdev,
+                                   u32 num)
 {
-       struct vring_virtqueue *vq;
        struct vring_packed_desc *ring;
        struct vring_packed_desc_event *driver, *device;
        dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
@@ -1680,7 +1869,11 @@ static struct virtqueue *vring_create_virtqueue_packed(
                                 &ring_dma_addr,
                                 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
        if (!ring)
-               goto err_ring;
+               goto err;
+
+       vring_packed->vring.desc         = ring;
+       vring_packed->ring_dma_addr      = ring_dma_addr;
+       vring_packed->ring_size_in_bytes = ring_size_in_bytes;
 
        event_size_in_bytes = sizeof(struct vring_packed_desc_event);
 
@@ -1688,13 +1881,112 @@ static struct virtqueue *vring_create_virtqueue_packed(
                                   &driver_event_dma_addr,
                                   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
        if (!driver)
-               goto err_driver;
+               goto err;
+
+       vring_packed->vring.driver          = driver;
+       vring_packed->event_size_in_bytes   = event_size_in_bytes;
+       vring_packed->driver_event_dma_addr = driver_event_dma_addr;
 
        device = vring_alloc_queue(vdev, event_size_in_bytes,
                                   &device_event_dma_addr,
                                   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
        if (!device)
-               goto err_device;
+               goto err;
+
+       vring_packed->vring.device          = device;
+       vring_packed->device_event_dma_addr = device_event_dma_addr;
+
+       vring_packed->vring.num = num;
+
+       return 0;
+
+err:
+       vring_free_packed(vring_packed, vdev);
+       return -ENOMEM;
+}
+
+static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
+{
+       struct vring_desc_state_packed *state;
+       struct vring_desc_extra *extra;
+       u32 num = vring_packed->vring.num;
+
+       state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
+       if (!state)
+               goto err_desc_state;
+
+       memset(state, 0, num * sizeof(struct vring_desc_state_packed));
+
+       extra = vring_alloc_desc_extra(num);
+       if (!extra)
+               goto err_desc_extra;
+
+       vring_packed->desc_state = state;
+       vring_packed->desc_extra = extra;
+
+       return 0;
+
+err_desc_extra:
+       kfree(state);
+err_desc_state:
+       return -ENOMEM;
+}
+
+static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
+                                       bool callback)
+{
+       vring_packed->next_avail_idx = 0;
+       vring_packed->avail_wrap_counter = 1;
+       vring_packed->event_flags_shadow = 0;
+       vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
+
+       /* No callback?  Tell other side not to bother us. */
+       if (!callback) {
+               vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
+               vring_packed->vring.driver->flags =
+                       cpu_to_le16(vring_packed->event_flags_shadow);
+       }
+}
+
+static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
+                                         struct vring_virtqueue_packed *vring_packed)
+{
+       vq->packed = *vring_packed;
+
+       /* Put everything in free lists. */
+       vq->free_head = 0;
+}
+
+static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
+{
+       memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
+       memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
+
+       /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
+       memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
+
+       virtqueue_init(vq, vq->packed.vring.num);
+       virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
+}
+
+static struct virtqueue *vring_create_virtqueue_packed(
+       unsigned int index,
+       unsigned int num,
+       unsigned int vring_align,
+       struct virtio_device *vdev,
+       bool weak_barriers,
+       bool may_reduce_num,
+       bool context,
+       bool (*notify)(struct virtqueue *),
+       void (*callback)(struct virtqueue *),
+       const char *name)
+{
+       struct vring_virtqueue_packed vring_packed = {};
+       struct vring_virtqueue *vq;
+       int err;
+
+       if (vring_alloc_queue_packed(&vring_packed, vdev, num))
+               goto err_ring;
 
        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
        if (!vq)
@@ -1703,8 +1995,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
        vq->vq.callback = callback;
        vq->vq.vdev = vdev;
        vq->vq.name = name;
-       vq->vq.num_free = num;
        vq->vq.index = index;
+       vq->vq.reset = false;
        vq->we_own_ring = true;
        vq->notify = notify;
        vq->weak_barriers = weak_barriers;
@@ -1713,15 +2005,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
 #else
        vq->broken = false;
 #endif
-       vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
-       vq->event_triggered = false;
-       vq->num_added = 0;
        vq->packed_ring = true;
        vq->use_dma_api = vring_use_dma_api(vdev);
-#ifdef DEBUG
-       vq->in_use = false;
-       vq->last_add_time_valid = false;
-#endif
 
        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
                !context;
@@ -1730,65 +2015,58 @@ static struct virtqueue *vring_create_virtqueue_packed(
        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
                vq->weak_barriers = false;
 
-       vq->packed.ring_dma_addr = ring_dma_addr;
-       vq->packed.driver_event_dma_addr = driver_event_dma_addr;
-       vq->packed.device_event_dma_addr = device_event_dma_addr;
-
-       vq->packed.ring_size_in_bytes = ring_size_in_bytes;
-       vq->packed.event_size_in_bytes = event_size_in_bytes;
-
-       vq->packed.vring.num = num;
-       vq->packed.vring.desc = ring;
-       vq->packed.vring.driver = driver;
-       vq->packed.vring.device = device;
-
-       vq->packed.next_avail_idx = 0;
-       vq->packed.avail_wrap_counter = 1;
-       vq->packed.event_flags_shadow = 0;
-       vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
-
-       vq->packed.desc_state = kmalloc_array(num,
-                       sizeof(struct vring_desc_state_packed),
-                       GFP_KERNEL);
-       if (!vq->packed.desc_state)
-               goto err_desc_state;
-
-       memset(vq->packed.desc_state, 0,
-               num * sizeof(struct vring_desc_state_packed));
-
-       /* Put everything in free lists. */
-       vq->free_head = 0;
+       err = vring_alloc_state_extra_packed(&vring_packed);
+       if (err)
+               goto err_state_extra;
 
-       vq->packed.desc_extra = vring_alloc_desc_extra(vq, num);
-       if (!vq->packed.desc_extra)
-               goto err_desc_extra;
+       virtqueue_vring_init_packed(&vring_packed, !!callback);
 
-       /* No callback?  Tell other side not to bother us. */
-       if (!callback) {
-               vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
-               vq->packed.vring.driver->flags =
-                       cpu_to_le16(vq->packed.event_flags_shadow);
-       }
+       virtqueue_init(vq, num);
+       virtqueue_vring_attach_packed(vq, &vring_packed);
 
        spin_lock(&vdev->vqs_list_lock);
        list_add_tail(&vq->vq.list, &vdev->vqs);
        spin_unlock(&vdev->vqs_list_lock);
        return &vq->vq;
 
-err_desc_extra:
-       kfree(vq->packed.desc_state);
-err_desc_state:
+err_state_extra:
        kfree(vq);
 err_vq:
-       vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
-err_device:
-       vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
-err_driver:
-       vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
+       vring_free_packed(&vring_packed, vdev);
 err_ring:
        return NULL;
 }
 
+static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
+{
+       struct vring_virtqueue_packed vring_packed = {};
+       struct vring_virtqueue *vq = to_vvq(_vq);
+       struct virtio_device *vdev = _vq->vdev;
+       int err;
+
+       if (vring_alloc_queue_packed(&vring_packed, vdev, num))
+               goto err_ring;
+
+       err = vring_alloc_state_extra_packed(&vring_packed);
+       if (err)
+               goto err_state_extra;
+
+       vring_free(&vq->vq);
+
+       virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
+
+       virtqueue_init(vq, vring_packed.vring.num);
+       virtqueue_vring_attach_packed(vq, &vring_packed);
+
+       return 0;
+
+err_state_extra:
+       vring_free_packed(&vring_packed, vdev);
+err_ring:
+       virtqueue_reinit_packed(vq);
+       return -ENOMEM;
+}
+
 
 /*
  * Generic functions and exported symbols.
@@ -2131,8 +2409,8 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
  * @_vq: the struct virtqueue we're talking about.
  *
  * Returns NULL or the "data" token handed to virtqueue_add_*().
- * This is not valid on an active queue; it is useful only for device
- * shutdown.
+ * This is not valid on an active queue; it is useful for device
+ * shutdown or the reset queue.
  */
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
@@ -2180,16 +2458,17 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
 EXPORT_SYMBOL_GPL(vring_interrupt);
 
 /* Only available for split ring */
-struct virtqueue *__vring_new_virtqueue(unsigned int index,
-                                       struct vring vring,
-                                       struct virtio_device *vdev,
-                                       bool weak_barriers,
-                                       bool context,
-                                       bool (*notify)(struct virtqueue *),
-                                       void (*callback)(struct virtqueue *),
-                                       const char *name)
+static struct virtqueue *__vring_new_virtqueue(unsigned int index,
+                                              struct vring_virtqueue_split *vring_split,
+                                              struct virtio_device *vdev,
+                                              bool weak_barriers,
+                                              bool context,
+                                              bool (*notify)(struct virtqueue *),
+                                              void (*callback)(struct virtqueue *),
+                                              const char *name)
 {
        struct vring_virtqueue *vq;
+       int err;
 
        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
                return NULL;
@@ -2202,8 +2481,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
        vq->vq.callback = callback;
        vq->vq.vdev = vdev;
        vq->vq.name = name;
-       vq->vq.num_free = vring.num;
        vq->vq.index = index;
+       vq->vq.reset = false;
        vq->we_own_ring = false;
        vq->notify = notify;
        vq->weak_barriers = weak_barriers;
@@ -2212,14 +2491,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 #else
        vq->broken = false;
 #endif
-       vq->last_used_idx = 0;
-       vq->event_triggered = false;
-       vq->num_added = 0;
        vq->use_dma_api = vring_use_dma_api(vdev);
-#ifdef DEBUG
-       vq->in_use = false;
-       vq->last_add_time_valid = false;
-#endif
 
        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
                !context;
@@ -2228,47 +2500,22 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
                vq->weak_barriers = false;
 
-       vq->split.queue_dma_addr = 0;
-       vq->split.queue_size_in_bytes = 0;
-
-       vq->split.vring = vring;
-       vq->split.avail_flags_shadow = 0;
-       vq->split.avail_idx_shadow = 0;
-
-       /* No callback?  Tell other side not to bother us. */
-       if (!callback) {
-               vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
-               if (!vq->event)
-                       vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
-                                       vq->split.avail_flags_shadow);
+       err = vring_alloc_state_extra_split(vring_split);
+       if (err) {
+               kfree(vq);
+               return NULL;
        }
 
-       vq->split.desc_state = kmalloc_array(vring.num,
-                       sizeof(struct vring_desc_state_split), GFP_KERNEL);
-       if (!vq->split.desc_state)
-               goto err_state;
-
-       vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num);
-       if (!vq->split.desc_extra)
-               goto err_extra;
+       virtqueue_vring_init_split(vring_split, vq);
 
-       /* Put everything in free lists. */
-       vq->free_head = 0;
-       memset(vq->split.desc_state, 0, vring.num *
-                       sizeof(struct vring_desc_state_split));
+       virtqueue_init(vq, vring_split->vring.num);
+       virtqueue_vring_attach_split(vq, vring_split);
 
        spin_lock(&vdev->vqs_list_lock);
        list_add_tail(&vq->vq.list, &vdev->vqs);
        spin_unlock(&vdev->vqs_list_lock);
        return &vq->vq;
-
-err_extra:
-       kfree(vq->split.desc_state);
-err_state:
-       kfree(vq);
-       return NULL;
 }
-EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
 
 struct virtqueue *vring_create_virtqueue(
        unsigned int index,
@@ -2294,6 +2541,75 @@ struct virtqueue *vring_create_virtqueue(
 }
 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
 
+/**
+ * virtqueue_resize - resize the vring of vq
+ * @_vq: the struct virtqueue we're talking about.
+ * @num: new ring num
+ * @recycle: callback for recycle the useless buffer
+ *
+ * When it is really necessary to create a new vring, it will set the current vq
+ * into the reset state. Then call the passed callback to recycle the buffer
+ * that is no longer used. Only after the new vring is successfully created, the
+ * old vring will be released.
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error.
+ * 0: success.
+ * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
+ *  vq can still work normally
+ * -EBUSY: Failed to sync with device, vq may not work properly
+ * -ENOENT: Transport or device not supported
+ * -E2BIG/-EINVAL: num error
+ * -EPERM: Operation not permitted
+ *
+ */
+int virtqueue_resize(struct virtqueue *_vq, u32 num,
+                    void (*recycle)(struct virtqueue *vq, void *buf))
+{
+       struct vring_virtqueue *vq = to_vvq(_vq);
+       struct virtio_device *vdev = vq->vq.vdev;
+       void *buf;
+       int err;
+
+       if (!vq->we_own_ring)
+               return -EPERM;
+
+       if (num > vq->vq.num_max)
+               return -E2BIG;
+
+       if (!num)
+               return -EINVAL;
+
+       if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
+               return 0;
+
+       if (!vdev->config->disable_vq_and_reset)
+               return -ENOENT;
+
+       if (!vdev->config->enable_vq_after_reset)
+               return -ENOENT;
+
+       err = vdev->config->disable_vq_and_reset(_vq);
+       if (err)
+               return err;
+
+       while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
+               recycle(_vq, buf);
+
+       if (vq->packed_ring)
+               err = virtqueue_resize_packed(_vq, num);
+       else
+               err = virtqueue_resize_split(_vq, num);
+
+       if (vdev->config->enable_vq_after_reset(_vq))
+               return -EBUSY;
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(virtqueue_resize);
+
 /* Only available for split ring */
 struct virtqueue *vring_new_virtqueue(unsigned int index,
                                      unsigned int num,
@@ -2306,25 +2622,21 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
                                      void (*callback)(struct virtqueue *vq),
                                      const char *name)
 {
-       struct vring vring;
+       struct vring_virtqueue_split vring_split = {};
 
        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
                return NULL;
 
-       vring_init(&vring, num, pages, vring_align);
-       return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
-                                    notify, callback, name);
+       vring_init(&vring_split.vring, num, pages, vring_align);
+       return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
+                                    context, notify, callback, name);
 }
 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
 
-void vring_del_virtqueue(struct virtqueue *_vq)
+static void vring_free(struct virtqueue *_vq)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
 
-       spin_lock(&vq->vq.vdev->vqs_list_lock);
-       list_del(&_vq->list);
-       spin_unlock(&vq->vq.vdev->vqs_list_lock);
-
        if (vq->we_own_ring) {
                if (vq->packed_ring) {
                        vring_free_queue(vq->vq.vdev,
@@ -2355,6 +2667,18 @@ void vring_del_virtqueue(struct virtqueue *_vq)
                kfree(vq->split.desc_state);
                kfree(vq->split.desc_extra);
        }
+}
+
+void vring_del_virtqueue(struct virtqueue *_vq)
+{
+       struct vring_virtqueue *vq = to_vvq(_vq);
+
+       spin_lock(&vq->vq.vdev->vqs_list_lock);
+       list_del(&_vq->list);
+       spin_unlock(&vq->vq.vdev->vqs_list_lock);
+
+       vring_free(_vq);
+
        kfree(vq);
 }
 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
@@ -2402,6 +2726,30 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
 
+/*
+ * This function should only be called by the core, not directly by the driver.
+ */
+void __virtqueue_break(struct virtqueue *_vq)
+{
+       struct vring_virtqueue *vq = to_vvq(_vq);
+
+       /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
+       WRITE_ONCE(vq->broken, true);
+}
+EXPORT_SYMBOL_GPL(__virtqueue_break);
+
+/*
+ * This function should only be called by the core, not directly by the driver.
+ */
+void __virtqueue_unbreak(struct virtqueue *_vq)
+{
+       struct vring_virtqueue *vq = to_vvq(_vq);
+
+       /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
+       WRITE_ONCE(vq->broken, false);
+}
+EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
+
 bool virtqueue_is_broken(struct virtqueue *_vq)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
index c40f7deb6b5ac1750756a9fefb22ee37ea27a191..9bc4d110b80003ce912ba5888ffebad177b3ddff 100644 (file)
@@ -131,7 +131,7 @@ static irqreturn_t virtio_vdpa_virtqueue_cb(void *private)
 static struct virtqueue *
 virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
                     void (*callback)(struct virtqueue *vq),
-                    const char *name, bool ctx)
+                    const char *name, u32 size, bool ctx)
 {
        struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
        struct vdpa_device *vdpa = vd_get_vdpa(vdev);
@@ -168,14 +168,17 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
                goto error_new_virtqueue;
        }
 
+       if (!size || size > max_num)
+               size = max_num;
+
        if (ops->get_vq_num_min)
                min_num = ops->get_vq_num_min(vdpa);
 
-       may_reduce_num = (max_num == min_num) ? false : true;
+       may_reduce_num = (size == min_num) ? false : true;
 
        /* Create the vring */
        align = ops->get_vq_align(vdpa);
-       vq = vring_create_virtqueue(index, max_num, align, vdev,
+       vq = vring_create_virtqueue(index, size, align, vdev,
                                    true, may_reduce_num, ctx,
                                    virtio_vdpa_notify, callback, name);
        if (!vq) {
@@ -183,6 +186,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
                goto error_new_virtqueue;
        }
 
+       vq->num_max = max_num;
+
        /* Setup virtqueue callback */
        cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL;
        cb.private = info;
@@ -267,6 +272,7 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
                                struct virtqueue *vqs[],
                                vq_callback_t *callbacks[],
                                const char * const names[],
+                               u32 sizes[],
                                const bool *ctx,
                                struct irq_affinity *desc)
 {
@@ -282,9 +288,9 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
                        continue;
                }
 
-               vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++,
-                                             callbacks[i], names[i], ctx ?
-                                             ctx[i] : false);
+               vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++, callbacks[i],
+                                                 names[i], sizes ? sizes[i] : 0,
+                                                 ctx ? ctx[i] : false);
                if (IS_ERR(vqs[i])) {
                        err = PTR_ERR(vqs[i]);
                        goto err_setup_vq;
index 4414ed5b6ed291e4b2edebe8fb019e1efbbf1629..9becdc3fa5034b360110cd8e0eb0a30fa8098daa 100644 (file)
@@ -150,6 +150,14 @@ enum {
        MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR      = 0x3,
 };
 
+/* This indicates that the object was not created or has already
+ * been desroyed. It is very safe to assume that this object will never
+ * have so many states
+ */
+enum {
+       MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
+};
+
 enum {
        MLX5_RQTC_LIST_Q_TYPE_RQ            = 0x0,
        MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q  = 0x1,
index 7c943f0a2fc40ade313616a0230408e8954ada22..aea79c77db0ff26d9dfe20cd1377b01687a641ca 100644 (file)
@@ -597,7 +597,7 @@ struct rproc_subdev {
 /**
  * struct rproc_vring - remoteproc vring state
  * @va:        virtual address
- * @len: length, in bytes
+ * @num: vring size
  * @da: device address
  * @align: vring alignment
  * @notifyid: rproc-specific unique vring index
@@ -606,7 +606,7 @@ struct rproc_subdev {
  */
 struct rproc_vring {
        void *va;
-       int len;
+       int num;
        u32 da;
        u32 align;
        int notifyid;
index 7b4a13d3bd919009e4ebae7751f3685964f7e1a6..d282f464d2f1a7168ee18e14ccd7c9d98a0ae195 100644 (file)
@@ -218,6 +218,9 @@ struct vdpa_map_file {
  * @reset:                     Reset device
  *                             @vdev: vdpa device
  *                             Returns integer: success (0) or error (< 0)
+ * @suspend:                   Suspend or resume the device (optional)
+ *                             @vdev: vdpa device
+ *                             Returns integer: success (0) or error (< 0)
  * @get_config_size:           Get the size of the configuration space includes
  *                             fields that are conditional on feature bits.
  *                             @vdev: vdpa device
@@ -319,6 +322,7 @@ struct vdpa_config_ops {
        u8 (*get_status)(struct vdpa_device *vdev);
        void (*set_status)(struct vdpa_device *vdev, u8 status);
        int (*reset)(struct vdpa_device *vdev);
+       int (*suspend)(struct vdpa_device *vdev);
        size_t (*get_config_size)(struct vdpa_device *vdev);
        void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
                           void *buf, unsigned int len);
index d8fdf170637c9a11b317e7795fff7fe0b8c12ee8..a3f73bb6733e82b78b50e3ce2f736d67f1ec3f64 100644 (file)
@@ -19,6 +19,8 @@
  * @priv: a pointer for the virtqueue implementation to use.
  * @index: the zero-based ordinal number for this queue.
  * @num_free: number of elements we expect to be able to fit.
+ * @num_max: the maximum number of elements supported by the device.
+ * @reset: vq is in reset state or not.
  *
  * A note on @num_free: with indirect buffers, each buffer needs one
  * element in the queue, otherwise a buffer will need one element per
@@ -31,7 +33,9 @@ struct virtqueue {
        struct virtio_device *vdev;
        unsigned int index;
        unsigned int num_free;
+       unsigned int num_max;
        void *priv;
+       bool reset;
 };
 
 int virtqueue_add_outbuf(struct virtqueue *vq,
@@ -89,6 +93,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq);
 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq);
 dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
 
+int virtqueue_resize(struct virtqueue *vq, u32 num,
+                    void (*recycle)(struct virtqueue *vq, void *buf));
+
 /**
  * virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus
@@ -133,6 +140,9 @@ bool is_virtio_device(struct device *dev);
 void virtio_break_device(struct virtio_device *dev);
 void __virtio_unbreak_device(struct virtio_device *dev);
 
+void __virtqueue_break(struct virtqueue *_vq);
+void __virtqueue_unbreak(struct virtqueue *_vq);
+
 void virtio_config_changed(struct virtio_device *dev);
 #ifdef CONFIG_PM_SLEEP
 int virtio_device_freeze(struct virtio_device *dev);
index b47c2e7ed0ee8ca6366d96d9f9156ad036889c54..6adff09f7170abdfd9c993d450221839d0872856 100644 (file)
@@ -55,6 +55,7 @@ struct virtio_shm_region {
  *             include a NULL entry for vqs that do not need a callback
  *     names: array of virtqueue names (mainly for debugging)
  *             include a NULL entry for vqs unused by driver
+ *     sizes: array of virtqueue sizes
  *     Returns 0 on success or error status
  * @del_vqs: free virtqueues found by find_vqs().
  * @synchronize_cbs: synchronize with the virtqueue callbacks (optional)
@@ -78,6 +79,18 @@ struct virtio_shm_region {
  * @set_vq_affinity: set the affinity for a virtqueue (optional).
  * @get_vq_affinity: get the affinity for a virtqueue (optional).
  * @get_shm_region: get a shared memory region based on the index.
+ * @disable_vq_and_reset: reset a queue individually (optional).
+ *     vq: the virtqueue
+ *     Returns 0 on success or error status
+ *     disable_vq_and_reset will guarantee that the callbacks are disabled and
+ *     synchronized.
+ *     Except for the callback, the caller should guarantee that the vring is
+ *     not accessed by any functions of virtqueue.
+ * @enable_vq_after_reset: enable a reset queue
+ *     vq: the virtqueue
+ *     Returns 0 on success or error status
+ *     If disable_vq_and_reset is set, then enable_vq_after_reset must also be
+ *     set.
  */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
@@ -91,7 +104,9 @@ struct virtio_config_ops {
        void (*reset)(struct virtio_device *vdev);
        int (*find_vqs)(struct virtio_device *, unsigned nvqs,
                        struct virtqueue *vqs[], vq_callback_t *callbacks[],
-                       const char * const names[], const bool *ctx,
+                       const char * const names[],
+                       u32 sizes[],
+                       const bool *ctx,
                        struct irq_affinity *desc);
        void (*del_vqs)(struct virtio_device *);
        void (*synchronize_cbs)(struct virtio_device *);
@@ -104,6 +119,8 @@ struct virtio_config_ops {
                        int index);
        bool (*get_shm_region)(struct virtio_device *vdev,
                               struct virtio_shm_region *region, u8 id);
+       int (*disable_vq_and_reset)(struct virtqueue *vq);
+       int (*enable_vq_after_reset)(struct virtqueue *vq);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
@@ -198,7 +215,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
        const char *names[] = { n };
        struct virtqueue *vq;
        int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL,
-                                        NULL);
+                                        NULL, NULL);
        if (err < 0)
                return ERR_PTR(err);
        return vq;
@@ -210,7 +227,8 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
                        const char * const names[],
                        struct irq_affinity *desc)
 {
-       return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc);
+       return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL,
+                                     NULL, desc);
 }
 
 static inline
@@ -219,8 +237,20 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
                        const char * const names[], const bool *ctx,
                        struct irq_affinity *desc)
 {
-       return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx,
-                                     desc);
+       return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL,
+                                     ctx, desc);
+}
+
+static inline
+int virtio_find_vqs_ctx_size(struct virtio_device *vdev, u32 nvqs,
+                            struct virtqueue *vqs[],
+                            vq_callback_t *callbacks[],
+                            const char * const names[],
+                            u32 sizes[],
+                            const bool *ctx, struct irq_affinity *desc)
+{
+       return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, sizes,
+                                     ctx, desc);
 }
 
 /**
index eb2bd9b4077defc9a884cdb0dc1bf47111a4dd3b..c4eeb79b01398eba88977e636f248096a07a8087 100644 (file)
@@ -5,6 +5,13 @@
 #include <linux/pci.h>
 #include <linux/virtio_pci.h>
 
+struct virtio_pci_modern_common_cfg {
+       struct virtio_pci_common_cfg cfg;
+
+       __le16 queue_notify_data;       /* read-write */
+       __le16 queue_reset;             /* read-write */
+};
+
 struct virtio_pci_modern_device {
        struct pci_dev *pci_dev;
 
@@ -106,4 +113,6 @@ void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev,
                                       u16 index, resource_size_t *pa);
 int vp_modern_probe(struct virtio_pci_modern_device *mdev);
 void vp_modern_remove(struct virtio_pci_modern_device *mdev);
+int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
+void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
 #endif
index b485b13fa50bf3897f64a283c892e83e3c785c5c..8b8af1a38991a77bed3b579b8b392ead365d7a61 100644 (file)
@@ -76,16 +76,6 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
                                         void (*callback)(struct virtqueue *vq),
                                         const char *name);
 
-/* Creates a virtqueue with a custom layout. */
-struct virtqueue *__vring_new_virtqueue(unsigned int index,
-                                       struct vring vring,
-                                       struct virtio_device *vdev,
-                                       bool weak_barriers,
-                                       bool ctx,
-                                       bool (*notify)(struct virtqueue *),
-                                       void (*callback)(struct virtqueue *),
-                                       const char *name);
-
 /*
  * Creates a virtqueue with a standard layout but a caller-allocated
  * ring.
index 7cfe1c1280c0f2a3fd01edeffac3dc124017829d..11bd48c72c6ccc542b4ba5647fb5299147e6e9bd 100644 (file)
@@ -210,6 +210,53 @@ struct vduse_vq_eventfd {
  */
 #define VDUSE_VQ_INJECT_IRQ    _IOW(VDUSE_BASE, 0x17, __u32)
 
+/**
+ * struct vduse_iova_umem - userspace memory configuration for one IOVA region
+ * @uaddr: start address of userspace memory, it must be aligned to page size
+ * @iova: start of the IOVA region
+ * @size: size of the IOVA region
+ * @reserved: for future use, needs to be initialized to zero
+ *
+ * Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM
+ * ioctls to register/de-register userspace memory for IOVA regions
+ */
+struct vduse_iova_umem {
+       __u64 uaddr;
+       __u64 iova;
+       __u64 size;
+       __u64 reserved[3];
+};
+
+/* Register userspace memory for IOVA regions */
+#define VDUSE_IOTLB_REG_UMEM   _IOW(VDUSE_BASE, 0x18, struct vduse_iova_umem)
+
+/* De-register the userspace memory. Caller should set iova and size field. */
+#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem)
+
+/**
+ * struct vduse_iova_info - information of one IOVA region
+ * @start: start of the IOVA region
+ * @last: last of the IOVA region
+ * @capability: capability of the IOVA regsion
+ * @reserved: for future use, needs to be initialized to zero
+ *
+ * Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of
+ * one IOVA region.
+ */
+struct vduse_iova_info {
+       __u64 start;
+       __u64 last;
+#define VDUSE_IOVA_CAP_UMEM (1 << 0)
+       __u64 capability;
+       __u64 reserved[3];
+};
+
+/*
+ * Find the first IOVA region that overlaps with the range [start, last]
+ * and return some information on it. Caller should set start and last fields.
+ */
+#define VDUSE_IOTLB_GET_INFO   _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info)
+
 /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
 
 /**
index cab645d4a64555641833eba4e08d60b6163f3512..f9f115a7c75b8a3060f0678599e7662d0015b878 100644 (file)
 #define VHOST_VDPA_SET_GROUP_ASID      _IOW(VHOST_VIRTIO, 0x7C, \
                                             struct vhost_vring_state)
 
+/* Suspend a device so it does not process virtqueue requests anymore
+ *
+ * After the return of ioctl the device must preserve all the necessary state
+ * (the virtqueue vring base plus the possible device specific states) that is
+ * required for restoring in the future. The device must not change its
+ * configuration after that point.
+ */
+#define VHOST_VDPA_SUSPEND             _IO(VHOST_VIRTIO, 0x7D)
+
 #endif
index 391331a10879a93395a37a5cd0a4fac6d9aa6843..53601ce2c20a42b1fac94d712b39c75cca071ec6 100644 (file)
@@ -161,5 +161,7 @@ struct vhost_vdpa_iova_range {
  * message
  */
 #define VHOST_BACKEND_F_IOTLB_ASID  0x3
+/* Device can be suspended */
+#define VHOST_BACKEND_F_SUSPEND  0x4
 
 #endif
index f0fb0ae021c096d4ad338275c4d25cccbe9b2e75..3c05162bc988d504e483bcf466d93c80ae9733a9 100644 (file)
@@ -52,7 +52,7 @@
  * rest are per-device feature bits.
  */
 #define VIRTIO_TRANSPORT_F_START       28
-#define VIRTIO_TRANSPORT_F_END         38
+#define VIRTIO_TRANSPORT_F_END         41
 
 #ifndef VIRTIO_CONFIG_NO_LEGACY
 /* Do we get callbacks when the ring is completely used, even if we've
@@ -98,4 +98,9 @@
  * Does the device support Single Root I/O Virtualization?
  */
 #define VIRTIO_F_SR_IOV                        37
+
+/*
+ * This feature indicates that the driver can reset a queue individually.
+ */
+#define VIRTIO_F_RING_RESET            40
 #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
index 3f55a4215f11b7014b991893f0e796547aae5bde..29ced55514d4173114550565ec752b1646920ca8 100644 (file)
@@ -56,7 +56,7 @@
 #define VIRTIO_NET_F_MQ        22      /* Device supports Receive Flow
                                         * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23  /* Set MAC address */
-
+#define VIRTIO_NET_F_NOTF_COAL 53      /* Guest can handle notifications coalescing */
 #define VIRTIO_NET_F_HASH_REPORT  57   /* Supports hash report */
 #define VIRTIO_NET_F_RSS         60    /* Supports RSS RX steering */
 #define VIRTIO_NET_F_RSC_EXT     61    /* extended coalescing info */
@@ -355,4 +355,36 @@ struct virtio_net_hash_config {
 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
 
+/*
+ * Control notifications coalescing.
+ *
+ * Request the device to change the notifications coalescing parameters.
+ *
+ * Available with the VIRTIO_NET_F_NOTF_COAL feature bit.
+ */
+#define VIRTIO_NET_CTRL_NOTF_COAL              6
+/*
+ * Set the tx-usecs/tx-max-packets patameters.
+ * tx-usecs - Maximum number of usecs to delay a TX notification.
+ * tx-max-packets - Maximum number of packets to send before a TX notification.
+ */
+struct virtio_net_ctrl_coal_tx {
+       __le32 tx_max_packets;
+       __le32 tx_usecs;
+};
+
+#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET               0
+
+/*
+ * Set the rx-usecs/rx-max-packets patameters.
+ * rx-usecs - Maximum number of usecs to delay a RX notification.
+ * rx-max-frames - Maximum number of packets to receive before a RX notification.
+ */
+struct virtio_net_ctrl_coal_rx {
+       __le32 rx_max_packets;
+       __le32 rx_usecs;
+};
+
+#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET               1
+
 #endif /* _UAPI_LINUX_VIRTIO_NET_H */
index 3a86f36d7e3d965633594fa9210c9ab28221b6b3..f703afc7ad31ba0791101585fd95b9a10a48f3ce 100644 (file)
@@ -202,6 +202,8 @@ struct virtio_pci_cfg_cap {
 #define VIRTIO_PCI_COMMON_Q_AVAILHI    44
 #define VIRTIO_PCI_COMMON_Q_USEDLO     48
 #define VIRTIO_PCI_COMMON_Q_USEDHI     52
+#define VIRTIO_PCI_COMMON_Q_NDATA      56
+#define VIRTIO_PCI_COMMON_Q_RESET      58
 
 #endif /* VIRTIO_PCI_NO_MODERN */
 
index 0b493542e61a620f8680be0527116586a534a843..21593bf9775526692c297a3ed28a0f8ea0d14cca 100644 (file)
@@ -29,7 +29,6 @@
 #define READ                    0
 #define WRITE                   1
 
-typedef unsigned long long phys_addr_t;
 typedef unsigned long long dma_addr_t;
 typedef size_t __kernel_size_t;
 typedef unsigned int __wsum;
@@ -136,6 +135,7 @@ static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t
 #endif
 #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#define dev_warn_once(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 
 #define min(x, y) ({                           \
        typeof(x) _min1 = (x);                  \
index 9348957be56e48dcd8a63ddd1dacd0702010f6ec..e11c6aece7341e635cd0b775f04d6692413ea596 100644 (file)
@@ -1 +1,2 @@
+#include <limits.h>
 #include "../../../include/linux/vringh.h"
index 23f142af544ad796146361cc81ce3315d303f42e..86a410ddceddec107f3dd0cd294bb2785c8deba5 100644 (file)
@@ -102,8 +102,8 @@ static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev)
 
        memset(info->ring, 0, vring_size(num, 4096));
        vring_init(&info->vring, num, info->ring, 4096);
-       info->vq = __vring_new_virtqueue(info->idx, info->vring, vdev, true,
-                                        false, vq_notify, vq_callback, "test");
+       info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false,
+                                      info->ring, vq_notify, vq_callback, "test");
        assert(info->vq);
        info->vq->priv = info;
 }