2 * Intel MIC Platform Software Stack (MPSS)
4 * Copyright(c) 2013 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
18 * Intel MIC Host driver.
21 #include <linux/pci.h>
22 #include <linux/sched.h>
23 #include <linux/uaccess.h>
25 #include <linux/mic_common.h>
26 #include "../common/mic_dev.h"
27 #include "mic_device.h"
29 #include "mic_virtio.h"
32 * Initiates the copies across the PCIe bus from card memory to
33 * a user space buffer.
35 static int mic_virtio_copy_to_user(struct mic_vdev *mvdev,
36 void __user *ubuf, size_t len, u64 addr)
39 void __iomem *dbuf = mvdev->mdev->aper.va + addr;
41 * We are copying from IO below an should ideally use something
42 * like copy_to_user_fromio(..) if it existed.
44 if (copy_to_user(ubuf, dbuf, len)) {
46 dev_err(mic_dev(mvdev), "%s %d err %d\n",
47 __func__, __LINE__, err);
50 mvdev->in_bytes += len;
57 * Initiates copies across the PCIe bus from a user space
58 * buffer to card memory.
60 static int mic_virtio_copy_from_user(struct mic_vdev *mvdev,
61 void __user *ubuf, size_t len, u64 addr)
64 void __iomem *dbuf = mvdev->mdev->aper.va + addr;
66 * We are copying to IO below and should ideally use something
67 * like copy_from_user_toio(..) if it existed.
69 if (copy_from_user(dbuf, ubuf, len)) {
71 dev_err(mic_dev(mvdev), "%s %d err %d\n",
72 __func__, __LINE__, err);
75 mvdev->out_bytes += len;
81 #define MIC_VRINGH_READ true
83 /* The function to call to notify the card about added buffers */
84 static void mic_notify(struct vringh *vrh)
86 struct mic_vringh *mvrh = container_of(vrh, struct mic_vringh, vrh);
87 struct mic_vdev *mvdev = mvrh->mvdev;
88 s8 db = mvdev->dc->h2c_vdev_db;
91 mvdev->mdev->ops->send_intr(mvdev->mdev, db);
94 /* Determine the total number of bytes consumed in a VRINGH KIOV */
95 static inline u32 mic_vringh_iov_consumed(struct vringh_kiov *iov)
98 u32 total = iov->consumed;
100 for (i = 0; i < iov->i; i++)
101 total += iov->iov[i].iov_len;
106 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
107 * This API is heavily based on the vringh_iov_xfer(..) implementation
108 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
109 * and vringh_iov_push_kern(..) directly is because there is no
110 * way to override the VRINGH xfer(..) routines as of v3.10.
112 static int mic_vringh_copy(struct mic_vdev *mvdev, struct vringh_kiov *iov,
113 void __user *ubuf, size_t len, bool read, size_t *out_len)
116 size_t partlen, tot_len = 0;
118 while (len && iov->i < iov->used) {
119 partlen = min(iov->iov[iov->i].iov_len, len);
121 ret = mic_virtio_copy_to_user(mvdev,
123 (u64)iov->iov[iov->i].iov_base);
125 ret = mic_virtio_copy_from_user(mvdev,
127 (u64)iov->iov[iov->i].iov_base);
129 dev_err(mic_dev(mvdev), "%s %d err %d\n",
130 __func__, __LINE__, ret);
136 iov->consumed += partlen;
137 iov->iov[iov->i].iov_len -= partlen;
138 iov->iov[iov->i].iov_base += partlen;
139 if (!iov->iov[iov->i].iov_len) {
140 /* Fix up old iov element then increment. */
141 iov->iov[iov->i].iov_len = iov->consumed;
142 iov->iov[iov->i].iov_base -= iov->consumed;
153 * Use the standard VRINGH infrastructure in the kernel to fetch new
154 * descriptors, initiate the copies and update the used ring.
156 static int _mic_virtio_copy(struct mic_vdev *mvdev,
157 struct mic_copy_desc *copy)
159 int ret = 0, iovcnt = copy->iovcnt;
161 struct iovec __user *u_iov = copy->iov;
162 void __user *ubuf = NULL;
163 struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx];
164 struct vringh_kiov *riov = &mvr->riov;
165 struct vringh_kiov *wiov = &mvr->wiov;
166 struct vringh *vrh = &mvr->vrh;
167 u16 *head = &mvr->head;
168 struct mic_vring *vr = &mvr->vring;
169 size_t len = 0, out_len;
172 /* Fetch a new IOVEC if all previous elements have been processed */
173 if (riov->i == riov->used && wiov->i == wiov->used) {
174 ret = vringh_getdesc_kern(vrh, riov, wiov,
176 /* Check if there are available descriptors */
182 /* Copy over a new iovec from user space. */
183 ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
186 dev_err(mic_dev(mvdev), "%s %d err %d\n",
187 __func__, __LINE__, ret);
193 /* Issue all the read descriptors first */
194 ret = mic_vringh_copy(mvdev, riov, ubuf, len,
195 MIC_VRINGH_READ, &out_len);
197 dev_err(mic_dev(mvdev), "%s %d err %d\n",
198 __func__, __LINE__, ret);
203 copy->out_len += out_len;
204 /* Issue the write descriptors next */
205 ret = mic_vringh_copy(mvdev, wiov, ubuf, len,
206 !MIC_VRINGH_READ, &out_len);
208 dev_err(mic_dev(mvdev), "%s %d err %d\n",
209 __func__, __LINE__, ret);
214 copy->out_len += out_len;
216 /* One user space iovec is now completed */
220 /* Exit loop if all elements in KIOVs have been processed. */
221 if (riov->i == riov->used && wiov->i == wiov->used)
225 * Update the used ring if a descriptor was available and some data was
226 * copied in/out and the user asked for a used ring update.
228 if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
231 /* Determine the total data consumed */
232 total += mic_vringh_iov_consumed(riov);
233 total += mic_vringh_iov_consumed(wiov);
234 vringh_complete_kern(vrh, *head, total);
236 if (vringh_need_notify_kern(vrh) > 0)
238 vringh_kiov_cleanup(riov);
239 vringh_kiov_cleanup(wiov);
240 /* Update avail idx for user space */
241 vr->info->avail_idx = vrh->last_avail_idx;
246 static inline int mic_verify_copy_args(struct mic_vdev *mvdev,
247 struct mic_copy_desc *copy)
249 if (copy->vr_idx >= mvdev->dd->num_vq) {
250 dev_err(mic_dev(mvdev), "%s %d err %d\n",
251 __func__, __LINE__, -EINVAL);
257 /* Copy a specified number of virtio descriptors in a chain */
258 int mic_virtio_copy_desc(struct mic_vdev *mvdev,
259 struct mic_copy_desc *copy)
262 struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx];
264 err = mic_verify_copy_args(mvdev, copy);
268 mutex_lock(&mvr->vr_mutex);
269 if (!mic_vdevup(mvdev)) {
271 dev_err(mic_dev(mvdev), "%s %d err %d\n",
272 __func__, __LINE__, err);
275 err = _mic_virtio_copy(mvdev, copy);
277 dev_err(mic_dev(mvdev), "%s %d err %d\n",
278 __func__, __LINE__, err);
281 mutex_unlock(&mvr->vr_mutex);
285 static void mic_virtio_init_post(struct mic_vdev *mvdev)
287 struct mic_vqconfig *vqconfig = mic_vq_config(mvdev->dd);
290 for (i = 0; i < mvdev->dd->num_vq; i++) {
291 if (!le64_to_cpu(vqconfig[i].used_address)) {
292 dev_warn(mic_dev(mvdev), "used_address zero??\n");
295 mvdev->mvr[i].vrh.vring.used =
296 mvdev->mdev->aper.va +
297 le64_to_cpu(vqconfig[i].used_address);
300 mvdev->dc->used_address_updated = 0;
302 dev_dbg(mic_dev(mvdev), "%s: device type %d LINKUP\n",
303 __func__, mvdev->virtio_id);
306 static inline void mic_virtio_device_reset(struct mic_vdev *mvdev)
310 dev_dbg(mic_dev(mvdev), "%s: status %d device type %d RESET\n",
311 __func__, mvdev->dd->status, mvdev->virtio_id);
313 for (i = 0; i < mvdev->dd->num_vq; i++)
315 * Avoid lockdep false positive. The + 1 is for the mic
316 * mutex which is held in the reset devices code path.
318 mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1);
320 /* 0 status means "reset" */
321 mvdev->dd->status = 0;
322 mvdev->dc->vdev_reset = 0;
323 mvdev->dc->host_ack = 1;
325 for (i = 0; i < mvdev->dd->num_vq; i++) {
326 struct vringh *vrh = &mvdev->mvr[i].vrh;
327 mvdev->mvr[i].vring.info->avail_idx = 0;
329 vrh->last_avail_idx = 0;
330 vrh->last_used_idx = 0;
333 for (i = 0; i < mvdev->dd->num_vq; i++)
334 mutex_unlock(&mvdev->mvr[i].vr_mutex);
337 void mic_virtio_reset_devices(struct mic_device *mdev)
339 struct list_head *pos, *tmp;
340 struct mic_vdev *mvdev;
342 dev_dbg(mdev->sdev->parent, "%s\n", __func__);
344 list_for_each_safe(pos, tmp, &mdev->vdev_list) {
345 mvdev = list_entry(pos, struct mic_vdev, list);
346 mic_virtio_device_reset(mvdev);
347 mvdev->poll_wake = 1;
348 wake_up(&mvdev->waitq);
352 void mic_bh_handler(struct work_struct *work)
354 struct mic_vdev *mvdev = container_of(work, struct mic_vdev,
357 if (mvdev->dc->used_address_updated)
358 mic_virtio_init_post(mvdev);
360 if (mvdev->dc->vdev_reset)
361 mic_virtio_device_reset(mvdev);
363 mvdev->poll_wake = 1;
364 wake_up(&mvdev->waitq);
367 static irqreturn_t mic_virtio_intr_handler(int irq, void *data)
369 struct mic_vdev *mvdev = data;
370 struct mic_device *mdev = mvdev->mdev;
372 mdev->ops->ack_interrupt(mdev);
373 schedule_work(&mvdev->virtio_bh_work);
377 int mic_virtio_config_change(struct mic_vdev *mvdev,
380 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
381 int ret = 0, retry = 100, i;
382 struct mic_bootparam *bootparam = mvdev->mdev->dp;
383 s8 db = bootparam->h2c_config_db;
385 mutex_lock(&mvdev->mdev->mic_mutex);
386 for (i = 0; i < mvdev->dd->num_vq; i++)
387 mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1);
389 if (db == -1 || mvdev->dd->type == -1) {
394 if (copy_from_user(mic_vq_configspace(mvdev->dd),
395 argp, mvdev->dd->config_len)) {
396 dev_err(mic_dev(mvdev), "%s %d err %d\n",
397 __func__, __LINE__, -EFAULT);
401 mvdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
402 mvdev->mdev->ops->send_intr(mvdev->mdev, db);
404 for (i = retry; i--;) {
405 ret = wait_event_timeout(wake,
406 mvdev->dc->guest_ack, msecs_to_jiffies(100));
411 dev_dbg(mic_dev(mvdev),
412 "%s %d retry: %d\n", __func__, __LINE__, retry);
413 mvdev->dc->config_change = 0;
414 mvdev->dc->guest_ack = 0;
416 for (i = 0; i < mvdev->dd->num_vq; i++)
417 mutex_unlock(&mvdev->mvr[i].vr_mutex);
418 mutex_unlock(&mvdev->mdev->mic_mutex);
422 static int mic_copy_dp_entry(struct mic_vdev *mvdev,
425 struct mic_device_desc **devpage)
427 struct mic_device *mdev = mvdev->mdev;
428 struct mic_device_desc dd, *dd_config, *devp;
429 struct mic_vqconfig *vqconfig;
431 bool slot_found = false;
433 if (copy_from_user(&dd, argp, sizeof(dd))) {
434 dev_err(mic_dev(mvdev), "%s %d err %d\n",
435 __func__, __LINE__, -EFAULT);
439 if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
440 dd.num_vq > MIC_MAX_VRINGS) {
441 dev_err(mic_dev(mvdev), "%s %d err %d\n",
442 __func__, __LINE__, -EINVAL);
446 dd_config = kmalloc(mic_desc_size(&dd), GFP_KERNEL);
447 if (dd_config == NULL) {
448 dev_err(mic_dev(mvdev), "%s %d err %d\n",
449 __func__, __LINE__, -ENOMEM);
452 if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
454 dev_err(mic_dev(mvdev), "%s %d err %d\n",
455 __func__, __LINE__, ret);
459 vqconfig = mic_vq_config(dd_config);
460 for (i = 0; i < dd.num_vq; i++) {
461 if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
463 dev_err(mic_dev(mvdev), "%s %d err %d\n",
464 __func__, __LINE__, ret);
469 /* Find the first free device page entry */
470 for (i = mic_aligned_size(struct mic_bootparam);
471 i < MIC_DP_SIZE - mic_total_desc_size(dd_config);
472 i += mic_total_desc_size(devp)) {
474 if (devp->type == 0 || devp->type == -1) {
481 dev_err(mic_dev(mvdev), "%s %d err %d\n",
482 __func__, __LINE__, ret);
486 * Save off the type before doing the memcpy. Type will be set in the
487 * end after completing all initialization for the new device.
489 *type = dd_config->type;
491 memcpy(devp, dd_config, mic_desc_size(dd_config));
499 static void mic_init_device_ctrl(struct mic_vdev *mvdev,
500 struct mic_device_desc *devpage)
502 struct mic_device_ctrl *dc;
504 dc = (void *)devpage + mic_aligned_desc_size(devpage);
506 dc->config_change = 0;
510 dc->used_address_updated = 0;
511 dc->c2h_vdev_db = -1;
512 dc->h2c_vdev_db = -1;
516 int mic_virtio_add_device(struct mic_vdev *mvdev,
519 struct mic_device *mdev = mvdev->mdev;
520 struct mic_device_desc *dd = NULL;
521 struct mic_vqconfig *vqconfig;
522 int vr_size, i, j, ret;
526 struct mic_bootparam *bootparam = mdev->dp;
529 mutex_lock(&mdev->mic_mutex);
531 ret = mic_copy_dp_entry(mvdev, argp, &type, &dd);
533 mutex_unlock(&mdev->mic_mutex);
537 mic_init_device_ctrl(mvdev, dd);
540 mvdev->virtio_id = type;
541 vqconfig = mic_vq_config(dd);
542 INIT_WORK(&mvdev->virtio_bh_work, mic_bh_handler);
544 for (i = 0; i < dd->num_vq; i++) {
545 struct mic_vringh *mvr = &mvdev->mvr[i];
546 struct mic_vring *vr = &mvdev->mvr[i].vring;
547 num = le16_to_cpu(vqconfig[i].num);
548 mutex_init(&mvr->vr_mutex);
549 vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
550 sizeof(struct _mic_vring_info));
552 __get_free_pages(GFP_KERNEL | __GFP_ZERO,
556 dev_err(mic_dev(mvdev), "%s %d err %d\n",
557 __func__, __LINE__, ret);
561 vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
562 vr->info->magic = MIC_MAGIC + mvdev->virtio_id + i;
563 vqconfig[i].address = mic_map_single(mdev,
565 if (mic_map_error(vqconfig[i].address)) {
566 free_pages((unsigned long)vr->va, get_order(vr_size));
568 dev_err(mic_dev(mvdev), "%s %d err %d\n",
569 __func__, __LINE__, ret);
572 vqconfig[i].address = cpu_to_le64(vqconfig[i].address);
574 vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
575 ret = vringh_init_kern(&mvr->vrh,
576 *(u32 *)mic_vq_features(mvdev->dd), num, false,
577 vr->vr.desc, vr->vr.avail, vr->vr.used);
579 dev_err(mic_dev(mvdev), "%s %d err %d\n",
580 __func__, __LINE__, ret);
583 vringh_kiov_init(&mvr->riov, NULL, 0);
584 vringh_kiov_init(&mvr->wiov, NULL, 0);
585 mvr->head = USHRT_MAX;
587 mvr->vrh.notify = mic_notify;
588 dev_dbg(mdev->sdev->parent,
589 "%s %d index %d va %p info %p vr_size 0x%x\n",
590 __func__, __LINE__, i, vr->va, vr->info, vr_size);
593 snprintf(irqname, sizeof(irqname), "mic%dvirtio%d", mdev->id,
595 mvdev->virtio_db = mic_next_db(mdev);
596 mvdev->virtio_cookie = mic_request_irq(mdev, mic_virtio_intr_handler,
597 irqname, mvdev, mvdev->virtio_db, MIC_INTR_DB);
598 if (IS_ERR(mvdev->virtio_cookie)) {
599 ret = PTR_ERR(mvdev->virtio_cookie);
600 dev_dbg(mdev->sdev->parent, "request irq failed\n");
604 mvdev->dc->c2h_vdev_db = mvdev->virtio_db;
606 list_add_tail(&mvdev->list, &mdev->vdev_list);
608 * Order the type update with previous stores. This write barrier
609 * is paired with the corresponding read barrier before the uncached
610 * system memory read of the type, on the card while scanning the
616 dev_dbg(mdev->sdev->parent, "Added virtio device id %d\n", dd->type);
618 db = bootparam->h2c_config_db;
620 mdev->ops->send_intr(mdev, db);
621 mutex_unlock(&mdev->mic_mutex);
624 vqconfig = mic_vq_config(dd);
625 for (j = 0; j < i; j++) {
626 struct mic_vringh *mvr = &mvdev->mvr[j];
627 mic_unmap_single(mdev, le64_to_cpu(vqconfig[j].address),
629 free_pages((unsigned long)mvr->vring.va,
630 get_order(mvr->vring.len));
632 mutex_unlock(&mdev->mic_mutex);
636 void mic_virtio_del_device(struct mic_vdev *mvdev)
638 struct list_head *pos, *tmp;
639 struct mic_vdev *tmp_mvdev;
640 struct mic_device *mdev = mvdev->mdev;
641 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
642 int i, ret, retry = 100;
643 struct mic_vqconfig *vqconfig;
644 struct mic_bootparam *bootparam = mdev->dp;
647 mutex_lock(&mdev->mic_mutex);
648 db = bootparam->h2c_config_db;
650 goto skip_hot_remove;
651 dev_dbg(mdev->sdev->parent,
652 "Requesting hot remove id %d\n", mvdev->virtio_id);
653 mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
654 mdev->ops->send_intr(mdev, db);
655 for (i = retry; i--;) {
656 ret = wait_event_timeout(wake,
657 mvdev->dc->guest_ack, msecs_to_jiffies(100));
661 dev_dbg(mdev->sdev->parent,
662 "Device id %d config_change %d guest_ack %d\n",
663 mvdev->virtio_id, mvdev->dc->config_change,
664 mvdev->dc->guest_ack);
665 mvdev->dc->config_change = 0;
666 mvdev->dc->guest_ack = 0;
668 mic_free_irq(mdev, mvdev->virtio_cookie, mvdev);
669 flush_work(&mvdev->virtio_bh_work);
670 vqconfig = mic_vq_config(mvdev->dd);
671 for (i = 0; i < mvdev->dd->num_vq; i++) {
672 struct mic_vringh *mvr = &mvdev->mvr[i];
673 vringh_kiov_cleanup(&mvr->riov);
674 vringh_kiov_cleanup(&mvr->wiov);
675 mic_unmap_single(mdev, le64_to_cpu(vqconfig[i].address),
677 free_pages((unsigned long)mvr->vring.va,
678 get_order(mvr->vring.len));
681 list_for_each_safe(pos, tmp, &mdev->vdev_list) {
682 tmp_mvdev = list_entry(pos, struct mic_vdev, list);
683 if (tmp_mvdev == mvdev) {
685 dev_dbg(mdev->sdev->parent,
686 "Removing virtio device id %d\n",
692 * Order the type update with previous stores. This write barrier
693 * is paired with the corresponding read barrier before the uncached
694 * system memory read of the type, on the card while scanning the
698 mvdev->dd->type = -1;
699 mutex_unlock(&mdev->mic_mutex);