[linux-2.6-block.git] / drivers / iommu / intel-svm.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright © 2015 Intel Corporation.
 *
 * Authors: David Woodhouse <dwmw2@infradead.org>
 */

#include <linux/intel-iommu.h>
#include <linux/mmu_notifier.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <linux/intel-svm.h>
#include <linux/rculist.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/dmar.h>
#include <linux/interrupt.h>
#include <linux/mm_types.h>
#include <asm/page.h>

#include "intel-pasid.h"

static irqreturn_t prq_event_thread(int irq, void *d);

int intel_svm_init(struct intel_iommu *iommu)
{
	if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
			!cap_fl1gp_support(iommu->cap))
		return -EINVAL;

	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
			!cap_5lp_support(iommu->cap))
		return -EINVAL;

	return 0;
}

#define PRQ_ORDER 0

int intel_svm_enable_prq(struct intel_iommu *iommu)
{
	struct page *pages;
	int irq, ret;

	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
	if (!pages) {
		pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
			iommu->name);
		return -ENOMEM;
	}
	iommu->prq = page_address(pages);

	irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
	if (irq <= 0) {
		pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
		       iommu->name);
		ret = -EINVAL;
	err:
		free_pages((unsigned long)iommu->prq, PRQ_ORDER);
		iommu->prq = NULL;
		return ret;
	}
	iommu->pr_irq = irq;

	snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);

	ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
				   iommu->prq_name, iommu);
	if (ret) {
		pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
		       iommu->name);
		dmar_free_hwirq(irq);
		iommu->pr_irq = 0;
		goto err;
	}
	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
	dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);

	return 0;
}

int intel_svm_finish_prq(struct intel_iommu *iommu)
{
	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
	dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);

	if (iommu->pr_irq) {
		free_irq(iommu->pr_irq, iommu);
		dmar_free_hwirq(iommu->pr_irq);
		iommu->pr_irq = 0;
	}

	free_pages((unsigned long)iommu->prq, PRQ_ORDER);
	iommu->prq = NULL;

	return 0;
}

static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
				unsigned long address, unsigned long pages, int ih)
{
	struct qi_desc desc;

	/*
	 * Do PASID granu IOTLB invalidation if page selective capability is
	 * not available.
	 */
	if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap)) {
		desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
			QI_EIOTLB_DID(sdev->did) |
			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
			QI_EIOTLB_TYPE;
		desc.qw1 = 0;
	} else {
		int mask = ilog2(__roundup_pow_of_two(pages));

		desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
				QI_EIOTLB_DID(sdev->did) |
				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
				QI_EIOTLB_TYPE;
		desc.qw1 = QI_EIOTLB_ADDR(address) |
				QI_EIOTLB_IH(ih) |
				QI_EIOTLB_AM(mask);
	}
	desc.qw2 = 0;
	desc.qw3 = 0;
	qi_submit_sync(&desc, svm->iommu);

	if (sdev->dev_iotlb) {
		desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) |
				QI_DEV_EIOTLB_SID(sdev->sid) |
				QI_DEV_EIOTLB_QDEP(sdev->qdep) |
				QI_DEIOTLB_TYPE;
		if (pages == -1) {
			desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) |
					QI_DEV_EIOTLB_SIZE;
		} else if (pages > 1) {
			/* The least significant zero bit indicates the size. So,
			 * for example, an "address" value of 0x12345f000 will
			 * flush from 0x123440000 to 0x12347ffff (256KiB). */
			unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
			unsigned long mask = __rounddown_pow_of_two(address ^ last);

			desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) |
					(mask - 1)) | QI_DEV_EIOTLB_SIZE;
		} else {
			desc.qw1 = QI_DEV_EIOTLB_ADDR(address);
		}
		desc.qw2 = 0;
		desc.qw3 = 0;
		qi_submit_sync(&desc, svm->iommu);
	}
}

static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
				unsigned long pages, int ih)
{
	struct intel_svm_dev *sdev;

	rcu_read_lock();
	list_for_each_entry_rcu(sdev, &svm->devs, list)
		intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
	rcu_read_unlock();
}

/* Pages have been freed at this point */
static void intel_invalidate_range(struct mmu_notifier *mn,
				   struct mm_struct *mm,
				   unsigned long start, unsigned long end)
{
	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);

	intel_flush_svm_range(svm, start,
			      (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
}

static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
{
	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
	struct intel_svm_dev *sdev;

	/* This might end up being called from exit_mmap(), *before* the page
	 * tables are cleared. And __mmu_notifier_release() will delete us from
	 * the list of notifiers so that our invalidate_range() callback doesn't
	 * get called when the page tables are cleared. So we need to protect
	 * against hardware accessing those page tables.
	 *
	 * We do it by clearing the entry in the PASID table and then flushing
	 * the IOTLB and the PASID table caches. This might upset hardware;
	 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
	 * page) so that we end up taking a fault that the hardware really
	 * *has* to handle gracefully without affecting other processes.
	 */
	rcu_read_lock();
	list_for_each_entry_rcu(sdev, &svm->devs, list) {
		intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
		intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
	}
	rcu_read_unlock();

}

static const struct mmu_notifier_ops intel_mmuops = {
	.release = intel_mm_release,
	.invalidate_range = intel_invalidate_range,
};

static DEFINE_MUTEX(pasid_mutex);
static LIST_HEAD(global_svm_list);

int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
{
	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
	struct device_domain_info *info;
	struct intel_svm_dev *sdev;
	struct intel_svm *svm = NULL;
	struct mm_struct *mm = NULL;
	int pasid_max;
	int ret;

	if (!iommu || dmar_disabled)
		return -EINVAL;

	if (dev_is_pci(dev)) {
		pasid_max = pci_max_pasids(to_pci_dev(dev));
		if (pasid_max < 0)
			return -EINVAL;
	} else
		pasid_max = 1 << 20;

	if (flags & SVM_FLAG_SUPERVISOR_MODE) {
		if (!ecap_srs(iommu->ecap))
			return -EINVAL;
	} else if (pasid) {
		mm = get_task_mm(current);
		BUG_ON(!mm);
	}

	mutex_lock(&pasid_mutex);
	if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
		struct intel_svm *t;

		list_for_each_entry(t, &global_svm_list, list) {
			if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID))
				continue;

			svm = t;
			if (svm->pasid >= pasid_max) {
				dev_warn(dev,
					 "Limited PASID width. Cannot use existing PASID %d\n",
					 svm->pasid);
				ret = -ENOSPC;
				goto out;
			}

			list_for_each_entry(sdev, &svm->devs, list) {
				if (dev == sdev->dev) {
					if (sdev->ops != ops) {
						ret = -EBUSY;
						goto out;
					}
					sdev->users++;
					goto success;
				}
			}

			break;
		}
	}

	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
	if (!sdev) {
		ret = -ENOMEM;
		goto out;
	}
	sdev->dev = dev;

	ret = intel_iommu_enable_pasid(iommu, dev);
	if (ret || !pasid) {
		/* If they don't actually want to assign a PASID, this is
		 * just an enabling check/preparation. */
		kfree(sdev);
		goto out;
	}

	info = dev->archdata.iommu;
	if (!info || !info->pasid_supported) {
		kfree(sdev);
		goto out;
	}

	sdev->did = FLPT_DEFAULT_DID;
	sdev->sid = PCI_DEVID(info->bus, info->devfn);
	if (info->ats_enabled) {
		sdev->dev_iotlb = 1;
		sdev->qdep = info->ats_qdep;
		if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
			sdev->qdep = 0;
	}

	/* Finish the setup now we know we're keeping it */
	sdev->users = 1;
	sdev->ops = ops;
	init_rcu_head(&sdev->rcu);

	if (!svm) {
		svm = kzalloc(sizeof(*svm), GFP_KERNEL);
		if (!svm) {
			ret = -ENOMEM;
			kfree(sdev);
			goto out;
		}
		svm->iommu = iommu;

		if (pasid_max > intel_pasid_max_id)
			pasid_max = intel_pasid_max_id;

		/* Do not use PASID 0 in caching mode (virtualised IOMMU) */
		ret = intel_pasid_alloc_id(svm,
					   !!cap_caching_mode(iommu->cap),
					   pasid_max - 1, GFP_KERNEL);
		if (ret < 0) {
			kfree(svm);
			kfree(sdev);
			goto out;
		}
		svm->pasid = ret;
		svm->notifier.ops = &intel_mmuops;
		svm->mm = mm;
		svm->flags = flags;
		INIT_LIST_HEAD_RCU(&svm->devs);
		INIT_LIST_HEAD(&svm->list);
		ret = -ENOMEM;
		if (mm) {
			ret = mmu_notifier_register(&svm->notifier, mm);
			if (ret) {
				intel_pasid_free_id(svm->pasid);
				kfree(svm);
				kfree(sdev);
				goto out;
			}
		}

		spin_lock(&iommu->lock);
		ret = intel_pasid_setup_first_level(iommu, dev,
				mm ? mm->pgd : init_mm.pgd,
				svm->pasid, FLPT_DEFAULT_DID,
				mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
		spin_unlock(&iommu->lock);
		if (ret) {
			if (mm)
				mmu_notifier_unregister(&svm->notifier, mm);
			intel_pasid_free_id(svm->pasid);
			kfree(svm);
			kfree(sdev);
			goto out;
		}

		list_add_tail(&svm->list, &global_svm_list);
	} else {
		/*
		 * Binding a new device with existing PASID, need to setup
		 * the PASID entry.
		 */
		spin_lock(&iommu->lock);
		ret = intel_pasid_setup_first_level(iommu, dev,
						mm ? mm->pgd : init_mm.pgd,
						svm->pasid, FLPT_DEFAULT_DID,
						mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
		spin_unlock(&iommu->lock);
		if (ret) {
			kfree(sdev);
			goto out;
		}
	}
	list_add_rcu(&sdev->list, &svm->devs);

 success:
	*pasid = svm->pasid;
	ret = 0;
 out:
	mutex_unlock(&pasid_mutex);
	if (mm)
		mmput(mm);
	return ret;
}
EXPORT_SYMBOL_GPL(intel_svm_bind_mm);

int intel_svm_unbind_mm(struct device *dev, int pasid)
{
	struct intel_svm_dev *sdev;
	struct intel_iommu *iommu;
	struct intel_svm *svm;
	int ret = -EINVAL;

	mutex_lock(&pasid_mutex);
	iommu = intel_svm_device_to_iommu(dev);
	if (!iommu)
		goto out;

	svm = intel_pasid_lookup_id(pasid);
	if (!svm)
		goto out;

	list_for_each_entry(sdev, &svm->devs, list) {
		if (dev == sdev->dev) {
			ret = 0;
			sdev->users--;
			if (!sdev->users) {
				list_del_rcu(&sdev->list);
				/* Flush the PASID cache and IOTLB for this device.
				 * Note that we do depend on the hardware *not* using
				 * the PASID any more. Just as we depend on other
				 * devices never using PASIDs that they have no right
				 * to use. We have a *shared* PASID table, because it's
				 * large and has to be physically contiguous. So it's
				 * hard to be as defensive as we might like. */
				intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
				intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
				kfree_rcu(sdev, rcu);

				if (list_empty(&svm->devs)) {
					intel_pasid_free_id(svm->pasid);
					if (svm->mm)
						mmu_notifier_unregister(&svm->notifier, svm->mm);

					list_del(&svm->list);

					/* We mandate that no page faults may be outstanding
					 * for the PASID when intel_svm_unbind_mm() is called.
					 * If that is not obeyed, subtle errors will happen.
					 * Let's make them less subtle... */
					memset(svm, 0x6b, sizeof(*svm));
					kfree(svm);
				}
			}
			break;
		}
	}
 out:
	mutex_unlock(&pasid_mutex);

	return ret;
}
EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);

int intel_svm_is_pasid_valid(struct device *dev, int pasid)
{
	struct intel_iommu *iommu;
	struct intel_svm *svm;
	int ret = -EINVAL;

	mutex_lock(&pasid_mutex);
	iommu = intel_svm_device_to_iommu(dev);
	if (!iommu)
		goto out;

	svm = intel_pasid_lookup_id(pasid);
	if (!svm)
		goto out;

	/* init_mm is used in this case */
	if (!svm->mm)
		ret = 1;
	else if (atomic_read(&svm->mm->mm_users) > 0)
		ret = 1;
	else
		ret = 0;

 out:
	mutex_unlock(&pasid_mutex);

	return ret;
}
EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);

/* Page request queue descriptor */
struct page_req_dsc {
	union {
		struct {
			u64 type:8;
			u64 pasid_present:1;
			u64 priv_data_present:1;
			u64 rsvd:6;
			u64 rid:16;
			u64 pasid:20;
			u64 exe_req:1;
			u64 pm_req:1;
			u64 rsvd2:10;
		};
		u64 qw_0;
	};
	union {
		struct {
			u64 rd_req:1;
			u64 wr_req:1;
			u64 lpig:1;
			u64 prg_index:9;
			u64 addr:52;
		};
		u64 qw_1;
	};
	u64 priv_data[2];
};

#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)

static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
{
	unsigned long requested = 0;

	if (req->exe_req)
		requested |= VM_EXEC;

	if (req->rd_req)
		requested |= VM_READ;

	if (req->wr_req)
		requested |= VM_WRITE;

	return (requested & ~vma->vm_flags) != 0;
}

static bool is_canonical_address(u64 addr)
{
	int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
	long saddr = (long) addr;

	return (((saddr << shift) >> shift) == saddr);
}

static irqreturn_t prq_event_thread(int irq, void *d)
{
	struct intel_iommu *iommu = d;
	struct intel_svm *svm = NULL;
	int head, tail, handled = 0;

	/* Clear PPR bit before reading head/tail registers, to
	 * ensure that we get a new interrupt if needed. */
	writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);

	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
	while (head != tail) {
		struct intel_svm_dev *sdev;
		struct vm_area_struct *vma;
		struct page_req_dsc *req;
		struct qi_desc resp;
		int result;
		vm_fault_t ret;
		u64 address;

		handled = 1;

		req = &iommu->prq[head / sizeof(*req)];

		result = QI_RESP_FAILURE;
		address = (u64)req->addr << VTD_PAGE_SHIFT;
		if (!req->pasid_present) {
			pr_err("%s: Page request without PASID: %08llx %08llx\n",
			       iommu->name, ((unsigned long long *)req)[0],
			       ((unsigned long long *)req)[1]);
			goto no_pasid;
		}

		if (!svm || svm->pasid != req->pasid) {
			rcu_read_lock();
			svm = intel_pasid_lookup_id(req->pasid);
			/* It *can't* go away, because the driver is not permitted
			 * to unbind the mm while any page faults are outstanding.
			 * So we only need RCU to protect the internal idr code. */
			rcu_read_unlock();

			if (!svm) {
				pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
				       iommu->name, req->pasid, ((unsigned long long *)req)[0],
				       ((unsigned long long *)req)[1]);
				goto no_pasid;
			}
		}

		result = QI_RESP_INVALID;
		/* Since we're using init_mm.pgd directly, we should never take
		 * any faults on kernel addresses. */
		if (!svm->mm)
			goto bad_req;
		/* If the mm is already defunct, don't handle faults. */
		if (!mmget_not_zero(svm->mm))
			goto bad_req;

		/* If address is not canonical, return invalid response */
		if (!is_canonical_address(address))
			goto bad_req;

		down_read(&svm->mm->mmap_sem);
		vma = find_extend_vma(svm->mm, address);
		if (!vma || address < vma->vm_start)
			goto invalid;

		if (access_error(vma, req))
			goto invalid;

		ret = handle_mm_fault(vma, address,
				      req->wr_req ? FAULT_FLAG_WRITE : 0);
		if (ret & VM_FAULT_ERROR)
			goto invalid;

		result = QI_RESP_SUCCESS;
	invalid:
		up_read(&svm->mm->mmap_sem);
		mmput(svm->mm);
	bad_req:
		/* Accounting for major/minor faults? */
		rcu_read_lock();
		list_for_each_entry_rcu(sdev, &svm->devs, list) {
			if (sdev->sid == req->rid)
				break;
		}
		/* Other devices can go away, but the drivers are not permitted
		 * to unbind while any page faults might be in flight. So it's
		 * OK to drop the 'lock' here now we have it. */
		rcu_read_unlock();

		if (WARN_ON(&sdev->list == &svm->devs))
			sdev = NULL;

		if (sdev && sdev->ops && sdev->ops->fault_cb) {
			int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
				(req->exe_req << 1) | (req->pm_req);
			sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr,
					    req->priv_data, rwxp, result);
		}
		/* We get here in the error case where the PASID lookup failed,
		   and these can be NULL. Do not use them below this point! */
		sdev = NULL;
		svm = NULL;
	no_pasid:
		if (req->lpig || req->priv_data_present) {
			/*
			 * Per VT-d spec. v3.0 ch7.7, system software must
			 * respond with page group response if private data
			 * is present (PDP) or last page in group (LPIG) bit
			 * is set. This is an additional VT-d feature beyond
			 * PCI ATS spec.
			 */
			resp.qw0 = QI_PGRP_PASID(req->pasid) |
				QI_PGRP_DID(req->rid) |
				QI_PGRP_PASID_P(req->pasid_present) |
				QI_PGRP_PDP(req->pasid_present) |
				QI_PGRP_RESP_CODE(result) |
				QI_PGRP_RESP_TYPE;
			resp.qw1 = QI_PGRP_IDX(req->prg_index) |
				QI_PGRP_LPIG(req->lpig);

			if (req->priv_data_present)
				memcpy(&resp.qw2, req->priv_data,
				       sizeof(req->priv_data));
		}
		resp.qw2 = 0;
		resp.qw3 = 0;
		qi_submit_sync(&resp, iommu);

		head = (head + sizeof(*req)) & PRQ_RING_MASK;
	}

	dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);

	return IRQ_RETVAL(handled);
}
Commit	Line	Data
2025cf9e	1	// SPDX-License-Identifier: GPL-2.0-only
8a94ade4 DW	2	/*
	3	* Copyright © 2015 Intel Corporation.
	4	*
8a94ade4 DW	5	* Authors: David Woodhouse <dwmw2@infradead.org>
	6	*/
	7
	8	#include <linux/intel-iommu.h>
2f26e0a9 DW	9	#include <linux/mmu_notifier.h>
2f26e0a9 DW	10	#include <linux/sched.h>
6e84f315	11	#include <linux/sched/mm.h>
2f26e0a9 DW	12	#include <linux/slab.h>
	13	#include <linux/intel-svm.h>
	14	#include <linux/rculist.h>
	15	#include <linux/pci.h>
	16	#include <linux/pci-ats.h>
a222a7f0 DW	17	#include <linux/dmar.h>
a222a7f0 DW	18	#include <linux/interrupt.h>
50a7ca3c	19	#include <linux/mm_types.h>
9d8c3af3	20	#include <asm/page.h>
a222a7f0	21
af395073 LB	22	#include "intel-pasid.h"
af395073 LB	23
a222a7f0	24	static irqreturn_t prq_event_thread(int irq, void *d);
2f26e0a9	25
d9737953	26	int intel_svm_init(struct intel_iommu *iommu)
8a94ade4	27	{
59103caa SM	28	if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
	29	!cap_fl1gp_support(iommu->cap))
	30	return -EINVAL;
	31
f1ac10c2 SM	32	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
	33	!cap_5lp_support(iommu->cap))
	34	return -EINVAL;
	35
8a94ade4 DW	36	return 0;
8a94ade4 DW	37	}
2f26e0a9	38
a222a7f0 DW	39	#define PRQ_ORDER 0
	40
	41	int intel_svm_enable_prq(struct intel_iommu *iommu)
	42	{
	43	struct page *pages;
	44	int irq, ret;
	45
	46	pages = alloc_pages(GFP_KERNEL \| __GFP_ZERO, PRQ_ORDER);
	47	if (!pages) {
	48	pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
	49	iommu->name);
	50	return -ENOMEM;
	51	}
	52	iommu->prq = page_address(pages);
	53
	54	irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
	55	if (irq <= 0) {
	56	pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
	57	iommu->name);
	58	ret = -EINVAL;
	59	err:
	60	free_pages((unsigned long)iommu->prq, PRQ_ORDER);
	61	iommu->prq = NULL;
	62	return ret;
	63	}
	64	iommu->pr_irq = irq;
	65
	66	snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
	67
	68	ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
	69	iommu->prq_name, iommu);
	70	if (ret) {
	71	pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
	72	iommu->name);
	73	dmar_free_hwirq(irq);
72d54811	74	iommu->pr_irq = 0;
a222a7f0 DW	75	goto err;
	76	}
	77	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
	78	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
	79	dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) \| PRQ_ORDER);
	80
	81	return 0;
	82	}
	83
	84	int intel_svm_finish_prq(struct intel_iommu *iommu)
	85	{
	86	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
	87	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
	88	dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
	89
72d54811 JS	90	if (iommu->pr_irq) {
	91	free_irq(iommu->pr_irq, iommu);
	92	dmar_free_hwirq(iommu->pr_irq);
	93	iommu->pr_irq = 0;
	94	}
a222a7f0 DW	95
	96	free_pages((unsigned long)iommu->prq, PRQ_ORDER);
	97	iommu->prq = NULL;
	98
	99	return 0;
	100	}
	101
2f26e0a9	102	static void intel_flush_svm_range_dev (struct intel_svm svm, struct intel_svm_dev sdev,
8744daf4	103	unsigned long address, unsigned long pages, int ih)
2f26e0a9 DW	104	{
2f26e0a9 DW	105	struct qi_desc desc;
2f26e0a9	106
8744daf4 JP	107	/*
	108	* Do PASID granu IOTLB invalidation if page selective capability is
	109	* not available.
	110	*/
	111	if (pages == -1 \|\| !cap_pgsel_inv(svm->iommu->cap)) {
	112	desc.qw0 = QI_EIOTLB_PASID(svm->pasid) \|
	113	QI_EIOTLB_DID(sdev->did) \|
	114	QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) \|
	115	QI_EIOTLB_TYPE;
5d308fc1	116	desc.qw1 = 0;
2f26e0a9	117	} else {
5d52f482 DW	118	int mask = ilog2(__roundup_pow_of_two(pages));
5d52f482 DW	119
5d308fc1 LB	120	desc.qw0 = QI_EIOTLB_PASID(svm->pasid) \|
	121	QI_EIOTLB_DID(sdev->did) \|
	122	QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) \|
	123	QI_EIOTLB_TYPE;
	124	desc.qw1 = QI_EIOTLB_ADDR(address) \|
5d308fc1 LB	125	QI_EIOTLB_IH(ih) \|
5d308fc1 LB	126	QI_EIOTLB_AM(mask);
2f26e0a9	127	}
5d308fc1 LB	128	desc.qw2 = 0;
5d308fc1 LB	129	desc.qw3 = 0;
2f26e0a9 DW	130	qi_submit_sync(&desc, svm->iommu);
	131
	132	if (sdev->dev_iotlb) {
5d308fc1 LB	133	desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) \|
	134	QI_DEV_EIOTLB_SID(sdev->sid) \|
	135	QI_DEV_EIOTLB_QDEP(sdev->qdep) \|
	136	QI_DEIOTLB_TYPE;
5d52f482	137	if (pages == -1) {
5d308fc1 LB	138	desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) \|
5d308fc1 LB	139	QI_DEV_EIOTLB_SIZE;
5d52f482 DW	140	} else if (pages > 1) {
	141	/* The least significant zero bit indicates the size. So,
	142	* for example, an "address" value of 0x12345f000 will
	143	* flush from 0x123440000 to 0x12347ffff (256KiB). */
	144	unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
ed7158ba	145	unsigned long mask = __rounddown_pow_of_two(address ^ last);
5d52f482	146
5d308fc1 LB	147	desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) \|
5d308fc1 LB	148	(mask - 1)) \| QI_DEV_EIOTLB_SIZE;
2f26e0a9	149	} else {
5d308fc1	150	desc.qw1 = QI_DEV_EIOTLB_ADDR(address);
2f26e0a9	151	}
5d308fc1 LB	152	desc.qw2 = 0;
5d308fc1 LB	153	desc.qw3 = 0;
2f26e0a9 DW	154	qi_submit_sync(&desc, svm->iommu);
	155	}
	156	}
	157
	158	static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
8744daf4	159	unsigned long pages, int ih)
2f26e0a9 DW	160	{
	161	struct intel_svm_dev *sdev;
	162
	163	rcu_read_lock();
	164	list_for_each_entry_rcu(sdev, &svm->devs, list)
8744daf4	165	intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
2f26e0a9 DW	166	rcu_read_unlock();
	167	}
	168
2f26e0a9 DW	169	/* Pages have been freed at this point */
	170	static void intel_invalidate_range(struct mmu_notifier *mn,
	171	struct mm_struct *mm,
	172	unsigned long start, unsigned long end)
	173	{
	174	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
	175
	176	intel_flush_svm_range(svm, start,
8744daf4	177	(end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
2f26e0a9 DW	178	}
2f26e0a9 DW	179
2f26e0a9 DW	180	static void intel_mm_release(struct mmu_notifier mn, struct mm_struct mm)
	181	{
	182	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
e57e58bd	183	struct intel_svm_dev *sdev;
2f26e0a9	184
e57e58bd DW	185	/* This might end up being called from exit_mmap(), before the page
	186	* tables are cleared. And __mmu_notifier_release() will delete us from
	187	* the list of notifiers so that our invalidate_range() callback doesn't
	188	* get called when the page tables are cleared. So we need to protect
	189	* against hardware accessing those page tables.
	190	*
	191	* We do it by clearing the entry in the PASID table and then flushing
	192	* the IOTLB and the PASID table caches. This might upset hardware;
	193	* perhaps we'll want to point the PASID to a dummy PGD (like the zero
	194	* page) so that we end up taking a fault that the hardware really
	195	* has to handle gracefully without affecting other processes.
	196	*/
e57e58bd DW	197	rcu_read_lock();
e57e58bd DW	198	list_for_each_entry_rcu(sdev, &svm->devs, list) {
1c4f88b7	199	intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
8744daf4	200	intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
e57e58bd DW	201	}
e57e58bd DW	202	rcu_read_unlock();
2f26e0a9	203
2f26e0a9 DW	204	}
	205
	206	static const struct mmu_notifier_ops intel_mmuops = {
	207	.release = intel_mm_release,
2f26e0a9 DW	208	.invalidate_range = intel_invalidate_range,
	209	};
	210
	211	static DEFINE_MUTEX(pasid_mutex);
51261aac	212	static LIST_HEAD(global_svm_list);
2f26e0a9	213
0204a496	214	int intel_svm_bind_mm(struct device dev, int pasid, int flags, struct svm_dev_ops *ops)
2f26e0a9 DW	215	{
2f26e0a9 DW	216	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
d7cbc0f3	217	struct device_domain_info *info;
2f26e0a9 DW	218	struct intel_svm_dev *sdev;
2f26e0a9 DW	219	struct intel_svm *svm = NULL;
5cec7537	220	struct mm_struct *mm = NULL;
2f26e0a9 DW	221	int pasid_max;
	222	int ret;
	223
c56cba5d	224	if (!iommu \|\| dmar_disabled)
2f26e0a9 DW	225	return -EINVAL;
	226
	227	if (dev_is_pci(dev)) {
	228	pasid_max = pci_max_pasids(to_pci_dev(dev));
	229	if (pasid_max < 0)
	230	return -EINVAL;
	231	} else
	232	pasid_max = 1 << 20;
	233
bb37f7db	234	if (flags & SVM_FLAG_SUPERVISOR_MODE) {
5cec7537 DW	235	if (!ecap_srs(iommu->ecap))
	236	return -EINVAL;
	237	} else if (pasid) {
	238	mm = get_task_mm(current);
	239	BUG_ON(!mm);
	240	}
	241
2f26e0a9	242	mutex_lock(&pasid_mutex);
569e4f77	243	if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
51261aac	244	struct intel_svm *t;
2f26e0a9	245
51261aac LB	246	list_for_each_entry(t, &global_svm_list, list) {
51261aac LB	247	if (t->mm != mm \|\| (t->flags & SVM_FLAG_PRIVATE_PASID))
2f26e0a9 DW	248	continue;
2f26e0a9 DW	249
51261aac	250	svm = t;
2f26e0a9 DW	251	if (svm->pasid >= pasid_max) {
	252	dev_warn(dev,
	253	"Limited PASID width. Cannot use existing PASID %d\n",
	254	svm->pasid);
	255	ret = -ENOSPC;
	256	goto out;
	257	}
	258
	259	list_for_each_entry(sdev, &svm->devs, list) {
	260	if (dev == sdev->dev) {
0204a496 DW	261	if (sdev->ops != ops) {
	262	ret = -EBUSY;
	263	goto out;
	264	}
2f26e0a9 DW	265	sdev->users++;
	266	goto success;
	267	}
	268	}
	269
	270	break;
	271	}
	272	}
	273
	274	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
	275	if (!sdev) {
	276	ret = -ENOMEM;
	277	goto out;
	278	}
	279	sdev->dev = dev;
	280
d7cbc0f3	281	ret = intel_iommu_enable_pasid(iommu, dev);
2f26e0a9 DW	282	if (ret \|\| !pasid) {
	283	/* If they don't actually want to assign a PASID, this is
	284	* just an enabling check/preparation. */
	285	kfree(sdev);
	286	goto out;
	287	}
d7cbc0f3 LB	288
	289	info = dev->archdata.iommu;
	290	if (!info \|\| !info->pasid_supported) {
	291	kfree(sdev);
	292	goto out;
	293	}
	294
	295	sdev->did = FLPT_DEFAULT_DID;
	296	sdev->sid = PCI_DEVID(info->bus, info->devfn);
	297	if (info->ats_enabled) {
	298	sdev->dev_iotlb = 1;
	299	sdev->qdep = info->ats_qdep;
	300	if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
	301	sdev->qdep = 0;
	302	}
	303
2f26e0a9 DW	304	/* Finish the setup now we know we're keeping it */
2f26e0a9 DW	305	sdev->users = 1;
0204a496	306	sdev->ops = ops;
2f26e0a9 DW	307	init_rcu_head(&sdev->rcu);
	308
	309	if (!svm) {
	310	svm = kzalloc(sizeof(*svm), GFP_KERNEL);
	311	if (!svm) {
	312	ret = -ENOMEM;
	313	kfree(sdev);
	314	goto out;
	315	}
	316	svm->iommu = iommu;
	317
4774cc52 LB	318	if (pasid_max > intel_pasid_max_id)
4774cc52 LB	319	pasid_max = intel_pasid_max_id;
2f26e0a9	320
5a10ba27	321	/* Do not use PASID 0 in caching mode (virtualised IOMMU) */
af395073 LB	322	ret = intel_pasid_alloc_id(svm,
	323	!!cap_caching_mode(iommu->cap),
	324	pasid_max - 1, GFP_KERNEL);
2f26e0a9 DW	325	if (ret < 0) {
2f26e0a9 DW	326	kfree(svm);
bbe4b3af	327	kfree(sdev);
2f26e0a9 DW	328	goto out;
	329	}
	330	svm->pasid = ret;
	331	svm->notifier.ops = &intel_mmuops;
5cec7537	332	svm->mm = mm;
569e4f77	333	svm->flags = flags;
2f26e0a9	334	INIT_LIST_HEAD_RCU(&svm->devs);
51261aac	335	INIT_LIST_HEAD(&svm->list);
2f26e0a9	336	ret = -ENOMEM;
5cec7537 DW	337	if (mm) {
	338	ret = mmu_notifier_register(&svm->notifier, mm);
	339	if (ret) {
af395073	340	intel_pasid_free_id(svm->pasid);
5cec7537 DW	341	kfree(svm);
	342	kfree(sdev);
	343	goto out;
	344	}
1c4f88b7	345	}
97140101	346
1c4f88b7 LB	347	spin_lock(&iommu->lock);
	348	ret = intel_pasid_setup_first_level(iommu, dev,
	349	mm ? mm->pgd : init_mm.pgd,
	350	svm->pasid, FLPT_DEFAULT_DID,
	351	mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
	352	spin_unlock(&iommu->lock);
	353	if (ret) {
	354	if (mm)
	355	mmu_notifier_unregister(&svm->notifier, mm);
	356	intel_pasid_free_id(svm->pasid);
	357	kfree(svm);
	358	kfree(sdev);
	359	goto out;
	360	}
51261aac LB	361
51261aac LB	362	list_add_tail(&svm->list, &global_svm_list);
d7af4d98 JP	363	} else {
	364	/*
	365	* Binding a new device with existing PASID, need to setup
	366	* the PASID entry.
	367	*/
	368	spin_lock(&iommu->lock);
	369	ret = intel_pasid_setup_first_level(iommu, dev,
	370	mm ? mm->pgd : init_mm.pgd,
	371	svm->pasid, FLPT_DEFAULT_DID,
	372	mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
	373	spin_unlock(&iommu->lock);
	374	if (ret) {
	375	kfree(sdev);
	376	goto out;
	377	}
2f26e0a9 DW	378	}
	379	list_add_rcu(&sdev->list, &svm->devs);
	380
	381	success:
	382	*pasid = svm->pasid;
	383	ret = 0;
	384	out:
	385	mutex_unlock(&pasid_mutex);
5cec7537 DW	386	if (mm)
5cec7537 DW	387	mmput(mm);
2f26e0a9 DW	388	return ret;
	389	}
	390	EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
	391
	392	int intel_svm_unbind_mm(struct device *dev, int pasid)
	393	{
	394	struct intel_svm_dev *sdev;
	395	struct intel_iommu *iommu;
	396	struct intel_svm *svm;
	397	int ret = -EINVAL;
	398
	399	mutex_lock(&pasid_mutex);
	400	iommu = intel_svm_device_to_iommu(dev);
4774cc52	401	if (!iommu)
2f26e0a9 DW	402	goto out;
2f26e0a9 DW	403
af395073	404	svm = intel_pasid_lookup_id(pasid);
2f26e0a9 DW	405	if (!svm)
	406	goto out;
	407
	408	list_for_each_entry(sdev, &svm->devs, list) {
	409	if (dev == sdev->dev) {
	410	ret = 0;
	411	sdev->users--;
	412	if (!sdev->users) {
	413	list_del_rcu(&sdev->list);
	414	/* Flush the PASID cache and IOTLB for this device.
	415	* Note that we do depend on the hardware not using
	416	* the PASID any more. Just as we depend on other
	417	* devices never using PASIDs that they have no right
	418	* to use. We have a shared PASID table, because it's
	419	* large and has to be physically contiguous. So it's
	420	* hard to be as defensive as we might like. */
1c4f88b7	421	intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
8744daf4	422	intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
2f26e0a9 DW	423	kfree_rcu(sdev, rcu);
	424
	425	if (list_empty(&svm->devs)) {
af395073	426	intel_pasid_free_id(svm->pasid);
5cec7537	427	if (svm->mm)
e57e58bd DW	428	mmu_notifier_unregister(&svm->notifier, svm->mm);
e57e58bd DW	429
51261aac LB	430	list_del(&svm->list);
51261aac LB	431
2f26e0a9 DW	432	/* We mandate that no page faults may be outstanding
	433	* for the PASID when intel_svm_unbind_mm() is called.
	434	* If that is not obeyed, subtle errors will happen.
	435	* Let's make them less subtle... */
	436	memset(svm, 0x6b, sizeof(*svm));
	437	kfree(svm);
	438	}
	439	}
	440	break;
	441	}
	442	}
	443	out:
	444	mutex_unlock(&pasid_mutex);
	445
	446	return ret;
	447	}
	448	EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
a222a7f0	449
15060aba CT	450	int intel_svm_is_pasid_valid(struct device *dev, int pasid)
	451	{
	452	struct intel_iommu *iommu;
	453	struct intel_svm *svm;
	454	int ret = -EINVAL;
	455
	456	mutex_lock(&pasid_mutex);
	457	iommu = intel_svm_device_to_iommu(dev);
4774cc52	458	if (!iommu)
15060aba CT	459	goto out;
15060aba CT	460
af395073	461	svm = intel_pasid_lookup_id(pasid);
15060aba CT	462	if (!svm)
	463	goto out;
	464
	465	/* init_mm is used in this case */
	466	if (!svm->mm)
	467	ret = 1;
	468	else if (atomic_read(&svm->mm->mm_users) > 0)
	469	ret = 1;
	470	else
	471	ret = 0;
	472
	473	out:
	474	mutex_unlock(&pasid_mutex);
	475
	476	return ret;
	477	}
	478	EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
	479
a222a7f0 DW	480	/* Page request queue descriptor */
a222a7f0 DW	481	struct page_req_dsc {
5b438f4b JP	482	union {
	483	struct {
	484	u64 type:8;
	485	u64 pasid_present:1;
	486	u64 priv_data_present:1;
	487	u64 rsvd:6;
	488	u64 rid:16;
	489	u64 pasid:20;
	490	u64 exe_req:1;
	491	u64 pm_req:1;
	492	u64 rsvd2:10;
	493	};
	494	u64 qw_0;
	495	};
	496	union {
	497	struct {
	498	u64 rd_req:1;
	499	u64 wr_req:1;
	500	u64 lpig:1;
	501	u64 prg_index:9;
	502	u64 addr:52;
	503	};
	504	u64 qw_1;
	505	};
	506	u64 priv_data[2];
a222a7f0 DW	507	};
	508
	509	#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)
7f8312a3 JR	510
	511	static bool access_error(struct vm_area_struct vma, struct page_req_dsc req)
	512	{
	513	unsigned long requested = 0;
	514
	515	if (req->exe_req)
	516	requested \|= VM_EXEC;
	517
	518	if (req->rd_req)
	519	requested \|= VM_READ;
	520
	521	if (req->wr_req)
	522	requested \|= VM_WRITE;
	523
	524	return (requested & ~vma->vm_flags) != 0;
	525	}
	526
9d8c3af3 AR	527	static bool is_canonical_address(u64 addr)
	528	{
	529	int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
	530	long saddr = (long) addr;
	531
	532	return (((saddr << shift) >> shift) == saddr);
	533	}
	534
a222a7f0 DW	535	static irqreturn_t prq_event_thread(int irq, void *d)
	536	{
	537	struct intel_iommu *iommu = d;
	538	struct intel_svm *svm = NULL;
	539	int head, tail, handled = 0;
	540
46924008 DW	541	/* Clear PPR bit before reading head/tail registers, to
	542	* ensure that we get a new interrupt if needed. */
	543	writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
	544
a222a7f0 DW	545	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
	546	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
	547	while (head != tail) {
0204a496	548	struct intel_svm_dev *sdev;
a222a7f0 DW	549	struct vm_area_struct *vma;
	550	struct page_req_dsc *req;
	551	struct qi_desc resp;
50a7ca3c SJ	552	int result;
50a7ca3c SJ	553	vm_fault_t ret;
a222a7f0 DW	554	u64 address;
	555
	556	handled = 1;
	557
	558	req = &iommu->prq[head / sizeof(*req)];
	559
	560	result = QI_RESP_FAILURE;
7f92a2e9	561	address = (u64)req->addr << VTD_PAGE_SHIFT;
a222a7f0 DW	562	if (!req->pasid_present) {
	563	pr_err("%s: Page request without PASID: %08llx %08llx\n",
	564	iommu->name, ((unsigned long long *)req)[0],
	565	((unsigned long long *)req)[1]);
19ed3e2d	566	goto no_pasid;
a222a7f0 DW	567	}
	568
	569	if (!svm \|\| svm->pasid != req->pasid) {
	570	rcu_read_lock();
af395073	571	svm = intel_pasid_lookup_id(req->pasid);
a222a7f0 DW	572	/* It can't go away, because the driver is not permitted
	573	* to unbind the mm while any page faults are outstanding.
	574	* So we only need RCU to protect the internal idr code. */
	575	rcu_read_unlock();
	576
	577	if (!svm) {
	578	pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
	579	iommu->name, req->pasid, ((unsigned long long *)req)[0],
	580	((unsigned long long *)req)[1]);
26322ab5	581	goto no_pasid;
a222a7f0 DW	582	}
	583	}
	584
	585	result = QI_RESP_INVALID;
5cec7537 DW	586	/* Since we're using init_mm.pgd directly, we should never take
	587	* any faults on kernel addresses. */
	588	if (!svm->mm)
	589	goto bad_req;
e57e58bd	590	/* If the mm is already defunct, don't handle faults. */
388f7934	591	if (!mmget_not_zero(svm->mm))
e57e58bd	592	goto bad_req;
9d8c3af3 AR	593
	594	/* If address is not canonical, return invalid response */
	595	if (!is_canonical_address(address))
	596	goto bad_req;
	597
a222a7f0 DW	598	down_read(&svm->mm->mmap_sem);
	599	vma = find_extend_vma(svm->mm, address);
	600	if (!vma \|\| address < vma->vm_start)
	601	goto invalid;
	602
7f8312a3 JR	603	if (access_error(vma, req))
	604	goto invalid;
	605
dcddffd4	606	ret = handle_mm_fault(vma, address,
a222a7f0 DW	607	req->wr_req ? FAULT_FLAG_WRITE : 0);
	608	if (ret & VM_FAULT_ERROR)
	609	goto invalid;
	610
	611	result = QI_RESP_SUCCESS;
	612	invalid:
	613	up_read(&svm->mm->mmap_sem);
e57e58bd	614	mmput(svm->mm);
a222a7f0 DW	615	bad_req:
a222a7f0 DW	616	/* Accounting for major/minor faults? */
0204a496 DW	617	rcu_read_lock();
0204a496 DW	618	list_for_each_entry_rcu(sdev, &svm->devs, list) {
5b438f4b	619	if (sdev->sid == req->rid)
0204a496 DW	620	break;
	621	}
	622	/* Other devices can go away, but the drivers are not permitted
	623	* to unbind while any page faults might be in flight. So it's
	624	* OK to drop the 'lock' here now we have it. */
	625	rcu_read_unlock();
	626
	627	if (WARN_ON(&sdev->list == &svm->devs))
	628	sdev = NULL;
	629
	630	if (sdev && sdev->ops && sdev->ops->fault_cb) {
	631	int rwxp = (req->rd_req << 3) \| (req->wr_req << 2) \|
5b438f4b JP	632	(req->exe_req << 1) \| (req->pm_req);
	633	sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr,
	634	req->priv_data, rwxp, result);
0204a496	635	}
26322ab5 DW	636	/* We get here in the error case where the PASID lookup failed,
	637	and these can be NULL. Do not use them below this point! */
	638	sdev = NULL;
	639	svm = NULL;
	640	no_pasid:
5b438f4b JP	641	if (req->lpig \|\| req->priv_data_present) {
	642	/*
	643	* Per VT-d spec. v3.0 ch7.7, system software must
	644	* respond with page group response if private data
	645	* is present (PDP) or last page in group (LPIG) bit
	646	* is set. This is an additional VT-d feature beyond
	647	* PCI ATS spec.
	648	*/
5d308fc1	649	resp.qw0 = QI_PGRP_PASID(req->pasid) \|
5b438f4b	650	QI_PGRP_DID(req->rid) \|
a222a7f0	651	QI_PGRP_PASID_P(req->pasid_present) \|
5b438f4b JP	652	QI_PGRP_PDP(req->pasid_present) \|
5b438f4b JP	653	QI_PGRP_RESP_CODE(result) \|
a222a7f0	654	QI_PGRP_RESP_TYPE;
5d308fc1	655	resp.qw1 = QI_PGRP_IDX(req->prg_index) \|
5b438f4b JP	656	QI_PGRP_LPIG(req->lpig);
	657
	658	if (req->priv_data_present)
	659	memcpy(&resp.qw2, req->priv_data,
	660	sizeof(req->priv_data));
a222a7f0	661	}
5d308fc1 LB	662	resp.qw2 = 0;
	663	resp.qw3 = 0;
	664	qi_submit_sync(&resp, iommu);
a222a7f0 DW	665
	666	head = (head + sizeof(*req)) & PRQ_RING_MASK;
	667	}
	668
	669	dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
	670
	671	return IRQ_RETVAL(handled);
	672	}