Merge tag 'iommu-updates-v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Jul 2021 20:22:47 +0000 (13:22 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Jul 2021 20:22:47 +0000 (13:22 -0700)
Pull iommu updates from Joerg Roedel:

 - SMMU Updates from Will Deacon:

     - SMMUv3:
        - Support stalling faults for platform devices
        - Decrease defaults sizes for the event and PRI queues
     - SMMUv2:
        - Support for a new '->probe_finalize' hook, needed by Nvidia
        - Even more Qualcomm compatible strings
        - Avoid Adreno TTBR1 quirk for DB820C platform

 - Intel VT-d updates from Lu Baolu:

     - Convert Intel IOMMU to use sva_lib helpers in iommu core
     - ftrace and debugfs supports for page fault handling
     - Support asynchronous nested capabilities
     - Various misc cleanups

 - Support for new VIOT ACPI table to make the VirtIO IOMMU
   available on x86

 - Add the amd_iommu=force_enable command line option to enable
   the IOMMU on platforms where they are known to cause problems

 - Support for version 2 of the Rockchip IOMMU

 - Various smaller fixes, cleanups and refactorings

* tag 'iommu-updates-v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (66 commits)
  iommu/virtio: Enable x86 support
  iommu/dma: Pass address limit rather than size to iommu_setup_dma_ops()
  ACPI: Add driver for the VIOT table
  ACPI: Move IOMMU setup code out of IORT
  ACPI: arm64: Move DMA setup operations out of IORT
  iommu/vt-d: Fix dereference of pointer info before it is null checked
  iommu: Update "iommu.strict" documentation
  iommu/arm-smmu: Check smmu->impl pointer before dereferencing
  iommu/arm-smmu-v3: Remove unnecessary oom message
  iommu/arm-smmu: Fix arm_smmu_device refcount leak in address translation
  iommu/arm-smmu: Fix arm_smmu_device refcount leak when arm_smmu_rpm_get fails
  iommu/vt-d: Fix linker error on 32-bit
  iommu/vt-d: No need to typecast
  iommu/vt-d: Define counter explicitly as unsigned int
  iommu/vt-d: Remove unnecessary braces
  iommu/vt-d: Removed unused iommu_count in dmar domain
  iommu/vt-d: Use bitfields for DMAR capabilities
  iommu/vt-d: Use DEVICE_ATTR_RO macro
  iommu/vt-d: Fix out-bounds-warning in intel/svm.c
  iommu/vt-d: Add PRQ handling latency sampling
  ...

56 files changed:
Documentation/admin-guide/kernel-parameters.txt
Documentation/devicetree/bindings/iommu/iommu.txt
Documentation/devicetree/bindings/iommu/rockchip,iommu.txt [deleted file]
Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml [new file with mode: 0644]
MAINTAINERS
arch/arm64/boot/dts/qcom/msm8996.dtsi
arch/arm64/mm/dma-mapping.c
drivers/acpi/Kconfig
drivers/acpi/Makefile
drivers/acpi/arm64/Makefile
drivers/acpi/arm64/dma.c [new file with mode: 0644]
drivers/acpi/arm64/iort.c
drivers/acpi/bus.c
drivers/acpi/scan.c
drivers/acpi/viot.c [new file with mode: 0644]
drivers/iommu/Kconfig
drivers/iommu/amd/amd_iommu.h
drivers/iommu/amd/init.c
drivers/iommu/amd/iommu.c
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
drivers/iommu/arm/arm-smmu/arm-smmu.c
drivers/iommu/arm/arm-smmu/arm-smmu.h
drivers/iommu/arm/arm-smmu/qcom_iommu.c
drivers/iommu/dma-iommu.c
drivers/iommu/exynos-iommu.c
drivers/iommu/intel/Kconfig
drivers/iommu/intel/Makefile
drivers/iommu/intel/debugfs.c
drivers/iommu/intel/dmar.c
drivers/iommu/intel/iommu.c
drivers/iommu/intel/pasid.c
drivers/iommu/intel/perf.c [new file with mode: 0644]
drivers/iommu/intel/perf.h [new file with mode: 0644]
drivers/iommu/intel/svm.c
drivers/iommu/iommu.c
drivers/iommu/iova.c
drivers/iommu/ipmmu-vmsa.c
drivers/iommu/msm_iommu.c
drivers/iommu/mtk_iommu.c
drivers/iommu/mtk_iommu_v1.c
drivers/iommu/of_iommu.c
drivers/iommu/omap-iommu.c
drivers/iommu/rockchip-iommu.c
drivers/iommu/virtio-iommu.c
drivers/of/platform.c
include/acpi/acpi_bus.h
include/linux/acpi.h
include/linux/acpi_iort.h
include/linux/acpi_viot.h [new file with mode: 0644]
include/linux/dma-iommu.h
include/linux/intel-iommu.h
include/linux/of_iommu.h
include/trace/events/intel_iommu.h

index 13f13fdd47319d09c13ac98a8c985f8805b18b8c..0081d83457e8b78d37f614562bce154816c5021d 100644 (file)
                                          allowed anymore to lift isolation
                                          requirements as needed. This option
                                          does not override iommu=pt
+                       force_enable - Force enable the IOMMU on platforms known
+                                      to be buggy with IOMMU enabled. Use this
+                                      option with care.
 
        amd_iommu_dump= [HW,X86-64]
                        Enable AMD IOMMU driver option to dump the ACPI table
                          forcing Dual Address Cycle for PCI cards supporting
                          greater than 32-bit addressing.
 
-       iommu.strict=   [ARM64] Configure TLB invalidation behaviour
+       iommu.strict=   [ARM64, X86] Configure TLB invalidation behaviour
                        Format: { "0" | "1" }
                        0 - Lazy mode.
                          Request that DMA unmap operations use deferred
                        1 - Strict mode (default).
                          DMA unmap operations invalidate IOMMU hardware TLBs
                          synchronously.
+                       Note: on x86, the default behaviour depends on the
+                       equivalent driver-specific parameters, but a strict
+                       mode explicitly specified by either method takes
+                       precedence.
 
        iommu.passthrough=
                        [ARM64, X86] Configure DMA to bypass the IOMMU by default.
index 3c36334e4f94214ded1ae110fa6cee242e9ed4b2..26ba9e530f1389591a043d0f1fad2d207ac06192 100644 (file)
@@ -92,6 +92,24 @@ Optional properties:
   tagging DMA transactions with an address space identifier. By default,
   this is 0, which means that the device only has one address space.
 
+- dma-can-stall: When present, the master can wait for a transaction to
+  complete for an indefinite amount of time. Upon translation fault some
+  IOMMUs, instead of aborting the translation immediately, may first
+  notify the driver and keep the transaction in flight. This allows the OS
+  to inspect the fault and, for example, make physical pages resident
+  before updating the mappings and completing the transaction. Such IOMMU
+  accepts a limited number of simultaneous stalled transactions before
+  having to either put back-pressure on the master, or abort new faulting
+  transactions.
+
+  Firmware has to opt-in stalling, because most buses and masters don't
+  support it. In particular it isn't compatible with PCI, where
+  transactions have to complete before a time limit. More generally it
+  won't work in systems and masters that haven't been designed for
+  stalling. For example the OS, in order to handle a stalled transaction,
+  may attempt to retrieve pages from secondary storage in a stalled
+  domain, leading to a deadlock.
+
 
 Notes:
 ======
diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
deleted file mode 100644 (file)
index 6ecefea..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-Rockchip IOMMU
-==============
-
-A Rockchip DRM iommu translates io virtual addresses to physical addresses for
-its master device.  Each slave device is bound to a single master device, and
-shares its clocks, power domain and irq.
-
-Required properties:
-- compatible      : Should be "rockchip,iommu"
-- reg             : Address space for the configuration registers
-- interrupts      : Interrupt specifier for the IOMMU instance
-- interrupt-names : Interrupt name for the IOMMU instance
-- #iommu-cells    : Should be <0>.  This indicates the iommu is a
-                    "single-master" device, and needs no additional information
-                    to associate with its master device.  See:
-                    Documentation/devicetree/bindings/iommu/iommu.txt
-- clocks          : A list of clocks required for the IOMMU to be accessible by
-                    the host CPU.
-- clock-names     : Should contain the following:
-       "iface" - Main peripheral bus clock (PCLK/HCL) (required)
-       "aclk"  - AXI bus clock (required)
-
-Optional properties:
-- rockchip,disable-mmu-reset : Don't use the mmu reset operation.
-                              Some mmu instances may produce unexpected results
-                              when the reset operation is used.
-
-Example:
-
-       vopl_mmu: iommu@ff940300 {
-               compatible = "rockchip,iommu";
-               reg = <0xff940300 0x100>;
-               interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
-               interrupt-names = "vopl_mmu";
-               clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
-               clock-names = "aclk", "iface";
-               #iommu-cells = <0>;
-       };
diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
new file mode 100644 (file)
index 0000000..d2e28a9
--- /dev/null
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/rockchip,iommu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip IOMMU
+
+maintainers:
+  - Heiko Stuebner <heiko@sntech.de>
+
+description: |+
+  A Rockchip DRM iommu translates io virtual addresses to physical addresses for
+  its master device. Each slave device is bound to a single master device and
+  shares its clocks, power domain and irq.
+
+  For information on assigning IOMMU controller to its peripheral devices,
+  see generic IOMMU bindings.
+
+properties:
+  compatible:
+    enum:
+      - rockchip,iommu
+      - rockchip,rk3568-iommu
+
+  reg:
+    items:
+      - description: configuration registers for MMU instance 0
+      - description: configuration registers for MMU instance 1
+    minItems: 1
+    maxItems: 2
+
+  interrupts:
+    items:
+      - description: interruption for MMU instance 0
+      - description: interruption for MMU instance 1
+    minItems: 1
+    maxItems: 2
+
+  clocks:
+    items:
+      - description: Core clock
+      - description: Interface clock
+
+  clock-names:
+    items:
+      - const: aclk
+      - const: iface
+
+  "#iommu-cells":
+    const: 0
+
+  power-domains:
+    maxItems: 1
+
+  rockchip,disable-mmu-reset:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      Do not use the mmu reset operation.
+      Some mmu instances may produce unexpected results
+      when the reset operation is used.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - "#iommu-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/rk3399-cru.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    vopl_mmu: iommu@ff940300 {
+      compatible = "rockchip,iommu";
+      reg = <0xff940300 0x100>;
+      interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
+      clock-names = "aclk", "iface";
+      #iommu-cells = <0>;
+    };
index 0d9d8fbd9e92958ba22aa093ce409c37418747bb..4d42bc8c9b8bc4dccc732f15deeee55c6c164bff 100644 (file)
@@ -431,6 +431,14 @@ W: https://01.org/linux-acpi
 B:     https://bugzilla.kernel.org
 F:     drivers/acpi/acpi_video.c
 
+ACPI VIOT DRIVER
+M:     Jean-Philippe Brucker <jean-philippe@linaro.org>
+L:     linux-acpi@vger.kernel.org
+L:     iommu@lists.linux-foundation.org
+S:     Maintained
+F:     drivers/acpi/viot.c
+F:     include/linux/acpi_viot.h
+
 ACPI WMI DRIVER
 L:     platform-driver-x86@vger.kernel.org
 S:     Orphan
index ce430ba9c1183a7135e0a046eb1769103ed09e38..5d06216c5c1c8a920efb72f13b1d66972638adc1 100644 (file)
                };
 
                adreno_smmu: iommu@b40000 {
-                       compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2";
+                       compatible = "qcom,msm8996-smmu-v2", "qcom,adreno-smmu", "qcom,smmu-v2";
                        reg = <0x00b40000 0x10000>;
 
                        #global-interrupts = <1>;
index 4bf1dd3eb041912f8cad240fa606eb9efb5df2a1..6719f9efea093f80449d81ecab6b816c58f57ec4 100644 (file)
@@ -50,7 +50,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 
        dev->dma_coherent = coherent;
        if (iommu)
-               iommu_setup_dma_ops(dev, dma_base, size);
+               iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1);
 
 #ifdef CONFIG_XEN
        if (xen_swiotlb_detect())
index 3972de7b75653ce15f59ee7ebb5d27f6373239d1..fe0bb6277e4d08249a6edeecdcf84f145c1d35a0 100644 (file)
@@ -526,6 +526,9 @@ endif
 
 source "drivers/acpi/pmic/Kconfig"
 
+config ACPI_VIOT
+       bool
+
 endif  # ACPI
 
 config X86_PM_TIMER
index ceb1aed4b1fc9eff7bd8bbb78ec19196f31712c5..3018714e87d9c0e8fc5801896241b4780bb1ca66 100644 (file)
@@ -124,3 +124,5 @@ video-objs                  += acpi_video.o video_detect.o
 obj-y                          += dptf/
 
 obj-$(CONFIG_ARM64)            += arm64/
+
+obj-$(CONFIG_ACPI_VIOT)                += viot.o
index 6ff50f4ed947107c3a4dcee9f68fce768e64e2da..66acbe77f46e5a18cdf95f711f91ebc9c80df627 100644 (file)
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_ACPI_IORT)        += iort.o
 obj-$(CONFIG_ACPI_GTDT)        += gtdt.o
+obj-y                          += dma.o
diff --git a/drivers/acpi/arm64/dma.c b/drivers/acpi/arm64/dma.c
new file mode 100644 (file)
index 0000000..f16739a
--- /dev/null
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/acpi.h>
+#include <linux/acpi_iort.h>
+#include <linux/device.h>
+#include <linux/dma-direct.h>
+
+void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
+{
+       int ret;
+       u64 end, mask;
+       u64 dmaaddr = 0, size = 0, offset = 0;
+
+       /*
+        * If @dev is expected to be DMA-capable then the bus code that created
+        * it should have initialised its dma_mask pointer by this point. For
+        * now, we'll continue the legacy behaviour of coercing it to the
+        * coherent mask if not, but we'll no longer do so quietly.
+        */
+       if (!dev->dma_mask) {
+               dev_warn(dev, "DMA mask not set\n");
+               dev->dma_mask = &dev->coherent_dma_mask;
+       }
+
+       if (dev->coherent_dma_mask)
+               size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
+       else
+               size = 1ULL << 32;
+
+       ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size);
+       if (ret == -ENODEV)
+               ret = iort_dma_get_ranges(dev, &size);
+       if (!ret) {
+               /*
+                * Limit coherent and dma mask based on size retrieved from
+                * firmware.
+                */
+               end = dmaaddr + size - 1;
+               mask = DMA_BIT_MASK(ilog2(end) + 1);
+               dev->bus_dma_limit = end;
+               dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask);
+               *dev->dma_mask = min(*dev->dma_mask, mask);
+       }
+
+       *dma_addr = dmaaddr;
+       *dma_size = size;
+
+       ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size);
+
+       dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : "");
+}
index e34937e11186a0c814bffae2b3007e721e4972cd..3b23fb775ac456cc743adba2230dff2bdb028a1c 100644 (file)
@@ -806,23 +806,6 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
        return NULL;
 }
 
-static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev)
-{
-       struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
-
-       return (fwspec && fwspec->ops) ? fwspec->ops : NULL;
-}
-
-static inline int iort_add_device_replay(struct device *dev)
-{
-       int err = 0;
-
-       if (dev->bus && !device_iommu_mapped(dev))
-               err = iommu_probe_device(dev);
-
-       return err;
-}
-
 /**
  * iort_iommu_msi_get_resv_regions - Reserved region driver helper
  * @dev: Device from iommu_get_resv_regions()
@@ -900,18 +883,6 @@ static inline bool iort_iommu_driver_enabled(u8 type)
        }
 }
 
-static int arm_smmu_iort_xlate(struct device *dev, u32 streamid,
-                              struct fwnode_handle *fwnode,
-                              const struct iommu_ops *ops)
-{
-       int ret = iommu_fwspec_init(dev, fwnode, ops);
-
-       if (!ret)
-               ret = iommu_fwspec_add_ids(dev, &streamid, 1);
-
-       return ret;
-}
-
 static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node)
 {
        struct acpi_iort_root_complex *pci_rc;
@@ -946,7 +917,7 @@ static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node,
                return iort_iommu_driver_enabled(node->type) ?
                       -EPROBE_DEFER : -ENODEV;
 
-       return arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops);
+       return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode, ops);
 }
 
 struct iort_pci_alias_info {
@@ -968,13 +939,15 @@ static int iort_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
 static void iort_named_component_init(struct device *dev,
                                      struct acpi_iort_node *node)
 {
-       struct property_entry props[2] = {};
+       struct property_entry props[3] = {};
        struct acpi_iort_named_component *nc;
 
        nc = (struct acpi_iort_named_component *)node->node_data;
        props[0] = PROPERTY_ENTRY_U32("pasid-num-bits",
                                      FIELD_GET(ACPI_IORT_NC_PASID_BITS,
                                                nc->node_flags));
+       if (nc->node_flags & ACPI_IORT_NC_STALL_SUPPORTED)
+               props[1] = PROPERTY_ENTRY_BOOL("dma-can-stall");
 
        if (device_create_managed_software_node(dev, props, NULL))
                dev_warn(dev, "Could not add device properties\n");
@@ -1020,24 +993,13 @@ static int iort_nc_iommu_map_id(struct device *dev,
  * @dev: device to configure
  * @id_in: optional input id const value pointer
  *
- * Returns: iommu_ops pointer on configuration success
- *          NULL on configuration failure
+ * Returns: 0 on success, <0 on failure
  */
-const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
-                                               const u32 *id_in)
+int iort_iommu_configure_id(struct device *dev, const u32 *id_in)
 {
        struct acpi_iort_node *node;
-       const struct iommu_ops *ops;
        int err = -ENODEV;
 
-       /*
-        * If we already translated the fwspec there
-        * is nothing left to do, return the iommu_ops.
-        */
-       ops = iort_fwspec_iommu_ops(dev);
-       if (ops)
-               return ops;
-
        if (dev_is_pci(dev)) {
                struct iommu_fwspec *fwspec;
                struct pci_bus *bus = to_pci_dev(dev)->bus;
@@ -1046,7 +1008,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
                node = iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX,
                                      iort_match_node_callback, &bus->dev);
                if (!node)
-                       return NULL;
+                       return -ENODEV;
 
                info.node = node;
                err = pci_for_each_dma_alias(to_pci_dev(dev),
@@ -1059,7 +1021,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
                node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
                                      iort_match_node_callback, dev);
                if (!node)
-                       return NULL;
+                       return -ENODEV;
 
                err = id_in ? iort_nc_iommu_map_id(dev, node, id_in) :
                              iort_nc_iommu_map(dev, node);
@@ -1068,32 +1030,14 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
                        iort_named_component_init(dev, node);
        }
 
-       /*
-        * If we have reason to believe the IOMMU driver missed the initial
-        * add_device callback for dev, replay it to get things in order.
-        */
-       if (!err) {
-               ops = iort_fwspec_iommu_ops(dev);
-               err = iort_add_device_replay(dev);
-       }
-
-       /* Ignore all other errors apart from EPROBE_DEFER */
-       if (err == -EPROBE_DEFER) {
-               ops = ERR_PTR(err);
-       } else if (err) {
-               dev_dbg(dev, "Adding to IOMMU failed: %d\n", err);
-               ops = NULL;
-       }
-
-       return ops;
+       return err;
 }
 
 #else
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
 { return 0; }
-const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
-                                               const u32 *input_id)
-{ return NULL; }
+int iort_iommu_configure_id(struct device *dev, const u32 *input_id)
+{ return -ENODEV; }
 #endif
 
 static int nc_dma_get_range(struct device *dev, u64 *size)
@@ -1144,56 +1088,18 @@ static int rc_dma_get_range(struct device *dev, u64 *size)
 }
 
 /**
- * iort_dma_setup() - Set-up device DMA parameters.
+ * iort_dma_get_ranges() - Look up DMA addressing limit for the device
+ * @dev: device to lookup
+ * @size: DMA range size result pointer
  *
- * @dev: device to configure
- * @dma_addr: device DMA address result pointer
- * @dma_size: DMA range size result pointer
+ * Return: 0 on success, an error otherwise.
  */
-void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
+int iort_dma_get_ranges(struct device *dev, u64 *size)
 {
-       u64 end, mask, dmaaddr = 0, size = 0, offset = 0;
-       int ret;
-
-       /*
-        * If @dev is expected to be DMA-capable then the bus code that created
-        * it should have initialised its dma_mask pointer by this point. For
-        * now, we'll continue the legacy behaviour of coercing it to the
-        * coherent mask if not, but we'll no longer do so quietly.
-        */
-       if (!dev->dma_mask) {
-               dev_warn(dev, "DMA mask not set\n");
-               dev->dma_mask = &dev->coherent_dma_mask;
-       }
-
-       if (dev->coherent_dma_mask)
-               size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
+       if (dev_is_pci(dev))
+               return rc_dma_get_range(dev, size);
        else
-               size = 1ULL << 32;
-
-       ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size);
-       if (ret == -ENODEV)
-               ret = dev_is_pci(dev) ? rc_dma_get_range(dev, &size)
-                                     : nc_dma_get_range(dev, &size);
-
-       if (!ret) {
-               /*
-                * Limit coherent and dma mask based on size retrieved from
-                * firmware.
-                */
-               end = dmaaddr + size - 1;
-               mask = DMA_BIT_MASK(ilog2(end) + 1);
-               dev->bus_dma_limit = end;
-               dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask);
-               *dev->dma_mask = min(*dev->dma_mask, mask);
-       }
-
-       *dma_addr = dmaaddr;
-       *dma_size = size;
-
-       ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size);
-
-       dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : "");
+               return nc_dma_get_range(dev, size);
 }
 
 static void __init acpi_iort_register_irq(int hwirq, const char *name,
index 60fb6a843853a491dfa1e8756ceb8383c999aff5..ee24246d88fde166d27755ad1f7d4b9bca49ca07 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/dmi.h>
 #endif
 #include <linux/acpi_iort.h>
+#include <linux/acpi_viot.h>
 #include <linux/pci.h>
 #include <acpi/apei.h>
 #include <linux/suspend.h>
@@ -1335,6 +1336,7 @@ static int __init acpi_init(void)
        acpi_wakeup_device_init();
        acpi_debugger_init();
        acpi_setup_sb_notify_handler();
+       acpi_viot_init();
        return 0;
 }
 
index 0641bc20b09749138dfe530f019378d956d5373e..b24513ec3fae1886208c8590181573a241a2ed85 100644 (file)
@@ -11,6 +11,8 @@
 #include <linux/kernel.h>
 #include <linux/acpi.h>
 #include <linux/acpi_iort.h>
+#include <linux/acpi_viot.h>
+#include <linux/iommu.h>
 #include <linux/signal.h>
 #include <linux/kthread.h>
 #include <linux/dmi.h>
@@ -1526,6 +1528,78 @@ int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset,
        return ret >= 0 ? 0 : ret;
 }
 
+#ifdef CONFIG_IOMMU_API
+int acpi_iommu_fwspec_init(struct device *dev, u32 id,
+                          struct fwnode_handle *fwnode,
+                          const struct iommu_ops *ops)
+{
+       int ret = iommu_fwspec_init(dev, fwnode, ops);
+
+       if (!ret)
+               ret = iommu_fwspec_add_ids(dev, &id, 1);
+
+       return ret;
+}
+
+static inline const struct iommu_ops *acpi_iommu_fwspec_ops(struct device *dev)
+{
+       struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+       return fwspec ? fwspec->ops : NULL;
+}
+
+static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev,
+                                                      const u32 *id_in)
+{
+       int err;
+       const struct iommu_ops *ops;
+
+       /*
+        * If we already translated the fwspec there is nothing left to do,
+        * return the iommu_ops.
+        */
+       ops = acpi_iommu_fwspec_ops(dev);
+       if (ops)
+               return ops;
+
+       err = iort_iommu_configure_id(dev, id_in);
+       if (err && err != -EPROBE_DEFER)
+               err = viot_iommu_configure(dev);
+
+       /*
+        * If we have reason to believe the IOMMU driver missed the initial
+        * iommu_probe_device() call for dev, replay it to get things in order.
+        */
+       if (!err && dev->bus && !device_iommu_mapped(dev))
+               err = iommu_probe_device(dev);
+
+       /* Ignore all other errors apart from EPROBE_DEFER */
+       if (err == -EPROBE_DEFER) {
+               return ERR_PTR(err);
+       } else if (err) {
+               dev_dbg(dev, "Adding to IOMMU failed: %d\n", err);
+               return NULL;
+       }
+       return acpi_iommu_fwspec_ops(dev);
+}
+
+#else /* !CONFIG_IOMMU_API */
+
+int acpi_iommu_fwspec_init(struct device *dev, u32 id,
+                          struct fwnode_handle *fwnode,
+                          const struct iommu_ops *ops)
+{
+       return -ENODEV;
+}
+
+static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev,
+                                                      const u32 *id_in)
+{
+       return NULL;
+}
+
+#endif /* !CONFIG_IOMMU_API */
+
 /**
  * acpi_dma_configure_id - Set-up DMA configuration for the device.
  * @dev: The pointer to the device
@@ -1543,9 +1617,9 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr,
                return 0;
        }
 
-       iort_dma_setup(dev, &dma_addr, &size);
+       acpi_arch_dma_setup(dev, &dma_addr, &size);
 
-       iommu = iort_iommu_configure_id(dev, input_id);
+       iommu = acpi_iommu_configure_id(dev, input_id);
        if (PTR_ERR(iommu) == -EPROBE_DEFER)
                return -EPROBE_DEFER;
 
diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c
new file mode 100644 (file)
index 0000000..d225632
--- /dev/null
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Virtual I/O topology
+ *
+ * The Virtual I/O Translation Table (VIOT) describes the topology of
+ * para-virtual IOMMUs and the endpoints they manage. The OS uses it to
+ * initialize devices in the right order, preventing endpoints from issuing DMA
+ * before their IOMMU is ready.
+ *
+ * When binding a driver to a device, before calling the device driver's probe()
+ * method, the driver infrastructure calls dma_configure(). At that point the
+ * VIOT driver looks for an IOMMU associated to the device in the VIOT table.
+ * If an IOMMU exists and has been initialized, the VIOT driver initializes the
+ * device's IOMMU fwspec, allowing the DMA infrastructure to invoke the IOMMU
+ * ops when the device driver configures DMA mappings. If an IOMMU exists and
+ * hasn't yet been initialized, VIOT returns -EPROBE_DEFER to postpone probing
+ * the device until the IOMMU is available.
+ */
+#define pr_fmt(fmt) "ACPI: VIOT: " fmt
+
+#include <linux/acpi_viot.h>
+#include <linux/dma-iommu.h>
+#include <linux/fwnode.h>
+#include <linux/iommu.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+struct viot_iommu {
+       /* Node offset within the table */
+       unsigned int                    offset;
+       struct fwnode_handle            *fwnode;
+       struct list_head                list;
+};
+
+struct viot_endpoint {
+       union {
+               /* PCI range */
+               struct {
+                       u16             segment_start;
+                       u16             segment_end;
+                       u16             bdf_start;
+                       u16             bdf_end;
+               };
+               /* MMIO */
+               u64                     address;
+       };
+       u32                             endpoint_id;
+       struct viot_iommu               *viommu;
+       struct list_head                list;
+};
+
+static struct acpi_table_viot *viot;
+static LIST_HEAD(viot_iommus);
+static LIST_HEAD(viot_pci_ranges);
+static LIST_HEAD(viot_mmio_endpoints);
+
+static int __init viot_check_bounds(const struct acpi_viot_header *hdr)
+{
+       struct acpi_viot_header *start, *end, *hdr_end;
+
+       start = ACPI_ADD_PTR(struct acpi_viot_header, viot,
+                            max_t(size_t, sizeof(*viot), viot->node_offset));
+       end = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->header.length);
+       hdr_end = ACPI_ADD_PTR(struct acpi_viot_header, hdr, sizeof(*hdr));
+
+       if (hdr < start || hdr_end > end) {
+               pr_err(FW_BUG "Node pointer overflows\n");
+               return -EOVERFLOW;
+       }
+       if (hdr->length < sizeof(*hdr)) {
+               pr_err(FW_BUG "Empty node\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int __init viot_get_pci_iommu_fwnode(struct viot_iommu *viommu,
+                                           u16 segment, u16 bdf)
+{
+       struct pci_dev *pdev;
+       struct fwnode_handle *fwnode;
+
+       pdev = pci_get_domain_bus_and_slot(segment, PCI_BUS_NUM(bdf),
+                                          bdf & 0xff);
+       if (!pdev) {
+               pr_err("Could not find PCI IOMMU\n");
+               return -ENODEV;
+       }
+
+       fwnode = pdev->dev.fwnode;
+       if (!fwnode) {
+               /*
+                * PCI devices aren't necessarily described by ACPI. Create a
+                * fwnode so the IOMMU subsystem can identify this device.
+                */
+               fwnode = acpi_alloc_fwnode_static();
+               if (!fwnode) {
+                       pci_dev_put(pdev);
+                       return -ENOMEM;
+               }
+               set_primary_fwnode(&pdev->dev, fwnode);
+       }
+       viommu->fwnode = pdev->dev.fwnode;
+       pci_dev_put(pdev);
+       return 0;
+}
+
+static int __init viot_get_mmio_iommu_fwnode(struct viot_iommu *viommu,
+                                            u64 address)
+{
+       struct acpi_device *adev;
+       struct resource res = {
+               .start  = address,
+               .end    = address,
+               .flags  = IORESOURCE_MEM,
+       };
+
+       adev = acpi_resource_consumer(&res);
+       if (!adev) {
+               pr_err("Could not find MMIO IOMMU\n");
+               return -EINVAL;
+       }
+       viommu->fwnode = &adev->fwnode;
+       return 0;
+}
+
+static struct viot_iommu * __init viot_get_iommu(unsigned int offset)
+{
+       int ret;
+       struct viot_iommu *viommu;
+       struct acpi_viot_header *hdr = ACPI_ADD_PTR(struct acpi_viot_header,
+                                                   viot, offset);
+       union {
+               struct acpi_viot_virtio_iommu_pci pci;
+               struct acpi_viot_virtio_iommu_mmio mmio;
+       } *node = (void *)hdr;
+
+       list_for_each_entry(viommu, &viot_iommus, list)
+               if (viommu->offset == offset)
+                       return viommu;
+
+       if (viot_check_bounds(hdr))
+               return NULL;
+
+       viommu = kzalloc(sizeof(*viommu), GFP_KERNEL);
+       if (!viommu)
+               return NULL;
+
+       viommu->offset = offset;
+       switch (hdr->type) {
+       case ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI:
+               if (hdr->length < sizeof(node->pci))
+                       goto err_free;
+
+               ret = viot_get_pci_iommu_fwnode(viommu, node->pci.segment,
+                                               node->pci.bdf);
+               break;
+       case ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO:
+               if (hdr->length < sizeof(node->mmio))
+                       goto err_free;
+
+               ret = viot_get_mmio_iommu_fwnode(viommu,
+                                                node->mmio.base_address);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+       if (ret)
+               goto err_free;
+
+       list_add(&viommu->list, &viot_iommus);
+       return viommu;
+
+err_free:
+       kfree(viommu);
+       return NULL;
+}
+
+static int __init viot_parse_node(const struct acpi_viot_header *hdr)
+{
+       int ret = -EINVAL;
+       struct list_head *list;
+       struct viot_endpoint *ep;
+       union {
+               struct acpi_viot_mmio mmio;
+               struct acpi_viot_pci_range pci;
+       } *node = (void *)hdr;
+
+       if (viot_check_bounds(hdr))
+               return -EINVAL;
+
+       if (hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI ||
+           hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO)
+               return 0;
+
+       ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+       if (!ep)
+               return -ENOMEM;
+
+       switch (hdr->type) {
+       case ACPI_VIOT_NODE_PCI_RANGE:
+               if (hdr->length < sizeof(node->pci)) {
+                       pr_err(FW_BUG "Invalid PCI node size\n");
+                       goto err_free;
+               }
+
+               ep->segment_start = node->pci.segment_start;
+               ep->segment_end = node->pci.segment_end;
+               ep->bdf_start = node->pci.bdf_start;
+               ep->bdf_end = node->pci.bdf_end;
+               ep->endpoint_id = node->pci.endpoint_start;
+               ep->viommu = viot_get_iommu(node->pci.output_node);
+               list = &viot_pci_ranges;
+               break;
+       case ACPI_VIOT_NODE_MMIO:
+               if (hdr->length < sizeof(node->mmio)) {
+                       pr_err(FW_BUG "Invalid MMIO node size\n");
+                       goto err_free;
+               }
+
+               ep->address = node->mmio.base_address;
+               ep->endpoint_id = node->mmio.endpoint;
+               ep->viommu = viot_get_iommu(node->mmio.output_node);
+               list = &viot_mmio_endpoints;
+               break;
+       default:
+               pr_warn("Unsupported node %x\n", hdr->type);
+               ret = 0;
+               goto err_free;
+       }
+
+       if (!ep->viommu) {
+               pr_warn("No IOMMU node found\n");
+               /*
+                * A future version of the table may use the node for other
+                * purposes. Keep parsing.
+                */
+               ret = 0;
+               goto err_free;
+       }
+
+       list_add(&ep->list, list);
+       return 0;
+
+err_free:
+       kfree(ep);
+       return ret;
+}
+
+/**
+ * acpi_viot_init - Parse the VIOT table
+ *
+ * Parse the VIOT table, prepare the list of endpoints to be used during DMA
+ * setup of devices.
+ */
+void __init acpi_viot_init(void)
+{
+       int i;
+       acpi_status status;
+       struct acpi_table_header *hdr;
+       struct acpi_viot_header *node;
+
+       status = acpi_get_table(ACPI_SIG_VIOT, 0, &hdr);
+       if (ACPI_FAILURE(status)) {
+               if (status != AE_NOT_FOUND) {
+                       const char *msg = acpi_format_exception(status);
+
+                       pr_err("Failed to get table, %s\n", msg);
+               }
+               return;
+       }
+
+       viot = (void *)hdr;
+
+       node = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->node_offset);
+       for (i = 0; i < viot->node_count; i++) {
+               if (viot_parse_node(node))
+                       return;
+
+               node = ACPI_ADD_PTR(struct acpi_viot_header, node,
+                                   node->length);
+       }
+
+       acpi_put_table(hdr);
+}
+
+static int viot_dev_iommu_init(struct device *dev, struct viot_iommu *viommu,
+                              u32 epid)
+{
+       const struct iommu_ops *ops;
+
+       if (!viommu)
+               return -ENODEV;
+
+       /* We're not translating ourself */
+       if (viommu->fwnode == dev->fwnode)
+               return -EINVAL;
+
+       ops = iommu_ops_from_fwnode(viommu->fwnode);
+       if (!ops)
+               return IS_ENABLED(CONFIG_VIRTIO_IOMMU) ?
+                       -EPROBE_DEFER : -ENODEV;
+
+       return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode, ops);
+}
+
+static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data)
+{
+       u32 epid;
+       struct viot_endpoint *ep;
+       u32 domain_nr = pci_domain_nr(pdev->bus);
+
+       list_for_each_entry(ep, &viot_pci_ranges, list) {
+               if (domain_nr >= ep->segment_start &&
+                   domain_nr <= ep->segment_end &&
+                   dev_id >= ep->bdf_start &&
+                   dev_id <= ep->bdf_end) {
+                       epid = ((domain_nr - ep->segment_start) << 16) +
+                               dev_id - ep->bdf_start + ep->endpoint_id;
+
+                       /*
+                        * If we found a PCI range managed by the viommu, we're
+                        * the one that has to request ACS.
+                        */
+                       pci_request_acs();
+
+                       return viot_dev_iommu_init(&pdev->dev, ep->viommu,
+                                                  epid);
+               }
+       }
+       return -ENODEV;
+}
+
+static int viot_mmio_dev_iommu_init(struct platform_device *pdev)
+{
+       struct resource *mem;
+       struct viot_endpoint *ep;
+
+       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!mem)
+               return -ENODEV;
+
+       list_for_each_entry(ep, &viot_mmio_endpoints, list) {
+               if (ep->address == mem->start)
+                       return viot_dev_iommu_init(&pdev->dev, ep->viommu,
+                                                  ep->endpoint_id);
+       }
+       return -ENODEV;
+}
+
+/**
+ * viot_iommu_configure - Setup IOMMU ops for an endpoint described by VIOT
+ * @dev: the endpoint
+ *
+ * Return: 0 on success, <0 on failure
+ */
+int viot_iommu_configure(struct device *dev)
+{
+       if (dev_is_pci(dev))
+               return pci_for_each_dma_alias(to_pci_dev(dev),
+                                             viot_pci_dev_iommu_init, NULL);
+       else if (dev_is_platform(dev))
+               return viot_mmio_dev_iommu_init(to_platform_device(dev));
+       return -ENODEV;
+}
index 1f111b399bcab5c7d875ad64b52724174e47454c..07b7c25cbed8a1cc6cf6c91ff8402ea63f9d5074 100644 (file)
@@ -400,9 +400,11 @@ config HYPERV_IOMMU
 config VIRTIO_IOMMU
        tristate "Virtio IOMMU driver"
        depends on VIRTIO
-       depends on ARM64
+       depends on (ARM64 || X86)
        select IOMMU_API
+       select IOMMU_DMA
        select INTERVAL_TREE
+       select ACPI_VIOT if ACPI
        help
          Para-virtualised IOMMU driver with virtio.
 
index 55dd38d814d92607908e39017d3656a9541d9f12..416815a525d671916a896297b67b28006bb03236 100644 (file)
@@ -11,8 +11,6 @@
 
 #include "amd_iommu_types.h"
 
-extern int amd_iommu_init_dma_ops(void);
-extern int amd_iommu_init_passthrough(void);
 extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
 extern void amd_iommu_apply_erratum_63(u16 devid);
index d006724f4dc212267ff1bc3eec381e94fa806891..46280e6e1535b0695827d22ba2b4e7f28be64734 100644 (file)
@@ -153,7 +153,8 @@ int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
 
 static bool amd_iommu_detected;
-static bool __initdata amd_iommu_disabled;
+static bool amd_iommu_disabled __initdata;
+static bool amd_iommu_force_enable __initdata;
 static int amd_iommu_target_ivhd_type;
 
 u16 amd_iommu_last_bdf;                        /* largest PCI device id we have
@@ -231,7 +232,6 @@ enum iommu_init_state {
        IOMMU_ENABLED,
        IOMMU_PCI_INIT,
        IOMMU_INTERRUPTS_EN,
-       IOMMU_DMA_OPS,
        IOMMU_INITIALIZED,
        IOMMU_NOT_FOUND,
        IOMMU_INIT_ERROR,
@@ -1908,8 +1908,8 @@ static void print_iommu_info(void)
                pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
 
                if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
-                       pci_info(pdev, "Extended features (%#llx):",
-                                iommu->features);
+                       pr_info("Extended features (%#llx):", iommu->features);
+
                        for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
                                if (iommu_feature(iommu, (1ULL << i)))
                                        pr_cont(" %s", feat_str[i]);
@@ -2817,7 +2817,7 @@ out:
        return ret;
 }
 
-static bool detect_ivrs(void)
+static bool __init detect_ivrs(void)
 {
        struct acpi_table_header *ivrs_base;
        acpi_status status;
@@ -2834,6 +2834,9 @@ static bool detect_ivrs(void)
 
        acpi_put_table(ivrs_base);
 
+       if (amd_iommu_force_enable)
+               goto out;
+
        /* Don't use IOMMU if there is Stoney Ridge graphics */
        for (i = 0; i < 32; i++) {
                u32 pci_id;
@@ -2845,6 +2848,7 @@ static bool detect_ivrs(void)
                }
        }
 
+out:
        /* Make sure ACS will be enabled during PCI probe */
        pci_request_acs();
 
@@ -2895,10 +2899,6 @@ static int __init state_next(void)
                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
                break;
        case IOMMU_INTERRUPTS_EN:
-               ret = amd_iommu_init_dma_ops();
-               init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS;
-               break;
-       case IOMMU_DMA_OPS:
                init_state = IOMMU_INITIALIZED;
                break;
        case IOMMU_INITIALIZED:
@@ -3100,6 +3100,8 @@ static int __init parse_amd_iommu_options(char *str)
        for (; *str; ++str) {
                if (strncmp(str, "fullflush", 9) == 0)
                        amd_iommu_unmap_flush = true;
+               if (strncmp(str, "force_enable", 12) == 0)
+                       amd_iommu_force_enable = true;
                if (strncmp(str, "off", 3) == 0)
                        amd_iommu_disabled = true;
                if (strncmp(str, "force_isolation", 15) == 0)
index 3ac42bbdefc634bed6c31eaf38efdc4142fd3184..811a49a95d043ec77610e19251c2420389c75d80 100644 (file)
@@ -30,7 +30,6 @@
 #include <linux/msi.h>
 #include <linux/irqdomain.h>
 #include <linux/percpu.h>
-#include <linux/iova.h>
 #include <linux/io-pgtable.h>
 #include <asm/irq_remapping.h>
 #include <asm/io_apic.h>
@@ -1713,7 +1712,7 @@ static void amd_iommu_probe_finalize(struct device *dev)
        /* Domains are initialized for this device - have a look what we ended up with */
        domain = iommu_get_domain_for_dev(dev);
        if (domain->type == IOMMU_DOMAIN_DMA)
-               iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0);
+               iommu_setup_dma_ops(dev, 0, U64_MAX);
        else
                set_dma_ops(dev, NULL);
 }
@@ -1773,13 +1772,22 @@ void amd_iommu_domain_update(struct protection_domain *domain)
        amd_iommu_domain_flush_complete(domain);
 }
 
+static void __init amd_iommu_init_dma_ops(void)
+{
+       swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
+
+       if (amd_iommu_unmap_flush)
+               pr_info("IO/TLB flush on unmap enabled\n");
+       else
+               pr_info("Lazy IO/TLB flushing enabled\n");
+       iommu_set_dma_strict(amd_iommu_unmap_flush);
+}
+
 int __init amd_iommu_init_api(void)
 {
-       int ret, err = 0;
+       int err;
 
-       ret = iova_cache_get();
-       if (ret)
-               return ret;
+       amd_iommu_init_dma_ops();
 
        err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
        if (err)
@@ -1796,19 +1804,6 @@ int __init amd_iommu_init_api(void)
        return 0;
 }
 
-int __init amd_iommu_init_dma_ops(void)
-{
-       swiotlb        = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
-
-       if (amd_iommu_unmap_flush)
-               pr_info("IO/TLB flush on unmap enabled\n");
-       else
-               pr_info("Lazy IO/TLB flushing enabled\n");
-       iommu_set_dma_strict(amd_iommu_unmap_flush);
-       return 0;
-
-}
-
 /*****************************************************************************
  *
  * The following functions belong to the exported interface of AMD IOMMU
index bb251cab61f332597268fbef4e9f859cc39bfaa3..ee66d1f4cb81e67d5befe77fecffe7b2f0509806 100644 (file)
@@ -435,9 +435,13 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
        return true;
 }
 
-static bool arm_smmu_iopf_supported(struct arm_smmu_master *master)
+bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
 {
-       return false;
+       /* We're not keeping track of SIDs in fault events */
+       if (master->num_streams != 1)
+               return false;
+
+       return master->stall_enabled;
 }
 
 bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
@@ -445,8 +449,8 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
        if (!(master->smmu->features & ARM_SMMU_FEAT_SVA))
                return false;
 
-       /* SSID and IOPF support are mandatory for the moment */
-       return master->ssid_bits && arm_smmu_iopf_supported(master);
+       /* SSID support is mandatory for the moment */
+       return master->ssid_bits;
 }
 
 bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
@@ -459,13 +463,55 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
        return enabled;
 }
 
+static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master)
+{
+       int ret;
+       struct device *dev = master->dev;
+
+       /*
+        * Drivers for devices supporting PRI or stall should enable IOPF first.
+        * Others have device-specific fault handlers and don't need IOPF.
+        */
+       if (!arm_smmu_master_iopf_supported(master))
+               return 0;
+
+       if (!master->iopf_enabled)
+               return -EINVAL;
+
+       ret = iopf_queue_add_device(master->smmu->evtq.iopf, dev);
+       if (ret)
+               return ret;
+
+       ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+       if (ret) {
+               iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
+               return ret;
+       }
+       return 0;
+}
+
+static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master)
+{
+       struct device *dev = master->dev;
+
+       if (!master->iopf_enabled)
+               return;
+
+       iommu_unregister_device_fault_handler(dev);
+       iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
+}
+
 int arm_smmu_master_enable_sva(struct arm_smmu_master *master)
 {
+       int ret;
+
        mutex_lock(&sva_lock);
-       master->sva_enabled = true;
+       ret = arm_smmu_master_sva_enable_iopf(master);
+       if (!ret)
+               master->sva_enabled = true;
        mutex_unlock(&sva_lock);
 
-       return 0;
+       return ret;
 }
 
 int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
@@ -476,6 +522,7 @@ int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
                mutex_unlock(&sva_lock);
                return -EBUSY;
        }
+       arm_smmu_master_sva_disable_iopf(master);
        master->sva_enabled = false;
        mutex_unlock(&sva_lock);
 
index 54b2f27b81d43963cfd61efa50bfde17add278a3..dd20b01771c4bd2330c75f7d7b53f189c19089a7 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/msi.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/pci.h>
 #include <linux/pci-ats.h>
@@ -32,6 +31,7 @@
 #include <linux/amba/bus.h>
 
 #include "arm-smmu-v3.h"
+#include "../../iommu-sva-lib.h"
 
 static bool disable_bypass = true;
 module_param(disable_bypass, bool, 0444);
@@ -313,6 +313,11 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
                }
                cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
                break;
+       case CMDQ_OP_RESUME:
+               cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
+               cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
+               cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
+               break;
        case CMDQ_OP_CMD_SYNC:
                if (ent->sync.msiaddr) {
                        cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
@@ -352,7 +357,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 
 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 {
-       static const char *cerror_str[] = {
+       static const char * const cerror_str[] = {
                [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
                [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
                [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
@@ -876,6 +881,44 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
        return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
 }
 
+static int arm_smmu_page_response(struct device *dev,
+                                 struct iommu_fault_event *unused,
+                                 struct iommu_page_response *resp)
+{
+       struct arm_smmu_cmdq_ent cmd = {0};
+       struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+       int sid = master->streams[0].id;
+
+       if (master->stall_enabled) {
+               cmd.opcode              = CMDQ_OP_RESUME;
+               cmd.resume.sid          = sid;
+               cmd.resume.stag         = resp->grpid;
+               switch (resp->code) {
+               case IOMMU_PAGE_RESP_INVALID:
+               case IOMMU_PAGE_RESP_FAILURE:
+                       cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
+                       break;
+               case IOMMU_PAGE_RESP_SUCCESS:
+                       cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       } else {
+               return -ENODEV;
+       }
+
+       arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
+       /*
+        * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
+        * RESUME consumption guarantees that the stalled transaction will be
+        * terminated... at some point in the future. PRI_RESP is fire and
+        * forget.
+        */
+
+       return 0;
+}
+
 /* Context descriptor manipulation functions */
 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
 {
@@ -986,7 +1029,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
        u64 val;
        bool cd_live;
        __le64 *cdptr;
-       struct arm_smmu_device *smmu = smmu_domain->smmu;
 
        if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
                return -E2BIG;
@@ -1031,8 +1073,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
                        FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
                        CTXDESC_CD_0_V;
 
-               /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
-               if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
+               if (smmu_domain->stall_enabled)
                        val |= CTXDESC_CD_0_S;
        }
 
@@ -1276,7 +1317,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
                         FIELD_PREP(STRTAB_STE_1_STRW, strw));
 
                if (smmu->features & ARM_SMMU_FEAT_STALLS &&
-                  !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
+                   !master->stall_enabled)
                        dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
                val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
@@ -1353,7 +1394,6 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
        return 0;
 }
 
-__maybe_unused
 static struct arm_smmu_master *
 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
 {
@@ -1377,18 +1417,118 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
 }
 
 /* IRQ and event handlers */
+static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
+{
+       int ret;
+       u32 reason;
+       u32 perm = 0;
+       struct arm_smmu_master *master;
+       bool ssid_valid = evt[0] & EVTQ_0_SSV;
+       u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
+       struct iommu_fault_event fault_evt = { };
+       struct iommu_fault *flt = &fault_evt.fault;
+
+       switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
+       case EVT_ID_TRANSLATION_FAULT:
+               reason = IOMMU_FAULT_REASON_PTE_FETCH;
+               break;
+       case EVT_ID_ADDR_SIZE_FAULT:
+               reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
+               break;
+       case EVT_ID_ACCESS_FAULT:
+               reason = IOMMU_FAULT_REASON_ACCESS;
+               break;
+       case EVT_ID_PERMISSION_FAULT:
+               reason = IOMMU_FAULT_REASON_PERMISSION;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       /* Stage-2 is always pinned at the moment */
+       if (evt[1] & EVTQ_1_S2)
+               return -EFAULT;
+
+       if (evt[1] & EVTQ_1_RnW)
+               perm |= IOMMU_FAULT_PERM_READ;
+       else
+               perm |= IOMMU_FAULT_PERM_WRITE;
+
+       if (evt[1] & EVTQ_1_InD)
+               perm |= IOMMU_FAULT_PERM_EXEC;
+
+       if (evt[1] & EVTQ_1_PnU)
+               perm |= IOMMU_FAULT_PERM_PRIV;
+
+       if (evt[1] & EVTQ_1_STALL) {
+               flt->type = IOMMU_FAULT_PAGE_REQ;
+               flt->prm = (struct iommu_fault_page_request) {
+                       .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
+                       .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
+                       .perm = perm,
+                       .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
+               };
+
+               if (ssid_valid) {
+                       flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+                       flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
+               }
+       } else {
+               flt->type = IOMMU_FAULT_DMA_UNRECOV;
+               flt->event = (struct iommu_fault_unrecoverable) {
+                       .reason = reason,
+                       .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
+                       .perm = perm,
+                       .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
+               };
+
+               if (ssid_valid) {
+                       flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
+                       flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
+               }
+       }
+
+       mutex_lock(&smmu->streams_mutex);
+       master = arm_smmu_find_master(smmu, sid);
+       if (!master) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       ret = iommu_report_device_fault(master->dev, &fault_evt);
+       if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
+               /* Nobody cared, abort the access */
+               struct iommu_page_response resp = {
+                       .pasid          = flt->prm.pasid,
+                       .grpid          = flt->prm.grpid,
+                       .code           = IOMMU_PAGE_RESP_FAILURE,
+               };
+               arm_smmu_page_response(master->dev, &fault_evt, &resp);
+       }
+
+out_unlock:
+       mutex_unlock(&smmu->streams_mutex);
+       return ret;
+}
+
 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
 {
-       int i;
+       int i, ret;
        struct arm_smmu_device *smmu = dev;
        struct arm_smmu_queue *q = &smmu->evtq.q;
        struct arm_smmu_ll_queue *llq = &q->llq;
+       static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+                                     DEFAULT_RATELIMIT_BURST);
        u64 evt[EVTQ_ENT_DWORDS];
 
        do {
                while (!queue_remove_raw(q, evt)) {
                        u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
 
+                       ret = arm_smmu_handle_evt(smmu, evt);
+                       if (!ret || !__ratelimit(&rs))
+                               continue;
+
                        dev_info(smmu->dev, "event 0x%02x received:\n", id);
                        for (i = 0; i < ARRAY_SIZE(evt); ++i)
                                dev_info(smmu->dev, "\t0x%016llx\n",
@@ -1923,6 +2063,8 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 
        cfg->s1cdmax = master->ssid_bits;
 
+       smmu_domain->stall_enabled = master->stall_enabled;
+
        ret = arm_smmu_alloc_cd_tables(smmu_domain);
        if (ret)
                goto out_free_asid;
@@ -2270,6 +2412,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
                        smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
                ret = -EINVAL;
                goto out_unlock;
+       } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
+                  smmu_domain->stall_enabled != master->stall_enabled) {
+               dev_err(dev, "cannot attach to stall-%s domain\n",
+                       smmu_domain->stall_enabled ? "enabled" : "disabled");
+               ret = -EINVAL;
+               goto out_unlock;
        }
 
        master->domain = smmu_domain;
@@ -2508,6 +2656,11 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
                master->ssid_bits = min_t(u8, master->ssid_bits,
                                          CTXDESC_LINEAR_CDMAX);
 
+       if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
+            device_property_read_bool(dev, "dma-can-stall")) ||
+           smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
+               master->stall_enabled = true;
+
        return &smmu->iommu;
 
 err_free_master:
@@ -2525,7 +2678,8 @@ static void arm_smmu_release_device(struct device *dev)
                return;
 
        master = dev_iommu_priv_get(dev);
-       WARN_ON(arm_smmu_master_sva_enabled(master));
+       if (WARN_ON(arm_smmu_master_sva_enabled(master)))
+               iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
        arm_smmu_detach_dev(master);
        arm_smmu_disable_pasid(master);
        arm_smmu_remove_master(master);
@@ -2595,6 +2749,8 @@ static bool arm_smmu_dev_has_feature(struct device *dev,
                return false;
 
        switch (feat) {
+       case IOMMU_DEV_FEAT_IOPF:
+               return arm_smmu_master_iopf_supported(master);
        case IOMMU_DEV_FEAT_SVA:
                return arm_smmu_master_sva_supported(master);
        default:
@@ -2611,6 +2767,8 @@ static bool arm_smmu_dev_feature_enabled(struct device *dev,
                return false;
 
        switch (feat) {
+       case IOMMU_DEV_FEAT_IOPF:
+               return master->iopf_enabled;
        case IOMMU_DEV_FEAT_SVA:
                return arm_smmu_master_sva_enabled(master);
        default:
@@ -2621,6 +2779,8 @@ static bool arm_smmu_dev_feature_enabled(struct device *dev,
 static int arm_smmu_dev_enable_feature(struct device *dev,
                                       enum iommu_dev_features feat)
 {
+       struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
        if (!arm_smmu_dev_has_feature(dev, feat))
                return -ENODEV;
 
@@ -2628,8 +2788,11 @@ static int arm_smmu_dev_enable_feature(struct device *dev,
                return -EBUSY;
 
        switch (feat) {
+       case IOMMU_DEV_FEAT_IOPF:
+               master->iopf_enabled = true;
+               return 0;
        case IOMMU_DEV_FEAT_SVA:
-               return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
+               return arm_smmu_master_enable_sva(master);
        default:
                return -EINVAL;
        }
@@ -2638,12 +2801,19 @@ static int arm_smmu_dev_enable_feature(struct device *dev,
 static int arm_smmu_dev_disable_feature(struct device *dev,
                                        enum iommu_dev_features feat)
 {
+       struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
        if (!arm_smmu_dev_feature_enabled(dev, feat))
                return -EINVAL;
 
        switch (feat) {
+       case IOMMU_DEV_FEAT_IOPF:
+               if (master->sva_enabled)
+                       return -EBUSY;
+               master->iopf_enabled = false;
+               return 0;
        case IOMMU_DEV_FEAT_SVA:
-               return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
+               return arm_smmu_master_disable_sva(master);
        default:
                return -EINVAL;
        }
@@ -2673,6 +2843,7 @@ static struct iommu_ops arm_smmu_ops = {
        .sva_bind               = arm_smmu_sva_bind,
        .sva_unbind             = arm_smmu_sva_unbind,
        .sva_get_pasid          = arm_smmu_sva_get_pasid,
+       .page_response          = arm_smmu_page_response,
        .pgsize_bitmap          = -1UL, /* Restricted during device attach */
        .owner                  = THIS_MODULE,
 };
@@ -2771,6 +2942,13 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
        if (ret)
                return ret;
 
+       if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
+           (smmu->features & ARM_SMMU_FEAT_STALLS)) {
+               smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
+               if (!smmu->evtq.iopf)
+                       return -ENOMEM;
+       }
+
        /* priq */
        if (!(smmu->features & ARM_SMMU_FEAT_PRI))
                return 0;
@@ -2788,10 +2966,8 @@ static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
        void *strtab = smmu->strtab_cfg.strtab;
 
        cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
-       if (!cfg->l1_desc) {
-               dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
+       if (!cfg->l1_desc)
                return -ENOMEM;
-       }
 
        for (i = 0; i < cfg->num_l1_ents; ++i) {
                arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
@@ -3582,10 +3758,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
        bool bypass;
 
        smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
-       if (!smmu) {
-               dev_err(dev, "failed to allocate arm_smmu_device\n");
+       if (!smmu)
                return -ENOMEM;
-       }
        smmu->dev = dev;
 
        if (dev->of_node) {
@@ -3669,10 +3843,20 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
        ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
        if (ret) {
                dev_err(dev, "Failed to register iommu\n");
-               return ret;
+               goto err_sysfs_remove;
        }
 
-       return arm_smmu_set_bus_ops(&arm_smmu_ops);
+       ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
+       if (ret)
+               goto err_unregister_device;
+
+       return 0;
+
+err_unregister_device:
+       iommu_device_unregister(&smmu->iommu);
+err_sysfs_remove:
+       iommu_device_sysfs_remove(&smmu->iommu);
+       return ret;
 }
 
 static int arm_smmu_device_remove(struct platform_device *pdev)
@@ -3683,6 +3867,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
        iommu_device_unregister(&smmu->iommu);
        iommu_device_sysfs_remove(&smmu->iommu);
        arm_smmu_device_disable(smmu);
+       iopf_queue_free(smmu->evtq.iopf);
 
        return 0;
 }
index 46e8c49214a872e6c7c5a83e5d22b9bf3048d77a..4cb136f07914e83fbac99c109f5b946c69025f2b 100644 (file)
 #else
 #define Q_MAX_SZ_SHIFT                 (PAGE_SHIFT + MAX_ORDER - 1)
 #endif
+#define Q_MIN_SZ_SHIFT                 (PAGE_SHIFT)
 
 /*
  * Stream table.
 #define CMDQ_PRI_1_GRPID               GENMASK_ULL(8, 0)
 #define CMDQ_PRI_1_RESP                        GENMASK_ULL(13, 12)
 
+#define CMDQ_RESUME_0_RESP_TERM                0UL
+#define CMDQ_RESUME_0_RESP_RETRY       1UL
+#define CMDQ_RESUME_0_RESP_ABORT       2UL
+#define CMDQ_RESUME_0_RESP             GENMASK_ULL(13, 12)
+#define CMDQ_RESUME_0_SID              GENMASK_ULL(63, 32)
+#define CMDQ_RESUME_1_STAG             GENMASK_ULL(15, 0)
+
 #define CMDQ_SYNC_0_CS                 GENMASK_ULL(13, 12)
 #define CMDQ_SYNC_0_CS_NONE            0
 #define CMDQ_SYNC_0_CS_IRQ             1
 /* Event queue */
 #define EVTQ_ENT_SZ_SHIFT              5
 #define EVTQ_ENT_DWORDS                        ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
-#define EVTQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
+#define EVTQ_MAX_SZ_SHIFT              (Q_MIN_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
 
 #define EVTQ_0_ID                      GENMASK_ULL(7, 0)
 
+#define EVT_ID_TRANSLATION_FAULT       0x10
+#define EVT_ID_ADDR_SIZE_FAULT         0x11
+#define EVT_ID_ACCESS_FAULT            0x12
+#define EVT_ID_PERMISSION_FAULT                0x13
+
+#define EVTQ_0_SSV                     (1UL << 11)
+#define EVTQ_0_SSID                    GENMASK_ULL(31, 12)
+#define EVTQ_0_SID                     GENMASK_ULL(63, 32)
+#define EVTQ_1_STAG                    GENMASK_ULL(15, 0)
+#define EVTQ_1_STALL                   (1UL << 31)
+#define EVTQ_1_PnU                     (1UL << 33)
+#define EVTQ_1_InD                     (1UL << 34)
+#define EVTQ_1_RnW                     (1UL << 35)
+#define EVTQ_1_S2                      (1UL << 39)
+#define EVTQ_1_CLASS                   GENMASK_ULL(41, 40)
+#define EVTQ_1_TT_READ                 (1UL << 44)
+#define EVTQ_2_ADDR                    GENMASK_ULL(63, 0)
+#define EVTQ_3_IPA                     GENMASK_ULL(51, 12)
+
 /* PRI queue */
 #define PRIQ_ENT_SZ_SHIFT              4
 #define PRIQ_ENT_DWORDS                        ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
-#define PRIQ_MAX_SZ_SHIFT              (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
+#define PRIQ_MAX_SZ_SHIFT              (Q_MIN_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
 
 #define PRIQ_0_SID                     GENMASK_ULL(31, 0)
 #define PRIQ_0_SSID                    GENMASK_ULL(51, 32)
@@ -462,6 +489,13 @@ struct arm_smmu_cmdq_ent {
                        enum pri_resp           resp;
                } pri;
 
+               #define CMDQ_OP_RESUME          0x44
+               struct {
+                       u32                     sid;
+                       u16                     stag;
+                       u8                      resp;
+               } resume;
+
                #define CMDQ_OP_CMD_SYNC        0x46
                struct {
                        u64                     msiaddr;
@@ -520,6 +554,7 @@ struct arm_smmu_cmdq_batch {
 
 struct arm_smmu_evtq {
        struct arm_smmu_queue           q;
+       struct iopf_queue               *iopf;
        u32                             max_stalls;
 };
 
@@ -657,7 +692,9 @@ struct arm_smmu_master {
        struct arm_smmu_stream          *streams;
        unsigned int                    num_streams;
        bool                            ats_enabled;
+       bool                            stall_enabled;
        bool                            sva_enabled;
+       bool                            iopf_enabled;
        struct list_head                bonds;
        unsigned int                    ssid_bits;
 };
@@ -675,6 +712,7 @@ struct arm_smmu_domain {
        struct mutex                    init_mutex; /* Protects smmu pointer */
 
        struct io_pgtable_ops           *pgtbl_ops;
+       bool                            stall_enabled;
        atomic_t                        nr_ats_masters;
 
        enum arm_smmu_domain_stage      stage;
@@ -716,6 +754,7 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
 bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
 int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
 int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
+bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master);
 struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
                                    void *drvdata);
 void arm_smmu_sva_unbind(struct iommu_sva *handle);
@@ -747,6 +786,11 @@ static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
        return -ENODEV;
 }
 
+static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
+{
+       return false;
+}
+
 static inline struct iommu_sva *
 arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
 {
index 61fc645c1325da9ba86daf449c1a7122b32bdb16..9b9d13ec5a88460ac471c8b883d950987c6501ec 100644 (file)
@@ -3,6 +3,7 @@
  * Copyright (c) 2019, The Linux Foundation. All rights reserved.
  */
 
+#include <linux/acpi.h>
 #include <linux/adreno-smmu-priv.h>
 #include <linux/of_device.h>
 #include <linux/qcom_scm.h>
@@ -177,6 +178,16 @@ static int qcom_adreno_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_doma
        return __arm_smmu_alloc_bitmap(smmu->context_map, start, count);
 }
 
+static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
+{
+       const struct device_node *np = smmu->dev->of_node;
+
+       if (of_device_is_compatible(np, "qcom,msm8996-smmu-v2"))
+               return false;
+
+       return true;
+}
+
 static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
                struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
 {
@@ -191,7 +202,8 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
         * be AARCH64 stage 1 but double check because the arm-smmu code assumes
         * that is the case when the TTBR1 quirk is enabled
         */
-       if ((smmu_domain->stage == ARM_SMMU_DOMAIN_S1) &&
+       if (qcom_adreno_can_do_ttbr1(smmu_domain->smmu) &&
+           (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) &&
            (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64))
                pgtbl_cfg->quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1;
 
@@ -216,6 +228,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
        { .compatible = "qcom,mdss" },
        { .compatible = "qcom,sc7180-mdss" },
        { .compatible = "qcom,sc7180-mss-pil" },
+       { .compatible = "qcom,sc7280-mdss" },
        { .compatible = "qcom,sc8180x-mdss" },
        { .compatible = "qcom,sdm845-mdss" },
        { .compatible = "qcom,sdm845-mss-pil" },
@@ -380,24 +393,48 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
        { .compatible = "qcom,msm8998-smmu-v2" },
        { .compatible = "qcom,sc7180-smmu-500" },
+       { .compatible = "qcom,sc7280-smmu-500" },
        { .compatible = "qcom,sc8180x-smmu-500" },
        { .compatible = "qcom,sdm630-smmu-v2" },
        { .compatible = "qcom,sdm845-smmu-500" },
+       { .compatible = "qcom,sm6125-smmu-500" },
        { .compatible = "qcom,sm8150-smmu-500" },
        { .compatible = "qcom,sm8250-smmu-500" },
        { .compatible = "qcom,sm8350-smmu-500" },
        { }
 };
 
+#ifdef CONFIG_ACPI
+static struct acpi_platform_list qcom_acpi_platlist[] = {
+       { "LENOVO", "CB-01   ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
+       { "QCOM  ", "QCOMEDK2", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
+       { }
+};
+#endif
+
 struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
 {
        const struct device_node *np = smmu->dev->of_node;
 
-       if (of_match_node(qcom_smmu_impl_of_match, np))
-               return qcom_smmu_create(smmu, &qcom_smmu_impl);
+#ifdef CONFIG_ACPI
+       if (np == NULL) {
+               /* Match platform for ACPI boot */
+               if (acpi_match_platform_list(qcom_acpi_platlist) >= 0)
+                       return qcom_smmu_create(smmu, &qcom_smmu_impl);
+       }
+#endif
 
+       /*
+        * Do not change this order of implementation, i.e., first adreno
+        * smmu impl and then apss smmu since we can have both implementing
+        * arm,mmu-500 in which case we will miss setting adreno smmu specific
+        * features if the order is changed.
+        */
        if (of_device_is_compatible(np, "qcom,adreno-smmu"))
                return qcom_smmu_create(smmu, &qcom_adreno_smmu_impl);
 
+       if (of_match_node(qcom_smmu_impl_of_match, np))
+               return qcom_smmu_create(smmu, &qcom_smmu_impl);
+
        return smmu;
 }
index b4b32d31fc069c3521b09e43fdb12d79dcceea98..f22dbeb1e51058db0e31a9fc1c98bc78a01a27d2 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
-#include <linux/of_iommu.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -74,7 +73,7 @@ static bool using_legacy_binding, using_generic_binding;
 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
 {
        if (pm_runtime_enabled(smmu->dev))
-               return pm_runtime_get_sync(smmu->dev);
+               return pm_runtime_resume_and_get(smmu->dev);
 
        return 0;
 }
@@ -1276,6 +1275,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
        u64 phys;
        unsigned long va, flags;
        int ret, idx = cfg->cbndx;
+       phys_addr_t addr = 0;
 
        ret = arm_smmu_rpm_get(smmu);
        if (ret < 0)
@@ -1295,6 +1295,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
                dev_err(dev,
                        "iova to phys timed out on %pad. Falling back to software table walk.\n",
                        &iova);
+               arm_smmu_rpm_put(smmu);
                return ops->iova_to_phys(ops, iova);
        }
 
@@ -1303,12 +1304,14 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
        if (phys & ARM_SMMU_CB_PAR_F) {
                dev_err(dev, "translation fault!\n");
                dev_err(dev, "PAR = 0x%llx\n", phys);
-               return 0;
+               goto out;
        }
 
+       addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
+out:
        arm_smmu_rpm_put(smmu);
 
-       return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
+       return addr;
 }
 
 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
@@ -1455,6 +1458,18 @@ static void arm_smmu_release_device(struct device *dev)
        iommu_fwspec_free(dev);
 }
 
+static void arm_smmu_probe_finalize(struct device *dev)
+{
+       struct arm_smmu_master_cfg *cfg;
+       struct arm_smmu_device *smmu;
+
+       cfg = dev_iommu_priv_get(dev);
+       smmu = cfg->smmu;
+
+       if (smmu->impl && smmu->impl->probe_finalize)
+               smmu->impl->probe_finalize(smmu, dev);
+}
+
 static struct iommu_group *arm_smmu_device_group(struct device *dev)
 {
        struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
@@ -1574,6 +1589,7 @@ static struct iommu_ops arm_smmu_ops = {
        .iova_to_phys           = arm_smmu_iova_to_phys,
        .probe_device           = arm_smmu_probe_device,
        .release_device         = arm_smmu_release_device,
+       .probe_finalize         = arm_smmu_probe_finalize,
        .device_group           = arm_smmu_device_group,
        .enable_nesting         = arm_smmu_enable_nesting,
        .set_pgtable_quirks     = arm_smmu_set_pgtable_quirks,
@@ -2169,7 +2185,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
        err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
        if (err) {
                dev_err(dev, "Failed to register iommu\n");
-               return err;
+               goto err_sysfs_remove;
        }
 
        platform_set_drvdata(pdev, smmu);
@@ -2192,10 +2208,19 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
         * any device which might need it, so we want the bus ops in place
         * ready to handle default domain setup as soon as any SMMU exists.
         */
-       if (!using_legacy_binding)
-               return arm_smmu_bus_init(&arm_smmu_ops);
+       if (!using_legacy_binding) {
+               err = arm_smmu_bus_init(&arm_smmu_ops);
+               if (err)
+                       goto err_unregister_device;
+       }
 
        return 0;
+
+err_unregister_device:
+       iommu_device_unregister(&smmu->iommu);
+err_sysfs_remove:
+       iommu_device_sysfs_remove(&smmu->iommu);
+       return err;
 }
 
 static int arm_smmu_device_remove(struct platform_device *pdev)
index 84c21c4b0691d2f24eeb2e4b79b4241b70e51474..a5027159596075aa92a8ac26f779f81113cd3054 100644 (file)
@@ -441,6 +441,7 @@ struct arm_smmu_impl {
                                  struct device *dev, int start);
        void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
        void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
+       void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev);
 };
 
 #define INVALID_SMENDX                 -1
index 4294abe389b22fccdecdfdc92095158748421aff..25ed444ff94d0748874ca3c9ced8769e2412574f 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
-#include <linux/of_iommu.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
@@ -850,10 +849,12 @@ static int qcom_iommu_device_probe(struct platform_device *pdev)
        ret = iommu_device_register(&qcom_iommu->iommu, &qcom_iommu_ops, dev);
        if (ret) {
                dev_err(dev, "Failed to register iommu\n");
-               return ret;
+               goto err_sysfs_remove;
        }
 
-       bus_set_iommu(&platform_bus_type, &qcom_iommu_ops);
+       ret = bus_set_iommu(&platform_bus_type, &qcom_iommu_ops);
+       if (ret)
+               goto err_unregister_device;
 
        if (qcom_iommu->local_base) {
                pm_runtime_get_sync(dev);
@@ -862,6 +863,13 @@ static int qcom_iommu_device_probe(struct platform_device *pdev)
        }
 
        return 0;
+
+err_unregister_device:
+       iommu_device_unregister(&qcom_iommu->iommu);
+
+err_sysfs_remove:
+       iommu_device_sysfs_remove(&qcom_iommu->iommu);
+       return ret;
 }
 
 static int qcom_iommu_device_remove(struct platform_device *pdev)
index 7bcdd12055358e98070d6deb3b04b19c797058f3..98ba927aee1a6903c7bc8619e41160618292f952 100644 (file)
@@ -243,9 +243,11 @@ resv_iova:
                        lo = iova_pfn(iovad, start);
                        hi = iova_pfn(iovad, end);
                        reserve_iova(iovad, lo, hi);
-               } else {
+               } else if (end < start) {
                        /* dma_ranges list should be sorted */
-                       dev_err(&dev->dev, "Failed to reserve IOVA\n");
+                       dev_err(&dev->dev,
+                               "Failed to reserve IOVA [%pa-%pa]\n",
+                               &start, &end);
                        return -EINVAL;
                }
 
@@ -319,16 +321,16 @@ static bool dev_is_untrusted(struct device *dev)
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
  * @base: IOVA at which the mappable address space starts
- * @size: Size of IOVA space
+ * @limit: Last address of the IOVA space
  * @dev: Device the domain is being initialised for
  *
- * @base and @size should be exact multiples of IOMMU page granularity to
+ * @base and @limit + 1 should be exact multiples of IOMMU page granularity to
  * avoid rounding surprises. If necessary, we reserve the page at address 0
  * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
  * any change which could make prior IOVAs invalid will fail.
  */
 static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
-               u64 size, struct device *dev)
+                                dma_addr_t limit, struct device *dev)
 {
        struct iommu_dma_cookie *cookie = domain->iova_cookie;
        unsigned long order, base_pfn;
@@ -346,7 +348,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
        /* Check the domain allows at least some access to the device... */
        if (domain->geometry.force_aperture) {
                if (base > domain->geometry.aperture_end ||
-                   base + size <= domain->geometry.aperture_start) {
+                   limit < domain->geometry.aperture_start) {
                        pr_warn("specified DMA range outside IOMMU capability\n");
                        return -EFAULT;
                }
@@ -1308,7 +1310,7 @@ static const struct dma_map_ops iommu_dma_ops = {
  * The IOMMU core code allocates the default DMA domain, which the underlying
  * IOMMU driver needs to support via the dma-iommu layer.
  */
-void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size)
+void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
 {
        struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
 
@@ -1320,7 +1322,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size)
         * underlying IOMMU driver needs to support via the dma-iommu layer.
         */
        if (domain->type == IOMMU_DOMAIN_DMA) {
-               if (iommu_dma_init_domain(domain, dma_base, size, dev))
+               if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
                        goto out_err;
                dev->dma_ops = &iommu_dma_ops;
        }
@@ -1330,6 +1332,7 @@ out_err:
         pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
                 dev_name(dev));
 }
+EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
 
 static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
                phys_addr_t msi_addr, struct iommu_domain *domain)
index 7623d8c371f5c7138f74c11057750393620731f8..d0fbf1d10e182f7812b6112b518372c5c46e73ac 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/kmemleak.h>
 #include <linux/list.h>
 #include <linux/of.h>
-#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
index 28a3d1596c76894789ae0939d0f82e01a422a35b..43ebd8af11c510642de82d149a79389fdb676ab0 100644 (file)
@@ -3,6 +3,9 @@
 config DMAR_TABLE
        bool
 
+config DMAR_PERF
+       bool
+
 config INTEL_IOMMU
        bool "Support for Intel IOMMU using DMA Remapping Devices"
        depends on PCI_MSI && ACPI && (X86 || IA64)
@@ -14,6 +17,7 @@ config INTEL_IOMMU
        select SWIOTLB
        select IOASID
        select IOMMU_DMA
+       select PCI_ATS
        help
          DMA remapping (DMAR) devices support enables independent address
          translations for Direct Memory Access (DMA) from devices.
@@ -24,6 +28,7 @@ config INTEL_IOMMU
 config INTEL_IOMMU_DEBUGFS
        bool "Export Intel IOMMU internals in Debugfs"
        depends on INTEL_IOMMU && IOMMU_DEBUGFS
+       select DMAR_PERF
        help
          !!!WARNING!!!
 
@@ -41,6 +46,7 @@ config INTEL_IOMMU_SVM
        select PCI_PRI
        select MMU_NOTIFIER
        select IOASID
+       select IOMMU_SVA_LIB
        help
          Shared Virtual Memory (SVM) provides a facility for devices
          to access DMA resources through process address space by
index ae236ec7d219ae5619aa0936c220ce4db05e2bc5..fa0dae16441cb50e712973063df5c6bde935ccf9 100644 (file)
@@ -2,6 +2,7 @@
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
 obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
+obj-$(CONFIG_DMAR_PERF) += perf.o
 obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
 obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
 obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
index efea7f02abd91aa60c0aec89016ff17bd58a28df..62e23ff3c987e1658fe44483716aa74b709227d8 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/irq_remapping.h>
 
 #include "pasid.h"
+#include "perf.h"
 
 struct tbl_walk {
        u16 bus;
@@ -31,6 +32,9 @@ struct iommu_regset {
        const char *regs;
 };
 
+#define DEBUG_BUFFER_SIZE      1024
+static char debug_buf[DEBUG_BUFFER_SIZE];
+
 #define IOMMU_REGSET_ENTRY(_reg_)                                      \
        { DMAR_##_reg_##_REG, __stringify(_reg_) }
 
@@ -538,6 +542,111 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused)
 DEFINE_SHOW_ATTRIBUTE(ir_translation_struct);
 #endif
 
+static void latency_show_one(struct seq_file *m, struct intel_iommu *iommu,
+                            struct dmar_drhd_unit *drhd)
+{
+       int ret;
+
+       seq_printf(m, "IOMMU: %s Register Base Address: %llx\n",
+                  iommu->name, drhd->reg_base_addr);
+
+       ret = dmar_latency_snapshot(iommu, debug_buf, DEBUG_BUFFER_SIZE);
+       if (ret < 0)
+               seq_puts(m, "Failed to get latency snapshot");
+       else
+               seq_puts(m, debug_buf);
+       seq_puts(m, "\n");
+}
+
+static int latency_show(struct seq_file *m, void *v)
+{
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+
+       rcu_read_lock();
+       for_each_active_iommu(iommu, drhd)
+               latency_show_one(m, iommu, drhd);
+       rcu_read_unlock();
+
+       return 0;
+}
+
+static int dmar_perf_latency_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, latency_show, NULL);
+}
+
+static ssize_t dmar_perf_latency_write(struct file *filp,
+                                      const char __user *ubuf,
+                                      size_t cnt, loff_t *ppos)
+{
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+       int counting;
+       char buf[64];
+
+       if (cnt > 63)
+               cnt = 63;
+
+       if (copy_from_user(&buf, ubuf, cnt))
+               return -EFAULT;
+
+       buf[cnt] = 0;
+
+       if (kstrtoint(buf, 0, &counting))
+               return -EINVAL;
+
+       switch (counting) {
+       case 0:
+               rcu_read_lock();
+               for_each_active_iommu(iommu, drhd) {
+                       dmar_latency_disable(iommu, DMAR_LATENCY_INV_IOTLB);
+                       dmar_latency_disable(iommu, DMAR_LATENCY_INV_DEVTLB);
+                       dmar_latency_disable(iommu, DMAR_LATENCY_INV_IEC);
+                       dmar_latency_disable(iommu, DMAR_LATENCY_PRQ);
+               }
+               rcu_read_unlock();
+               break;
+       case 1:
+               rcu_read_lock();
+               for_each_active_iommu(iommu, drhd)
+                       dmar_latency_enable(iommu, DMAR_LATENCY_INV_IOTLB);
+               rcu_read_unlock();
+               break;
+       case 2:
+               rcu_read_lock();
+               for_each_active_iommu(iommu, drhd)
+                       dmar_latency_enable(iommu, DMAR_LATENCY_INV_DEVTLB);
+               rcu_read_unlock();
+               break;
+       case 3:
+               rcu_read_lock();
+               for_each_active_iommu(iommu, drhd)
+                       dmar_latency_enable(iommu, DMAR_LATENCY_INV_IEC);
+               rcu_read_unlock();
+               break;
+       case 4:
+               rcu_read_lock();
+               for_each_active_iommu(iommu, drhd)
+                       dmar_latency_enable(iommu, DMAR_LATENCY_PRQ);
+               rcu_read_unlock();
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       *ppos += cnt;
+       return cnt;
+}
+
+static const struct file_operations dmar_perf_latency_fops = {
+       .open           = dmar_perf_latency_open,
+       .write          = dmar_perf_latency_write,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
 void __init intel_iommu_debugfs_init(void)
 {
        struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
@@ -556,4 +665,6 @@ void __init intel_iommu_debugfs_init(void)
        debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug,
                            NULL, &ir_translation_struct_fops);
 #endif
+       debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug,
+                           NULL, &dmar_perf_latency_fops);
 }
index 84057cb9596cb2a62acfeecc52839e198aee50d0..d66f79acd14d0dedb231eb5076f158b9dc13b169 100644 (file)
@@ -34,6 +34,7 @@
 #include <trace/events/intel_iommu.h>
 
 #include "../irq_remapping.h"
+#include "perf.h"
 
 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
 struct dmar_res_callback {
@@ -1342,15 +1343,33 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
                   unsigned int count, unsigned long options)
 {
        struct q_inval *qi = iommu->qi;
+       s64 devtlb_start_ktime = 0;
+       s64 iotlb_start_ktime = 0;
+       s64 iec_start_ktime = 0;
        struct qi_desc wait_desc;
        int wait_index, index;
        unsigned long flags;
        int offset, shift;
        int rc, i;
+       u64 type;
 
        if (!qi)
                return 0;
 
+       type = desc->qw0 & GENMASK_ULL(3, 0);
+
+       if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
+           dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
+               iotlb_start_ktime = ktime_to_ns(ktime_get());
+
+       if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
+           dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
+               devtlb_start_ktime = ktime_to_ns(ktime_get());
+
+       if (type == QI_IEC_TYPE &&
+           dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
+               iec_start_ktime = ktime_to_ns(ktime_get());
+
 restart:
        rc = 0;
 
@@ -1425,6 +1444,18 @@ restart:
        if (rc == -EAGAIN)
                goto restart;
 
+       if (iotlb_start_ktime)
+               dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
+                               ktime_to_ns(ktime_get()) - iotlb_start_ktime);
+
+       if (devtlb_start_ktime)
+               dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
+                               ktime_to_ns(ktime_get()) - devtlb_start_ktime);
+
+       if (iec_start_ktime)
+               dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
+                               ktime_to_ns(ktime_get()) - iec_start_ktime);
+
        return rc;
 }
 
@@ -1913,16 +1944,23 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
        reason = dmar_get_fault_reason(fault_reason, &fault_type);
 
        if (fault_type == INTR_REMAP)
-               pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
-                       source_id >> 8, PCI_SLOT(source_id & 0xFF),
-                       PCI_FUNC(source_id & 0xFF), addr >> 48,
-                       fault_reason, reason);
-       else
-               pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
+               pr_err("[INTR-REMAP] Request device [0x%02x:0x%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
+                      source_id >> 8, PCI_SLOT(source_id & 0xFF),
+                      PCI_FUNC(source_id & 0xFF), addr >> 48,
+                      fault_reason, reason);
+       else if (pasid == INVALID_IOASID)
+               pr_err("[%s NO_PASID] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
                       type ? "DMA Read" : "DMA Write",
                       source_id >> 8, PCI_SLOT(source_id & 0xFF),
-                      PCI_FUNC(source_id & 0xFF), pasid, addr,
+                      PCI_FUNC(source_id & 0xFF), addr,
                       fault_reason, reason);
+       else
+               pr_err("[%s PASID 0x%x] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
+                      type ? "DMA Read" : "DMA Write", pasid,
+                      source_id >> 8, PCI_SLOT(source_id & 0xFF),
+                      PCI_FUNC(source_id & 0xFF), addr,
+                      fault_reason, reason);
+
        return 0;
 }
 
@@ -1989,7 +2027,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
                if (!ratelimited)
                        /* Using pasid -1 if pasid is not present */
                        dmar_fault_do_one(iommu, type, fault_reason,
-                                         pasid_present ? pasid : -1,
+                                         pasid_present ? pasid : INVALID_IOASID,
                                          source_id, guest_addr);
 
                fault_index++;
index be35284a201600a9bf924204e735361bf75cf515..a6a07d985709f4c09792b30cdcdb777d983271d1 100644 (file)
@@ -46,6 +46,7 @@
 #include <asm/iommu.h>
 
 #include "../irq_remapping.h"
+#include "../iommu-sva-lib.h"
 #include "pasid.h"
 #include "cap_audit.h"
 
@@ -564,7 +565,7 @@ static inline int domain_pfn_supported(struct dmar_domain *domain,
 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
 {
        unsigned long sagaw;
-       int agaw = -1;
+       int agaw;
 
        sagaw = cap_sagaw(iommu->cap);
        for (agaw = width_to_agaw(max_gaw);
@@ -625,12 +626,12 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
        bool found = false;
        int i;
 
-       domain->iommu_coherency = 1;
+       domain->iommu_coherency = true;
 
        for_each_domain_iommu(i, domain) {
                found = true;
                if (!iommu_paging_structure_coherency(g_iommus[i])) {
-                       domain->iommu_coherency = 0;
+                       domain->iommu_coherency = false;
                        break;
                }
        }
@@ -641,18 +642,18 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
        rcu_read_lock();
        for_each_active_iommu(iommu, drhd) {
                if (!iommu_paging_structure_coherency(iommu)) {
-                       domain->iommu_coherency = 0;
+                       domain->iommu_coherency = false;
                        break;
                }
        }
        rcu_read_unlock();
 }
 
-static int domain_update_iommu_snooping(struct intel_iommu *skip)
+static bool domain_update_iommu_snooping(struct intel_iommu *skip)
 {
        struct dmar_drhd_unit *drhd;
        struct intel_iommu *iommu;
-       int ret = 1;
+       bool ret = true;
 
        rcu_read_lock();
        for_each_active_iommu(iommu, drhd) {
@@ -665,7 +666,7 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip)
                         */
                        if (!sm_supported(iommu) &&
                            !ecap_sc_support(iommu->ecap)) {
-                               ret = 0;
+                               ret = false;
                                break;
                        }
                }
@@ -682,9 +683,8 @@ static int domain_update_iommu_superpage(struct dmar_domain *domain,
        struct intel_iommu *iommu;
        int mask = 0x3;
 
-       if (!intel_iommu_superpage) {
+       if (!intel_iommu_superpage)
                return 0;
-       }
 
        /* set iommu_superpage to the smallest common denominator */
        rcu_read_lock();
@@ -1919,7 +1919,6 @@ static int domain_attach_iommu(struct dmar_domain *domain,
        assert_spin_locked(&iommu->lock);
 
        domain->iommu_refcnt[iommu->seq_id] += 1;
-       domain->iommu_count += 1;
        if (domain->iommu_refcnt[iommu->seq_id] == 1) {
                ndomains = cap_ndoms(iommu->cap);
                num      = find_first_zero_bit(iommu->domain_ids, ndomains);
@@ -1927,7 +1926,6 @@ static int domain_attach_iommu(struct dmar_domain *domain,
                if (num >= ndomains) {
                        pr_err("%s: No free domain ids\n", iommu->name);
                        domain->iommu_refcnt[iommu->seq_id] -= 1;
-                       domain->iommu_count -= 1;
                        return -ENOSPC;
                }
 
@@ -1943,16 +1941,15 @@ static int domain_attach_iommu(struct dmar_domain *domain,
        return 0;
 }
 
-static int domain_detach_iommu(struct dmar_domain *domain,
-                              struct intel_iommu *iommu)
+static void domain_detach_iommu(struct dmar_domain *domain,
+                               struct intel_iommu *iommu)
 {
-       int num, count;
+       int num;
 
        assert_spin_locked(&device_domain_lock);
        assert_spin_locked(&iommu->lock);
 
        domain->iommu_refcnt[iommu->seq_id] -= 1;
-       count = --domain->iommu_count;
        if (domain->iommu_refcnt[iommu->seq_id] == 0) {
                num = domain->iommu_did[iommu->seq_id];
                clear_bit(num, iommu->domain_ids);
@@ -1961,8 +1958,6 @@ static int domain_detach_iommu(struct dmar_domain *domain,
                domain_update_iommu_cap(domain);
                domain->iommu_did[iommu->seq_id] = 0;
        }
-
-       return count;
 }
 
 static inline int guestwidth_to_adjustwidth(int gaw)
@@ -4138,62 +4133,56 @@ static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
        return container_of(iommu_dev, struct intel_iommu, iommu);
 }
 
-static ssize_t intel_iommu_show_version(struct device *dev,
-                                       struct device_attribute *attr,
-                                       char *buf)
+static ssize_t version_show(struct device *dev,
+                           struct device_attribute *attr, char *buf)
 {
        struct intel_iommu *iommu = dev_to_intel_iommu(dev);
        u32 ver = readl(iommu->reg + DMAR_VER_REG);
        return sprintf(buf, "%d:%d\n",
                       DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
 }
-static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
+static DEVICE_ATTR_RO(version);
 
-static ssize_t intel_iommu_show_address(struct device *dev,
-                                       struct device_attribute *attr,
-                                       char *buf)
+static ssize_t address_show(struct device *dev,
+                           struct device_attribute *attr, char *buf)
 {
        struct intel_iommu *iommu = dev_to_intel_iommu(dev);
        return sprintf(buf, "%llx\n", iommu->reg_phys);
 }
-static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
+static DEVICE_ATTR_RO(address);
 
-static ssize_t intel_iommu_show_cap(struct device *dev,
-                                   struct device_attribute *attr,
-                                   char *buf)
+static ssize_t cap_show(struct device *dev,
+                       struct device_attribute *attr, char *buf)
 {
        struct intel_iommu *iommu = dev_to_intel_iommu(dev);
        return sprintf(buf, "%llx\n", iommu->cap);
 }
-static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
+static DEVICE_ATTR_RO(cap);
 
-static ssize_t intel_iommu_show_ecap(struct device *dev,
-                                   struct device_attribute *attr,
-                                   char *buf)
+static ssize_t ecap_show(struct device *dev,
+                        struct device_attribute *attr, char *buf)
 {
        struct intel_iommu *iommu = dev_to_intel_iommu(dev);
        return sprintf(buf, "%llx\n", iommu->ecap);
 }
-static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
+static DEVICE_ATTR_RO(ecap);
 
-static ssize_t intel_iommu_show_ndoms(struct device *dev,
-                                     struct device_attribute *attr,
-                                     char *buf)
+static ssize_t domains_supported_show(struct device *dev,
+                                     struct device_attribute *attr, char *buf)
 {
        struct intel_iommu *iommu = dev_to_intel_iommu(dev);
        return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
 }
-static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
+static DEVICE_ATTR_RO(domains_supported);
 
-static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
-                                          struct device_attribute *attr,
-                                          char *buf)
+static ssize_t domains_used_show(struct device *dev,
+                                struct device_attribute *attr, char *buf)
 {
        struct intel_iommu *iommu = dev_to_intel_iommu(dev);
        return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
                                                  cap_ndoms(iommu->cap)));
 }
-static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
+static DEVICE_ATTR_RO(domains_used);
 
 static struct attribute *intel_iommu_attrs[] = {
        &dev_attr_version.attr,
@@ -4511,13 +4500,13 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
        adjust_width = guestwidth_to_adjustwidth(guest_width);
        domain->agaw = width_to_agaw(adjust_width);
 
-       domain->iommu_coherency = 0;
-       domain->iommu_snooping = 0;
+       domain->iommu_coherency = false;
+       domain->iommu_snooping = false;
        domain->iommu_superpage = 0;
        domain->max_addr = 0;
 
        /* always allocate the top pgd */
-       domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
+       domain->pgd = alloc_pgtable_page(domain->nid);
        if (!domain->pgd)
                return -ENOMEM;
        domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
@@ -4757,6 +4746,13 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
        if (!iommu)
                return -ENODEV;
 
+       if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) &&
+           !ecap_nest(iommu->ecap)) {
+               dev_err(dev, "%s: iommu not support nested translation\n",
+                       iommu->name);
+               return -EINVAL;
+       }
+
        /* check if this iommu agaw is sufficient for max mapped address */
        addr_width = agaw_to_width(iommu->agaw);
        if (addr_width > cap_mgaw(iommu->cap))
@@ -4778,8 +4774,7 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
 
                pte = dmar_domain->pgd;
                if (dma_pte_present(pte)) {
-                       dmar_domain->pgd = (struct dma_pte *)
-                               phys_to_virt(dma_pte_addr(pte));
+                       dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
                        free_pgtable_page(pte);
                }
                dmar_domain->agaw--;
@@ -5129,7 +5124,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 static bool intel_iommu_capable(enum iommu_cap cap)
 {
        if (cap == IOMMU_CAP_CACHE_COHERENCY)
-               return domain_update_iommu_snooping(NULL) == 1;
+               return domain_update_iommu_snooping(NULL);
        if (cap == IOMMU_CAP_INTR_REMAP)
                return irq_remapping_enabled == 1;
 
@@ -5165,13 +5160,10 @@ static void intel_iommu_release_device(struct device *dev)
 
 static void intel_iommu_probe_finalize(struct device *dev)
 {
-       dma_addr_t base = IOVA_START_PFN << VTD_PAGE_SHIFT;
        struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-       struct dmar_domain *dmar_domain = to_dmar_domain(domain);
 
        if (domain && domain->type == IOMMU_DOMAIN_DMA)
-               iommu_setup_dma_ops(dev, base,
-                                   __DOMAIN_MAX_ADDR(dmar_domain->gaw) - base);
+               iommu_setup_dma_ops(dev, 0, U64_MAX);
        else
                set_dma_ops(dev, NULL);
 }
@@ -5331,6 +5323,48 @@ static int intel_iommu_disable_auxd(struct device *dev)
        return 0;
 }
 
+static int intel_iommu_enable_sva(struct device *dev)
+{
+       struct device_domain_info *info = get_domain_info(dev);
+       struct intel_iommu *iommu;
+       int ret;
+
+       if (!info || dmar_disabled)
+               return -EINVAL;
+
+       iommu = info->iommu;
+       if (!iommu)
+               return -EINVAL;
+
+       if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
+               return -ENODEV;
+
+       if (intel_iommu_enable_pasid(iommu, dev))
+               return -ENODEV;
+
+       if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
+               return -EINVAL;
+
+       ret = iopf_queue_add_device(iommu->iopf_queue, dev);
+       if (!ret)
+               ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+
+       return ret;
+}
+
+static int intel_iommu_disable_sva(struct device *dev)
+{
+       struct device_domain_info *info = get_domain_info(dev);
+       struct intel_iommu *iommu = info->iommu;
+       int ret;
+
+       ret = iommu_unregister_device_fault_handler(dev);
+       if (!ret)
+               ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
+
+       return ret;
+}
+
 /*
  * A PCI express designated vendor specific extended capability is defined
  * in the section 3.7 of Intel scalable I/O virtualization technical spec
@@ -5392,35 +5426,37 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
 static int
 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
 {
-       if (feat == IOMMU_DEV_FEAT_AUX)
+       switch (feat) {
+       case IOMMU_DEV_FEAT_AUX:
                return intel_iommu_enable_auxd(dev);
 
-       if (feat == IOMMU_DEV_FEAT_IOPF)
+       case IOMMU_DEV_FEAT_IOPF:
                return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV;
 
-       if (feat == IOMMU_DEV_FEAT_SVA) {
-               struct device_domain_info *info = get_domain_info(dev);
-
-               if (!info)
-                       return -EINVAL;
+       case IOMMU_DEV_FEAT_SVA:
+               return intel_iommu_enable_sva(dev);
 
-               if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
-                       return -EINVAL;
-
-               if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
-                       return 0;
+       default:
+               return -ENODEV;
        }
-
-       return -ENODEV;
 }
 
 static int
 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
 {
-       if (feat == IOMMU_DEV_FEAT_AUX)
+       switch (feat) {
+       case IOMMU_DEV_FEAT_AUX:
                return intel_iommu_disable_auxd(dev);
 
-       return -ENODEV;
+       case IOMMU_DEV_FEAT_IOPF:
+               return 0;
+
+       case IOMMU_DEV_FEAT_SVA:
+               return intel_iommu_disable_sva(dev);
+
+       default:
+               return -ENODEV;
+       }
 }
 
 static bool
@@ -5457,7 +5493,7 @@ intel_iommu_enable_nesting(struct iommu_domain *domain)
        int ret = -ENODEV;
 
        spin_lock_irqsave(&device_domain_lock, flags);
-       if (nested_mode_support() && list_empty(&dmar_domain->devices)) {
+       if (list_empty(&dmar_domain->devices)) {
                dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
                dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
                ret = 0;
index 72dc84821dad2de15f56c0ea26c222afb7b338a5..c6cf44a6c92305c57093feb22372a6076b0a0536 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/**
+/*
  * intel-pasid.c - PASID idr, table and entry manipulation
  *
  * Copyright (C) 2018 Intel Corporation
diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c
new file mode 100644 (file)
index 0000000..73b7ec7
--- /dev/null
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * perf.c - performance monitor
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ *         Fenghua Yu <fenghua.yu@intel.com>
+ */
+
+#include <linux/spinlock.h>
+#include <linux/intel-iommu.h>
+
+#include "perf.h"
+
+static DEFINE_SPINLOCK(latency_lock);
+
+bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type)
+{
+       struct latency_statistic *lstat = iommu->perf_statistic;
+
+       return lstat && lstat[type].enabled;
+}
+
+int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
+{
+       struct latency_statistic *lstat;
+       unsigned long flags;
+       int ret = -EBUSY;
+
+       if (dmar_latency_enabled(iommu, type))
+               return 0;
+
+       spin_lock_irqsave(&latency_lock, flags);
+       if (!iommu->perf_statistic) {
+               iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM,
+                                               GFP_ATOMIC);
+               if (!iommu->perf_statistic) {
+                       ret = -ENOMEM;
+                       goto unlock_out;
+               }
+       }
+
+       lstat = iommu->perf_statistic;
+
+       if (!lstat[type].enabled) {
+               lstat[type].enabled = true;
+               lstat[type].counter[COUNTS_MIN] = UINT_MAX;
+               ret = 0;
+       }
+unlock_out:
+       spin_unlock_irqrestore(&latency_lock, flags);
+
+       return ret;
+}
+
+void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type)
+{
+       struct latency_statistic *lstat = iommu->perf_statistic;
+       unsigned long flags;
+
+       if (!dmar_latency_enabled(iommu, type))
+               return;
+
+       spin_lock_irqsave(&latency_lock, flags);
+       memset(&lstat[type], 0, sizeof(*lstat) * DMAR_LATENCY_NUM);
+       spin_unlock_irqrestore(&latency_lock, flags);
+}
+
+void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency)
+{
+       struct latency_statistic *lstat = iommu->perf_statistic;
+       unsigned long flags;
+       u64 min, max;
+
+       if (!dmar_latency_enabled(iommu, type))
+               return;
+
+       spin_lock_irqsave(&latency_lock, flags);
+       if (latency < 100)
+               lstat[type].counter[COUNTS_10e2]++;
+       else if (latency < 1000)
+               lstat[type].counter[COUNTS_10e3]++;
+       else if (latency < 10000)
+               lstat[type].counter[COUNTS_10e4]++;
+       else if (latency < 100000)
+               lstat[type].counter[COUNTS_10e5]++;
+       else if (latency < 1000000)
+               lstat[type].counter[COUNTS_10e6]++;
+       else if (latency < 10000000)
+               lstat[type].counter[COUNTS_10e7]++;
+       else
+               lstat[type].counter[COUNTS_10e8_plus]++;
+
+       min = lstat[type].counter[COUNTS_MIN];
+       max = lstat[type].counter[COUNTS_MAX];
+       lstat[type].counter[COUNTS_MIN] = min_t(u64, min, latency);
+       lstat[type].counter[COUNTS_MAX] = max_t(u64, max, latency);
+       lstat[type].counter[COUNTS_SUM] += latency;
+       lstat[type].samples++;
+       spin_unlock_irqrestore(&latency_lock, flags);
+}
+
+static char *latency_counter_names[] = {
+       "                  <0.1us",
+       "   0.1us-1us", "    1us-10us", "  10us-100us",
+       "   100us-1ms", "    1ms-10ms", "      >=10ms",
+       "     min(us)", "     max(us)", " average(us)"
+};
+
+static char *latency_type_names[] = {
+       "   inv_iotlb", "  inv_devtlb", "     inv_iec",
+       "     svm_prq"
+};
+
+int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
+{
+       struct latency_statistic *lstat = iommu->perf_statistic;
+       unsigned long flags;
+       int bytes = 0, i, j;
+
+       memset(str, 0, size);
+
+       for (i = 0; i < COUNTS_NUM; i++)
+               bytes += snprintf(str + bytes, size - bytes,
+                                 "%s", latency_counter_names[i]);
+
+       spin_lock_irqsave(&latency_lock, flags);
+       for (i = 0; i < DMAR_LATENCY_NUM; i++) {
+               if (!dmar_latency_enabled(iommu, i))
+                       continue;
+
+               bytes += snprintf(str + bytes, size - bytes,
+                                 "\n%s", latency_type_names[i]);
+
+               for (j = 0; j < COUNTS_NUM; j++) {
+                       u64 val = lstat[i].counter[j];
+
+                       switch (j) {
+                       case COUNTS_MIN:
+                               if (val == UINT_MAX)
+                                       val = 0;
+                               else
+                                       val = div_u64(val, 1000);
+                               break;
+                       case COUNTS_MAX:
+                               val = div_u64(val, 1000);
+                               break;
+                       case COUNTS_SUM:
+                               if (lstat[i].samples)
+                                       val = div_u64(val, (lstat[i].samples * 1000));
+                               else
+                                       val = 0;
+                               break;
+                       default:
+                               break;
+                       }
+
+                       bytes += snprintf(str + bytes, size - bytes,
+                                         "%12lld", val);
+               }
+       }
+       spin_unlock_irqrestore(&latency_lock, flags);
+
+       return bytes;
+}
diff --git a/drivers/iommu/intel/perf.h b/drivers/iommu/intel/perf.h
new file mode 100644 (file)
index 0000000..fd6db80
--- /dev/null
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * perf.h - performance monitor header
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ */
+
+enum latency_type {
+       DMAR_LATENCY_INV_IOTLB = 0,
+       DMAR_LATENCY_INV_DEVTLB,
+       DMAR_LATENCY_INV_IEC,
+       DMAR_LATENCY_PRQ,
+       DMAR_LATENCY_NUM
+};
+
+enum latency_count {
+       COUNTS_10e2 = 0,        /* < 0.1us      */
+       COUNTS_10e3,            /* 0.1us ~ 1us  */
+       COUNTS_10e4,            /* 1us ~ 10us   */
+       COUNTS_10e5,            /* 10us ~ 100us */
+       COUNTS_10e6,            /* 100us ~ 1ms  */
+       COUNTS_10e7,            /* 1ms ~ 10ms   */
+       COUNTS_10e8_plus,       /* 10ms and plus*/
+       COUNTS_MIN,
+       COUNTS_MAX,
+       COUNTS_SUM,
+       COUNTS_NUM
+};
+
+struct latency_statistic {
+       bool enabled;
+       u64 counter[COUNTS_NUM];
+       u64 samples;
+};
+
+#ifdef CONFIG_DMAR_PERF
+int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type);
+void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type);
+bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type);
+void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type,
+                        u64 latency);
+int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size);
+#else
+static inline int
+dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
+{
+       return -EINVAL;
+}
+
+static inline void
+dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type)
+{
+}
+
+static inline bool
+dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type)
+{
+       return false;
+}
+
+static inline void
+dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency)
+{
+}
+
+static inline int
+dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
+{
+       return 0;
+}
+#endif /* CONFIG_DMAR_PERF */
index 5165cea904211a8af9768be34750f9a49d55bca4..9b0f22bc0514e0ea5863524f2f7b41718ae05fcc 100644 (file)
 #include <linux/dmar.h>
 #include <linux/interrupt.h>
 #include <linux/mm_types.h>
+#include <linux/xarray.h>
 #include <linux/ioasid.h>
 #include <asm/page.h>
 #include <asm/fpu/api.h>
+#include <trace/events/intel_iommu.h>
 
 #include "pasid.h"
+#include "perf.h"
+#include "../iommu-sva-lib.h"
 
 static irqreturn_t prq_event_thread(int irq, void *d);
 static void intel_svm_drain_prq(struct device *dev, u32 pasid);
+#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
 
 #define PRQ_ORDER 0
 
+static DEFINE_XARRAY_ALLOC(pasid_private_array);
+static int pasid_private_add(ioasid_t pasid, void *priv)
+{
+       return xa_alloc(&pasid_private_array, &pasid, priv,
+                       XA_LIMIT(pasid, pasid), GFP_ATOMIC);
+}
+
+static void pasid_private_remove(ioasid_t pasid)
+{
+       xa_erase(&pasid_private_array, pasid);
+}
+
+static void *pasid_private_find(ioasid_t pasid)
+{
+       return xa_load(&pasid_private_array, pasid);
+}
+
+static struct intel_svm_dev *
+svm_lookup_device_by_sid(struct intel_svm *svm, u16 sid)
+{
+       struct intel_svm_dev *sdev = NULL, *t;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(t, &svm->devs, list) {
+               if (t->sid == sid) {
+                       sdev = t;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+
+       return sdev;
+}
+
+static struct intel_svm_dev *
+svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
+{
+       struct intel_svm_dev *sdev = NULL, *t;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(t, &svm->devs, list) {
+               if (t->dev == dev) {
+                       sdev = t;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+
+       return sdev;
+}
+
 int intel_svm_enable_prq(struct intel_iommu *iommu)
 {
+       struct iopf_queue *iopfq;
        struct page *pages;
        int irq, ret;
 
@@ -46,13 +103,20 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
                pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
                       iommu->name);
                ret = -EINVAL;
-       err:
-               free_pages((unsigned long)iommu->prq, PRQ_ORDER);
-               iommu->prq = NULL;
-               return ret;
+               goto free_prq;
        }
        iommu->pr_irq = irq;
 
+       snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
+                "dmar%d-iopfq", iommu->seq_id);
+       iopfq = iopf_queue_alloc(iommu->iopfq_name);
+       if (!iopfq) {
+               pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
+               ret = -ENOMEM;
+               goto free_hwirq;
+       }
+       iommu->iopf_queue = iopfq;
+
        snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
 
        ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
@@ -60,9 +124,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
        if (ret) {
                pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
                       iommu->name);
-               dmar_free_hwirq(irq);
-               iommu->pr_irq = 0;
-               goto err;
+               goto free_iopfq;
        }
        dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
        dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
@@ -71,6 +133,18 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
        init_completion(&iommu->prq_complete);
 
        return 0;
+
+free_iopfq:
+       iopf_queue_free(iommu->iopf_queue);
+       iommu->iopf_queue = NULL;
+free_hwirq:
+       dmar_free_hwirq(irq);
+       iommu->pr_irq = 0;
+free_prq:
+       free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+       iommu->prq = NULL;
+
+       return ret;
 }
 
 int intel_svm_finish_prq(struct intel_iommu *iommu)
@@ -85,6 +159,11 @@ int intel_svm_finish_prq(struct intel_iommu *iommu)
                iommu->pr_irq = 0;
        }
 
+       if (iommu->iopf_queue) {
+               iopf_queue_free(iommu->iopf_queue);
+               iommu->iopf_queue = NULL;
+       }
+
        free_pages((unsigned long)iommu->prq, PRQ_ORDER);
        iommu->prq = NULL;
 
@@ -204,17 +283,12 @@ static const struct mmu_notifier_ops intel_mmuops = {
 };
 
 static DEFINE_MUTEX(pasid_mutex);
-static LIST_HEAD(global_svm_list);
-
-#define for_each_svm_dev(sdev, svm, d)                 \
-       list_for_each_entry((sdev), &(svm)->devs, list) \
-               if ((d) != (sdev)->dev) {} else
 
 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
                             struct intel_svm **rsvm,
                             struct intel_svm_dev **rsdev)
 {
-       struct intel_svm_dev *d, *sdev = NULL;
+       struct intel_svm_dev *sdev = NULL;
        struct intel_svm *svm;
 
        /* The caller should hold the pasid_mutex lock */
@@ -224,7 +298,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
        if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
                return -EINVAL;
 
-       svm = ioasid_find(NULL, pasid, NULL);
+       svm = pasid_private_find(pasid);
        if (IS_ERR(svm))
                return PTR_ERR(svm);
 
@@ -237,15 +311,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
         */
        if (WARN_ON(list_empty(&svm->devs)))
                return -EINVAL;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu(d, &svm->devs, list) {
-               if (d->dev == dev) {
-                       sdev = d;
-                       break;
-               }
-       }
-       rcu_read_unlock();
+       sdev = svm_lookup_device_by_dev(svm, dev);
 
 out:
        *rsvm = svm;
@@ -334,7 +400,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
                        svm->gpasid = data->gpasid;
                        svm->flags |= SVM_FLAG_GUEST_PASID;
                }
-               ioasid_set_data(data->hpasid, svm);
+               pasid_private_add(data->hpasid, svm);
                INIT_LIST_HEAD_RCU(&svm->devs);
                mmput(svm->mm);
        }
@@ -388,7 +454,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
        list_add_rcu(&sdev->list, &svm->devs);
  out:
        if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) {
-               ioasid_set_data(data->hpasid, NULL);
+               pasid_private_remove(data->hpasid);
                kfree(svm);
        }
 
@@ -431,7 +497,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
                                 * the unbind, IOMMU driver will get notified
                                 * and perform cleanup.
                                 */
-                               ioasid_set_data(pasid, NULL);
+                               pasid_private_remove(pasid);
                                kfree(svm);
                        }
                }
@@ -459,79 +525,81 @@ static void load_pasid(struct mm_struct *mm, u32 pasid)
        mutex_unlock(&mm->context.lock);
 }
 
-/* Caller must hold pasid_mutex, mm reference */
-static int
-intel_svm_bind_mm(struct device *dev, unsigned int flags,
-                 struct mm_struct *mm, struct intel_svm_dev **sd)
+static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
+                                unsigned int flags)
 {
-       struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
-       struct intel_svm *svm = NULL, *t;
-       struct device_domain_info *info;
-       struct intel_svm_dev *sdev;
-       unsigned long iflags;
-       int pasid_max;
-       int ret;
+       ioasid_t max_pasid = dev_is_pci(dev) ?
+                       pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id;
 
-       if (!iommu || dmar_disabled)
-               return -EINVAL;
+       return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
+}
 
-       if (!intel_svm_capable(iommu))
-               return -ENOTSUPP;
+static void intel_svm_free_pasid(struct mm_struct *mm)
+{
+       iommu_sva_free_pasid(mm);
+}
 
-       if (dev_is_pci(dev)) {
-               pasid_max = pci_max_pasids(to_pci_dev(dev));
-               if (pasid_max < 0)
-                       return -EINVAL;
-       } else
-               pasid_max = 1 << 20;
+static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
+                                          struct device *dev,
+                                          struct mm_struct *mm,
+                                          unsigned int flags)
+{
+       struct device_domain_info *info = get_domain_info(dev);
+       unsigned long iflags, sflags;
+       struct intel_svm_dev *sdev;
+       struct intel_svm *svm;
+       int ret = 0;
 
-       /* Bind supervisor PASID shuld have mm = NULL */
-       if (flags & SVM_FLAG_SUPERVISOR_MODE) {
-               if (!ecap_srs(iommu->ecap) || mm) {
-                       pr_err("Supervisor PASID with user provided mm.\n");
-                       return -EINVAL;
-               }
-       }
+       svm = pasid_private_find(mm->pasid);
+       if (!svm) {
+               svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+               if (!svm)
+                       return ERR_PTR(-ENOMEM);
 
-       list_for_each_entry(t, &global_svm_list, list) {
-               if (t->mm != mm)
-                       continue;
+               svm->pasid = mm->pasid;
+               svm->mm = mm;
+               svm->flags = flags;
+               INIT_LIST_HEAD_RCU(&svm->devs);
 
-               svm = t;
-               if (svm->pasid >= pasid_max) {
-                       dev_warn(dev,
-                                "Limited PASID width. Cannot use existing PASID %d\n",
-                                svm->pasid);
-                       ret = -ENOSPC;
-                       goto out;
+               if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) {
+                       svm->notifier.ops = &intel_mmuops;
+                       ret = mmu_notifier_register(&svm->notifier, mm);
+                       if (ret) {
+                               kfree(svm);
+                               return ERR_PTR(ret);
+                       }
                }
 
-               /* Find the matching device in svm list */
-               for_each_svm_dev(sdev, svm, dev) {
-                       sdev->users++;
-                       goto success;
+               ret = pasid_private_add(svm->pasid, svm);
+               if (ret) {
+                       if (svm->notifier.ops)
+                               mmu_notifier_unregister(&svm->notifier, mm);
+                       kfree(svm);
+                       return ERR_PTR(ret);
                }
+       }
 
-               break;
+       /* Find the matching device in svm list */
+       sdev = svm_lookup_device_by_dev(svm, dev);
+       if (sdev) {
+               sdev->users++;
+               goto success;
        }
 
        sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
        if (!sdev) {
                ret = -ENOMEM;
-               goto out;
+               goto free_svm;
        }
+
        sdev->dev = dev;
        sdev->iommu = iommu;
-
-       ret = intel_iommu_enable_pasid(iommu, dev);
-       if (ret) {
-               kfree(sdev);
-               goto out;
-       }
-
-       info = get_domain_info(dev);
        sdev->did = FLPT_DEFAULT_DID;
        sdev->sid = PCI_DEVID(info->bus, info->devfn);
+       sdev->users = 1;
+       sdev->pasid = svm->pasid;
+       sdev->sva.dev = dev;
+       init_rcu_head(&sdev->rcu);
        if (info->ats_enabled) {
                sdev->dev_iotlb = 1;
                sdev->qdep = info->ats_qdep;
@@ -539,95 +607,37 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
                        sdev->qdep = 0;
        }
 
-       /* Finish the setup now we know we're keeping it */
-       sdev->users = 1;
-       init_rcu_head(&sdev->rcu);
-
-       if (!svm) {
-               svm = kzalloc(sizeof(*svm), GFP_KERNEL);
-               if (!svm) {
-                       ret = -ENOMEM;
-                       kfree(sdev);
-                       goto out;
-               }
-
-               if (pasid_max > intel_pasid_max_id)
-                       pasid_max = intel_pasid_max_id;
+       /* Setup the pasid table: */
+       sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
+                       PASID_FLAG_SUPERVISOR_MODE : 0;
+       sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
+       spin_lock_irqsave(&iommu->lock, iflags);
+       ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
+                                           FLPT_DEFAULT_DID, sflags);
+       spin_unlock_irqrestore(&iommu->lock, iflags);
 
-               /* Do not use PASID 0, reserved for RID to PASID */
-               svm->pasid = ioasid_alloc(NULL, PASID_MIN,
-                                         pasid_max - 1, svm);
-               if (svm->pasid == INVALID_IOASID) {
-                       kfree(svm);
-                       kfree(sdev);
-                       ret = -ENOSPC;
-                       goto out;
-               }
-               svm->notifier.ops = &intel_mmuops;
-               svm->mm = mm;
-               svm->flags = flags;
-               INIT_LIST_HEAD_RCU(&svm->devs);
-               INIT_LIST_HEAD(&svm->list);
-               ret = -ENOMEM;
-               if (mm) {
-                       ret = mmu_notifier_register(&svm->notifier, mm);
-                       if (ret) {
-                               ioasid_put(svm->pasid);
-                               kfree(svm);
-                               kfree(sdev);
-                               goto out;
-                       }
-               }
+       if (ret)
+               goto free_sdev;
 
-               spin_lock_irqsave(&iommu->lock, iflags);
-               ret = intel_pasid_setup_first_level(iommu, dev,
-                               mm ? mm->pgd : init_mm.pgd,
-                               svm->pasid, FLPT_DEFAULT_DID,
-                               (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
-                               (cpu_feature_enabled(X86_FEATURE_LA57) ?
-                                PASID_FLAG_FL5LP : 0));
-               spin_unlock_irqrestore(&iommu->lock, iflags);
-               if (ret) {
-                       if (mm)
-                               mmu_notifier_unregister(&svm->notifier, mm);
-                       ioasid_put(svm->pasid);
-                       kfree(svm);
-                       kfree(sdev);
-                       goto out;
-               }
+       /* The newly allocated pasid is loaded to the mm. */
+       if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs))
+               load_pasid(mm, svm->pasid);
 
-               list_add_tail(&svm->list, &global_svm_list);
-               if (mm) {
-                       /* The newly allocated pasid is loaded to the mm. */
-                       load_pasid(mm, svm->pasid);
-               }
-       } else {
-               /*
-                * Binding a new device with existing PASID, need to setup
-                * the PASID entry.
-                */
-               spin_lock_irqsave(&iommu->lock, iflags);
-               ret = intel_pasid_setup_first_level(iommu, dev,
-                                               mm ? mm->pgd : init_mm.pgd,
-                                               svm->pasid, FLPT_DEFAULT_DID,
-                                               (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
-                                               (cpu_feature_enabled(X86_FEATURE_LA57) ?
-                                               PASID_FLAG_FL5LP : 0));
-               spin_unlock_irqrestore(&iommu->lock, iflags);
-               if (ret) {
-                       kfree(sdev);
-                       goto out;
-               }
-       }
        list_add_rcu(&sdev->list, &svm->devs);
 success:
-       sdev->pasid = svm->pasid;
-       sdev->sva.dev = dev;
-       if (sd)
-               *sd = sdev;
-       ret = 0;
-out:
-       return ret;
+       return &sdev->sva;
+
+free_sdev:
+       kfree(sdev);
+free_svm:
+       if (list_empty(&svm->devs)) {
+               if (svm->notifier.ops)
+                       mmu_notifier_unregister(&svm->notifier, mm);
+               pasid_private_remove(mm->pasid);
+               kfree(svm);
+       }
+
+       return ERR_PTR(ret);
 }
 
 /* Caller must hold pasid_mutex */
@@ -636,6 +646,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
        struct intel_svm_dev *sdev;
        struct intel_iommu *iommu;
        struct intel_svm *svm;
+       struct mm_struct *mm;
        int ret = -EINVAL;
 
        iommu = device_to_iommu(dev, NULL, NULL);
@@ -645,6 +656,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
        ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
        if (ret)
                goto out;
+       mm = svm->mm;
 
        if (sdev) {
                sdev->users--;
@@ -663,13 +675,13 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
                        kfree_rcu(sdev, rcu);
 
                        if (list_empty(&svm->devs)) {
-                               ioasid_put(svm->pasid);
-                               if (svm->mm) {
-                                       mmu_notifier_unregister(&svm->notifier, svm->mm);
+                               intel_svm_free_pasid(mm);
+                               if (svm->notifier.ops) {
+                                       mmu_notifier_unregister(&svm->notifier, mm);
                                        /* Clear mm's pasid. */
-                                       load_pasid(svm->mm, PASID_DISABLED);
+                                       load_pasid(mm, PASID_DISABLED);
                                }
-                               list_del(&svm->list);
+                               pasid_private_remove(svm->pasid);
                                /* We mandate that no page faults may be outstanding
                                 * for the PASID when intel_svm_unbind_mm() is called.
                                 * If that is not obeyed, subtle errors will happen.
@@ -714,22 +726,6 @@ struct page_req_dsc {
 
 #define PRQ_RING_MASK  ((0x1000 << PRQ_ORDER) - 0x20)
 
-static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
-{
-       unsigned long requested = 0;
-
-       if (req->exe_req)
-               requested |= VM_EXEC;
-
-       if (req->rd_req)
-               requested |= VM_READ;
-
-       if (req->wr_req)
-               requested |= VM_WRITE;
-
-       return (requested & ~vma->vm_flags) != 0;
-}
-
 static bool is_canonical_address(u64 addr)
 {
        int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
@@ -799,6 +795,8 @@ prq_retry:
                goto prq_retry;
        }
 
+       iopf_queue_flush_dev(dev);
+
        /*
         * Perform steps described in VT-d spec CH7.10 to drain page
         * requests and responses in hardware.
@@ -841,8 +839,8 @@ static int prq_to_iommu_prot(struct page_req_dsc *req)
        return prot;
 }
 
-static int
-intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
+static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
+                               struct page_req_dsc *desc)
 {
        struct iommu_fault_event event;
 
@@ -872,159 +870,136 @@ intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
                 */
                event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
                event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
-               memcpy(event.fault.prm.private_data, desc->priv_data,
-                      sizeof(desc->priv_data));
+               event.fault.prm.private_data[0] = desc->priv_data[0];
+               event.fault.prm.private_data[1] = desc->priv_data[1];
+       } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
+               /*
+                * If the private data fields are not used by hardware, use it
+                * to monitor the prq handle latency.
+                */
+               event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
        }
 
        return iommu_report_device_fault(dev, &event);
 }
 
+static void handle_bad_prq_event(struct intel_iommu *iommu,
+                                struct page_req_dsc *req, int result)
+{
+       struct qi_desc desc;
+
+       pr_err("%s: Invalid page request: %08llx %08llx\n",
+              iommu->name, ((unsigned long long *)req)[0],
+              ((unsigned long long *)req)[1]);
+
+       /*
+        * Per VT-d spec. v3.0 ch7.7, system software must
+        * respond with page group response if private data
+        * is present (PDP) or last page in group (LPIG) bit
+        * is set. This is an additional VT-d feature beyond
+        * PCI ATS spec.
+        */
+       if (!req->lpig && !req->priv_data_present)
+               return;
+
+       desc.qw0 = QI_PGRP_PASID(req->pasid) |
+                       QI_PGRP_DID(req->rid) |
+                       QI_PGRP_PASID_P(req->pasid_present) |
+                       QI_PGRP_PDP(req->priv_data_present) |
+                       QI_PGRP_RESP_CODE(result) |
+                       QI_PGRP_RESP_TYPE;
+       desc.qw1 = QI_PGRP_IDX(req->prg_index) |
+                       QI_PGRP_LPIG(req->lpig);
+
+       if (req->priv_data_present) {
+               desc.qw2 = req->priv_data[0];
+               desc.qw3 = req->priv_data[1];
+       } else {
+               desc.qw2 = 0;
+               desc.qw3 = 0;
+       }
+
+       qi_submit_sync(iommu, &desc, 1, 0);
+}
+
 static irqreturn_t prq_event_thread(int irq, void *d)
 {
        struct intel_svm_dev *sdev = NULL;
        struct intel_iommu *iommu = d;
        struct intel_svm *svm = NULL;
-       int head, tail, handled = 0;
-       unsigned int flags = 0;
+       struct page_req_dsc *req;
+       int head, tail, handled;
+       u64 address;
 
-       /* Clear PPR bit before reading head/tail registers, to
-        * ensure that we get a new interrupt if needed. */
+       /*
+        * Clear PPR bit before reading head/tail registers, to ensure that
+        * we get a new interrupt if needed.
+        */
        writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
 
        tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
        head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+       handled = (head != tail);
        while (head != tail) {
-               struct vm_area_struct *vma;
-               struct page_req_dsc *req;
-               struct qi_desc resp;
-               int result;
-               vm_fault_t ret;
-               u64 address;
-
-               handled = 1;
                req = &iommu->prq[head / sizeof(*req)];
-               result = QI_RESP_INVALID;
                address = (u64)req->addr << VTD_PAGE_SHIFT;
-               if (!req->pasid_present) {
-                       pr_err("%s: Page request without PASID: %08llx %08llx\n",
-                              iommu->name, ((unsigned long long *)req)[0],
-                              ((unsigned long long *)req)[1]);
-                       goto no_pasid;
+
+               if (unlikely(!req->pasid_present)) {
+                       pr_err("IOMMU: %s: Page request without PASID\n",
+                              iommu->name);
+bad_req:
+                       svm = NULL;
+                       sdev = NULL;
+                       handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
+                       goto prq_advance;
                }
-               /* We shall not receive page request for supervisor SVM */
-               if (req->pm_req && (req->rd_req | req->wr_req)) {
-                       pr_err("Unexpected page request in Privilege Mode");
-                       /* No need to find the matching sdev as for bad_req */
-                       goto no_pasid;
+
+               if (unlikely(!is_canonical_address(address))) {
+                       pr_err("IOMMU: %s: Address is not canonical\n",
+                              iommu->name);
+                       goto bad_req;
                }
-               /* DMA read with exec requeset is not supported. */
-               if (req->exe_req && req->rd_req) {
-                       pr_err("Execution request not supported\n");
-                       goto no_pasid;
+
+               if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
+                       pr_err("IOMMU: %s: Page request in Privilege Mode\n",
+                              iommu->name);
+                       goto bad_req;
                }
+
+               if (unlikely(req->exe_req && req->rd_req)) {
+                       pr_err("IOMMU: %s: Execution request not supported\n",
+                              iommu->name);
+                       goto bad_req;
+               }
+
                if (!svm || svm->pasid != req->pasid) {
-                       rcu_read_lock();
-                       svm = ioasid_find(NULL, req->pasid, NULL);
-                       /* It *can't* go away, because the driver is not permitted
+                       /*
+                        * It can't go away, because the driver is not permitted
                         * to unbind the mm while any page faults are outstanding.
-                        * So we only need RCU to protect the internal idr code. */
-                       rcu_read_unlock();
-                       if (IS_ERR_OR_NULL(svm)) {
-                               pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
-                                      iommu->name, req->pasid, ((unsigned long long *)req)[0],
-                                      ((unsigned long long *)req)[1]);
-                               goto no_pasid;
-                       }
+                        */
+                       svm = pasid_private_find(req->pasid);
+                       if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE))
+                               goto bad_req;
                }
 
                if (!sdev || sdev->sid != req->rid) {
-                       struct intel_svm_dev *t;
-
-                       sdev = NULL;
-                       rcu_read_lock();
-                       list_for_each_entry_rcu(t, &svm->devs, list) {
-                               if (t->sid == req->rid) {
-                                       sdev = t;
-                                       break;
-                               }
-                       }
-                       rcu_read_unlock();
+                       sdev = svm_lookup_device_by_sid(svm, req->rid);
+                       if (!sdev)
+                               goto bad_req;
                }
 
-               /* Since we're using init_mm.pgd directly, we should never take
-                * any faults on kernel addresses. */
-               if (!svm->mm)
-                       goto bad_req;
-
-               /* If address is not canonical, return invalid response */
-               if (!is_canonical_address(address))
-                       goto bad_req;
+               sdev->prq_seq_number++;
 
                /*
                 * If prq is to be handled outside iommu driver via receiver of
                 * the fault notifiers, we skip the page response here.
                 */
-               if (svm->flags & SVM_FLAG_GUEST_MODE) {
-                       if (sdev && !intel_svm_prq_report(sdev->dev, req))
-                               goto prq_advance;
-                       else
-                               goto bad_req;
-               }
-
-               /* If the mm is already defunct, don't handle faults. */
-               if (!mmget_not_zero(svm->mm))
-                       goto bad_req;
-
-               mmap_read_lock(svm->mm);
-               vma = find_extend_vma(svm->mm, address);
-               if (!vma || address < vma->vm_start)
-                       goto invalid;
-
-               if (access_error(vma, req))
-                       goto invalid;
+               if (intel_svm_prq_report(iommu, sdev->dev, req))
+                       handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
 
-               flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE;
-               if (req->wr_req)
-                       flags |= FAULT_FLAG_WRITE;
-
-               ret = handle_mm_fault(vma, address, flags, NULL);
-               if (ret & VM_FAULT_ERROR)
-                       goto invalid;
-
-               result = QI_RESP_SUCCESS;
-invalid:
-               mmap_read_unlock(svm->mm);
-               mmput(svm->mm);
-bad_req:
-               /* We get here in the error case where the PASID lookup failed,
-                  and these can be NULL. Do not use them below this point! */
-               sdev = NULL;
-               svm = NULL;
-no_pasid:
-               if (req->lpig || req->priv_data_present) {
-                       /*
-                        * Per VT-d spec. v3.0 ch7.7, system software must
-                        * respond with page group response if private data
-                        * is present (PDP) or last page in group (LPIG) bit
-                        * is set. This is an additional VT-d feature beyond
-                        * PCI ATS spec.
-                        */
-                       resp.qw0 = QI_PGRP_PASID(req->pasid) |
-                               QI_PGRP_DID(req->rid) |
-                               QI_PGRP_PASID_P(req->pasid_present) |
-                               QI_PGRP_PDP(req->priv_data_present) |
-                               QI_PGRP_RESP_CODE(result) |
-                               QI_PGRP_RESP_TYPE;
-                       resp.qw1 = QI_PGRP_IDX(req->prg_index) |
-                               QI_PGRP_LPIG(req->lpig);
-                       resp.qw2 = 0;
-                       resp.qw3 = 0;
-
-                       if (req->priv_data_present)
-                               memcpy(&resp.qw2, req->priv_data,
-                                      sizeof(req->priv_data));
-                       qi_submit_sync(iommu, &resp, 1, 0);
-               }
+               trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1,
+                                req->priv_data[0], req->priv_data[1],
+                                sdev->prq_seq_number);
 prq_advance:
                head = (head + sizeof(*req)) & PRQ_RING_MASK;
        }
@@ -1041,6 +1016,7 @@ prq_advance:
                head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
                tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
                if (head == tail) {
+                       iopf_queue_discard_partial(iommu->iopf_queue);
                        writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
                        pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
                                            iommu->name);
@@ -1053,31 +1029,42 @@ prq_advance:
        return IRQ_RETVAL(handled);
 }
 
-#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
-struct iommu_sva *
-intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
 {
-       struct iommu_sva *sva = ERR_PTR(-EINVAL);
-       struct intel_svm_dev *sdev = NULL;
+       struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
        unsigned int flags = 0;
+       struct iommu_sva *sva;
        int ret;
 
-       /*
-        * TODO: Consolidate with generic iommu-sva bind after it is merged.
-        * It will require shared SVM data structures, i.e. combine io_mm
-        * and intel_svm etc.
-        */
        if (drvdata)
                flags = *(unsigned int *)drvdata;
+
+       if (flags & SVM_FLAG_SUPERVISOR_MODE) {
+               if (!ecap_srs(iommu->ecap)) {
+                       dev_err(dev, "%s: Supervisor PASID not supported\n",
+                               iommu->name);
+                       return ERR_PTR(-EOPNOTSUPP);
+               }
+
+               if (mm) {
+                       dev_err(dev, "%s: Supervisor PASID with user provided mm\n",
+                               iommu->name);
+                       return ERR_PTR(-EINVAL);
+               }
+
+               mm = &init_mm;
+       }
+
        mutex_lock(&pasid_mutex);
-       ret = intel_svm_bind_mm(dev, flags, mm, &sdev);
-       if (ret)
-               sva = ERR_PTR(ret);
-       else if (sdev)
-               sva = &sdev->sva;
-       else
-               WARN(!sdev, "SVM bind succeeded with no sdev!\n");
+       ret = intel_svm_alloc_pasid(dev, mm, flags);
+       if (ret) {
+               mutex_unlock(&pasid_mutex);
+               return ERR_PTR(ret);
+       }
 
+       sva = intel_svm_bind_mm(iommu, dev, mm, flags);
+       if (IS_ERR_OR_NULL(sva))
+               intel_svm_free_pasid(mm);
        mutex_unlock(&pasid_mutex);
 
        return sva;
@@ -1085,10 +1072,9 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
 
 void intel_svm_unbind(struct iommu_sva *sva)
 {
-       struct intel_svm_dev *sdev;
+       struct intel_svm_dev *sdev = to_intel_svm_dev(sva);
 
        mutex_lock(&pasid_mutex);
-       sdev = to_intel_svm_dev(sva);
        intel_svm_unbind_mm(sdev->dev, sdev->pasid);
        mutex_unlock(&pasid_mutex);
 }
@@ -1194,9 +1180,14 @@ int intel_svm_page_response(struct device *dev,
                desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
                desc.qw2 = 0;
                desc.qw3 = 0;
-               if (private_present)
-                       memcpy(&desc.qw2, prm->private_data,
-                              sizeof(prm->private_data));
+
+               if (private_present) {
+                       desc.qw2 = prm->private_data[0];
+                       desc.qw3 = prm->private_data[1];
+               } else if (prm->private_data[0]) {
+                       dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
+                               ktime_to_ns(ktime_get()) - prm->private_data[0]);
+               }
 
                qi_submit_sync(iommu, &desc, 1, 0);
        }
index 808ab70d5df50f7839dafc489cfcc4519d05ec06..5419c4b9f27ada00f31f86b0a6eb3adc19f56753 100644 (file)
@@ -3059,9 +3059,6 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
        int ret, dev_def_dom;
        struct device *dev;
 
-       if (!group)
-               return -EINVAL;
-
        mutex_lock(&group->mutex);
 
        if (group->default_domain != group->domain) {
index b7ecd5b080398c21f9c4a8ea5eb8789a07c9cf80..b6cf5f16123bdb678548ed2f1fb077efd467a300 100644 (file)
@@ -412,12 +412,11 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn)
        return NULL;
 }
 
-static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
+static void remove_iova(struct iova_domain *iovad, struct iova *iova)
 {
        assert_spin_locked(&iovad->iova_rbtree_lock);
        __cached_rbnode_delete_update(iovad, iova);
        rb_erase(&iova->node, &iovad->rbroot);
-       free_iova_mem(iova);
 }
 
 /**
@@ -452,8 +451,9 @@ __free_iova(struct iova_domain *iovad, struct iova *iova)
        unsigned long flags;
 
        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
-       private_free_iova(iovad, iova);
+       remove_iova(iovad, iova);
        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+       free_iova_mem(iova);
 }
 EXPORT_SYMBOL_GPL(__free_iova);
 
@@ -472,10 +472,13 @@ free_iova(struct iova_domain *iovad, unsigned long pfn)
 
        spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
        iova = private_find_iova(iovad, pfn);
-       if (iova)
-               private_free_iova(iovad, iova);
+       if (!iova) {
+               spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+               return;
+       }
+       remove_iova(iovad, iova);
        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-
+       free_iova_mem(iova);
 }
 EXPORT_SYMBOL_GPL(free_iova);
 
@@ -825,7 +828,8 @@ iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
                if (WARN_ON(!iova))
                        continue;
 
-               private_free_iova(iovad, iova);
+               remove_iova(iovad, iova);
+               free_iova_mem(iova);
        }
 
        spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
index aaa6a4d59057a7ac9f0999719d37a3777610236e..51ea6f00db2f6ff65b45f3d3b1931d7dc9b4a1b3 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/iommu.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/sizes.h>
index 7880f307cb2da8bfee65173ff7d62cc1b8c26fe5..3a38352b603f39be407edd18ccacfbee4ba95e92 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/iommu.h>
 #include <linux/clk.h>
 #include <linux/err.h>
-#include <linux/of_iommu.h>
 
 #include <asm/cacheflush.h>
 #include <linux/sizes.h>
index e06b8a0e2b56bddd73a5e70d0f53f2c17e9fd483..6f7c69688ce2ae4922a2586d6595706c9f234352 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
-#include <linux/of_iommu.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
index 5915d7b3821110c6896668461450b5ca459c15cc..778e66f5f1aa59fb074094e005fe33401d00cd4a 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
-#include <linux/of_iommu.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
index a9d2df001149920e7f5a2b45594bd7dc2a70fc02..5696314ae69e7d23b5ef7f71382bf95c5bda0bd7 100644 (file)
 
 #define NO_IOMMU       1
 
-/**
- * of_get_dma_window - Parse *dma-window property and returns 0 if found.
- *
- * @dn: device node
- * @prefix: prefix for property name if any
- * @index: index to start to parse
- * @busno: Returns busno if supported. Otherwise pass NULL
- * @addr: Returns address that DMA starts
- * @size: Returns the range that DMA can handle
- *
- * This supports different formats flexibly. "prefix" can be
- * configured if any. "busno" and "index" are optionally
- * specified. Set 0(or NULL) if not used.
- */
-int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
-                     unsigned long *busno, dma_addr_t *addr, size_t *size)
-{
-       const __be32 *dma_window, *end;
-       int bytes, cur_index = 0;
-       char propname[NAME_MAX], addrname[NAME_MAX], sizename[NAME_MAX];
-
-       if (!dn || !addr || !size)
-               return -EINVAL;
-
-       if (!prefix)
-               prefix = "";
-
-       snprintf(propname, sizeof(propname), "%sdma-window", prefix);
-       snprintf(addrname, sizeof(addrname), "%s#dma-address-cells", prefix);
-       snprintf(sizename, sizeof(sizename), "%s#dma-size-cells", prefix);
-
-       dma_window = of_get_property(dn, propname, &bytes);
-       if (!dma_window)
-               return -ENODEV;
-       end = dma_window + bytes / sizeof(*dma_window);
-
-       while (dma_window < end) {
-               u32 cells;
-               const void *prop;
-
-               /* busno is one cell if supported */
-               if (busno)
-                       *busno = be32_to_cpup(dma_window++);
-
-               prop = of_get_property(dn, addrname, NULL);
-               if (!prop)
-                       prop = of_get_property(dn, "#address-cells", NULL);
-
-               cells = prop ? be32_to_cpup(prop) : of_n_addr_cells(dn);
-               if (!cells)
-                       return -EINVAL;
-               *addr = of_read_number(dma_window, cells);
-               dma_window += cells;
-
-               prop = of_get_property(dn, sizename, NULL);
-               cells = prop ? be32_to_cpup(prop) : of_n_size_cells(dn);
-               if (!cells)
-                       return -EINVAL;
-               *size = of_read_number(dma_window, cells);
-               dma_window += cells;
-
-               if (cur_index++ == index)
-                       break;
-       }
-       return 0;
-}
-EXPORT_SYMBOL_GPL(of_get_dma_window);
-
 static int of_iommu_xlate(struct device *dev,
                          struct of_phandle_args *iommu_spec)
 {
index 26e517eb0dd3a8073e2c9bd1f4c74623f09360cf..91749654fd4909d0bfc1e7536908439eeefa7d60 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/io.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
-#include <linux/of_iommu.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/regmap.h>
index 7a2932772fdf5e09436efca861f54aee236d132f..94b9d8e5b9a40c4eb5796366a3ed539f3b3f6a83 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/of.h>
-#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -96,6 +95,15 @@ static const char * const rk_iommu_clocks[] = {
        "aclk", "iface",
 };
 
+struct rk_iommu_ops {
+       phys_addr_t (*pt_address)(u32 dte);
+       u32 (*mk_dtentries)(dma_addr_t pt_dma);
+       u32 (*mk_ptentries)(phys_addr_t page, int prot);
+       phys_addr_t (*dte_addr_phys)(u32 addr);
+       u32 (*dma_addr_dte)(dma_addr_t dt_dma);
+       u64 dma_bit_mask;
+};
+
 struct rk_iommu {
        struct device *dev;
        void __iomem **bases;
@@ -116,6 +124,7 @@ struct rk_iommudata {
 };
 
 static struct device *dma_dev;
+static const struct rk_iommu_ops *rk_ops;
 
 static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma,
                                  unsigned int count)
@@ -179,6 +188,33 @@ static inline phys_addr_t rk_dte_pt_address(u32 dte)
        return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK;
 }
 
+/*
+ * In v2:
+ * 31:12 - PT address bit 31:0
+ * 11: 8 - PT address bit 35:32
+ *  7: 4 - PT address bit 39:36
+ *  3: 1 - Reserved
+ *     0 - 1 if PT @ PT address is valid
+ */
+#define RK_DTE_PT_ADDRESS_MASK_V2 GENMASK_ULL(31, 4)
+#define DTE_HI_MASK1   GENMASK(11, 8)
+#define DTE_HI_MASK2   GENMASK(7, 4)
+#define DTE_HI_SHIFT1  24 /* shift bit 8 to bit 32 */
+#define DTE_HI_SHIFT2  32 /* shift bit 4 to bit 36 */
+#define PAGE_DESC_HI_MASK1     GENMASK_ULL(39, 36)
+#define PAGE_DESC_HI_MASK2     GENMASK_ULL(35, 32)
+
+static inline phys_addr_t rk_dte_pt_address_v2(u32 dte)
+{
+       u64 dte_v2 = dte;
+
+       dte_v2 = ((dte_v2 & DTE_HI_MASK2) << DTE_HI_SHIFT2) |
+                ((dte_v2 & DTE_HI_MASK1) << DTE_HI_SHIFT1) |
+                (dte_v2 & RK_DTE_PT_ADDRESS_MASK);
+
+       return (phys_addr_t)dte_v2;
+}
+
 static inline bool rk_dte_is_pt_valid(u32 dte)
 {
        return dte & RK_DTE_PT_VALID;
@@ -189,6 +225,15 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma)
        return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
 }
 
+static inline u32 rk_mk_dte_v2(dma_addr_t pt_dma)
+{
+       pt_dma = (pt_dma & RK_DTE_PT_ADDRESS_MASK) |
+                ((pt_dma & PAGE_DESC_HI_MASK1) >> DTE_HI_SHIFT1) |
+                (pt_dma & PAGE_DESC_HI_MASK2) >> DTE_HI_SHIFT2;
+
+       return (pt_dma & RK_DTE_PT_ADDRESS_MASK_V2) | RK_DTE_PT_VALID;
+}
+
 /*
  * Each PTE has a Page address, some flags and a valid bit:
  * +---------------------+---+-------+-+
@@ -215,11 +260,6 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma)
 #define RK_PTE_PAGE_READABLE      BIT(1)
 #define RK_PTE_PAGE_VALID         BIT(0)
 
-static inline phys_addr_t rk_pte_page_address(u32 pte)
-{
-       return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK;
-}
-
 static inline bool rk_pte_is_page_valid(u32 pte)
 {
        return pte & RK_PTE_PAGE_VALID;
@@ -235,6 +275,29 @@ static u32 rk_mk_pte(phys_addr_t page, int prot)
        return page | flags | RK_PTE_PAGE_VALID;
 }
 
+/*
+ * In v2:
+ * 31:12 - Page address bit 31:0
+ *  11:9 - Page address bit 34:32
+ *   8:4 - Page address bit 39:35
+ *     3 - Security
+ *     2 - Readable
+ *     1 - Writable
+ *     0 - 1 if Page @ Page address is valid
+ */
+#define RK_PTE_PAGE_READABLE_V2      BIT(2)
+#define RK_PTE_PAGE_WRITABLE_V2      BIT(1)
+
+static u32 rk_mk_pte_v2(phys_addr_t page, int prot)
+{
+       u32 flags = 0;
+
+       flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE_V2 : 0;
+       flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE_V2 : 0;
+
+       return rk_mk_dte_v2(page) | flags;
+}
+
 static u32 rk_mk_pte_invalid(u32 pte)
 {
        return pte & ~RK_PTE_PAGE_VALID;
@@ -448,10 +511,10 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
         * and verifying that upper 5 nybbles are read back.
         */
        for (i = 0; i < iommu->num_mmu; i++) {
-               rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY);
+               dte_addr = rk_ops->pt_address(DTE_ADDR_DUMMY);
+               rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr);
 
-               dte_addr = rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR);
-               if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) {
+               if (dte_addr != rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR)) {
                        dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n");
                        return -EFAULT;
                }
@@ -470,6 +533,31 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
        return 0;
 }
 
+static inline phys_addr_t rk_dte_addr_phys(u32 addr)
+{
+       return (phys_addr_t)addr;
+}
+
+static inline u32 rk_dma_addr_dte(dma_addr_t dt_dma)
+{
+       return dt_dma;
+}
+
+#define DT_HI_MASK GENMASK_ULL(39, 32)
+#define DT_SHIFT   28
+
+static inline phys_addr_t rk_dte_addr_phys_v2(u32 addr)
+{
+       return (phys_addr_t)(addr & RK_DTE_PT_ADDRESS_MASK) |
+              ((addr & DT_HI_MASK) << DT_SHIFT);
+}
+
+static inline u32 rk_dma_addr_dte_v2(dma_addr_t dt_dma)
+{
+       return (dt_dma & RK_DTE_PT_ADDRESS_MASK) |
+              ((dt_dma & DT_HI_MASK) >> DT_SHIFT);
+}
+
 static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
 {
        void __iomem *base = iommu->bases[index];
@@ -489,7 +577,7 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
        page_offset = rk_iova_page_offset(iova);
 
        mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR);
-       mmu_dte_addr_phys = (phys_addr_t)mmu_dte_addr;
+       mmu_dte_addr_phys = rk_ops->dte_addr_phys(mmu_dte_addr);
 
        dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index);
        dte_addr = phys_to_virt(dte_addr_phys);
@@ -498,14 +586,14 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
        if (!rk_dte_is_pt_valid(dte))
                goto print_it;
 
-       pte_addr_phys = rk_dte_pt_address(dte) + (pte_index * 4);
+       pte_addr_phys = rk_ops->pt_address(dte) + (pte_index * 4);
        pte_addr = phys_to_virt(pte_addr_phys);
        pte = *pte_addr;
 
        if (!rk_pte_is_page_valid(pte))
                goto print_it;
 
-       page_addr_phys = rk_pte_page_address(pte) + page_offset;
+       page_addr_phys = rk_ops->pt_address(pte) + page_offset;
        page_flags = pte & RK_PTE_PAGE_FLAGS_MASK;
 
 print_it:
@@ -601,13 +689,13 @@ static phys_addr_t rk_iommu_iova_to_phys(struct iommu_domain *domain,
        if (!rk_dte_is_pt_valid(dte))
                goto out;
 
-       pt_phys = rk_dte_pt_address(dte);
+       pt_phys = rk_ops->pt_address(dte);
        page_table = (u32 *)phys_to_virt(pt_phys);
        pte = page_table[rk_iova_pte_index(iova)];
        if (!rk_pte_is_page_valid(pte))
                goto out;
 
-       phys = rk_pte_page_address(pte) + rk_iova_page_offset(iova);
+       phys = rk_ops->pt_address(pte) + rk_iova_page_offset(iova);
 out:
        spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
 
@@ -679,14 +767,13 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
                return ERR_PTR(-ENOMEM);
        }
 
-       dte = rk_mk_dte(pt_dma);
+       dte = rk_ops->mk_dtentries(pt_dma);
        *dte_addr = dte;
 
-       rk_table_flush(rk_domain, pt_dma, NUM_PT_ENTRIES);
        rk_table_flush(rk_domain,
                       rk_domain->dt_dma + dte_index * sizeof(u32), 1);
 done:
-       pt_phys = rk_dte_pt_address(dte);
+       pt_phys = rk_ops->pt_address(dte);
        return (u32 *)phys_to_virt(pt_phys);
 }
 
@@ -728,7 +815,7 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
                if (rk_pte_is_page_valid(pte))
                        goto unwind;
 
-               pte_addr[pte_count] = rk_mk_pte(paddr, prot);
+               pte_addr[pte_count] = rk_ops->mk_ptentries(paddr, prot);
 
                paddr += SPAGE_SIZE;
        }
@@ -750,7 +837,7 @@ unwind:
                            pte_count * SPAGE_SIZE);
 
        iova += pte_count * SPAGE_SIZE;
-       page_phys = rk_pte_page_address(pte_addr[pte_count]);
+       page_phys = rk_ops->pt_address(pte_addr[pte_count]);
        pr_err("iova: %pad already mapped to %pa cannot remap to phys: %pa prot: %#x\n",
               &iova, &page_phys, &paddr, prot);
 
@@ -785,7 +872,8 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
        dte_index = rk_domain->dt[rk_iova_dte_index(iova)];
        pte_index = rk_iova_pte_index(iova);
        pte_addr = &page_table[pte_index];
-       pte_dma = rk_dte_pt_address(dte_index) + pte_index * sizeof(u32);
+
+       pte_dma = rk_ops->pt_address(dte_index) + pte_index * sizeof(u32);
        ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova,
                                paddr, size, prot);
 
@@ -821,7 +909,7 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
                return 0;
        }
 
-       pt_phys = rk_dte_pt_address(dte);
+       pt_phys = rk_ops->pt_address(dte);
        pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova);
        pte_dma = pt_phys + rk_iova_pte_index(iova) * sizeof(u32);
        unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, size);
@@ -879,7 +967,7 @@ static int rk_iommu_enable(struct rk_iommu *iommu)
 
        for (i = 0; i < iommu->num_mmu; i++) {
                rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
-                              rk_domain->dt_dma);
+                              rk_ops->dma_addr_dte(rk_domain->dt_dma));
                rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
                rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
        }
@@ -1004,8 +1092,6 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
                goto err_free_dt;
        }
 
-       rk_table_flush(rk_domain, rk_domain->dt_dma, NUM_DT_ENTRIES);
-
        spin_lock_init(&rk_domain->iommus_lock);
        spin_lock_init(&rk_domain->dt_lock);
        INIT_LIST_HEAD(&rk_domain->iommus);
@@ -1037,7 +1123,7 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
        for (i = 0; i < NUM_DT_ENTRIES; i++) {
                u32 dte = rk_domain->dt[i];
                if (rk_dte_is_pt_valid(dte)) {
-                       phys_addr_t pt_phys = rk_dte_pt_address(dte);
+                       phys_addr_t pt_phys = rk_ops->pt_address(dte);
                        u32 *page_table = phys_to_virt(pt_phys);
                        dma_unmap_single(dma_dev, pt_phys,
                                         SPAGE_SIZE, DMA_TO_DEVICE);
@@ -1127,6 +1213,7 @@ static int rk_iommu_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct rk_iommu *iommu;
        struct resource *res;
+       const struct rk_iommu_ops *ops;
        int num_res = pdev->num_resources;
        int err, i;
 
@@ -1138,6 +1225,17 @@ static int rk_iommu_probe(struct platform_device *pdev)
        iommu->dev = dev;
        iommu->num_mmu = 0;
 
+       ops = of_device_get_match_data(dev);
+       if (!rk_ops)
+               rk_ops = ops;
+
+       /*
+        * That should not happen unless different versions of the
+        * hardware block are embedded the same SoC
+        */
+       if (WARN_ON(rk_ops != ops))
+               return -EINVAL;
+
        iommu->bases = devm_kcalloc(dev, num_res, sizeof(*iommu->bases),
                                    GFP_KERNEL);
        if (!iommu->bases)
@@ -1226,6 +1324,8 @@ static int rk_iommu_probe(struct platform_device *pdev)
                }
        }
 
+       dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask);
+
        return 0;
 err_remove_sysfs:
        iommu_device_sysfs_remove(&iommu->iommu);
@@ -1277,8 +1377,31 @@ static const struct dev_pm_ops rk_iommu_pm_ops = {
                                pm_runtime_force_resume)
 };
 
+static struct rk_iommu_ops iommu_data_ops_v1 = {
+       .pt_address = &rk_dte_pt_address,
+       .mk_dtentries = &rk_mk_dte,
+       .mk_ptentries = &rk_mk_pte,
+       .dte_addr_phys = &rk_dte_addr_phys,
+       .dma_addr_dte = &rk_dma_addr_dte,
+       .dma_bit_mask = DMA_BIT_MASK(32),
+};
+
+static struct rk_iommu_ops iommu_data_ops_v2 = {
+       .pt_address = &rk_dte_pt_address_v2,
+       .mk_dtentries = &rk_mk_dte_v2,
+       .mk_ptentries = &rk_mk_pte_v2,
+       .dte_addr_phys = &rk_dte_addr_phys_v2,
+       .dma_addr_dte = &rk_dma_addr_dte_v2,
+       .dma_bit_mask = DMA_BIT_MASK(40),
+};
+
 static const struct of_device_id rk_iommu_dt_ids[] = {
-       { .compatible = "rockchip,iommu" },
+       {       .compatible = "rockchip,iommu",
+               .data = &iommu_data_ops_v1,
+       },
+       {       .compatible = "rockchip,rk3568-iommu",
+               .data = &iommu_data_ops_v2,
+       },
        { /* sentinel */ }
 };
 
index c6e5ee4d9cef83e81b0896d0811c673d3be09b89..6abdcab7273be6de75cf80a732db0c75c8417d53 100644 (file)
 #include <linux/amba/bus.h>
 #include <linux/delay.h>
 #include <linux/dma-iommu.h>
+#include <linux/dma-map-ops.h>
 #include <linux/freezer.h>
 #include <linux/interval_tree.h>
 #include <linux/iommu.h>
 #include <linux/module.h>
-#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
@@ -904,6 +904,15 @@ err_free_dev:
        return ERR_PTR(ret);
 }
 
+static void viommu_probe_finalize(struct device *dev)
+{
+#ifndef CONFIG_ARCH_HAS_SETUP_DMA_OPS
+       /* First clear the DMA ops in case we're switching from a DMA domain */
+       set_dma_ops(dev, NULL);
+       iommu_setup_dma_ops(dev, 0, U64_MAX);
+#endif
+}
+
 static void viommu_release_device(struct device *dev)
 {
        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
@@ -940,6 +949,7 @@ static struct iommu_ops viommu_ops = {
        .iova_to_phys           = viommu_iova_to_phys,
        .iotlb_sync             = viommu_iotlb_sync,
        .probe_device           = viommu_probe_device,
+       .probe_finalize         = viommu_probe_finalize,
        .release_device         = viommu_release_device,
        .device_group           = viommu_device_group,
        .get_resv_regions       = viommu_get_resv_regions,
index 25d448f5af91c9d60fc4fba76bd69bfb1a691a80..74afbb7a4f5ecb5c7ee4424f84e754ad215b49ba 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/slab.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
-#include <linux/of_iommu.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
index c6bd4f9a80ba48ffa976b069b4c46fcb74b86b8e..1ae993fee4a5d479a2d659c29bbe1dad85a1e2a1 100644 (file)
@@ -592,6 +592,9 @@ struct acpi_pci_root {
 
 bool acpi_dma_supported(const struct acpi_device *adev);
 enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
+int acpi_iommu_fwspec_init(struct device *dev, u32 id,
+                          struct fwnode_handle *fwnode,
+                          const struct iommu_ops *ops);
 int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset,
                       u64 *size);
 int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr,
index b338613fb536565fd2fe309f9a30355f060ce460..6bb36fd6ba31a6eb035ec263b71c330304eea4ee 100644 (file)
@@ -260,9 +260,12 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);
 
 #ifdef CONFIG_ARM64
 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa);
+void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size);
 #else
 static inline void
 acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { }
+static inline void
+acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { }
 #endif
 
 int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
index 1a12baa58e409b05ea715704cb8e27ff6b95c195..f1f0842a2cb2beb40a6cd1dd323dcb92303171a5 100644 (file)
@@ -34,9 +34,8 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 id,
 void acpi_configure_pmsi_domain(struct device *dev);
 int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
 /* IOMMU interface */
-void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
-const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
-                                               const u32 *id_in);
+int iort_dma_get_ranges(struct device *dev, u64 *size);
+int iort_iommu_configure_id(struct device *dev, const u32 *id_in);
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
 phys_addr_t acpi_iort_dma_get_max_cpu_address(void);
 #else
@@ -48,11 +47,10 @@ static inline struct irq_domain *iort_get_device_domain(
 { return NULL; }
 static inline void acpi_configure_pmsi_domain(struct device *dev) { }
 /* IOMMU interface */
-static inline void iort_dma_setup(struct device *dev, u64 *dma_addr,
-                                 u64 *size) { }
-static inline const struct iommu_ops *iort_iommu_configure_id(
-                                     struct device *dev, const u32 *id_in)
-{ return NULL; }
+static inline int iort_dma_get_ranges(struct device *dev, u64 *size)
+{ return -ENODEV; }
+static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in)
+{ return -ENODEV; }
 static inline
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
 { return 0; }
diff --git a/include/linux/acpi_viot.h b/include/linux/acpi_viot.h
new file mode 100644 (file)
index 0000000..1eb8ee5
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ACPI_VIOT_H__
+#define __ACPI_VIOT_H__
+
+#include <linux/acpi.h>
+
+#ifdef CONFIG_ACPI_VIOT
+void __init acpi_viot_init(void);
+int viot_iommu_configure(struct device *dev);
+#else
+static inline void acpi_viot_init(void) {}
+static inline int viot_iommu_configure(struct device *dev)
+{
+       return -ENODEV;
+}
+#endif
+
+#endif /* __ACPI_VIOT_H__ */
index 6e75a2d689b413bd66fe806695c627d1a071e6ce..758ca4694257d8e2b8ba0febc2fb5f0f115d48f9 100644 (file)
@@ -19,7 +19,7 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
 void iommu_put_dma_cookie(struct iommu_domain *domain);
 
 /* Setup call for arch DMA mapping code */
-void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size);
+void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
 
 /* The DMA API isn't _quite_ the whole story, though... */
 /*
@@ -50,7 +50,7 @@ struct msi_msg;
 struct device;
 
 static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base,
-               u64 size)
+                                      u64 dma_limit)
 {
 }
 
index 03faf20a6817e86e2124435420f765dd1500cc93..d0fa0b31994d0d6e60662b15046d3d7c1c6658d9 100644 (file)
@@ -537,7 +537,7 @@ struct context_entry {
 struct dmar_domain {
        int     nid;                    /* node id */
 
-       unsigned        iommu_refcnt[DMAR_UNITS_SUPPORTED];
+       unsigned int iommu_refcnt[DMAR_UNITS_SUPPORTED];
                                        /* Refcount of devices per iommu */
 
 
@@ -546,7 +546,10 @@ struct dmar_domain {
                                         * domain ids are 16 bit wide according
                                         * to VT-d spec, section 9.3 */
 
-       bool has_iotlb_device;
+       u8 has_iotlb_device: 1;
+       u8 iommu_coherency: 1;          /* indicate coherency of iommu access */
+       u8 iommu_snooping: 1;           /* indicate snooping control feature */
+
        struct list_head devices;       /* all devices' list */
        struct list_head subdevices;    /* all subdevices' list */
        struct iova_domain iovad;       /* iova's that belong to this domain */
@@ -558,10 +561,6 @@ struct dmar_domain {
        int             agaw;
 
        int             flags;          /* flags to find out type of domain */
-
-       int             iommu_coherency;/* indicate coherency of iommu access */
-       int             iommu_snooping; /* indicate snooping control feature*/
-       int             iommu_count;    /* reference count of iommu */
        int             iommu_superpage;/* Level of superpages supported:
                                           0 == 4KiB (no superpages), 1 == 2MiB,
                                           2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
@@ -606,6 +605,8 @@ struct intel_iommu {
        struct completion prq_complete;
        struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */
 #endif
+       struct iopf_queue *iopf_queue;
+       unsigned char iopfq_name[16];
        struct q_inval  *qi;            /* Queued invalidation info */
        u32 *iommu_state; /* Store iommu states between suspend and resume.*/
 
@@ -619,6 +620,7 @@ struct intel_iommu {
        u32             flags;      /* Software defined flags */
 
        struct dmar_drhd_unit *drhd;
+       void *perf_statistic;
 };
 
 /* Per subdevice private data */
@@ -776,6 +778,7 @@ struct intel_svm_dev {
        struct device *dev;
        struct intel_iommu *iommu;
        struct iommu_sva sva;
+       unsigned long prq_seq_number;
        u32 pasid;
        int users;
        u16 did;
@@ -791,7 +794,6 @@ struct intel_svm {
        u32 pasid;
        int gpasid; /* In case that guest PASID is different from host PASID */
        struct list_head devs;
-       struct list_head list;
 };
 #else
 static inline void intel_svm_check(struct intel_iommu *iommu) {}
@@ -827,4 +829,32 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
 #define intel_iommu_enabled (0)
 #endif
 
+static inline const char *decode_prq_descriptor(char *str, size_t size,
+               u64 dw0, u64 dw1, u64 dw2, u64 dw3)
+{
+       char *buf = str;
+       int bytes;
+
+       bytes = snprintf(buf, size,
+                        "rid=0x%llx addr=0x%llx %c%c%c%c%c pasid=0x%llx index=0x%llx",
+                        FIELD_GET(GENMASK_ULL(31, 16), dw0),
+                        FIELD_GET(GENMASK_ULL(63, 12), dw1),
+                        dw1 & BIT_ULL(0) ? 'r' : '-',
+                        dw1 & BIT_ULL(1) ? 'w' : '-',
+                        dw0 & BIT_ULL(52) ? 'x' : '-',
+                        dw0 & BIT_ULL(53) ? 'p' : '-',
+                        dw1 & BIT_ULL(2) ? 'l' : '-',
+                        FIELD_GET(GENMASK_ULL(51, 32), dw0),
+                        FIELD_GET(GENMASK_ULL(11, 3), dw1));
+
+       /* Private Data */
+       if (dw0 & BIT_ULL(9)) {
+               size -= bytes;
+               buf += bytes;
+               snprintf(buf, size, " private=0x%llx/0x%llx\n", dw2, dw3);
+       }
+
+       return str;
+}
+
 #endif
index 16f4b3e87f2068627f0c57fd2d902662d10e0821..55c1eb300a86bb5af06b709be5b522db8ee1d9f1 100644 (file)
@@ -2,29 +2,18 @@
 #ifndef __OF_IOMMU_H
 #define __OF_IOMMU_H
 
-#include <linux/device.h>
-#include <linux/iommu.h>
-#include <linux/of.h>
+struct device;
+struct device_node;
+struct iommu_ops;
 
 #ifdef CONFIG_OF_IOMMU
 
-extern int of_get_dma_window(struct device_node *dn, const char *prefix,
-                            int index, unsigned long *busno, dma_addr_t *addr,
-                            size_t *size);
-
 extern const struct iommu_ops *of_iommu_configure(struct device *dev,
                                        struct device_node *master_np,
                                        const u32 *id);
 
 #else
 
-static inline int of_get_dma_window(struct device_node *dn, const char *prefix,
-                           int index, unsigned long *busno, dma_addr_t *addr,
-                           size_t *size)
-{
-       return -EINVAL;
-}
-
 static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
                                         struct device_node *master_np,
                                         const u32 *id)
index d233f2916584fe61c8556ccac106d4dc78dc2c3a..e5c1ca6d16eea36e34b6ca78aaf4709267e47bb7 100644 (file)
@@ -15,6 +15,8 @@
 #include <linux/tracepoint.h>
 #include <linux/intel-iommu.h>
 
+#define MSG_MAX                256
+
 TRACE_EVENT(qi_submit,
        TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3),
 
@@ -51,6 +53,41 @@ TRACE_EVENT(qi_submit,
                __entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3
        )
 );
+
+TRACE_EVENT(prq_report,
+       TP_PROTO(struct intel_iommu *iommu, struct device *dev,
+                u64 dw0, u64 dw1, u64 dw2, u64 dw3,
+                unsigned long seq),
+
+       TP_ARGS(iommu, dev, dw0, dw1, dw2, dw3, seq),
+
+       TP_STRUCT__entry(
+               __field(u64, dw0)
+               __field(u64, dw1)
+               __field(u64, dw2)
+               __field(u64, dw3)
+               __field(unsigned long, seq)
+               __string(iommu, iommu->name)
+               __string(dev, dev_name(dev))
+               __dynamic_array(char, buff, MSG_MAX)
+       ),
+
+       TP_fast_assign(
+               __entry->dw0 = dw0;
+               __entry->dw1 = dw1;
+               __entry->dw2 = dw2;
+               __entry->dw3 = dw3;
+               __entry->seq = seq;
+               __assign_str(iommu, iommu->name);
+               __assign_str(dev, dev_name(dev));
+       ),
+
+       TP_printk("%s/%s seq# %ld: %s",
+               __get_str(iommu), __get_str(dev), __entry->seq,
+               decode_prq_descriptor(__get_str(buff), MSG_MAX, __entry->dw0,
+                                     __entry->dw1, __entry->dw2, __entry->dw3)
+       )
+);
 #endif /* _TRACE_INTEL_IOMMU_H */
 
 /* This part must be outside protection */