Merge tag 'binfmt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb...
[linux-2.6-block.git] / drivers / misc / cxl / pci.c
index a08fcc888a71df37d4fcf6fd37d8716d71786d71..d152e2de8c9375e2760b03cd93857d542f9a36d4 100644 (file)
@@ -55,6 +55,8 @@
        pci_read_config_byte(dev, vsec + 0xa, dest)
 #define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \
        pci_write_config_byte(dev, vsec + 0xa, val)
+#define CXL_WRITE_VSEC_MODE_CONTROL_BUS(bus, devfn, vsec, val) \
+       pci_bus_write_config_byte(bus, devfn, vsec + 0xa, val)
 #define CXL_VSEC_PROTOCOL_MASK   0xe0
 #define CXL_VSEC_PROTOCOL_1024TB 0x80
 #define CXL_VSEC_PROTOCOL_512TB  0x40
@@ -352,13 +354,10 @@ static u64 get_capp_unit_id(struct device_node *np)
        return 0;
 }
 
-static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev *dev)
+static int calc_capp_routing(struct pci_dev *dev, u64 *chipid, u64 *capp_unit_id)
 {
        struct device_node *np;
        const __be32 *prop;
-       u64 psl_dsnctl;
-       u64 chipid;
-       u64 capp_unit_id;
 
        if (!(np = pnv_pci_get_phb_node(dev)))
                return -ENODEV;
@@ -367,14 +366,28 @@ static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev
                np = of_get_next_parent(np);
        if (!np)
                return -ENODEV;
-       chipid = be32_to_cpup(prop);
-       capp_unit_id = get_capp_unit_id(np);
+       *chipid = be32_to_cpup(prop);
+       *capp_unit_id = get_capp_unit_id(np);
        of_node_put(np);
-       if (!capp_unit_id) {
+       if (!*capp_unit_id) {
                pr_err("cxl: invalid capp unit id\n");
                return -ENODEV;
        }
 
+       return 0;
+}
+
+static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_dev *dev)
+{
+       u64 psl_dsnctl;
+       u64 chipid;
+       u64 capp_unit_id;
+       int rc;
+
+       rc = calc_capp_routing(dev, &chipid, &capp_unit_id);
+       if (rc)
+               return rc;
+
        psl_dsnctl = 0x0000900000000000ULL; /* pteupd ttype, scdone */
        psl_dsnctl |= (0x2ULL << (63-38)); /* MMIO hang pulse: 256 us */
        /* Tell PSL where to route data to */
@@ -393,8 +406,61 @@ static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev
        return 0;
 }
 
+static int init_implementation_adapter_xsl_regs(struct cxl *adapter, struct pci_dev *dev)
+{
+       u64 xsl_dsnctl;
+       u64 chipid;
+       u64 capp_unit_id;
+       int rc;
+
+       rc = calc_capp_routing(dev, &chipid, &capp_unit_id);
+       if (rc)
+               return rc;
+
+       /* Tell XSL where to route data to */
+       xsl_dsnctl = 0x0000600000000000ULL | (chipid << (63-5));
+       xsl_dsnctl |= (capp_unit_id << (63-13));
+       cxl_p1_write(adapter, CXL_XSL_DSNCTL, xsl_dsnctl);
+
+       return 0;
+}
+
+/* PSL & XSL */
+#define TBSYNC_CAL(n) (((u64)n & 0x7) << (63-3))
 #define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6))
-#define _2048_250MHZ_CYCLES 1
+/* For the PSL this is a multiple for 0 < n <= 7: */
+#define PSL_2048_250MHZ_CYCLES 1
+
+static void write_timebase_ctrl_psl(struct cxl *adapter)
+{
+       cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT,
+                    TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES));
+}
+
+/* XSL */
+#define TBSYNC_ENA (1ULL << 63)
+/* For the XSL this is 2**n * 2000 clocks for 0 < n <= 6: */
+#define XSL_2000_CLOCKS 1
+#define XSL_4000_CLOCKS 2
+#define XSL_8000_CLOCKS 3
+
+static void write_timebase_ctrl_xsl(struct cxl *adapter)
+{
+       cxl_p1_write(adapter, CXL_XSL_TB_CTLSTAT,
+                    TBSYNC_ENA |
+                    TBSYNC_CAL(3) |
+                    TBSYNC_CNT(XSL_4000_CLOCKS));
+}
+
+static u64 timebase_read_psl(struct cxl *adapter)
+{
+       return cxl_p1_read(adapter, CXL_PSL_Timebase);
+}
+
+static u64 timebase_read_xsl(struct cxl *adapter)
+{
+       return cxl_p1_read(adapter, CXL_XSL_Timebase);
+}
 
 static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
 {
@@ -421,8 +487,7 @@ static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
         * Setup PSL Timebase Control and Status register
         * with the recommended Timebase Sync Count value
         */
-       cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT,
-                    TBSYNC_CNT(2 * _2048_250MHZ_CYCLES));
+       adapter->native->sl_ops->write_timebase_ctrl(adapter);
 
        /* Enable PSL Timebase */
        cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000);
@@ -435,7 +500,7 @@ static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
                        dev_info(&dev->dev, "PSL timebase can't synchronize\n");
                        return;
                }
-               psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase);
+               psl_tb = adapter->native->sl_ops->timebase_read(adapter);
                delta = mftb() - psl_tb;
                if (delta < 0)
                        delta = -delta;
@@ -445,7 +510,7 @@ static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
        return;
 }
 
-static int init_implementation_afu_regs(struct cxl_afu *afu)
+static int init_implementation_afu_psl_regs(struct cxl_afu *afu)
 {
        /* read/write masks for this slice */
        cxl_p1n_write(afu, CXL_PSL_APCALLOC_A, 0xFFFFFFFEFEFEFEFEULL);
@@ -551,36 +616,234 @@ static int setup_cxl_bars(struct pci_dev *dev)
        return 0;
 }
 
-/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */
-static int switch_card_to_cxl(struct pci_dev *dev)
-{
+#ifdef CONFIG_CXL_BIMODAL
+
+struct cxl_switch_work {
+       struct pci_dev *dev;
+       struct work_struct work;
        int vsec;
+       int mode;
+};
+
+static void switch_card_to_cxl(struct work_struct *work)
+{
+       struct cxl_switch_work *switch_work =
+               container_of(work, struct cxl_switch_work, work);
+       struct pci_dev *dev = switch_work->dev;
+       struct pci_bus *bus = dev->bus;
+       struct pci_controller *hose = pci_bus_to_host(bus);
+       struct pci_dev *bridge;
+       struct pnv_php_slot *php_slot;
+       unsigned int devfn;
        u8 val;
        int rc;
 
-       dev_info(&dev->dev, "switch card to CXL\n");
+       dev_info(&bus->dev, "cxl: Preparing for mode switch...\n");
+       bridge = list_first_entry_or_null(&hose->bus->devices, struct pci_dev,
+                                         bus_list);
+       if (!bridge) {
+               dev_WARN(&bus->dev, "cxl: Couldn't find root port!\n");
+               goto err_dev_put;
+       }
 
-       if (!(vsec = find_cxl_vsec(dev))) {
-               dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
+       php_slot = pnv_php_find_slot(pci_device_to_OF_node(bridge));
+       if (!php_slot) {
+               dev_err(&bus->dev, "cxl: Failed to find slot hotplug "
+                                  "information. You may need to upgrade "
+                                  "skiboot. Aborting.\n");
+               goto err_dev_put;
+       }
+
+       rc = CXL_READ_VSEC_MODE_CONTROL(dev, switch_work->vsec, &val);
+       if (rc) {
+               dev_err(&bus->dev, "cxl: Failed to read CAPI mode control: %i\n", rc);
+               goto err_dev_put;
+       }
+       devfn = dev->devfn;
+
+       /* Release the reference obtained in cxl_check_and_switch_mode() */
+       pci_dev_put(dev);
+
+       dev_dbg(&bus->dev, "cxl: Removing PCI devices from kernel\n");
+       pci_lock_rescan_remove();
+       pci_hp_remove_devices(bridge->subordinate);
+       pci_unlock_rescan_remove();
+
+       /* Switch the CXL protocol on the card */
+       if (switch_work->mode == CXL_BIMODE_CXL) {
+               dev_info(&bus->dev, "cxl: Switching card to CXL mode\n");
+               val &= ~CXL_VSEC_PROTOCOL_MASK;
+               val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE;
+               rc = pnv_cxl_enable_phb_kernel_api(hose, true);
+               if (rc) {
+                       dev_err(&bus->dev, "cxl: Failed to enable kernel API"
+                                          " on real PHB, aborting\n");
+                       goto err_free_work;
+               }
+       } else {
+               dev_WARN(&bus->dev, "cxl: Switching card to PCI mode not supported!\n");
+               goto err_free_work;
+       }
+
+       rc = CXL_WRITE_VSEC_MODE_CONTROL_BUS(bus, devfn, switch_work->vsec, val);
+       if (rc) {
+               dev_err(&bus->dev, "cxl: Failed to configure CXL protocol: %i\n", rc);
+               goto err_free_work;
+       }
+
+       /*
+        * The CAIA spec (v1.1, Section 10.6 Bi-modal Device Support) states
+        * we must wait 100ms after this mode switch before touching PCIe config
+        * space.
+        */
+       msleep(100);
+
+       /*
+        * Hot reset to cause the card to come back in cxl mode. A
+        * OPAL_RESET_PCI_LINK would be sufficient, but currently lacks support
+        * in skiboot, so we use a hot reset instead.
+        *
+        * We call pci_set_pcie_reset_state() on the bridge, as a CAPI card is
+        * guaranteed to sit directly under the root port, and setting the reset
+        * state on a device directly under the root port is equivalent to doing
+        * it on the root port iself.
+        */
+       dev_info(&bus->dev, "cxl: Configuration write complete, resetting card\n");
+       pci_set_pcie_reset_state(bridge, pcie_hot_reset);
+       pci_set_pcie_reset_state(bridge, pcie_deassert_reset);
+
+       dev_dbg(&bus->dev, "cxl: Offlining slot\n");
+       rc = pnv_php_set_slot_power_state(&php_slot->slot, OPAL_PCI_SLOT_OFFLINE);
+       if (rc) {
+               dev_err(&bus->dev, "cxl: OPAL offlining call failed: %i\n", rc);
+               goto err_free_work;
+       }
+
+       dev_dbg(&bus->dev, "cxl: Onlining and probing slot\n");
+       rc = pnv_php_set_slot_power_state(&php_slot->slot, OPAL_PCI_SLOT_ONLINE);
+       if (rc) {
+               dev_err(&bus->dev, "cxl: OPAL onlining call failed: %i\n", rc);
+               goto err_free_work;
+       }
+
+       pci_lock_rescan_remove();
+       pci_hp_add_devices(bridge->subordinate);
+       pci_unlock_rescan_remove();
+
+       dev_info(&bus->dev, "cxl: CAPI mode switch completed\n");
+       kfree(switch_work);
+       return;
+
+err_dev_put:
+       /* Release the reference obtained in cxl_check_and_switch_mode() */
+       pci_dev_put(dev);
+err_free_work:
+       kfree(switch_work);
+}
+
+int cxl_check_and_switch_mode(struct pci_dev *dev, int mode, int vsec)
+{
+       struct cxl_switch_work *work;
+       u8 val;
+       int rc;
+
+       if (!cpu_has_feature(CPU_FTR_HVMODE))
                return -ENODEV;
+
+       if (!vsec) {
+               vsec = find_cxl_vsec(dev);
+               if (!vsec) {
+                       dev_info(&dev->dev, "CXL VSEC not found\n");
+                       return -ENODEV;
+               }
        }
 
-       if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) {
-               dev_err(&dev->dev, "failed to read current mode control: %i", rc);
+       rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val);
+       if (rc) {
+               dev_err(&dev->dev, "Failed to read current mode control: %i", rc);
                return rc;
        }
-       val &= ~CXL_VSEC_PROTOCOL_MASK;
-       val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE;
-       if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) {
-               dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc);
-               return rc;
+
+       if (mode == CXL_BIMODE_PCI) {
+               if (!(val & CXL_VSEC_PROTOCOL_ENABLE)) {
+                       dev_info(&dev->dev, "Card is already in PCI mode\n");
+                       return 0;
+               }
+               /*
+                * TODO: Before it's safe to switch the card back to PCI mode
+                * we need to disable the CAPP and make sure any cachelines the
+                * card holds have been flushed out. Needs skiboot support.
+                */
+               dev_WARN(&dev->dev, "CXL mode switch to PCI unsupported!\n");
+               return -EIO;
        }
+
+       if (val & CXL_VSEC_PROTOCOL_ENABLE) {
+               dev_info(&dev->dev, "Card is already in CXL mode\n");
+               return 0;
+       }
+
+       dev_info(&dev->dev, "Card is in PCI mode, scheduling kernel thread "
+                           "to switch to CXL mode\n");
+
+       work = kmalloc(sizeof(struct cxl_switch_work), GFP_KERNEL);
+       if (!work)
+               return -ENOMEM;
+
+       pci_dev_get(dev);
+       work->dev = dev;
+       work->vsec = vsec;
+       work->mode = mode;
+       INIT_WORK(&work->work, switch_card_to_cxl);
+
+       schedule_work(&work->work);
+
        /*
-        * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states
-        * we must wait 100ms after this mode switch before touching
-        * PCIe config space.
+        * We return a failure now to abort the driver init. Once the
+        * link has been cycled and the card is in cxl mode we will
+        * come back (possibly using the generic cxl driver), but
+        * return success as the card should then be in cxl mode.
+        *
+        * TODO: What if the card comes back in PCI mode even after
+        *       the switch?  Don't want to spin endlessly.
         */
-       msleep(100);
+       return -EBUSY;
+}
+EXPORT_SYMBOL_GPL(cxl_check_and_switch_mode);
+
+#endif /* CONFIG_CXL_BIMODAL */
+
+static int setup_cxl_protocol_area(struct pci_dev *dev)
+{
+       u8 val;
+       int rc;
+       int vsec = find_cxl_vsec(dev);
+
+       if (!vsec) {
+               dev_info(&dev->dev, "CXL VSEC not found\n");
+               return -ENODEV;
+       }
+
+       rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val);
+       if (rc) {
+               dev_err(&dev->dev, "Failed to read current mode control: %i\n", rc);
+               return rc;
+       }
+
+       if (!(val & CXL_VSEC_PROTOCOL_ENABLE)) {
+               dev_err(&dev->dev, "Card not in CAPI mode!\n");
+               return -EIO;
+       }
+
+       if ((val & CXL_VSEC_PROTOCOL_MASK) != CXL_VSEC_PROTOCOL_256TB) {
+               val &= ~CXL_VSEC_PROTOCOL_MASK;
+               val |= CXL_VSEC_PROTOCOL_256TB;
+               rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val);
+               if (rc) {
+                       dev_err(&dev->dev, "Failed to set CXL protocol area: %i\n", rc);
+                       return rc;
+               }
+       }
 
        return 0;
 }
@@ -712,6 +975,21 @@ static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu)
                }
        }
 
+       if ((afu->modes_supported & ~CXL_MODE_DEDICATED) && afu->max_procs_virtualised == 0) {
+               /*
+                * We could also check this for the dedicated process model
+                * since the architecture indicates it should be set to 1, but
+                * in that case we ignore the value and I'd rather not risk
+                * breaking any existing dedicated process AFUs that left it as
+                * 0 (not that I'm aware of any). It is clearly an error for an
+                * AFU directed AFU to set this to 0, and would have previously
+                * triggered a bug resulting in the maximum not being enforced
+                * at all since idr_alloc treats 0 as no maximum.
+                */
+               dev_err(&afu->dev, "AFU does not support any processes\n");
+               return -EINVAL;
+       }
+
        return 0;
 }
 
@@ -753,11 +1031,13 @@ static int sanitise_afu_regs(struct cxl_afu *afu)
                else
                        cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
        }
-       reg = cxl_p1n_read(afu, CXL_PSL_SERR_An);
-       if (reg) {
-               if (reg & ~0xffff)
-                       dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg);
-               cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff);
+       if (afu->adapter->native->sl_ops->register_serr_irq) {
+               reg = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+               if (reg) {
+                       if (reg & ~0xffff)
+                               dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg);
+                       cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff);
+               }
        }
        reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
        if (reg) {
@@ -835,11 +1115,13 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
        if ((rc = cxl_afu_descriptor_looks_ok(afu)))
                goto err1;
 
-       if ((rc = init_implementation_afu_regs(afu)))
-               goto err1;
+       if (adapter->native->sl_ops->afu_regs_init)
+               if ((rc = adapter->native->sl_ops->afu_regs_init(afu)))
+                       goto err1;
 
-       if ((rc = cxl_native_register_serr_irq(afu)))
-               goto err1;
+       if (adapter->native->sl_ops->register_serr_irq)
+               if ((rc = adapter->native->sl_ops->register_serr_irq(afu)))
+                       goto err1;
 
        if ((rc = cxl_native_register_psl_irq(afu)))
                goto err2;
@@ -847,7 +1129,8 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
        return 0;
 
 err2:
-       cxl_native_release_serr_irq(afu);
+       if (adapter->native->sl_ops->release_serr_irq)
+               adapter->native->sl_ops->release_serr_irq(afu);
 err1:
        pci_unmap_slice_regs(afu);
        return rc;
@@ -856,7 +1139,8 @@ err1:
 static void pci_deconfigure_afu(struct cxl_afu *afu)
 {
        cxl_native_release_psl_irq(afu);
-       cxl_native_release_serr_irq(afu);
+       if (afu->adapter->native->sl_ops->release_serr_irq)
+               afu->adapter->native->sl_ops->release_serr_irq(afu);
        pci_unmap_slice_regs(afu);
 }
 
@@ -1165,7 +1449,7 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
        if ((rc = setup_cxl_bars(dev)))
                return rc;
 
-       if ((rc = switch_card_to_cxl(dev)))
+       if ((rc = setup_cxl_protocol_area(dev)))
                return rc;
 
        if ((rc = cxl_update_image_control(adapter)))
@@ -1177,10 +1461,13 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
        if ((rc = sanitise_adapter_regs(adapter)))
                goto err;
 
-       if ((rc = init_implementation_adapter_regs(adapter, dev)))
+       if ((rc = adapter->native->sl_ops->adapter_regs_init(adapter, dev)))
                goto err;
 
-       if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI)))
+       /* Required for devices using CAPP DMA mode, harmless for others */
+       pci_set_master(dev);
+
+       if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode)))
                goto err;
 
        /* If recovery happened, the last step is to turn on snooping.
@@ -1212,6 +1499,43 @@ static void cxl_deconfigure_adapter(struct cxl *adapter)
        pci_disable_device(pdev);
 }
 
+static const struct cxl_service_layer_ops psl_ops = {
+       .adapter_regs_init = init_implementation_adapter_psl_regs,
+       .afu_regs_init = init_implementation_afu_psl_regs,
+       .register_serr_irq = cxl_native_register_serr_irq,
+       .release_serr_irq = cxl_native_release_serr_irq,
+       .debugfs_add_adapter_sl_regs = cxl_debugfs_add_adapter_psl_regs,
+       .debugfs_add_afu_sl_regs = cxl_debugfs_add_afu_psl_regs,
+       .psl_irq_dump_registers = cxl_native_psl_irq_dump_regs,
+       .err_irq_dump_registers = cxl_native_err_irq_dump_regs,
+       .debugfs_stop_trace = cxl_stop_trace,
+       .write_timebase_ctrl = write_timebase_ctrl_psl,
+       .timebase_read = timebase_read_psl,
+       .capi_mode = OPAL_PHB_CAPI_MODE_CAPI,
+       .needs_reset_before_disable = true,
+};
+
+static const struct cxl_service_layer_ops xsl_ops = {
+       .adapter_regs_init = init_implementation_adapter_xsl_regs,
+       .debugfs_add_adapter_sl_regs = cxl_debugfs_add_adapter_xsl_regs,
+       .write_timebase_ctrl = write_timebase_ctrl_xsl,
+       .timebase_read = timebase_read_xsl,
+       .capi_mode = OPAL_PHB_CAPI_MODE_DMA,
+       .min_pe = 1, /* Workaround for Mellanox CX4 HW bug */
+};
+
+static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev)
+{
+       if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) {
+               dev_info(&adapter->dev, "Device uses an XSL\n");
+               adapter->native->sl_ops = &xsl_ops;
+       } else {
+               dev_info(&adapter->dev, "Device uses a PSL\n");
+               adapter->native->sl_ops = &psl_ops;
+       }
+}
+
+
 static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev)
 {
        struct cxl *adapter;
@@ -1227,6 +1551,8 @@ static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev)
                goto err_release;
        }
 
+       set_sl_ops(adapter, dev);
+
        /* Set defaults for parameters which need to persist over
         * configure/reconfigure
         */
@@ -1280,6 +1606,67 @@ static void cxl_pci_remove_adapter(struct cxl *adapter)
        device_unregister(&adapter->dev);
 }
 
+#define CXL_MAX_PCIEX_PARENT 2
+
+static int cxl_slot_is_switched(struct pci_dev *dev)
+{
+       struct device_node *np;
+       int depth = 0;
+       const __be32 *prop;
+
+       if (!(np = pci_device_to_OF_node(dev))) {
+               pr_err("cxl: np = NULL\n");
+               return -ENODEV;
+       }
+       of_node_get(np);
+       while (np) {
+               np = of_get_next_parent(np);
+               prop = of_get_property(np, "device_type", NULL);
+               if (!prop || strcmp((char *)prop, "pciex"))
+                       break;
+               depth++;
+       }
+       of_node_put(np);
+       return (depth > CXL_MAX_PCIEX_PARENT);
+}
+
+bool cxl_slot_is_supported(struct pci_dev *dev, int flags)
+{
+       if (!cpu_has_feature(CPU_FTR_HVMODE))
+               return false;
+
+       if ((flags & CXL_SLOT_FLAG_DMA) && (!pvr_version_is(PVR_POWER8NVL))) {
+               /*
+                * CAPP DMA mode is technically supported on regular P8, but
+                * will EEH if the card attempts to access memory < 4GB, which
+                * we cannot realistically avoid. We might be able to work
+                * around the issue, but until then return unsupported:
+                */
+               return false;
+       }
+
+       if (cxl_slot_is_switched(dev))
+               return false;
+
+       /*
+        * XXX: This gets a little tricky on regular P8 (not POWER8NVL) since
+        * the CAPP can be connected to PHB 0, 1 or 2 on a first come first
+        * served basis, which is racy to check from here. If we need to
+        * support this in future we might need to consider having this
+        * function effectively reserve it ahead of time.
+        *
+        * Currently, the only user of this API is the Mellanox CX4, which is
+        * only supported on P8NVL due to the above mentioned limitation of
+        * CAPP DMA mode and therefore does not need to worry about this. If the
+        * issue with CAPP DMA mode is later worked around on P8 we might need
+        * to revisit this.
+        */
+
+       return true;
+}
+EXPORT_SYMBOL_GPL(cxl_slot_is_supported);
+
+
 static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
        struct cxl *adapter;
@@ -1291,6 +1678,11 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
                return -ENODEV;
        }
 
+       if (cxl_slot_is_switched(dev)) {
+               dev_info(&dev->dev, "Ignoring card on incompatible PCI slot\n");
+               return -ENODEV;
+       }
+
        if (cxl_verbose)
                dump_cxl_config_space(dev);
 
@@ -1311,6 +1703,9 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
                        dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc);
        }
 
+       if (pnv_pci_on_cxl_phb(dev) && adapter->slices >= 1)
+               pnv_cxl_phb_set_peer_afu(dev, adapter->afu[0]);
+
        return 0;
 }
 
@@ -1381,6 +1776,9 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev,
                 */
                for (i = 0; i < adapter->slices; i++) {
                        afu = adapter->afu[i];
+                       /* Only participate in EEH if we are on a virtual PHB */
+                       if (afu->phb == NULL)
+                               return PCI_ERS_RESULT_NONE;
                        cxl_vphb_error_detected(afu, state);
                }
                return PCI_ERS_RESULT_DISCONNECT;