IB/mlx4: Use correct subnet-prefix in QP1 mads under SR-IOV
authorJack Morgenstein <jackm@dev.mellanox.co.il>
Mon, 12 Sep 2016 16:16:20 +0000 (19:16 +0300)
committerDoug Ledford <dledford@redhat.com>
Fri, 16 Sep 2016 18:14:08 +0000 (14:14 -0400)
When sending QP1 MAD packets which use a GRH, the source GID
(which consists of the 64-bit subnet prefix, and the 64 bit port GUID)
must be included in the packet GRH.

For SR-IOV, a GID cache is used, since the source GID needs to be the
slave's source GID, and not the Hypervisor's GID. This cache also
included a subnet_prefix. Unfortunately, the subnet_prefix field in
the cache was never initialized (to the default subnet prefix 0xfe80::0).
As a result, this field remained all zeroes.  Therefore, when SR-IOV
was active, all QP1 packets which included a GRH had a source GID
subnet prefix of all-zeroes.

However, the subnet-prefix should initially be 0xfe80::0 (the default
subnet prefix). In addition, if OpenSM modifies a port's subnet prefix,
the new subnet prefix must be used in the GRH when sending QP1 packets.
To fix this we now initialize the subnet prefix in the SR-IOV GID cache
to the default subnet prefix. We update the cached value if/when OpenSM
modifies the port's subnet prefix. We take this cached value when sending
QP1 packets when SR-IOV is active.

Note that the value is stored as an atomic64. This eliminates any need
for locking when the subnet prefix is being updated.

Note also that we depend on the FW generating the "port management change"
event for tracking subnet-prefix changes performed by OpenSM. If running
early FW (before 2.9.4630), subnet prefix changes will not be tracked (but
the default subnet prefix still will be stored in the cache; therefore
users who do not modify the subnet prefix will not have a problem).
IF there is a need for such tracking also for early FW, we will add that
capability in a subsequent patch.

Fixes: 1ffeb2eb8be9 ("IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/qp.c

index 9c2e53d28f985740c3d22f9c13be776e58010567..0f21c3a25552d47b7305c0df2980a75b36a55970 100644 (file)
@@ -1128,6 +1128,27 @@ void handle_port_mgmt_change_event(struct work_struct *work)
 
                /* Generate GUID changed event */
                if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
+                       if (mlx4_is_master(dev->dev)) {
+                               union ib_gid gid;
+                               int err = 0;
+
+                               if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix)
+                                       err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1);
+                               else
+                                       gid.global.subnet_prefix =
+                                               eqe->event.port_mgmt_change.params.port_info.gid_prefix;
+                               if (err) {
+                                       pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n",
+                                               port, err);
+                               } else {
+                                       pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n",
+                                                port,
+                                                (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix),
+                                                be64_to_cpu(gid.global.subnet_prefix));
+                                       atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix,
+                                                    be64_to_cpu(gid.global.subnet_prefix));
+                               }
+                       }
                        mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
                        /*if master, notify all slaves*/
                        if (mlx4_is_master(dev->dev))
@@ -2202,6 +2223,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
                if (err)
                        goto demux_err;
                dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
+               atomic64_set(&dev->sriov.demux[i].subnet_prefix,
+                            be64_to_cpu(gid.global.subnet_prefix));
                err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
                                      &dev->sriov.sqps[i]);
                if (err)
index 7c5832ede4bd0cc213139900eeb3999532192858..686ab48ff644163879049533985e81b33e4a054f 100644 (file)
@@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx {
        struct workqueue_struct *wq;
        struct workqueue_struct *ud_wq;
        spinlock_t ud_lock;
-       __be64 subnet_prefix;
+       atomic64_t subnet_prefix;
        __be64 guid_cache[128];
        struct mlx4_ib_dev *dev;
        /* the following lock protects both mcg_table and mcg_mgid0_list */
index e398c04903fa22c0ee6026695b24d901c08b3297..7fb9629bd12b9736c181d7c5e0935ff5b7524d9a 100644 (file)
@@ -2502,8 +2502,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                                 * we must use our own cache
                                 */
                                sqp->ud_header.grh.source_gid.global.subnet_prefix =
-                                       to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
-                                                              subnet_prefix;
+                                       cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
+                                                                   demux[sqp->qp.port - 1].
+                                                                   subnet_prefix)));
                                sqp->ud_header.grh.source_gid.global.interface_id =
                                        to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
                                                       guid_cache[ah->av.ib.gid_index];