RDMA/core: Support IB sub device with type "SMI"
authorMark Zhang <markzhang@nvidia.com>
Sun, 16 Jun 2024 16:08:36 +0000 (19:08 +0300)
committerLeon Romanovsky <leonro@nvidia.com>
Mon, 1 Jul 2024 12:38:04 +0000 (15:38 +0300)
This patch adds 2 APIs, as well as driver operations to support adding
and deleting an IB sub device, which provides part of functionalities
of it's parent.

A sub device has a type; for a sub device with type "SMI", it provides
the smi capability through umad for its parent, meaning uverb is not
supported.

A sub device cannot live without a parent. So when a parent is
released, all it's sub devices are released as well.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/44253f7508b21eb2caefea3980c2bc072869116c.1718553901.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
drivers/infiniband/core/device.c
drivers/infiniband/core/uverbs_main.c
include/rdma/ib_verbs.h
include/uapi/rdma/rdma_netlink.h

index e0cff28bb0ef183ed3784ed74fa92bc3107ac523..7b418c717f296b2ad09975aeef4b427704b8e70c 100644 (file)
@@ -503,6 +503,7 @@ static void ib_device_release(struct device *device)
                          rcu_head);
        }
 
+       mutex_destroy(&dev->subdev_lock);
        mutex_destroy(&dev->unregistration_lock);
        mutex_destroy(&dev->compat_devs_mutex);
 
@@ -641,6 +642,11 @@ struct ib_device *_ib_alloc_device(size_t size)
                BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
                BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) |
                BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ);
+
+       mutex_init(&device->subdev_lock);
+       INIT_LIST_HEAD(&device->subdev_list_head);
+       INIT_LIST_HEAD(&device->subdev_list);
+
        return device;
 }
 EXPORT_SYMBOL(_ib_alloc_device);
@@ -1461,6 +1467,18 @@ EXPORT_SYMBOL(ib_register_device);
 /* Callers must hold a get on the device. */
 static void __ib_unregister_device(struct ib_device *ib_dev)
 {
+       struct ib_device *sub, *tmp;
+
+       mutex_lock(&ib_dev->subdev_lock);
+       list_for_each_entry_safe_reverse(sub, tmp,
+                                        &ib_dev->subdev_list_head,
+                                        subdev_list) {
+               list_del(&sub->subdev_list);
+               ib_dev->ops.del_sub_dev(sub);
+               ib_device_put(ib_dev);
+       }
+       mutex_unlock(&ib_dev->subdev_lock);
+
        /*
         * We have a registration lock so that all the calls to unregister are
         * fully fenced, once any unregister returns the device is truely
@@ -2597,6 +2615,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
                ops->uverbs_no_driver_id_binding;
 
        SET_DEVICE_OP(dev_ops, add_gid);
+       SET_DEVICE_OP(dev_ops, add_sub_dev);
        SET_DEVICE_OP(dev_ops, advise_mr);
        SET_DEVICE_OP(dev_ops, alloc_dm);
        SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
@@ -2631,6 +2650,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
        SET_DEVICE_OP(dev_ops, dealloc_ucontext);
        SET_DEVICE_OP(dev_ops, dealloc_xrcd);
        SET_DEVICE_OP(dev_ops, del_gid);
+       SET_DEVICE_OP(dev_ops, del_sub_dev);
        SET_DEVICE_OP(dev_ops, dereg_mr);
        SET_DEVICE_OP(dev_ops, destroy_ah);
        SET_DEVICE_OP(dev_ops, destroy_counters);
@@ -2727,6 +2747,55 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 }
 EXPORT_SYMBOL(ib_set_device_ops);
 
+int ib_add_sub_device(struct ib_device *parent,
+                     enum rdma_nl_dev_type type,
+                     const char *name)
+{
+       struct ib_device *sub;
+       int ret = 0;
+
+       if (!parent->ops.add_sub_dev || !parent->ops.del_sub_dev)
+               return -EOPNOTSUPP;
+
+       if (!ib_device_try_get(parent))
+               return -EINVAL;
+
+       sub = parent->ops.add_sub_dev(parent, type, name);
+       if (IS_ERR(sub)) {
+               ib_device_put(parent);
+               return PTR_ERR(sub);
+       }
+
+       sub->type = type;
+       sub->parent = parent;
+
+       mutex_lock(&parent->subdev_lock);
+       list_add_tail(&parent->subdev_list_head, &sub->subdev_list);
+       mutex_unlock(&parent->subdev_lock);
+
+       return ret;
+}
+EXPORT_SYMBOL(ib_add_sub_device);
+
+int ib_del_sub_device_and_put(struct ib_device *sub)
+{
+       struct ib_device *parent = sub->parent;
+
+       if (!parent)
+               return -EOPNOTSUPP;
+
+       mutex_lock(&parent->subdev_lock);
+       list_del(&sub->subdev_list);
+       mutex_unlock(&parent->subdev_lock);
+
+       ib_device_put(sub);
+       parent->ops.del_sub_dev(sub);
+       ib_device_put(parent);
+
+       return 0;
+}
+EXPORT_SYMBOL(ib_del_sub_device_and_put);
+
 #ifdef CONFIG_INFINIBAND_VIRT_DMA
 int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
 {
index 495d5a5d0373f1e91db030de98bded5af8d44884..bc099287de9a6082f0ae9d81a3a382e4c26743af 100644 (file)
@@ -1114,7 +1114,8 @@ static int ib_uverbs_add_one(struct ib_device *device)
        struct ib_uverbs_device *uverbs_dev;
        int ret;
 
-       if (!device->ops.alloc_ucontext)
+       if (!device->ops.alloc_ucontext ||
+           device->type == RDMA_DEVICE_TYPE_SMI)
                return -EOPNOTSUPP;
 
        uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
index 825043512b2d1a142eb7eac5fb4a5ffeb10bc5bf..d4128958908f7546e9fdb109394fd78f2a959eb9 100644 (file)
@@ -2663,6 +2663,18 @@ struct ib_device_ops {
         */
        int (*get_numa_node)(struct ib_device *dev);
 
+       /**
+        * add_sub_dev - Add a sub IB device
+        */
+       struct ib_device *(*add_sub_dev)(struct ib_device *parent,
+                                        enum rdma_nl_dev_type type,
+                                        const char *name);
+
+       /**
+        * del_sub_dev - Delete a sub IB device
+        */
+       void (*del_sub_dev)(struct ib_device *sub_dev);
+
        DECLARE_RDMA_OBJ_SIZE(ib_ah);
        DECLARE_RDMA_OBJ_SIZE(ib_counters);
        DECLARE_RDMA_OBJ_SIZE(ib_cq);
@@ -2773,6 +2785,15 @@ struct ib_device {
        char iw_ifname[IFNAMSIZ];
        u32 iw_driver_flags;
        u32 lag_flags;
+
+       /* A parent device has a list of sub-devices */
+       struct mutex subdev_lock;
+       struct list_head subdev_list_head;
+
+       /* A sub device has a type and a parent */
+       enum rdma_nl_dev_type type;
+       struct ib_device *parent;
+       struct list_head subdev_list;
 };
 
 static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size,
@@ -4822,4 +4843,26 @@ static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn)
 
 const struct ib_port_immutable*
 ib_port_immutable_read(struct ib_device *dev, unsigned int port);
+
+/** ib_add_sub_device - Add a sub IB device on an existing one
+ *
+ * @parent: The IB device that needs to add a sub device
+ * @type: The type of the new sub device
+ * @name: The name of the new sub device
+ *
+ *
+ * Return 0 on success, an error code otherwise
+ */
+int ib_add_sub_device(struct ib_device *parent,
+                     enum rdma_nl_dev_type type,
+                     const char *name);
+
+
+/** ib_del_sub_device_and_put - Delect an IB sub device while holding a 'get'
+ *
+ * @sub: The sub device that is going to be deleted
+ *
+ * Return 0 on success, an error code otherwise
+ */
+int ib_del_sub_device_and_put(struct ib_device *sub);
 #endif /* IB_VERBS_H */
index a214fc259f28c27f7cb0a24ac512525637139d2d..d15ee16be722f07141e1e85834349968051e4bac 100644 (file)
@@ -602,4 +602,9 @@ enum rdma_nl_counter_mask {
        RDMA_COUNTER_MASK_QP_TYPE = 1,
        RDMA_COUNTER_MASK_PID = 1 << 1,
 };
+
+/* Supported rdma device types. */
+enum rdma_nl_dev_type {
+       RDMA_DEVICE_TYPE_SMI = 1,
+};
 #endif /* _UAPI_RDMA_NETLINK_H */