Merge branches 'core', 'cxgb3', 'cxgb4', 'iser', 'iwpm', 'misc', 'mlx4', 'mlx5',...
authorRoland Dreier <roland@purestorage.com>
Tue, 10 Jun 2014 17:12:14 +0000 (10:12 -0700)
committerRoland Dreier <roland@purestorage.com>
Tue, 10 Jun 2014 17:12:14 +0000 (10:12 -0700)
74 files changed:
drivers/infiniband/Makefile
drivers/infiniband/core/Makefile
drivers/infiniband/core/cma.c
drivers/infiniband/core/iwpm_msg.c [new file with mode: 0644]
drivers/infiniband/core/iwpm_util.c [new file with mode: 0644]
drivers/infiniband/core/iwpm_util.h [new file with mode: 0644]
drivers/infiniband/core/netlink.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/Makefile [new file with mode: 0644]
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/cxgb4/user.h
drivers/infiniband/hw/ipath/ipath_diag.c
drivers/infiniband/hw/ipath/ipath_intr.c
drivers/infiniband/hw/ipath/ipath_sdma.c
drivers/infiniband/hw/mlx4/ah.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx4/srq.c
drivers/infiniband/hw/mlx4/sysfs.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/srq.c
drivers/infiniband/hw/mlx5/user.h
drivers/infiniband/hw/nes/nes.c
drivers/infiniband/hw/nes/nes.h
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/nes/nes_cm.h
drivers/infiniband/hw/ocrdma/ocrdma_stats.c
drivers/infiniband/hw/qib/qib_init.c
drivers/infiniband/hw/qib/qib_mad.c
drivers/infiniband/hw/qib/qib_qp.c
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
drivers/infiniband/ulp/Makefile [new file with mode: 0644]
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srp/ib_srp.h
drivers/net/ethernet/mellanox/mlx4/alloc.c
drivers/net/ethernet/mellanox/mlx4/cmd.c
drivers/net/ethernet/mellanox/mlx4/cq.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/net/ethernet/mellanox/mlx4/fw.h
drivers/net/ethernet/mellanox/mlx4/icm.c
drivers/net/ethernet/mellanox/mlx4/icm.h
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx4/mlx4.h
drivers/net/ethernet/mellanox/mlx4/mr.c
drivers/net/ethernet/mellanox/mlx4/qp.c
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
drivers/net/ethernet/mellanox/mlx4/srq.c
drivers/net/ethernet/mellanox/mlx5/core/mr.c
include/linux/mlx4/device.h
include/linux/mlx5/driver.h
include/rdma/ib_verbs.h
include/rdma/iw_portmap.h [new file with mode: 0644]
include/rdma/rdma_netlink.h
include/uapi/rdma/rdma_netlink.h

index bf508b5550c452f32997a888badb4c885900cec9..dc21836b5a8d11d25622a55443d5dbfae12de3bb 100644 (file)
@@ -1,18 +1,3 @@
 obj-$(CONFIG_INFINIBAND)               += core/
-obj-$(CONFIG_INFINIBAND_MTHCA)         += hw/mthca/
-obj-$(CONFIG_INFINIBAND_IPATH)         += hw/ipath/
-obj-$(CONFIG_INFINIBAND_QIB)           += hw/qib/
-obj-$(CONFIG_INFINIBAND_EHCA)          += hw/ehca/
-obj-$(CONFIG_INFINIBAND_AMSO1100)      += hw/amso1100/
-obj-$(CONFIG_INFINIBAND_CXGB3)         += hw/cxgb3/
-obj-$(CONFIG_INFINIBAND_CXGB4)         += hw/cxgb4/
-obj-$(CONFIG_MLX4_INFINIBAND)          += hw/mlx4/
-obj-$(CONFIG_MLX5_INFINIBAND)          += hw/mlx5/
-obj-$(CONFIG_INFINIBAND_NES)           += hw/nes/
-obj-$(CONFIG_INFINIBAND_OCRDMA)                += hw/ocrdma/
-obj-$(CONFIG_INFINIBAND_USNIC)         += hw/usnic/
-obj-$(CONFIG_INFINIBAND_IPOIB)         += ulp/ipoib/
-obj-$(CONFIG_INFINIBAND_SRP)           += ulp/srp/
-obj-$(CONFIG_INFINIBAND_SRPT)          += ulp/srpt/
-obj-$(CONFIG_INFINIBAND_ISER)          += ulp/iser/
-obj-$(CONFIG_INFINIBAND_ISERT)         += ulp/isert/
+obj-$(CONFIG_INFINIBAND)               += hw/
+obj-$(CONFIG_INFINIBAND)               += ulp/
index 3ab3865544bb33eeac18659e4c19b2f1a41c7e76..ffd0af6734af63120b78aa8402f2f48a1ec6fc20 100644 (file)
@@ -18,7 +18,7 @@ ib_sa-y :=                    sa_query.o multicast.o
 
 ib_cm-y :=                     cm.o
 
-iw_cm-y :=                     iwcm.o
+iw_cm-y :=                     iwcm.o iwpm_util.o iwpm_msg.o
 
 rdma_cm-y :=                   cma.o
 
index 42c3058e6e9cdbaa406cf682c4c6bb2b064591bd..d570030d899c0c662d2b208ce30dd2cd20bc3f78 100644 (file)
@@ -3607,7 +3607,8 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
 
                        id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
                                                sizeof *id_stats, RDMA_NL_RDMA_CM,
-                                               RDMA_NL_RDMA_CM_ID_STATS);
+                                               RDMA_NL_RDMA_CM_ID_STATS,
+                                               NLM_F_MULTI);
                        if (!id_stats)
                                goto out;
 
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
new file mode 100644 (file)
index 0000000..b85ddbc
--- /dev/null
@@ -0,0 +1,685 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "iwpm_util.h"
+
+static const char iwpm_ulib_name[] = "iWarpPortMapperUser";
+static int iwpm_ulib_version = 3;
+static int iwpm_user_pid = IWPM_PID_UNDEFINED;
+static atomic_t echo_nlmsg_seq;
+
+int iwpm_valid_pid(void)
+{
+       return iwpm_user_pid > 0;
+}
+EXPORT_SYMBOL(iwpm_valid_pid);
+
+/*
+ * iwpm_register_pid - Send a netlink query to user space
+ *                     for the iwarp port mapper pid
+ *
+ * nlmsg attributes:
+ *     [IWPM_NLA_REG_PID_SEQ]
+ *     [IWPM_NLA_REG_IF_NAME]
+ *     [IWPM_NLA_REG_IBDEV_NAME]
+ *     [IWPM_NLA_REG_ULIB_NAME]
+ */
+int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
+{
+       struct sk_buff *skb = NULL;
+       struct iwpm_nlmsg_request *nlmsg_request = NULL;
+       struct nlmsghdr *nlh;
+       u32 msg_seq;
+       const char *err_str = "";
+       int ret = -EINVAL;
+
+       if (!iwpm_valid_client(nl_client)) {
+               err_str = "Invalid port mapper client";
+               goto pid_query_error;
+       }
+       if (iwpm_registered_client(nl_client))
+               return 0;
+       skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client);
+       if (!skb) {
+               err_str = "Unable to create a nlmsg";
+               goto pid_query_error;
+       }
+       nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+       nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL);
+       if (!nlmsg_request) {
+               err_str = "Unable to allocate netlink request";
+               goto pid_query_error;
+       }
+       msg_seq = atomic_read(&echo_nlmsg_seq);
+
+       /* fill in the pid request message */
+       err_str = "Unable to put attribute of the nlmsg";
+       ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ);
+       if (ret)
+               goto pid_query_error;
+       ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE,
+                               pm_msg->if_name, IWPM_NLA_REG_IF_NAME);
+       if (ret)
+               goto pid_query_error;
+       ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE,
+                               pm_msg->dev_name, IWPM_NLA_REG_IBDEV_NAME);
+       if (ret)
+               goto pid_query_error;
+       ret = ibnl_put_attr(skb, nlh, IWPM_ULIBNAME_SIZE,
+                               (char *)iwpm_ulib_name, IWPM_NLA_REG_ULIB_NAME);
+       if (ret)
+               goto pid_query_error;
+
+       pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
+               __func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
+
+       ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
+       if (ret) {
+               skb = NULL; /* skb is freed in the netlink send-op handling */
+               iwpm_set_registered(nl_client, 1);
+               iwpm_user_pid = IWPM_PID_UNAVAILABLE;
+               err_str = "Unable to send a nlmsg";
+               goto pid_query_error;
+       }
+       nlmsg_request->req_buffer = pm_msg;
+       ret = iwpm_wait_complete_req(nlmsg_request);
+       return ret;
+pid_query_error:
+       pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+       if (skb)
+               dev_kfree_skb(skb);
+       if (nlmsg_request)
+               iwpm_free_nlmsg_request(&nlmsg_request->kref);
+       return ret;
+}
+EXPORT_SYMBOL(iwpm_register_pid);
+
+/*
+ * iwpm_add_mapping - Send a netlink add mapping message
+ *                    to the port mapper
+ * nlmsg attributes:
+ *     [IWPM_NLA_MANAGE_MAPPING_SEQ]
+ *     [IWPM_NLA_MANAGE_ADDR]
+ */
+int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
+{
+       struct sk_buff *skb = NULL;
+       struct iwpm_nlmsg_request *nlmsg_request = NULL;
+       struct nlmsghdr *nlh;
+       u32 msg_seq;
+       const char *err_str = "";
+       int ret = -EINVAL;
+
+       if (!iwpm_valid_client(nl_client)) {
+               err_str = "Invalid port mapper client";
+               goto add_mapping_error;
+       }
+       if (!iwpm_registered_client(nl_client)) {
+               err_str = "Unregistered port mapper client";
+               goto add_mapping_error;
+       }
+       if (!iwpm_valid_pid())
+               return 0;
+       skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client);
+       if (!skb) {
+               err_str = "Unable to create a nlmsg";
+               goto add_mapping_error;
+       }
+       nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+       nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL);
+       if (!nlmsg_request) {
+               err_str = "Unable to allocate netlink request";
+               goto add_mapping_error;
+       }
+       msg_seq = atomic_read(&echo_nlmsg_seq);
+       /* fill in the add mapping message */
+       err_str = "Unable to put attribute of the nlmsg";
+       ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+                               IWPM_NLA_MANAGE_MAPPING_SEQ);
+       if (ret)
+               goto add_mapping_error;
+       ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+                               &pm_msg->loc_addr, IWPM_NLA_MANAGE_ADDR);
+       if (ret)
+               goto add_mapping_error;
+       nlmsg_request->req_buffer = pm_msg;
+
+       ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+       if (ret) {
+               skb = NULL; /* skb is freed in the netlink send-op handling */
+               iwpm_user_pid = IWPM_PID_UNDEFINED;
+               err_str = "Unable to send a nlmsg";
+               goto add_mapping_error;
+       }
+       ret = iwpm_wait_complete_req(nlmsg_request);
+       return ret;
+add_mapping_error:
+       pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+       if (skb)
+               dev_kfree_skb(skb);
+       if (nlmsg_request)
+               iwpm_free_nlmsg_request(&nlmsg_request->kref);
+       return ret;
+}
+EXPORT_SYMBOL(iwpm_add_mapping);
+
+/*
+ * iwpm_add_and_query_mapping - Send a netlink add and query
+ *                              mapping message to the port mapper
+ * nlmsg attributes:
+ *     [IWPM_NLA_QUERY_MAPPING_SEQ]
+ *     [IWPM_NLA_QUERY_LOCAL_ADDR]
+ *     [IWPM_NLA_QUERY_REMOTE_ADDR]
+ */
+int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
+{
+       struct sk_buff *skb = NULL;
+       struct iwpm_nlmsg_request *nlmsg_request = NULL;
+       struct nlmsghdr *nlh;
+       u32 msg_seq;
+       const char *err_str = "";
+       int ret = -EINVAL;
+
+       if (!iwpm_valid_client(nl_client)) {
+               err_str = "Invalid port mapper client";
+               goto query_mapping_error;
+       }
+       if (!iwpm_registered_client(nl_client)) {
+               err_str = "Unregistered port mapper client";
+               goto query_mapping_error;
+       }
+       if (!iwpm_valid_pid())
+               return 0;
+       ret = -ENOMEM;
+       skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client);
+       if (!skb) {
+               err_str = "Unable to create a nlmsg";
+               goto query_mapping_error;
+       }
+       nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+       nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq,
+                               nl_client, GFP_KERNEL);
+       if (!nlmsg_request) {
+               err_str = "Unable to allocate netlink request";
+               goto query_mapping_error;
+       }
+       msg_seq = atomic_read(&echo_nlmsg_seq);
+
+       /* fill in the query message */
+       err_str = "Unable to put attribute of the nlmsg";
+       ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+                               IWPM_NLA_QUERY_MAPPING_SEQ);
+       if (ret)
+               goto query_mapping_error;
+       ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+                               &pm_msg->loc_addr, IWPM_NLA_QUERY_LOCAL_ADDR);
+       if (ret)
+               goto query_mapping_error;
+       ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+                               &pm_msg->rem_addr, IWPM_NLA_QUERY_REMOTE_ADDR);
+       if (ret)
+               goto query_mapping_error;
+       nlmsg_request->req_buffer = pm_msg;
+
+       ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+       if (ret) {
+               skb = NULL; /* skb is freed in the netlink send-op handling */
+               err_str = "Unable to send a nlmsg";
+               goto query_mapping_error;
+       }
+       ret = iwpm_wait_complete_req(nlmsg_request);
+       return ret;
+query_mapping_error:
+       pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+       if (skb)
+               dev_kfree_skb(skb);
+       if (nlmsg_request)
+               iwpm_free_nlmsg_request(&nlmsg_request->kref);
+       return ret;
+}
+EXPORT_SYMBOL(iwpm_add_and_query_mapping);
+
+/*
+ * iwpm_remove_mapping - Send a netlink remove mapping message
+ *                       to the port mapper
+ * nlmsg attributes:
+ *     [IWPM_NLA_MANAGE_MAPPING_SEQ]
+ *     [IWPM_NLA_MANAGE_ADDR]
+ */
+int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
+{
+       struct sk_buff *skb = NULL;
+       struct nlmsghdr *nlh;
+       u32 msg_seq;
+       const char *err_str = "";
+       int ret = -EINVAL;
+
+       if (!iwpm_valid_client(nl_client)) {
+               err_str = "Invalid port mapper client";
+               goto remove_mapping_error;
+       }
+       if (!iwpm_registered_client(nl_client)) {
+               err_str = "Unregistered port mapper client";
+               goto remove_mapping_error;
+       }
+       if (!iwpm_valid_pid())
+               return 0;
+       skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client);
+       if (!skb) {
+               ret = -ENOMEM;
+               err_str = "Unable to create a nlmsg";
+               goto remove_mapping_error;
+       }
+       msg_seq = atomic_read(&echo_nlmsg_seq);
+       nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+       err_str = "Unable to put attribute of the nlmsg";
+       ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+                               IWPM_NLA_MANAGE_MAPPING_SEQ);
+       if (ret)
+               goto remove_mapping_error;
+       ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+                               local_addr, IWPM_NLA_MANAGE_ADDR);
+       if (ret)
+               goto remove_mapping_error;
+
+       ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+       if (ret) {
+               skb = NULL; /* skb is freed in the netlink send-op handling */
+               iwpm_user_pid = IWPM_PID_UNDEFINED;
+               err_str = "Unable to send a nlmsg";
+               goto remove_mapping_error;
+       }
+       iwpm_print_sockaddr(local_addr,
+                       "remove_mapping: Local sockaddr:");
+       return 0;
+remove_mapping_error:
+       pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+       if (skb)
+               dev_kfree_skb_any(skb);
+       return ret;
+}
+EXPORT_SYMBOL(iwpm_remove_mapping);
+
+/* netlink attribute policy for the received response to register pid request */
+static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
+       [IWPM_NLA_RREG_PID_SEQ]     = { .type = NLA_U32 },
+       [IWPM_NLA_RREG_IBDEV_NAME]  = { .type = NLA_STRING,
+                                       .len = IWPM_DEVNAME_SIZE - 1 },
+       [IWPM_NLA_RREG_ULIB_NAME]   = { .type = NLA_STRING,
+                                       .len = IWPM_ULIBNAME_SIZE - 1 },
+       [IWPM_NLA_RREG_ULIB_VER]    = { .type = NLA_U16 },
+       [IWPM_NLA_RREG_PID_ERR]     = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_register_pid_cb - Process a port mapper response to
+ *                        iwpm_register_pid()
+ */
+int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct iwpm_nlmsg_request *nlmsg_request = NULL;
+       struct nlattr *nltb[IWPM_NLA_RREG_PID_MAX];
+       struct iwpm_dev_data *pm_msg;
+       char *dev_name, *iwpm_name;
+       u32 msg_seq;
+       u8 nl_client;
+       u16 iwpm_version;
+       const char *msg_type = "Register Pid response";
+
+       if (iwpm_parse_nlmsg(cb, IWPM_NLA_RREG_PID_MAX,
+                               resp_reg_policy, nltb, msg_type))
+               return -EINVAL;
+
+       msg_seq = nla_get_u32(nltb[IWPM_NLA_RREG_PID_SEQ]);
+       nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+       if (!nlmsg_request) {
+               pr_info("%s: Could not find a matching request (seq = %u)\n",
+                                __func__, msg_seq);
+               return -EINVAL;
+       }
+       pm_msg = nlmsg_request->req_buffer;
+       nl_client = nlmsg_request->nl_client;
+       dev_name = (char *)nla_data(nltb[IWPM_NLA_RREG_IBDEV_NAME]);
+       iwpm_name = (char *)nla_data(nltb[IWPM_NLA_RREG_ULIB_NAME]);
+       iwpm_version = nla_get_u16(nltb[IWPM_NLA_RREG_ULIB_VER]);
+
+       /* check device name, ulib name and version */
+       if (strcmp(pm_msg->dev_name, dev_name) ||
+                       strcmp(iwpm_ulib_name, iwpm_name) ||
+                       iwpm_version != iwpm_ulib_version) {
+
+               pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n",
+                               __func__, dev_name, iwpm_name, iwpm_version);
+               nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+               goto register_pid_response_exit;
+       }
+       iwpm_user_pid = cb->nlh->nlmsg_pid;
+       atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+       pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
+                       __func__, iwpm_user_pid);
+       if (iwpm_valid_client(nl_client))
+               iwpm_set_registered(nl_client, 1);
+register_pid_response_exit:
+       nlmsg_request->request_done = 1;
+       /* always for found nlmsg_request */
+       kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+       barrier();
+       wake_up(&nlmsg_request->waitq);
+       return 0;
+}
+EXPORT_SYMBOL(iwpm_register_pid_cb);
+
+/* netlink attribute policy for the received response to add mapping request */
+static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
+       [IWPM_NLA_MANAGE_MAPPING_SEQ]     = { .type = NLA_U32 },
+       [IWPM_NLA_MANAGE_ADDR]            = { .len = sizeof(struct sockaddr_storage) },
+       [IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+       [IWPM_NLA_RMANAGE_MAPPING_ERR]    = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_add_mapping_cb - Process a port mapper response to
+ *                       iwpm_add_mapping()
+ */
+int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct iwpm_sa_data *pm_msg;
+       struct iwpm_nlmsg_request *nlmsg_request = NULL;
+       struct nlattr *nltb[IWPM_NLA_RMANAGE_MAPPING_MAX];
+       struct sockaddr_storage *local_sockaddr;
+       struct sockaddr_storage *mapped_sockaddr;
+       const char *msg_type;
+       u32 msg_seq;
+
+       msg_type = "Add Mapping response";
+       if (iwpm_parse_nlmsg(cb, IWPM_NLA_RMANAGE_MAPPING_MAX,
+                               resp_add_policy, nltb, msg_type))
+               return -EINVAL;
+
+       atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+
+       msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]);
+       nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+       if (!nlmsg_request) {
+               pr_info("%s: Could not find a matching request (seq = %u)\n",
+                                __func__, msg_seq);
+               return -EINVAL;
+       }
+       pm_msg = nlmsg_request->req_buffer;
+       local_sockaddr = (struct sockaddr_storage *)
+                       nla_data(nltb[IWPM_NLA_MANAGE_ADDR]);
+       mapped_sockaddr = (struct sockaddr_storage *)
+                       nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]);
+
+       if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) {
+               nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+               goto add_mapping_response_exit;
+       }
+       if (mapped_sockaddr->ss_family != local_sockaddr->ss_family) {
+               pr_info("%s: Sockaddr family doesn't match the requested one\n",
+                               __func__);
+               nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+               goto add_mapping_response_exit;
+       }
+       memcpy(&pm_msg->mapped_loc_addr, mapped_sockaddr,
+                       sizeof(*mapped_sockaddr));
+       iwpm_print_sockaddr(&pm_msg->loc_addr,
+                       "add_mapping: Local sockaddr:");
+       iwpm_print_sockaddr(&pm_msg->mapped_loc_addr,
+                       "add_mapping: Mapped local sockaddr:");
+
+add_mapping_response_exit:
+       nlmsg_request->request_done = 1;
+       /* always for found request */
+       kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+       barrier();
+       wake_up(&nlmsg_request->waitq);
+       return 0;
+}
+EXPORT_SYMBOL(iwpm_add_mapping_cb);
+
+/* netlink attribute policy for the response to add and query mapping request */
+static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
+       [IWPM_NLA_QUERY_MAPPING_SEQ]      = { .type = NLA_U32 },
+       [IWPM_NLA_QUERY_LOCAL_ADDR]       = { .len = sizeof(struct sockaddr_storage) },
+       [IWPM_NLA_QUERY_REMOTE_ADDR]      = { .len = sizeof(struct sockaddr_storage) },
+       [IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+       [IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+       [IWPM_NLA_RQUERY_MAPPING_ERR]     = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_add_and_query_mapping_cb - Process a port mapper response to
+ *                                 iwpm_add_and_query_mapping()
+ */
+int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
+                               struct netlink_callback *cb)
+{
+       struct iwpm_sa_data *pm_msg;
+       struct iwpm_nlmsg_request *nlmsg_request = NULL;
+       struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX];
+       struct sockaddr_storage *local_sockaddr, *remote_sockaddr;
+       struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr;
+       const char *msg_type;
+       u32 msg_seq;
+       u16 err_code;
+
+       msg_type = "Query Mapping response";
+       if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX,
+                               resp_query_policy, nltb, msg_type))
+               return -EINVAL;
+       atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+
+       msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]);
+       nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+       if (!nlmsg_request) {
+               pr_info("%s: Could not find a matching request (seq = %u)\n",
+                                __func__, msg_seq);
+                       return -EINVAL;
+       }
+       pm_msg = nlmsg_request->req_buffer;
+       local_sockaddr = (struct sockaddr_storage *)
+                       nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
+       remote_sockaddr = (struct sockaddr_storage *)
+                       nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
+       mapped_loc_sockaddr = (struct sockaddr_storage *)
+                       nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
+       mapped_rem_sockaddr = (struct sockaddr_storage *)
+                       nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]);
+
+       err_code = nla_get_u16(nltb[IWPM_NLA_RQUERY_MAPPING_ERR]);
+       if (err_code == IWPM_REMOTE_QUERY_REJECT) {
+               pr_info("%s: Received a Reject (pid = %u, echo seq = %u)\n",
+                       __func__, cb->nlh->nlmsg_pid, msg_seq);
+               nlmsg_request->err_code = IWPM_REMOTE_QUERY_REJECT;
+       }
+       if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr) ||
+               iwpm_compare_sockaddr(remote_sockaddr, &pm_msg->rem_addr)) {
+               pr_info("%s: Incorrect local sockaddr\n", __func__);
+               nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+               goto query_mapping_response_exit;
+       }
+       if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family ||
+               mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) {
+               pr_info("%s: Sockaddr family doesn't match the requested one\n",
+                               __func__);
+               nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+               goto query_mapping_response_exit;
+       }
+       memcpy(&pm_msg->mapped_loc_addr, mapped_loc_sockaddr,
+                       sizeof(*mapped_loc_sockaddr));
+       memcpy(&pm_msg->mapped_rem_addr, mapped_rem_sockaddr,
+                       sizeof(*mapped_rem_sockaddr));
+
+       iwpm_print_sockaddr(&pm_msg->loc_addr,
+                       "query_mapping: Local sockaddr:");
+       iwpm_print_sockaddr(&pm_msg->mapped_loc_addr,
+                       "query_mapping: Mapped local sockaddr:");
+       iwpm_print_sockaddr(&pm_msg->rem_addr,
+                       "query_mapping: Remote sockaddr:");
+       iwpm_print_sockaddr(&pm_msg->mapped_rem_addr,
+                       "query_mapping: Mapped remote sockaddr:");
+query_mapping_response_exit:
+       nlmsg_request->request_done = 1;
+       /* always for found request */
+       kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+       barrier();
+       wake_up(&nlmsg_request->waitq);
+       return 0;
+}
+EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
+
+/* netlink attribute policy for the received request for mapping info */
+static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
+       [IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING,
+                                       .len = IWPM_ULIBNAME_SIZE - 1 },
+       [IWPM_NLA_MAPINFO_ULIB_VER]  = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_mapping_info_cb - Process a port mapper request for mapping info
+ */
+int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX];
+       const char *msg_type = "Mapping Info response";
+       int iwpm_pid;
+       u8 nl_client;
+       char *iwpm_name;
+       u16 iwpm_version;
+       int ret = -EINVAL;
+
+       if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_REQ_MAX,
+                               resp_mapinfo_policy, nltb, msg_type)) {
+               pr_info("%s: Unable to parse nlmsg\n", __func__);
+               return ret;
+       }
+       iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]);
+       iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]);
+       if (strcmp(iwpm_ulib_name, iwpm_name) ||
+                       iwpm_version != iwpm_ulib_version) {
+               pr_info("%s: Invalid port mapper name = %s version = %d\n",
+                               __func__, iwpm_name, iwpm_version);
+               return ret;
+       }
+       nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+       if (!iwpm_valid_client(nl_client)) {
+               pr_info("%s: Invalid port mapper client = %d\n",
+                               __func__, nl_client);
+               return ret;
+       }
+       iwpm_set_registered(nl_client, 0);
+       atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+       if (!iwpm_mapinfo_available())
+               return 0;
+       iwpm_pid = cb->nlh->nlmsg_pid;
+       pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
+                __func__, iwpm_pid);
+       ret = iwpm_send_mapinfo(nl_client, iwpm_pid);
+       return ret;
+}
+EXPORT_SYMBOL(iwpm_mapping_info_cb);
+
+/* netlink attribute policy for the received mapping info ack */
+static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
+       [IWPM_NLA_MAPINFO_SEQ]    =   { .type = NLA_U32 },
+       [IWPM_NLA_MAPINFO_SEND_NUM] = { .type = NLA_U32 },
+       [IWPM_NLA_MAPINFO_ACK_NUM] =  { .type = NLA_U32 }
+};
+
+/*
+ * iwpm_ack_mapping_info_cb - Process a port mapper ack for
+ *                            the provided mapping info records
+ */
+int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct nlattr *nltb[IWPM_NLA_MAPINFO_NUM_MAX];
+       u32 mapinfo_send, mapinfo_ack;
+       const char *msg_type = "Mapping Info Ack";
+
+       if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_NUM_MAX,
+                               ack_mapinfo_policy, nltb, msg_type))
+               return -EINVAL;
+       mapinfo_send = nla_get_u32(nltb[IWPM_NLA_MAPINFO_SEND_NUM]);
+       mapinfo_ack = nla_get_u32(nltb[IWPM_NLA_MAPINFO_ACK_NUM]);
+       if (mapinfo_ack != mapinfo_send)
+               pr_info("%s: Invalid mapinfo number (sent = %u ack-ed = %u)\n",
+                       __func__, mapinfo_send, mapinfo_ack);
+       atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+       return 0;
+}
+EXPORT_SYMBOL(iwpm_ack_mapping_info_cb);
+
+/* netlink attribute policy for the received port mapper error message */
+static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
+       [IWPM_NLA_ERR_SEQ]        = { .type = NLA_U32 },
+       [IWPM_NLA_ERR_CODE]       = { .type = NLA_U16 },
+};
+
+/*
+ * iwpm_mapping_error_cb - Process a port mapper error message
+ */
+int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct iwpm_nlmsg_request *nlmsg_request = NULL;
+       int nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+       struct nlattr *nltb[IWPM_NLA_ERR_MAX];
+       u32 msg_seq;
+       u16 err_code;
+       const char *msg_type = "Mapping Error Msg";
+
+       if (iwpm_parse_nlmsg(cb, IWPM_NLA_ERR_MAX,
+                               map_error_policy, nltb, msg_type))
+               return -EINVAL;
+
+       msg_seq = nla_get_u32(nltb[IWPM_NLA_ERR_SEQ]);
+       err_code = nla_get_u16(nltb[IWPM_NLA_ERR_CODE]);
+       pr_info("%s: Received msg seq = %u err code = %u client = %d\n",
+                               __func__, msg_seq, err_code, nl_client);
+       /* look for nlmsg_request */
+       nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+       if (!nlmsg_request) {
+               /* not all errors have associated requests */
+               pr_debug("Could not find matching req (seq = %u)\n", msg_seq);
+               return 0;
+       }
+       atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+       nlmsg_request->err_code = err_code;
+       nlmsg_request->request_done = 1;
+       /* always for found request */
+       kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+       barrier();
+       wake_up(&nlmsg_request->waitq);
+       return 0;
+}
+EXPORT_SYMBOL(iwpm_mapping_error_cb);
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
new file mode 100644 (file)
index 0000000..69e9f84
--- /dev/null
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "iwpm_util.h"
+
+#define IWPM_HASH_BUCKET_SIZE  512
+#define IWPM_HASH_BUCKET_MASK  (IWPM_HASH_BUCKET_SIZE - 1)
+
+static LIST_HEAD(iwpm_nlmsg_req_list);
+static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
+
+static struct hlist_head *iwpm_hash_bucket;
+static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
+
+static DEFINE_MUTEX(iwpm_admin_lock);
+static struct iwpm_admin_data iwpm_admin;
+
+int iwpm_init(u8 nl_client)
+{
+       if (iwpm_valid_client(nl_client))
+               return -EINVAL;
+       mutex_lock(&iwpm_admin_lock);
+       if (atomic_read(&iwpm_admin.refcount) == 0) {
+               iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE *
+                                       sizeof(struct hlist_head), GFP_KERNEL);
+               if (!iwpm_hash_bucket) {
+                       mutex_unlock(&iwpm_admin_lock);
+                       pr_err("%s Unable to create mapinfo hash table\n", __func__);
+                       return -ENOMEM;
+               }
+       }
+       atomic_inc(&iwpm_admin.refcount);
+       mutex_unlock(&iwpm_admin_lock);
+       iwpm_set_valid(nl_client, 1);
+       return 0;
+}
+EXPORT_SYMBOL(iwpm_init);
+
+static void free_hash_bucket(void);
+
+int iwpm_exit(u8 nl_client)
+{
+
+       if (!iwpm_valid_client(nl_client))
+               return -EINVAL;
+       mutex_lock(&iwpm_admin_lock);
+       if (atomic_read(&iwpm_admin.refcount) == 0) {
+               mutex_unlock(&iwpm_admin_lock);
+               pr_err("%s Incorrect usage - negative refcount\n", __func__);
+               return -EINVAL;
+       }
+       if (atomic_dec_and_test(&iwpm_admin.refcount)) {
+               free_hash_bucket();
+               pr_debug("%s: Mapinfo hash table is destroyed\n", __func__);
+       }
+       mutex_unlock(&iwpm_admin_lock);
+       iwpm_set_valid(nl_client, 0);
+       return 0;
+}
+EXPORT_SYMBOL(iwpm_exit);
+
+static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *,
+                                              struct sockaddr_storage *);
+
+int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
+                       struct sockaddr_storage *mapped_sockaddr,
+                       u8 nl_client)
+{
+       struct hlist_head *hash_bucket_head;
+       struct iwpm_mapping_info *map_info;
+       unsigned long flags;
+
+       if (!iwpm_valid_client(nl_client))
+               return -EINVAL;
+       map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
+       if (!map_info) {
+               pr_err("%s: Unable to allocate a mapping info\n", __func__);
+               return -ENOMEM;
+       }
+       memcpy(&map_info->local_sockaddr, local_sockaddr,
+              sizeof(struct sockaddr_storage));
+       memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
+              sizeof(struct sockaddr_storage));
+       map_info->nl_client = nl_client;
+
+       spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+       if (iwpm_hash_bucket) {
+               hash_bucket_head = get_hash_bucket_head(
+                                       &map_info->local_sockaddr,
+                                       &map_info->mapped_sockaddr);
+               hlist_add_head(&map_info->hlist_node, hash_bucket_head);
+       }
+       spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+       return 0;
+}
+EXPORT_SYMBOL(iwpm_create_mapinfo);
+
+int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
+                       struct sockaddr_storage *mapped_local_addr)
+{
+       struct hlist_node *tmp_hlist_node;
+       struct hlist_head *hash_bucket_head;
+       struct iwpm_mapping_info *map_info = NULL;
+       unsigned long flags;
+       int ret = -EINVAL;
+
+       spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+       if (iwpm_hash_bucket) {
+               hash_bucket_head = get_hash_bucket_head(
+                                       local_sockaddr,
+                                       mapped_local_addr);
+               hlist_for_each_entry_safe(map_info, tmp_hlist_node,
+                                       hash_bucket_head, hlist_node) {
+
+                       if (!iwpm_compare_sockaddr(&map_info->mapped_sockaddr,
+                                               mapped_local_addr)) {
+
+                               hlist_del_init(&map_info->hlist_node);
+                               kfree(map_info);
+                               ret = 0;
+                               break;
+                       }
+               }
+       }
+       spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+       return ret;
+}
+EXPORT_SYMBOL(iwpm_remove_mapinfo);
+
+static void free_hash_bucket(void)
+{
+       struct hlist_node *tmp_hlist_node;
+       struct iwpm_mapping_info *map_info;
+       unsigned long flags;
+       int i;
+
+       /* remove all the mapinfo data from the list */
+       spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+       for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+               hlist_for_each_entry_safe(map_info, tmp_hlist_node,
+                       &iwpm_hash_bucket[i], hlist_node) {
+
+                               hlist_del_init(&map_info->hlist_node);
+                               kfree(map_info);
+                       }
+       }
+       /* free the hash list */
+       kfree(iwpm_hash_bucket);
+       iwpm_hash_bucket = NULL;
+       spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+}
+
+struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
+                                       u8 nl_client, gfp_t gfp)
+{
+       struct iwpm_nlmsg_request *nlmsg_request = NULL;
+       unsigned long flags;
+
+       nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);
+       if (!nlmsg_request) {
+               pr_err("%s Unable to allocate a nlmsg_request\n", __func__);
+               return NULL;
+       }
+       spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+       list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list);
+       spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+
+       kref_init(&nlmsg_request->kref);
+       kref_get(&nlmsg_request->kref);
+       nlmsg_request->nlmsg_seq = nlmsg_seq;
+       nlmsg_request->nl_client = nl_client;
+       nlmsg_request->request_done = 0;
+       nlmsg_request->err_code = 0;
+       return nlmsg_request;
+}
+
+void iwpm_free_nlmsg_request(struct kref *kref)
+{
+       struct iwpm_nlmsg_request *nlmsg_request;
+       unsigned long flags;
+
+       nlmsg_request = container_of(kref, struct iwpm_nlmsg_request, kref);
+
+       spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+       list_del_init(&nlmsg_request->inprocess_list);
+       spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+
+       if (!nlmsg_request->request_done)
+               pr_debug("%s Freeing incomplete nlmsg request (seq = %u).\n",
+                       __func__, nlmsg_request->nlmsg_seq);
+       kfree(nlmsg_request);
+}
+
+struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq)
+{
+       struct iwpm_nlmsg_request *nlmsg_request;
+       struct iwpm_nlmsg_request *found_request = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+       list_for_each_entry(nlmsg_request, &iwpm_nlmsg_req_list,
+                           inprocess_list) {
+               if (nlmsg_request->nlmsg_seq == echo_seq) {
+                       found_request = nlmsg_request;
+                       kref_get(&nlmsg_request->kref);
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+       return found_request;
+}
+
+int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request)
+{
+       int ret;
+       init_waitqueue_head(&nlmsg_request->waitq);
+
+       ret = wait_event_timeout(nlmsg_request->waitq,
+                       (nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT);
+       if (!ret) {
+               ret = -EINVAL;
+               pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n",
+                       __func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq);
+       } else {
+               ret = nlmsg_request->err_code;
+       }
+       kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+       return ret;
+}
+
+int iwpm_get_nlmsg_seq(void)
+{
+       return atomic_inc_return(&iwpm_admin.nlmsg_seq);
+}
+
+int iwpm_valid_client(u8 nl_client)
+{
+       if (nl_client >= RDMA_NL_NUM_CLIENTS)
+               return 0;
+       return iwpm_admin.client_list[nl_client];
+}
+
+void iwpm_set_valid(u8 nl_client, int valid)
+{
+       if (nl_client >= RDMA_NL_NUM_CLIENTS)
+               return;
+       iwpm_admin.client_list[nl_client] = valid;
+}
+
+/* valid client */
+int iwpm_registered_client(u8 nl_client)
+{
+       return iwpm_admin.reg_list[nl_client];
+}
+
+/* valid client */
+void iwpm_set_registered(u8 nl_client, int reg)
+{
+       iwpm_admin.reg_list[nl_client] = reg;
+}
+
+int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
+                               struct sockaddr_storage *b_sockaddr)
+{
+       if (a_sockaddr->ss_family != b_sockaddr->ss_family)
+               return 1;
+       if (a_sockaddr->ss_family == AF_INET) {
+               struct sockaddr_in *a4_sockaddr =
+                       (struct sockaddr_in *)a_sockaddr;
+               struct sockaddr_in *b4_sockaddr =
+                       (struct sockaddr_in *)b_sockaddr;
+               if (!memcmp(&a4_sockaddr->sin_addr,
+                       &b4_sockaddr->sin_addr, sizeof(struct in_addr))
+                       && a4_sockaddr->sin_port == b4_sockaddr->sin_port)
+                               return 0;
+
+       } else if (a_sockaddr->ss_family == AF_INET6) {
+               struct sockaddr_in6 *a6_sockaddr =
+                       (struct sockaddr_in6 *)a_sockaddr;
+               struct sockaddr_in6 *b6_sockaddr =
+                       (struct sockaddr_in6 *)b_sockaddr;
+               if (!memcmp(&a6_sockaddr->sin6_addr,
+                       &b6_sockaddr->sin6_addr, sizeof(struct in6_addr))
+                       && a6_sockaddr->sin6_port == b6_sockaddr->sin6_port)
+                               return 0;
+
+       } else {
+               pr_err("%s: Invalid sockaddr family\n", __func__);
+       }
+       return 1;
+}
+
+struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
+                                               int nl_client)
+{
+       struct sk_buff *skb = NULL;
+
+       skb = dev_alloc_skb(NLMSG_GOODSIZE);
+       if (!skb) {
+               pr_err("%s Unable to allocate skb\n", __func__);
+               goto create_nlmsg_exit;
+       }
+       if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op,
+                          NLM_F_REQUEST))) {
+               pr_warn("%s: Unable to put the nlmsg header\n", __func__);
+               dev_kfree_skb(skb);
+               skb = NULL;
+       }
+create_nlmsg_exit:
+       return skb;
+}
+
+int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
+                                  const struct nla_policy *nlmsg_policy,
+                                  struct nlattr *nltb[], const char *msg_type)
+{
+       int nlh_len = 0;
+       int ret;
+       const char *err_str = "";
+
+       ret = nlmsg_validate(cb->nlh, nlh_len, policy_max-1, nlmsg_policy);
+       if (ret) {
+               err_str = "Invalid attribute";
+               goto parse_nlmsg_error;
+       }
+       ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max-1, nlmsg_policy);
+       if (ret) {
+               err_str = "Unable to parse the nlmsg";
+               goto parse_nlmsg_error;
+       }
+       ret = iwpm_validate_nlmsg_attr(nltb, policy_max);
+       if (ret) {
+               err_str = "Invalid NULL attribute";
+               goto parse_nlmsg_error;
+       }
+       return 0;
+parse_nlmsg_error:
+       pr_warn("%s: %s (msg type %s ret = %d)\n",
+                       __func__, err_str, msg_type, ret);
+       return ret;
+}
+
+void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg)
+{
+       struct sockaddr_in6 *sockaddr_v6;
+       struct sockaddr_in *sockaddr_v4;
+
+       switch (sockaddr->ss_family) {
+       case AF_INET:
+               sockaddr_v4 = (struct sockaddr_in *)sockaddr;
+               pr_debug("%s IPV4 %pI4: %u(0x%04X)\n",
+                       msg, &sockaddr_v4->sin_addr,
+                       ntohs(sockaddr_v4->sin_port),
+                       ntohs(sockaddr_v4->sin_port));
+               break;
+       case AF_INET6:
+               sockaddr_v6 = (struct sockaddr_in6 *)sockaddr;
+               pr_debug("%s IPV6 %pI6: %u(0x%04X)\n",
+                       msg, &sockaddr_v6->sin6_addr,
+                       ntohs(sockaddr_v6->sin6_port),
+                       ntohs(sockaddr_v6->sin6_port));
+               break;
+       default:
+               break;
+       }
+}
+
+static u32 iwpm_ipv6_jhash(struct sockaddr_in6 *ipv6_sockaddr)
+{
+       u32 ipv6_hash = jhash(&ipv6_sockaddr->sin6_addr, sizeof(struct in6_addr), 0);
+       u32 hash = jhash_2words(ipv6_hash, (__force u32) ipv6_sockaddr->sin6_port, 0);
+       return hash;
+}
+
+static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr)
+{
+       u32 ipv4_hash = jhash(&ipv4_sockaddr->sin_addr, sizeof(struct in_addr), 0);
+       u32 hash = jhash_2words(ipv4_hash, (__force u32) ipv4_sockaddr->sin_port, 0);
+       return hash;
+}
+
+static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage
+                                              *local_sockaddr,
+                                              struct sockaddr_storage
+                                              *mapped_sockaddr)
+{
+       u32 local_hash, mapped_hash, hash;
+
+       if (local_sockaddr->ss_family == AF_INET) {
+               local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr);
+               mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr);
+
+       } else if (local_sockaddr->ss_family == AF_INET6) {
+               local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr);
+               mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr);
+       } else {
+               pr_err("%s: Invalid sockaddr family\n", __func__);
+               return NULL;
+       }
+
+       if (local_hash == mapped_hash) /* if port mapper isn't available */
+               hash = local_hash;
+       else
+               hash = jhash_2words(local_hash, mapped_hash, 0);
+
+       return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK];
+}
+
+static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
+{
+       struct sk_buff *skb = NULL;
+       struct nlmsghdr *nlh;
+       u32 msg_seq;
+       const char *err_str = "";
+       int ret = -EINVAL;
+
+       skb = iwpm_create_nlmsg(RDMA_NL_IWPM_MAPINFO_NUM, &nlh, nl_client);
+       if (!skb) {
+               err_str = "Unable to create a nlmsg";
+               goto mapinfo_num_error;
+       }
+       nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+       msg_seq = 0;
+       err_str = "Unable to put attribute of mapinfo number nlmsg";
+       ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_MAPINFO_SEQ);
+       if (ret)
+               goto mapinfo_num_error;
+       ret = ibnl_put_attr(skb, nlh, sizeof(u32),
+                               &mapping_num, IWPM_NLA_MAPINFO_SEND_NUM);
+       if (ret)
+               goto mapinfo_num_error;
+       ret = ibnl_unicast(skb, nlh, iwpm_pid);
+       if (ret) {
+               skb = NULL;
+               err_str = "Unable to send a nlmsg";
+               goto mapinfo_num_error;
+       }
+       pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num);
+       return 0;
+mapinfo_num_error:
+       pr_info("%s: %s\n", __func__, err_str);
+       if (skb)
+               dev_kfree_skb(skb);
+       return ret;
+}
+
+static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
+{
+       struct nlmsghdr *nlh = NULL;
+       int ret = 0;
+
+       if (!skb)
+               return ret;
+       if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
+                          RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
+               pr_warn("%s Unable to put NLMSG_DONE\n", __func__);
+               return -ENOMEM;
+       }
+       nlh->nlmsg_type = NLMSG_DONE;
+       ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid);
+       if (ret)
+               pr_warn("%s Unable to send a nlmsg\n", __func__);
+       return ret;
+}
+
+int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
+{
+       struct iwpm_mapping_info *map_info;
+       struct sk_buff *skb = NULL;
+       struct nlmsghdr *nlh;
+       int skb_num = 0, mapping_num = 0;
+       int i = 0, nlmsg_bytes = 0;
+       unsigned long flags;
+       const char *err_str = "";
+       int ret;
+
+       skb = dev_alloc_skb(NLMSG_GOODSIZE);
+       if (!skb) {
+               ret = -ENOMEM;
+               err_str = "Unable to allocate skb";
+               goto send_mapping_info_exit;
+       }
+       skb_num++;
+       spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+       for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+               hlist_for_each_entry(map_info, &iwpm_hash_bucket[i],
+                                    hlist_node) {
+                       if (map_info->nl_client != nl_client)
+                               continue;
+                       nlh = NULL;
+                       if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
+                                       RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
+                               ret = -ENOMEM;
+                               err_str = "Unable to put the nlmsg header";
+                               goto send_mapping_info_unlock;
+                       }
+                       err_str = "Unable to put attribute of the nlmsg";
+                       ret = ibnl_put_attr(skb, nlh,
+                                       sizeof(struct sockaddr_storage),
+                                       &map_info->local_sockaddr,
+                                       IWPM_NLA_MAPINFO_LOCAL_ADDR);
+                       if (ret)
+                               goto send_mapping_info_unlock;
+
+                       ret = ibnl_put_attr(skb, nlh,
+                                       sizeof(struct sockaddr_storage),
+                                       &map_info->mapped_sockaddr,
+                                       IWPM_NLA_MAPINFO_MAPPED_ADDR);
+                       if (ret)
+                               goto send_mapping_info_unlock;
+
+                       iwpm_print_sockaddr(&map_info->local_sockaddr,
+                               "send_mapping_info: Local sockaddr:");
+                       iwpm_print_sockaddr(&map_info->mapped_sockaddr,
+                               "send_mapping_info: Mapped local sockaddr:");
+                       mapping_num++;
+                       nlmsg_bytes += nlh->nlmsg_len;
+
+                       /* check if all mappings can fit in one skb */
+                       if (NLMSG_GOODSIZE - nlmsg_bytes < nlh->nlmsg_len * 2) {
+                               /* and leave room for NLMSG_DONE */
+                               nlmsg_bytes = 0;
+                               skb_num++;
+                               spin_unlock_irqrestore(&iwpm_mapinfo_lock,
+                                                      flags);
+                               /* send the skb */
+                               ret = send_nlmsg_done(skb, nl_client, iwpm_pid);
+                               skb = NULL;
+                               if (ret) {
+                                       err_str = "Unable to send map info";
+                                       goto send_mapping_info_exit;
+                               }
+                               if (skb_num == IWPM_MAPINFO_SKB_COUNT) {
+                                       ret = -ENOMEM;
+                                       err_str = "Insufficient skbs for map info";
+                                       goto send_mapping_info_exit;
+                               }
+                               skb = dev_alloc_skb(NLMSG_GOODSIZE);
+                               if (!skb) {
+                                       ret = -ENOMEM;
+                                       err_str = "Unable to allocate skb";
+                                       goto send_mapping_info_exit;
+                               }
+                               spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+                       }
+               }
+       }
+send_mapping_info_unlock:
+       spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+send_mapping_info_exit:
+       if (ret) {
+               pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret);
+               if (skb)
+                       dev_kfree_skb(skb);
+               return ret;
+       }
+       send_nlmsg_done(skb, nl_client, iwpm_pid);
+       return send_mapinfo_num(mapping_num, nl_client, iwpm_pid);
+}
+
+int iwpm_mapinfo_available(void)
+{
+       unsigned long flags;
+       int full_bucket = 0, i = 0;
+
+       spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+       if (iwpm_hash_bucket) {
+               for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+                       if (!hlist_empty(&iwpm_hash_bucket[i])) {
+                               full_bucket = 1;
+                               break;
+                       }
+               }
+       }
+       spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+       return full_bucket;
+}
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
new file mode 100644 (file)
index 0000000..9777c86
--- /dev/null
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IWPM_UTIL_H
+#define _IWPM_UTIL_H
+
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/jhash.h>
+#include <linux/kref.h>
+#include <net/netlink.h>
+#include <linux/errno.h>
+#include <rdma/iw_portmap.h>
+#include <rdma/rdma_netlink.h>
+
+
+#define IWPM_NL_RETRANS                3
+#define IWPM_NL_TIMEOUT                (10*HZ)
+#define IWPM_MAPINFO_SKB_COUNT 20
+
+#define IWPM_PID_UNDEFINED     -1
+#define IWPM_PID_UNAVAILABLE   -2
+
+struct iwpm_nlmsg_request {
+       struct list_head    inprocess_list;
+       __u32               nlmsg_seq;
+       void                *req_buffer;
+       u8                  nl_client;
+       u8                  request_done;
+       u16                 err_code;
+       wait_queue_head_t   waitq;
+       struct kref         kref;
+};
+
+struct iwpm_mapping_info {
+       struct hlist_node hlist_node;
+       struct sockaddr_storage local_sockaddr;
+       struct sockaddr_storage mapped_sockaddr;
+       u8     nl_client;
+};
+
+struct iwpm_admin_data {
+       atomic_t refcount;
+       atomic_t nlmsg_seq;
+       int      client_list[RDMA_NL_NUM_CLIENTS];
+       int      reg_list[RDMA_NL_NUM_CLIENTS];
+};
+
+/**
+ * iwpm_get_nlmsg_request - Allocate and initialize netlink message request
+ * @nlmsg_seq: Sequence number of the netlink message
+ * @nl_client: The index of the netlink client
+ * @gfp: Indicates how the memory for the request should be allocated
+ *
+ * Returns the newly allocated netlink request object if successful,
+ * otherwise returns NULL
+ */
+struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
+                                               u8 nl_client, gfp_t gfp);
+
+/**
+ * iwpm_free_nlmsg_request - Deallocate netlink message request
+ * @kref: Holds reference of netlink message request
+ */
+void iwpm_free_nlmsg_request(struct kref *kref);
+
+/**
+ * iwpm_find_nlmsg_request - Find netlink message request in the request list
+ * @echo_seq: Sequence number of the netlink request to find
+ *
+ * Returns the found netlink message request,
+ * if not found, returns NULL
+ */
+struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq);
+
+/**
+ * iwpm_wait_complete_req - Block while servicing the netlink request
+ * @nlmsg_request: Netlink message request to service
+ *
+ * Wakes up, after the request is completed or expired
+ * Returns 0 if the request is complete without error
+ */
+int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
+
+/**
+ * iwpm_get_nlmsg_seq - Get the sequence number for a netlink
+ *                     message to send to the port mapper
+ *
+ * Returns the sequence number for the netlink message.
+ */
+int iwpm_get_nlmsg_seq(void);
+
+/**
+ * iwpm_valid_client - Check if the port mapper client is valid
+ * @nl_client: The index of the netlink client
+ *
+ * Valid clients need to call iwpm_init() before using
+ * the port mapper
+ */
+int iwpm_valid_client(u8 nl_client);
+
+/**
+ * iwpm_set_valid - Set the port mapper client to valid or not
+ * @nl_client: The index of the netlink client
+ * @valid: 1 if valid or 0 if invalid
+ */
+void iwpm_set_valid(u8 nl_client, int valid);
+
+/**
+ * iwpm_registered_client - Check if the port mapper client is registered
+ * @nl_client: The index of the netlink client
+ *
+ * Call iwpm_register_pid() to register a client
+ */
+int iwpm_registered_client(u8 nl_client);
+
+/**
+ * iwpm_set_registered - Set the port mapper client to registered or not
+ * @nl_client: The index of the netlink client
+ * @reg: 1 if registered or 0 if not
+ */
+void iwpm_set_registered(u8 nl_client, int reg);
+
+/**
+ * iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of
+ *                     a client to the user space port mapper
+ * @nl_client: The index of the netlink client
+ * @iwpm_pid: The pid of the user space port mapper
+ *
+ * If successful, returns the number of sent mapping info records
+ */
+int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid);
+
+/**
+ * iwpm_mapinfo_available - Check if any mapping info records is available
+ *                         in the hash table
+ *
+ * Returns 1 if mapping information is available, otherwise returns 0
+ */
+int iwpm_mapinfo_available(void);
+
+/**
+ * iwpm_compare_sockaddr - Compare two sockaddr storage structs
+ *
+ * Returns 0 if they are holding the same ip/tcp address info,
+ * otherwise returns 1
+ */
+int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
+                       struct sockaddr_storage *b_sockaddr);
+
+/**
+ * iwpm_validate_nlmsg_attr - Check for NULL netlink attributes
+ * @nltb: Holds address of each netlink message attributes
+ * @nla_count: Number of netlink message attributes
+ *
+ * Returns error if any of the nla_count attributes is NULL
+ */
+static inline int iwpm_validate_nlmsg_attr(struct nlattr *nltb[],
+                                          int nla_count)
+{
+       int i;
+       for (i = 1; i < nla_count; i++) {
+               if (!nltb[i])
+                       return -EINVAL;
+       }
+       return 0;
+}
+
+/**
+ * iwpm_create_nlmsg - Allocate skb and form a netlink message
+ * @nl_op: Netlink message opcode
+ * @nlh: Holds address of the netlink message header in skb
+ * @nl_client: The index of the netlink client
+ *
+ * Returns the newly allcated skb, or NULL if the tailroom of the skb
+ * is insufficient to store the message header and payload
+ */
+struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
+                                       int nl_client);
+
+/**
+ * iwpm_parse_nlmsg - Validate and parse the received netlink message
+ * @cb: Netlink callback structure
+ * @policy_max: Maximum attribute type to be expected
+ * @nlmsg_policy: Validation policy
+ * @nltb: Array to store policy_max parsed elements
+ * @msg_type: Type of netlink message
+ *
+ * Returns 0 on success or a negative error code
+ */
+int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
+                               const struct nla_policy *nlmsg_policy,
+                               struct nlattr *nltb[], const char *msg_type);
+
+/**
+ * iwpm_print_sockaddr - Print IPv4/IPv6 address and TCP port
+ * @sockaddr: Socket address to print
+ * @msg: Message to print
+ */
+void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg);
+#endif
index a1e9cba849446b1cd5ac840b66b18355451377b4..23dd5a5c7597122de45eddf8211c736bedae553e 100644 (file)
@@ -103,13 +103,13 @@ int ibnl_remove_client(int index)
 EXPORT_SYMBOL(ibnl_remove_client);
 
 void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
-                  int len, int client, int op)
+                  int len, int client, int op, int flags)
 {
        unsigned char *prev_tail;
 
        prev_tail = skb_tail_pointer(skb);
        *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
-                        len, NLM_F_MULTI);
+                        len, flags);
        if (!*nlh)
                goto out_nlmsg_trim;
        (*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
@@ -172,6 +172,20 @@ static void ibnl_rcv(struct sk_buff *skb)
        mutex_unlock(&ibnl_mutex);
 }
 
+int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+                       __u32 pid)
+{
+       return nlmsg_unicast(nls, skb, pid);
+}
+EXPORT_SYMBOL(ibnl_unicast);
+
+int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+                       unsigned int group, gfp_t flags)
+{
+       return nlmsg_multicast(nls, skb, 0, group, flags);
+}
+EXPORT_SYMBOL(ibnl_multicast);
+
 int __init ibnl_init(void)
 {
        struct netlink_kernel_cfg cfg = {
index f820958e4047fbf47cb0e51934aa7d36cb314356..233eaf541f55121a3213f55f6771cda15f7e40d2 100644 (file)
@@ -618,7 +618,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
 
 static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
 {
-       bool preload = gfp_mask & __GFP_WAIT;
+       bool preload = !!(gfp_mask & __GFP_WAIT);
        unsigned long flags;
        int ret, id;
 
index 7d3292c7b4b4fa53cf8dbb928998ecddb7bd1094..cbd0383f622e0311bd4f56652648609604cb94c4 100644 (file)
@@ -429,15 +429,19 @@ static void ib_port_release(struct kobject *kobj)
        struct attribute *a;
        int i;
 
-       for (i = 0; (a = p->gid_group.attrs[i]); ++i)
-               kfree(a);
+       if (p->gid_group.attrs) {
+               for (i = 0; (a = p->gid_group.attrs[i]); ++i)
+                       kfree(a);
 
-       kfree(p->gid_group.attrs);
+               kfree(p->gid_group.attrs);
+       }
 
-       for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
-               kfree(a);
+       if (p->pkey_group.attrs) {
+               for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
+                       kfree(a);
 
-       kfree(p->pkey_group.attrs);
+               kfree(p->pkey_group.attrs);
+       }
 
        kfree(p);
 }
@@ -534,10 +538,12 @@ static int add_port(struct ib_device *device, int port_num,
        p->port_num   = port_num;
 
        ret = kobject_init_and_add(&p->kobj, &port_type,
-                                  kobject_get(device->ports_parent),
+                                  device->ports_parent,
                                   "%d", port_num);
-       if (ret)
-               goto err_put;
+       if (ret) {
+               kfree(p);
+               return ret;
+       }
 
        ret = sysfs_create_group(&p->kobj, &pma_group);
        if (ret)
@@ -585,6 +591,7 @@ err_free_pkey:
                kfree(p->pkey_group.attrs[i]);
 
        kfree(p->pkey_group.attrs);
+       p->pkey_group.attrs = NULL;
 
 err_remove_gid:
        sysfs_remove_group(&p->kobj, &p->gid_group);
@@ -594,13 +601,13 @@ err_free_gid:
                kfree(p->gid_group.attrs[i]);
 
        kfree(p->gid_group.attrs);
+       p->gid_group.attrs = NULL;
 
 err_remove_pma:
        sysfs_remove_group(&p->kobj, &pma_group);
 
 err_put:
-       kobject_put(device->ports_parent);
-       kfree(p);
+       kobject_put(&p->kobj);
        return ret;
 }
 
@@ -809,6 +816,22 @@ static struct attribute_group iw_stats_group = {
        .attrs  = iw_proto_stats_attrs,
 };
 
+static void free_port_list_attributes(struct ib_device *device)
+{
+       struct kobject *p, *t;
+
+       list_for_each_entry_safe(p, t, &device->port_list, entry) {
+               struct ib_port *port = container_of(p, struct ib_port, kobj);
+               list_del(&p->entry);
+               sysfs_remove_group(p, &pma_group);
+               sysfs_remove_group(p, &port->pkey_group);
+               sysfs_remove_group(p, &port->gid_group);
+               kobject_put(p);
+       }
+
+       kobject_put(device->ports_parent);
+}
+
 int ib_device_register_sysfs(struct ib_device *device,
                             int (*port_callback)(struct ib_device *,
                                                  u8, struct kobject *))
@@ -835,7 +858,7 @@ int ib_device_register_sysfs(struct ib_device *device,
        }
 
        device->ports_parent = kobject_create_and_add("ports",
-                                       kobject_get(&class_dev->kobj));
+                                                     &class_dev->kobj);
        if (!device->ports_parent) {
                ret = -ENOMEM;
                goto err_put;
@@ -862,21 +885,7 @@ int ib_device_register_sysfs(struct ib_device *device,
        return 0;
 
 err_put:
-       {
-               struct kobject *p, *t;
-               struct ib_port *port;
-
-               list_for_each_entry_safe(p, t, &device->port_list, entry) {
-                       list_del(&p->entry);
-                       port = container_of(p, struct ib_port, kobj);
-                       sysfs_remove_group(p, &pma_group);
-                       sysfs_remove_group(p, &port->pkey_group);
-                       sysfs_remove_group(p, &port->gid_group);
-                       kobject_put(p);
-               }
-       }
-
-       kobject_put(&class_dev->kobj);
+       free_port_list_attributes(device);
 
 err_unregister:
        device_unregister(class_dev);
@@ -887,22 +896,18 @@ err:
 
 void ib_device_unregister_sysfs(struct ib_device *device)
 {
-       struct kobject *p, *t;
-       struct ib_port *port;
-
        /* Hold kobject until ib_dealloc_device() */
-       kobject_get(&device->dev.kobj);
+       struct kobject *kobj_dev = kobject_get(&device->dev.kobj);
+       int i;
 
-       list_for_each_entry_safe(p, t, &device->port_list, entry) {
-               list_del(&p->entry);
-               port = container_of(p, struct ib_port, kobj);
-               sysfs_remove_group(p, &pma_group);
-               sysfs_remove_group(p, &port->pkey_group);
-               sysfs_remove_group(p, &port->gid_group);
-               kobject_put(p);
-       }
+       if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats)
+               sysfs_remove_group(kobj_dev, &iw_stats_group);
+
+       free_port_list_attributes(device);
+
+       for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
+               device_remove_file(&device->dev, ib_class_attributes[i]);
 
-       kobject_put(device->ports_parent);
        device_unregister(&device->dev);
 }
 
index f0d588f8859ef5fd2f7679504351003ac77c2589..1acb99100556944ef499b9d3cdc26da30d2f17c5 100644 (file)
@@ -98,7 +98,7 @@ struct ib_umad_port {
 
 struct ib_umad_device {
        int                  start_port, end_port;
-       struct kref          ref;
+       struct kobject       kobj;
        struct ib_umad_port  port[0];
 };
 
@@ -134,14 +134,18 @@ static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
 static void ib_umad_add_one(struct ib_device *device);
 static void ib_umad_remove_one(struct ib_device *device);
 
-static void ib_umad_release_dev(struct kref *ref)
+static void ib_umad_release_dev(struct kobject *kobj)
 {
        struct ib_umad_device *dev =
-               container_of(ref, struct ib_umad_device, ref);
+               container_of(kobj, struct ib_umad_device, kobj);
 
        kfree(dev);
 }
 
+static struct kobj_type ib_umad_dev_ktype = {
+       .release = ib_umad_release_dev,
+};
+
 static int hdr_size(struct ib_umad_file *file)
 {
        return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
@@ -780,27 +784,19 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
 {
        struct ib_umad_port *port;
        struct ib_umad_file *file;
-       int ret;
+       int ret = -ENXIO;
 
        port = container_of(inode->i_cdev, struct ib_umad_port, cdev);
-       if (port)
-               kref_get(&port->umad_dev->ref);
-       else
-               return -ENXIO;
 
        mutex_lock(&port->file_mutex);
 
-       if (!port->ib_dev) {
-               ret = -ENXIO;
+       if (!port->ib_dev)
                goto out;
-       }
 
+       ret = -ENOMEM;
        file = kzalloc(sizeof *file, GFP_KERNEL);
-       if (!file) {
-               kref_put(&port->umad_dev->ref, ib_umad_release_dev);
-               ret = -ENOMEM;
+       if (!file)
                goto out;
-       }
 
        mutex_init(&file->mutex);
        spin_lock_init(&file->send_lock);
@@ -814,6 +810,13 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
        list_add_tail(&file->port_list, &port->file_list);
 
        ret = nonseekable_open(inode, filp);
+       if (ret) {
+               list_del(&file->port_list);
+               kfree(file);
+               goto out;
+       }
+
+       kobject_get(&port->umad_dev->kobj);
 
 out:
        mutex_unlock(&port->file_mutex);
@@ -852,7 +855,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
        mutex_unlock(&file->port->file_mutex);
 
        kfree(file);
-       kref_put(&dev->ref, ib_umad_release_dev);
+       kobject_put(&dev->kobj);
 
        return 0;
 }
@@ -880,10 +883,6 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
        int ret;
 
        port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev);
-       if (port)
-               kref_get(&port->umad_dev->ref);
-       else
-               return -ENXIO;
 
        if (filp->f_flags & O_NONBLOCK) {
                if (down_trylock(&port->sm_sem)) {
@@ -898,17 +897,27 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
        }
 
        ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
-       if (ret) {
-               up(&port->sm_sem);
-               goto fail;
-       }
+       if (ret)
+               goto err_up_sem;
 
        filp->private_data = port;
 
-       return nonseekable_open(inode, filp);
+       ret = nonseekable_open(inode, filp);
+       if (ret)
+               goto err_clr_sm_cap;
+
+       kobject_get(&port->umad_dev->kobj);
+
+       return 0;
+
+err_clr_sm_cap:
+       swap(props.set_port_cap_mask, props.clr_port_cap_mask);
+       ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+
+err_up_sem:
+       up(&port->sm_sem);
 
 fail:
-       kref_put(&port->umad_dev->ref, ib_umad_release_dev);
        return ret;
 }
 
@@ -927,7 +936,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
 
        up(&port->sm_sem);
 
-       kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+       kobject_put(&port->umad_dev->kobj);
 
        return ret;
 }
@@ -995,6 +1004,7 @@ static int find_overflow_devnum(void)
 }
 
 static int ib_umad_init_port(struct ib_device *device, int port_num,
+                            struct ib_umad_device *umad_dev,
                             struct ib_umad_port *port)
 {
        int devnum;
@@ -1027,6 +1037,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
 
        cdev_init(&port->cdev, &umad_fops);
        port->cdev.owner = THIS_MODULE;
+       port->cdev.kobj.parent = &umad_dev->kobj;
        kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
        if (cdev_add(&port->cdev, base, 1))
                goto err_cdev;
@@ -1045,6 +1056,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
        base += IB_UMAD_MAX_PORTS;
        cdev_init(&port->sm_cdev, &umad_sm_fops);
        port->sm_cdev.owner = THIS_MODULE;
+       port->sm_cdev.kobj.parent = &umad_dev->kobj;
        kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
        if (cdev_add(&port->sm_cdev, base, 1))
                goto err_sm_cdev;
@@ -1138,7 +1150,7 @@ static void ib_umad_add_one(struct ib_device *device)
        if (!umad_dev)
                return;
 
-       kref_init(&umad_dev->ref);
+       kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype);
 
        umad_dev->start_port = s;
        umad_dev->end_port   = e;
@@ -1146,7 +1158,8 @@ static void ib_umad_add_one(struct ib_device *device)
        for (i = s; i <= e; ++i) {
                umad_dev->port[i - s].umad_dev = umad_dev;
 
-               if (ib_umad_init_port(device, i, &umad_dev->port[i - s]))
+               if (ib_umad_init_port(device, i, umad_dev,
+                                     &umad_dev->port[i - s]))
                        goto err;
        }
 
@@ -1158,7 +1171,7 @@ err:
        while (--i >= s)
                ib_umad_kill_port(&umad_dev->port[i - s]);
 
-       kref_put(&umad_dev->ref, ib_umad_release_dev);
+       kobject_put(&umad_dev->kobj);
 }
 
 static void ib_umad_remove_one(struct ib_device *device)
@@ -1172,7 +1185,7 @@ static void ib_umad_remove_one(struct ib_device *device)
        for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
                ib_umad_kill_port(&umad_dev->port[i]);
 
-       kref_put(&umad_dev->ref, ib_umad_release_dev);
+       kobject_put(&umad_dev->kobj);
 }
 
 static char *umad_devnode(struct device *dev, umode_t *mode)
index 92525f855d82b561156a49f168436695e4760531..c2b89cc5dbcad5153ef3e3d072ea0c5e3d384caa 100644 (file)
@@ -48,7 +48,7 @@
 
 #include "core_priv.h"
 
-int ib_rate_to_mult(enum ib_rate rate)
+__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
 {
        switch (rate) {
        case IB_RATE_2_5_GBPS: return  1;
@@ -65,7 +65,7 @@ int ib_rate_to_mult(enum ib_rate rate)
 }
 EXPORT_SYMBOL(ib_rate_to_mult);
 
-enum ib_rate mult_to_ib_rate(int mult)
+__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
 {
        switch (mult) {
        case 1:  return IB_RATE_2_5_GBPS;
@@ -82,7 +82,7 @@ enum ib_rate mult_to_ib_rate(int mult)
 }
 EXPORT_SYMBOL(mult_to_ib_rate);
 
-int ib_rate_to_mbps(enum ib_rate rate)
+__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
 {
        switch (rate) {
        case IB_RATE_2_5_GBPS: return 2500;
@@ -107,7 +107,7 @@ int ib_rate_to_mbps(enum ib_rate rate)
 }
 EXPORT_SYMBOL(ib_rate_to_mbps);
 
-enum rdma_transport_type
+__attribute_const__ enum rdma_transport_type
 rdma_node_get_transport(enum rdma_node_type node_type)
 {
        switch (node_type) {
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
new file mode 100644 (file)
index 0000000..e900b03
--- /dev/null
@@ -0,0 +1,12 @@
+obj-$(CONFIG_INFINIBAND_MTHCA)         += mthca/
+obj-$(CONFIG_INFINIBAND_IPATH)         += ipath/
+obj-$(CONFIG_INFINIBAND_QIB)           += qib/
+obj-$(CONFIG_INFINIBAND_EHCA)          += ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100)      += amso1100/
+obj-$(CONFIG_INFINIBAND_CXGB3)         += cxgb3/
+obj-$(CONFIG_INFINIBAND_CXGB4)         += cxgb4/
+obj-$(CONFIG_MLX4_INFINIBAND)          += mlx4/
+obj-$(CONFIG_MLX5_INFINIBAND)          += mlx5/
+obj-$(CONFIG_INFINIBAND_NES)           += nes/
+obj-$(CONFIG_INFINIBAND_OCRDMA)                += ocrdma/
+obj-$(CONFIG_INFINIBAND_USNIC)         += usnic/
index 1f863a96a480fd1ab087989acea029a781c7c23e..96d7131ab974bbb1baffcef9be2a4521890c82e9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -47,6 +47,8 @@
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
 
+#include <rdma/ib_addr.h>
+
 #include "iw_cxgb4.h"
 
 static char *states[] = {
@@ -294,6 +296,12 @@ void _c4iw_free_ep(struct kref *kref)
                dst_release(ep->dst);
                cxgb4_l2t_release(ep->l2t);
        }
+       if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) {
+               print_addr(&ep->com, __func__, "remove_mapinfo/mapping");
+               iwpm_remove_mapinfo(&ep->com.local_addr,
+                                   &ep->com.mapped_local_addr);
+               iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
+       }
        kfree(ep);
 }
 
@@ -341,10 +349,7 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
 
 static struct net_device *get_real_dev(struct net_device *egress_dev)
 {
-       struct net_device *phys_dev = egress_dev;
-       if (egress_dev->priv_flags & IFF_802_1Q_VLAN)
-               phys_dev = vlan_dev_real_dev(egress_dev);
-       return phys_dev;
+       return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
 }
 
 static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
@@ -528,6 +533,38 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 
+/*
+ * c4iw_form_pm_msg - Form a port mapper message with mapping info
+ */
+static void c4iw_form_pm_msg(struct c4iw_ep *ep,
+                               struct iwpm_sa_data *pm_msg)
+{
+       memcpy(&pm_msg->loc_addr, &ep->com.local_addr,
+               sizeof(ep->com.local_addr));
+       memcpy(&pm_msg->rem_addr, &ep->com.remote_addr,
+               sizeof(ep->com.remote_addr));
+}
+
+/*
+ * c4iw_form_reg_msg - Form a port mapper message with dev info
+ */
+static void c4iw_form_reg_msg(struct c4iw_dev *dev,
+                               struct iwpm_dev_data *pm_msg)
+{
+       memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE);
+       memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name,
+                               IWPM_IFNAME_SIZE);
+}
+
+static void c4iw_record_pm_msg(struct c4iw_ep *ep,
+                       struct iwpm_sa_data *pm_msg)
+{
+       memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr,
+               sizeof(ep->com.mapped_local_addr));
+       memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr,
+               sizeof(ep->com.mapped_remote_addr));
+}
+
 static int send_connect(struct c4iw_ep *ep)
 {
        struct cpl_act_open_req *req;
@@ -546,10 +583,14 @@ static int send_connect(struct c4iw_ep *ep)
        int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
                                sizeof(struct cpl_act_open_req6) :
                                sizeof(struct cpl_t5_act_open_req6);
-       struct sockaddr_in *la = (struct sockaddr_in *)&ep->com.local_addr;
-       struct sockaddr_in *ra = (struct sockaddr_in *)&ep->com.remote_addr;
-       struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
-       struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+       struct sockaddr_in *la = (struct sockaddr_in *)
+                                &ep->com.mapped_local_addr;
+       struct sockaddr_in *ra = (struct sockaddr_in *)
+                                &ep->com.mapped_remote_addr;
+       struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
+                                  &ep->com.mapped_local_addr;
+       struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
+                                  &ep->com.mapped_remote_addr;
 
        wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
                        roundup(sizev4, 16) :
@@ -1627,10 +1668,10 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
        req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
                                     ep->com.dev->rdev.lldi.ports[0],
                                     ep->l2t));
-       sin = (struct sockaddr_in *)&ep->com.local_addr;
+       sin = (struct sockaddr_in *)&ep->com.mapped_local_addr;
        req->le.lport = sin->sin_port;
        req->le.u.ipv4.lip = sin->sin_addr.s_addr;
-       sin = (struct sockaddr_in *)&ep->com.remote_addr;
+       sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
        req->le.pport = sin->sin_port;
        req->le.u.ipv4.pip = sin->sin_addr.s_addr;
        req->tcb.t_state_to_astid =
@@ -1746,16 +1787,16 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
                if (!ep->l2t)
                        goto out;
                ep->mtu = dst_mtu(dst);
-               ep->tx_chan = cxgb4_port_chan(n->dev);
-               ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
+               ep->tx_chan = cxgb4_port_chan(pdev);
+               ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
                step = cdev->rdev.lldi.ntxq /
                        cdev->rdev.lldi.nchan;
-               ep->txq_idx = cxgb4_port_idx(n->dev) * step;
-               ep->ctrlq_idx = cxgb4_port_idx(n->dev);
+               ep->txq_idx = cxgb4_port_idx(pdev) * step;
+               ep->ctrlq_idx = cxgb4_port_idx(pdev);
                step = cdev->rdev.lldi.nrxq /
                        cdev->rdev.lldi.nchan;
                ep->rss_qid = cdev->rdev.lldi.rxq_ids[
-                       cxgb4_port_idx(n->dev) * step];
+                       cxgb4_port_idx(pdev) * step];
 
                if (clear_mpa_v1) {
                        ep->retry_with_mpa_v1 = 0;
@@ -1870,10 +1911,10 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
        struct sockaddr_in6 *ra6;
 
        ep = lookup_atid(t, atid);
-       la = (struct sockaddr_in *)&ep->com.local_addr;
-       ra = (struct sockaddr_in *)&ep->com.remote_addr;
-       la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
-       ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+       la = (struct sockaddr_in *)&ep->com.mapped_local_addr;
+       ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
+       la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+       ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr;
 
        PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
             status, status2errno(status));
@@ -2730,13 +2771,15 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
        struct c4iw_ep *ep;
        int err = 0;
-       struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
-       struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
-       struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr;
-       struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
-                                     &cm_id->remote_addr;
+       struct sockaddr_in *laddr;
+       struct sockaddr_in *raddr;
+       struct sockaddr_in6 *laddr6;
+       struct sockaddr_in6 *raddr6;
+       struct iwpm_dev_data pm_reg_msg;
+       struct iwpm_sa_data pm_msg;
        __u8 *ra;
        int iptype;
+       int iwpm_err = 0;
 
        if ((conn_param->ord > c4iw_max_read_depth) ||
            (conn_param->ird > c4iw_max_read_depth)) {
@@ -2767,7 +2810,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        if (!ep->com.qp) {
                PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
                err = -EINVAL;
-               goto fail2;
+               goto fail1;
        }
        ref_qp(ep);
        PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
@@ -2780,10 +2823,50 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        if (ep->atid == -1) {
                printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
                err = -ENOMEM;
-               goto fail2;
+               goto fail1;
        }
        insert_handle(dev, &dev->atid_idr, ep, ep->atid);
 
+       memcpy(&ep->com.local_addr, &cm_id->local_addr,
+              sizeof(ep->com.local_addr));
+       memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
+              sizeof(ep->com.remote_addr));
+
+       /* No port mapper available, go with the specified peer information */
+       memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
+              sizeof(ep->com.mapped_local_addr));
+       memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr,
+              sizeof(ep->com.mapped_remote_addr));
+
+       c4iw_form_reg_msg(dev, &pm_reg_msg);
+       iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
+       if (iwpm_err) {
+               PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
+                       __func__, iwpm_err);
+       }
+       if (iwpm_valid_pid() && !iwpm_err) {
+               c4iw_form_pm_msg(ep, &pm_msg);
+               iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW);
+               if (iwpm_err)
+                       PDBG("%s: Port Mapper query fail (err = %d).\n",
+                               __func__, iwpm_err);
+               else
+                       c4iw_record_pm_msg(ep, &pm_msg);
+       }
+       if (iwpm_create_mapinfo(&ep->com.local_addr,
+                               &ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
+               iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
+               err = -ENOMEM;
+               goto fail1;
+       }
+       print_addr(&ep->com, __func__, "add_query/create_mapinfo");
+       set_bit(RELEASE_MAPINFO, &ep->com.flags);
+
+       laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr;
+       raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
+       laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+       raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr;
+
        if (cm_id->remote_addr.ss_family == AF_INET) {
                iptype = 4;
                ra = (__u8 *)&raddr->sin_addr;
@@ -2794,7 +2877,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) {
                        err = pick_local_ipaddrs(dev, cm_id);
                        if (err)
-                               goto fail2;
+                               goto fail1;
                }
 
                /* find a route */
@@ -2814,7 +2897,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
                        err = pick_local_ip6addrs(dev, cm_id);
                        if (err)
-                               goto fail2;
+                               goto fail1;
                }
 
                /* find a route */
@@ -2830,13 +2913,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        if (!ep->dst) {
                printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
                err = -EHOSTUNREACH;
-               goto fail3;
+               goto fail2;
        }
 
        err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true);
        if (err) {
                printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
-               goto fail4;
+               goto fail3;
        }
 
        PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
@@ -2845,10 +2928,6 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        state_set(&ep->com, CONNECTING);
        ep->tos = 0;
-       memcpy(&ep->com.local_addr, &cm_id->local_addr,
-              sizeof(ep->com.local_addr));
-       memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
-              sizeof(ep->com.remote_addr));
 
        /* send connect request to rnic */
        err = send_connect(ep);
@@ -2856,12 +2935,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                goto out;
 
        cxgb4_l2t_release(ep->l2t);
-fail4:
-       dst_release(ep->dst);
 fail3:
+       dst_release(ep->dst);
+fail2:
        remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
-fail2:
+fail1:
        cm_id->rem_ref(cm_id);
        c4iw_put_ep(&ep->com);
 out:
@@ -2871,7 +2950,8 @@ out:
 static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
 {
        int err;
-       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
+                                   &ep->com.mapped_local_addr;
 
        c4iw_init_wr_wait(&ep->com.wr_wait);
        err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
@@ -2892,7 +2972,8 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
 static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
 {
        int err;
-       struct sockaddr_in *sin = (struct sockaddr_in *)&ep->com.local_addr;
+       struct sockaddr_in *sin = (struct sockaddr_in *)
+                                 &ep->com.mapped_local_addr;
 
        if (dev->rdev.lldi.enable_fw_ofld_conn) {
                do {
@@ -2927,6 +3008,9 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
        int err = 0;
        struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
        struct c4iw_listen_ep *ep;
+       struct iwpm_dev_data pm_reg_msg;
+       struct iwpm_sa_data pm_msg;
+       int iwpm_err = 0;
 
        might_sleep();
 
@@ -2961,6 +3045,37 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
                goto fail2;
        }
        insert_handle(dev, &dev->stid_idr, ep, ep->stid);
+
+       /* No port mapper available, go with the specified info */
+       memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
+              sizeof(ep->com.mapped_local_addr));
+
+       c4iw_form_reg_msg(dev, &pm_reg_msg);
+       iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
+       if (iwpm_err) {
+               PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
+                       __func__, iwpm_err);
+       }
+       if (iwpm_valid_pid() && !iwpm_err) {
+               memcpy(&pm_msg.loc_addr, &ep->com.local_addr,
+                               sizeof(ep->com.local_addr));
+               iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW);
+               if (iwpm_err)
+                       PDBG("%s: Port Mapper query fail (err = %d).\n",
+                               __func__, iwpm_err);
+               else
+                       memcpy(&ep->com.mapped_local_addr,
+                               &pm_msg.mapped_loc_addr,
+                               sizeof(ep->com.mapped_local_addr));
+       }
+       if (iwpm_create_mapinfo(&ep->com.local_addr,
+                               &ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
+               err = -ENOMEM;
+               goto fail3;
+       }
+       print_addr(&ep->com, __func__, "add_mapping/create_mapinfo");
+
+       set_bit(RELEASE_MAPINFO, &ep->com.flags);
        state_set(&ep->com, LISTEN);
        if (ep->com.local_addr.ss_family == AF_INET)
                err = create_server4(dev, ep);
@@ -2970,6 +3085,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
                cm_id->provider_data = ep;
                goto out;
        }
+
+fail3:
        cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
                        ep->com.local_addr.ss_family);
 fail2:
index cfaa56ada18927e1580b4695511a146f995be395..7151a02b4ebb3e3f48de3c270046684065d0e0d5 100644 (file)
@@ -940,7 +940,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
                if (!mm2)
                        goto err4;
 
-               memset(&uresp, 0, sizeof(uresp));
                uresp.qid_mask = rhp->rdev.cqmask;
                uresp.cqid = chp->cq.cqid;
                uresp.size = chp->cq.size;
@@ -951,7 +950,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
                uresp.gts_key = ucontext->key;
                ucontext->key += PAGE_SIZE;
                spin_unlock(&ucontext->mmap_lock);
-               ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+               ret = ib_copy_to_udata(udata, &uresp,
+                                      sizeof(uresp) - sizeof(uresp.reserved));
                if (ret)
                        goto err5;
 
index f4fa50a609e21f5ef1fe0ee3660bc921fdded72e..dd93aadc996e1ca15e0739a1d60e9c4930b449b8 100644 (file)
@@ -77,6 +77,16 @@ struct c4iw_debugfs_data {
        int pos;
 };
 
+/* registered cxgb4 netlink callbacks */
+static struct ibnl_client_cbs c4iw_nl_cb_table[] = {
+       [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
+       [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
+       [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+       [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+       [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
+       [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+};
+
 static int count_idrs(int id, void *p, void *data)
 {
        int *countp = data;
@@ -113,35 +123,49 @@ static int dump_qp(int id, void *p, void *data)
                                &qp->ep->com.local_addr;
                        struct sockaddr_in *rsin = (struct sockaddr_in *)
                                &qp->ep->com.remote_addr;
+                       struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+                               &qp->ep->com.mapped_local_addr;
+                       struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
+                               &qp->ep->com.mapped_remote_addr;
 
                        cc = snprintf(qpd->buf + qpd->pos, space,
                                      "rc qp sq id %u rq id %u state %u "
                                      "onchip %u ep tid %u state %u "
-                                     "%pI4:%u->%pI4:%u\n",
+                                     "%pI4:%u/%u->%pI4:%u/%u\n",
                                      qp->wq.sq.qid, qp->wq.rq.qid,
                                      (int)qp->attr.state,
                                      qp->wq.sq.flags & T4_SQ_ONCHIP,
                                      qp->ep->hwtid, (int)qp->ep->com.state,
                                      &lsin->sin_addr, ntohs(lsin->sin_port),
-                                     &rsin->sin_addr, ntohs(rsin->sin_port));
+                                     ntohs(mapped_lsin->sin_port),
+                                     &rsin->sin_addr, ntohs(rsin->sin_port),
+                                     ntohs(mapped_rsin->sin_port));
                } else {
                        struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
                                &qp->ep->com.local_addr;
                        struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
                                &qp->ep->com.remote_addr;
+                       struct sockaddr_in6 *mapped_lsin6 =
+                               (struct sockaddr_in6 *)
+                               &qp->ep->com.mapped_local_addr;
+                       struct sockaddr_in6 *mapped_rsin6 =
+                               (struct sockaddr_in6 *)
+                               &qp->ep->com.mapped_remote_addr;
 
                        cc = snprintf(qpd->buf + qpd->pos, space,
                                      "rc qp sq id %u rq id %u state %u "
                                      "onchip %u ep tid %u state %u "
-                                     "%pI6:%u->%pI6:%u\n",
+                                     "%pI6:%u/%u->%pI6:%u/%u\n",
                                      qp->wq.sq.qid, qp->wq.rq.qid,
                                      (int)qp->attr.state,
                                      qp->wq.sq.flags & T4_SQ_ONCHIP,
                                      qp->ep->hwtid, (int)qp->ep->com.state,
                                      &lsin6->sin6_addr,
                                      ntohs(lsin6->sin6_port),
+                                     ntohs(mapped_lsin6->sin6_port),
                                      &rsin6->sin6_addr,
-                                     ntohs(rsin6->sin6_port));
+                                     ntohs(rsin6->sin6_port),
+                                     ntohs(mapped_rsin6->sin6_port));
                }
        } else
                cc = snprintf(qpd->buf + qpd->pos, space,
@@ -386,31 +410,43 @@ static int dump_ep(int id, void *p, void *data)
                        &ep->com.local_addr;
                struct sockaddr_in *rsin = (struct sockaddr_in *)
                        &ep->com.remote_addr;
+               struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+                       &ep->com.mapped_local_addr;
+               struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
+                       &ep->com.mapped_remote_addr;
 
                cc = snprintf(epd->buf + epd->pos, space,
                              "ep %p cm_id %p qp %p state %d flags 0x%lx "
                              "history 0x%lx hwtid %d atid %d "
-                             "%pI4:%d <-> %pI4:%d\n",
+                             "%pI4:%d/%d <-> %pI4:%d/%d\n",
                              ep, ep->com.cm_id, ep->com.qp,
                              (int)ep->com.state, ep->com.flags,
                              ep->com.history, ep->hwtid, ep->atid,
                              &lsin->sin_addr, ntohs(lsin->sin_port),
-                             &rsin->sin_addr, ntohs(rsin->sin_port));
+                             ntohs(mapped_lsin->sin_port),
+                             &rsin->sin_addr, ntohs(rsin->sin_port),
+                             ntohs(mapped_rsin->sin_port));
        } else {
                struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
                        &ep->com.local_addr;
                struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
                        &ep->com.remote_addr;
+               struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+                       &ep->com.mapped_local_addr;
+               struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
+                       &ep->com.mapped_remote_addr;
 
                cc = snprintf(epd->buf + epd->pos, space,
                              "ep %p cm_id %p qp %p state %d flags 0x%lx "
                              "history 0x%lx hwtid %d atid %d "
-                             "%pI6:%d <-> %pI6:%d\n",
+                             "%pI6:%d/%d <-> %pI6:%d/%d\n",
                              ep, ep->com.cm_id, ep->com.qp,
                              (int)ep->com.state, ep->com.flags,
                              ep->com.history, ep->hwtid, ep->atid,
                              &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
-                             &rsin6->sin6_addr, ntohs(rsin6->sin6_port));
+                             ntohs(mapped_lsin6->sin6_port),
+                             &rsin6->sin6_addr, ntohs(rsin6->sin6_port),
+                             ntohs(mapped_rsin6->sin6_port));
        }
        if (cc < space)
                epd->pos += cc;
@@ -431,23 +467,29 @@ static int dump_listen_ep(int id, void *p, void *data)
        if (ep->com.local_addr.ss_family == AF_INET) {
                struct sockaddr_in *lsin = (struct sockaddr_in *)
                        &ep->com.local_addr;
+               struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+                       &ep->com.mapped_local_addr;
 
                cc = snprintf(epd->buf + epd->pos, space,
                              "ep %p cm_id %p state %d flags 0x%lx stid %d "
-                             "backlog %d %pI4:%d\n",
+                             "backlog %d %pI4:%d/%d\n",
                              ep, ep->com.cm_id, (int)ep->com.state,
                              ep->com.flags, ep->stid, ep->backlog,
-                             &lsin->sin_addr, ntohs(lsin->sin_port));
+                             &lsin->sin_addr, ntohs(lsin->sin_port),
+                             ntohs(mapped_lsin->sin_port));
        } else {
                struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
                        &ep->com.local_addr;
+               struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+                       &ep->com.mapped_local_addr;
 
                cc = snprintf(epd->buf + epd->pos, space,
                              "ep %p cm_id %p state %d flags 0x%lx stid %d "
-                             "backlog %d %pI6:%d\n",
+                             "backlog %d %pI6:%d/%d\n",
                              ep, ep->com.cm_id, (int)ep->com.state,
                              ep->com.flags, ep->stid, ep->backlog,
-                             &lsin6->sin6_addr, ntohs(lsin6->sin6_port));
+                             &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
+                             ntohs(mapped_lsin6->sin6_port));
        }
        if (cc < space)
                epd->pos += cc;
@@ -687,6 +729,7 @@ static void c4iw_dealloc(struct uld_ctx *ctx)
        if (ctx->dev->rdev.oc_mw_kva)
                iounmap(ctx->dev->rdev.oc_mw_kva);
        ib_dealloc_device(&ctx->dev->ibdev);
+       iwpm_exit(RDMA_NL_C4IW);
        ctx->dev = NULL;
 }
 
@@ -736,6 +779,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
                        pci_resource_len(devp->rdev.lldi.pdev, 2));
                if (!devp->rdev.bar2_kva) {
                        pr_err(MOD "Unable to ioremap BAR2\n");
+                       ib_dealloc_device(&devp->ibdev);
                        return ERR_PTR(-EINVAL);
                }
        } else if (ocqp_supported(infop)) {
@@ -747,6 +791,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
                        devp->rdev.lldi.vr->ocq.size);
                if (!devp->rdev.oc_mw_kva) {
                        pr_err(MOD "Unable to ioremap onchip mem\n");
+                       ib_dealloc_device(&devp->ibdev);
                        return ERR_PTR(-EINVAL);
                }
        }
@@ -780,6 +825,14 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
                                        c4iw_debugfs_root);
                setup_debugfs(devp);
        }
+
+       ret = iwpm_init(RDMA_NL_C4IW);
+       if (ret) {
+               pr_err("port mapper initialization failed with %d\n", ret);
+               ib_dealloc_device(&devp->ibdev);
+               return ERR_PTR(ret);
+       }
+
        return devp;
 }
 
@@ -1274,6 +1327,11 @@ static int __init c4iw_init_module(void)
                printk(KERN_WARNING MOD
                       "could not create debugfs entry, continuing\n");
 
+       if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS,
+                           c4iw_nl_cb_table))
+               pr_err("%s[%u]: Failed to add netlink callback\n"
+                      , __func__, __LINE__);
+
        cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
 
        return 0;
@@ -1291,6 +1349,7 @@ static void __exit c4iw_exit_module(void)
        }
        mutex_unlock(&dev_mutex);
        cxgb4_unregister_uld(CXGB4_ULD_RDMA);
+       ibnl_remove_client(RDMA_NL_C4IW);
        c4iw_cm_term();
        debugfs_remove_recursive(c4iw_debugfs_root);
 }
index 7474b490760a413f9f13d9e04ead79319a6fd55e..6f533fbcc4b3d89fe1f4c9da2ae3fd6220720819 100644 (file)
@@ -52,6 +52,8 @@
 
 #include <rdma/ib_verbs.h>
 #include <rdma/iw_cm.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/iw_portmap.h>
 
 #include "cxgb4.h"
 #include "cxgb4_uld.h"
@@ -728,6 +730,7 @@ enum c4iw_ep_flags {
        CLOSE_SENT              = 3,
        TIMEOUT                 = 4,
        QP_REFERENCED           = 5,
+       RELEASE_MAPINFO         = 6,
 };
 
 enum c4iw_ep_history {
@@ -764,6 +767,8 @@ struct c4iw_ep_common {
        struct mutex mutex;
        struct sockaddr_storage local_addr;
        struct sockaddr_storage remote_addr;
+       struct sockaddr_storage mapped_local_addr;
+       struct sockaddr_storage mapped_remote_addr;
        struct c4iw_wr_wait wr_wait;
        unsigned long flags;
        unsigned long history;
@@ -807,6 +812,45 @@ struct c4iw_ep {
        unsigned int retry_count;
 };
 
+static inline void print_addr(struct c4iw_ep_common *epc, const char *func,
+                             const char *msg)
+{
+
+#define SINA(a) (&(((struct sockaddr_in *)(a))->sin_addr.s_addr))
+#define SINP(a) ntohs(((struct sockaddr_in *)(a))->sin_port)
+#define SIN6A(a) (&(((struct sockaddr_in6 *)(a))->sin6_addr))
+#define SIN6P(a) ntohs(((struct sockaddr_in6 *)(a))->sin6_port)
+
+       if (c4iw_debug) {
+               switch (epc->local_addr.ss_family) {
+               case AF_INET:
+                       PDBG("%s %s %pI4:%u/%u <-> %pI4:%u/%u\n",
+                            func, msg, SINA(&epc->local_addr),
+                            SINP(&epc->local_addr),
+                            SINP(&epc->mapped_local_addr),
+                            SINA(&epc->remote_addr),
+                            SINP(&epc->remote_addr),
+                            SINP(&epc->mapped_remote_addr));
+                       break;
+               case AF_INET6:
+                       PDBG("%s %s %pI6:%u/%u <-> %pI6:%u/%u\n",
+                            func, msg, SIN6A(&epc->local_addr),
+                            SIN6P(&epc->local_addr),
+                            SIN6P(&epc->mapped_local_addr),
+                            SIN6A(&epc->remote_addr),
+                            SIN6P(&epc->remote_addr),
+                            SIN6P(&epc->mapped_remote_addr));
+                       break;
+               default:
+                       break;
+               }
+       }
+#undef SINA
+#undef SINP
+#undef SIN6A
+#undef SIN6P
+}
+
 static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
 {
        return cm_id->provider_data;
index a94a3e12c349b273d472a42b6b2bba88dcc0779a..c777e22bd8d538800fb8c616c8f20f987b21d7d7 100644 (file)
@@ -122,7 +122,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
        INIT_LIST_HEAD(&context->mmaps);
        spin_lock_init(&context->mmap_lock);
 
-       if (udata->outlen < sizeof(uresp)) {
+       if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
                if (!warned++)
                        pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled.");
                rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED;
@@ -140,7 +140,8 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
                context->key += PAGE_SIZE;
                spin_unlock(&context->mmap_lock);
 
-               ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+               ret = ib_copy_to_udata(udata, &uresp,
+                                      sizeof(uresp) - sizeof(uresp.reserved));
                if (ret)
                        goto err_mm;
 
index 11ccd276e5d9c334d2cb3aa445ddaac1401483b7..cbd0ce1707282a63fe1acb10c223c4af22e28ad1 100644 (file)
@@ -48,6 +48,7 @@ struct c4iw_create_cq_resp {
        __u32 cqid;
        __u32 size;
        __u32 qid_mask;
+       __u32 reserved; /* explicit padding (optional for i386) */
 };
 
 
@@ -74,5 +75,6 @@ struct c4iw_create_qp_resp {
 struct c4iw_alloc_ucontext_resp {
        __u64 status_page_key;
        __u32 status_page_size;
+       __u32 reserved; /* explicit padding (optional for i386) */
 };
 #endif
index e2f9a51f4a38697aa6cccb261e7ee1c4ab346ad6..45802e97332ee830354e9bbbfe206031236629e6 100644 (file)
@@ -346,6 +346,10 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
                        ret = -EFAULT;
                        goto bail;
                }
+               dp.len = odp.len;
+               dp.unit = odp.unit;
+               dp.data = odp.data;
+               dp.pbc_wd = 0;
        } else {
                ret = -EINVAL;
                goto bail;
index 26dfbc8ee0f1cc8423b046d5ce8d2f5789a063b5..01ba792791a0afbed1c030f20001271f6af63aa4 100644 (file)
@@ -70,7 +70,7 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
        if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
                int i;
                if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
-                       dd->ipath_lastcancel > jiffies) {
+                       time_after(dd->ipath_lastcancel, jiffies)) {
                        __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
                                          "SendbufErrs %lx %lx", sbuf[0],
                                          sbuf[1]);
@@ -755,7 +755,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 
        /* likely due to cancel; so suppress message unless verbose */
        if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
-               dd->ipath_lastcancel > jiffies) {
+               time_after(dd->ipath_lastcancel, jiffies)) {
                /* armlaunch takes precedence; it often causes both. */
                ipath_cdbg(VERBOSE,
                        "Suppressed %s error (%llx) after sendbuf cancel\n",
index 98ac18ec977e4099f9ee154e8a53353d6206751d..17a517766ad27b822ba1b77780601a44f7219a7e 100644 (file)
@@ -247,7 +247,7 @@ static void sdma_abort_task(unsigned long opaque)
 
        /* ipath_sdma_abort() is done, waiting for interrupt */
        if (status == IPATH_SDMA_ABORT_DISARMED) {
-               if (jiffies < dd->ipath_sdma_abort_intr_timeout)
+               if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
                        goto resched_noprint;
                /* give up, intr got lost somewhere */
                ipath_dbg("give up waiting for SDMADISABLED intr\n");
@@ -341,7 +341,7 @@ resched:
         * JAG - this is bad to just have default be a loop without
         * state change
         */
-       if (jiffies > dd->ipath_sdma_abort_jiffies) {
+       if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
                ipath_dbg("looping with status 0x%08lx\n",
                          dd->ipath_sdma_status);
                dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
index 170dca6080423145a5cb31dd11be245de156bbcf..2d8c3397774f6ba84c207bd35a64a203f2916620 100644 (file)
@@ -73,7 +73,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
 {
        struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
        struct mlx4_dev *dev = ibdev->dev;
-       int is_mcast;
+       int is_mcast = 0;
        struct in6_addr in6;
        u16 vlan_tag;
 
index 5f640814cc81763e61fad3fb2bcdc50993a57a51..1066eec854a9a07bfa1d4ad771fe2c6b979ee7ae 100644 (file)
@@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
        int err;
 
        err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
-                            PAGE_SIZE * 2, &buf->buf);
+                            PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
 
        if (err)
                goto out;
@@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
        if (err)
                goto err_buf;
 
-       err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
+       err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
        if (err)
                goto err_mtt;
 
@@ -209,7 +209,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
 
                uar = &to_mucontext(context)->uar;
        } else {
-               err = mlx4_db_alloc(dev->dev, &cq->db, 1);
+               err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
                if (err)
                        goto err_cq;
 
index fd36ec67263208745170b28fce61e0ffec88844d..287ad0564acdfa5444c1a05d8dec2d9edd91416e 100644 (file)
@@ -478,10 +478,6 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
        if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
                return -EAGAIN;
 
-       /* QP0 forwarding only for Dom0 */
-       if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
-               return -EINVAL;
-
        if (!dest_qpt)
                tun_qp = &tun_ctx->qp[0];
        else
@@ -667,6 +663,21 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
        }
        /* Class-specific handling */
        switch (mad->mad_hdr.mgmt_class) {
+       case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+       case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+               /* 255 indicates the dom0 */
+               if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) {
+                       if (!mlx4_vf_smi_enabled(dev->dev, slave, port))
+                               return -EPERM;
+                       /* for a VF. drop unsolicited MADs */
+                       if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) {
+                               mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n",
+                                            slave, mad->mad_hdr.mgmt_class,
+                                            mad->mad_hdr.method);
+                               return -EINVAL;
+                       }
+               }
+               break;
        case IB_MGMT_CLASS_SUBN_ADM:
                if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
                                             (struct ib_sa_mad *) mad))
@@ -1165,10 +1176,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
        if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
                return -EAGAIN;
 
-       /* QP0 forwarding only for Dom0 */
-       if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
-               return -EINVAL;
-
        if (dest_qpt == IB_QPT_SMI) {
                src_qpnum = 0;
                sqp = &sqp_ctx->qp[0];
@@ -1285,11 +1292,6 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
                             "belongs to another slave\n", wc->src_qp);
                return;
        }
-       if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
-               mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
-                            "non-master trying to send QP0 packets\n", wc->src_qp);
-               return;
-       }
 
        /* Map transaction ID */
        ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
@@ -1317,6 +1319,12 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
 
        /* Class-specific handling */
        switch (tunnel->mad.mad_hdr.mgmt_class) {
+       case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+       case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+               if (slave != mlx4_master_func_num(dev->dev) &&
+                   !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port))
+                       return;
+               break;
        case IB_MGMT_CLASS_SUBN_ADM:
                if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
                              (struct ib_sa_mad *) &tunnel->mad))
@@ -1749,9 +1757,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
                return -EEXIST;
 
        ctx->state = DEMUX_PV_STATE_STARTING;
-       /* have QP0 only on port owner, and only if link layer is IB */
-       if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
-           rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
+       /* have QP0 only if link layer is IB */
+       if (rdma_port_get_link_layer(ibdev, ctx->port) ==
+           IB_LINK_LAYER_INFINIBAND)
                ctx->has_smi = 1;
 
        if (ctx->has_smi) {
index 1b6dbe156a3708692a743cd58fc3351d4b01e533..3c3806aff712864661280456cd40d76603673591 100644 (file)
@@ -544,12 +544,11 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
        return 0;
 }
 
-static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
-                        u32 cap_mask)
+static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
+                           u32 cap_mask)
 {
        struct mlx4_cmd_mailbox *mailbox;
        int err;
-       u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
 
        mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
        if (IS_ERR(mailbox))
@@ -563,8 +562,8 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
                ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
        }
 
-       err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
-                      MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+       err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
+                      MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 
        mlx4_free_cmd_mailbox(dev->dev, mailbox);
        return err;
@@ -573,11 +572,20 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
 static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
                               struct ib_port_modify *props)
 {
+       struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+       u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
        struct ib_port_attr attr;
        u32 cap_mask;
        int err;
 
-       mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
+       /* return OK if this is RoCE. CM calls ib_modify_port() regardless
+        * of whether port link layer is ETH or IB. For ETH ports, qkey
+        * violations and port capabilities are not meaningful.
+        */
+       if (is_eth)
+               return 0;
+
+       mutex_lock(&mdev->cap_mask_mutex);
 
        err = mlx4_ib_query_port(ibdev, port, &attr);
        if (err)
@@ -586,9 +594,9 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
        cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
                ~props->clr_port_cap_mask;
 
-       err = mlx4_SET_PORT(to_mdev(ibdev), port,
-                           !!(mask & IB_PORT_RESET_QKEY_CNTR),
-                           cap_mask);
+       err = mlx4_ib_SET_PORT(mdev, port,
+                              !!(mask & IB_PORT_RESET_QKEY_CNTR),
+                              cap_mask);
 
 out:
        mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
index f589522fddfd9efa4e32fdd0a7e8f63e49a54927..bb8c9dd442ae907e121fc6ec3266c1ed039f497d 100644 (file)
@@ -156,6 +156,7 @@ enum mlx4_ib_qp_flags {
        MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
        MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
        MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
+       MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
        MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
        MLX4_IB_SRIOV_SQP = 1 << 31,
 };
index 41308af4163c3dc852adc23f983bdd86d279374e..5b0cb8e2d807ed16d1742d3f6e6d1722510158f7 100644 (file)
@@ -608,9 +608,20 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
        return !attr->srq;
 }
 
+static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
+{
+       int i;
+       for (i = 0; i < dev->caps.num_ports; i++) {
+               if (qpn == dev->caps.qp0_proxy[i])
+                       return !!dev->caps.qp0_qkey[i];
+       }
+       return 0;
+}
+
 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                            struct ib_qp_init_attr *init_attr,
-                           struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
+                           struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
+                           gfp_t gfp)
 {
        int qpn;
        int err;
@@ -625,10 +636,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                     !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
                        if (init_attr->qp_type == IB_QPT_GSI)
                                qp_type = MLX4_IB_QPT_PROXY_GSI;
-                       else if (mlx4_is_master(dev->dev))
-                               qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
-                       else
-                               qp_type = MLX4_IB_QPT_PROXY_SMI;
+                       else {
+                               if (mlx4_is_master(dev->dev) ||
+                                   qp0_enabled_vf(dev->dev, sqpn))
+                                       qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
+                               else
+                                       qp_type = MLX4_IB_QPT_PROXY_SMI;
+                       }
                }
                qpn = sqpn;
                /* add extra sg entry for tunneling */
@@ -643,7 +657,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                        return -EINVAL;
                if (tnl_init->proxy_qp_type == IB_QPT_GSI)
                        qp_type = MLX4_IB_QPT_TUN_GSI;
-               else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
+               else if (tnl_init->slave == mlx4_master_func_num(dev->dev) ||
+                        mlx4_vf_smi_enabled(dev->dev, tnl_init->slave,
+                                            tnl_init->port))
                        qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
                else
                        qp_type = MLX4_IB_QPT_TUN_SMI;
@@ -658,14 +674,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
                    (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
                                MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
-                       sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
+                       sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp);
                        if (!sqp)
                                return -ENOMEM;
                        qp = &sqp->qp;
                        qp->pri.vid = 0xFFFF;
                        qp->alt.vid = 0xFFFF;
                } else {
-                       qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
+                       qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
                        if (!qp)
                                return -ENOMEM;
                        qp->pri.vid = 0xFFFF;
@@ -748,14 +764,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                        goto err;
 
                if (qp_has_rq(init_attr)) {
-                       err = mlx4_db_alloc(dev->dev, &qp->db, 0);
+                       err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
                        if (err)
                                goto err;
 
                        *qp->db.db = 0;
                }
 
-               if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
+               if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
                        err = -ENOMEM;
                        goto err_db;
                }
@@ -765,13 +781,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                if (err)
                        goto err_buf;
 
-               err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
+               err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
                if (err)
                        goto err_mtt;
 
-               qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-               qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-
+               qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
+               qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
                if (!qp->sq.wrid || !qp->rq.wrid) {
                        err = -ENOMEM;
                        goto err_wrid;
@@ -801,7 +816,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                        goto err_proxy;
        }
 
-       err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+       err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
        if (err)
                goto err_qpn;
 
@@ -1040,7 +1055,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        struct mlx4_ib_qp *qp = NULL;
        int err;
        u16 xrcdn = 0;
+       gfp_t gfp;
 
+       gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
+               GFP_NOIO : GFP_KERNEL;
        /*
         * We only support LSO, vendor flag1, and multicast loopback blocking,
         * and only for kernel UD QPs.
@@ -1049,7 +1067,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
                                        MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
                                        MLX4_IB_SRIOV_TUNNEL_QP |
                                        MLX4_IB_SRIOV_SQP |
-                                       MLX4_IB_QP_NETIF))
+                                       MLX4_IB_QP_NETIF |
+                                       MLX4_IB_QP_CREATE_USE_GFP_NOIO))
                return ERR_PTR(-EINVAL);
 
        if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
@@ -1059,7 +1078,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 
        if (init_attr->create_flags &&
            (udata ||
-            ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
+            ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
              init_attr->qp_type != IB_QPT_UD) ||
             ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
              init_attr->qp_type > IB_QPT_GSI)))
@@ -1079,7 +1098,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        case IB_QPT_RC:
        case IB_QPT_UC:
        case IB_QPT_RAW_PACKET:
-               qp = kzalloc(sizeof *qp, GFP_KERNEL);
+               qp = kzalloc(sizeof *qp, gfp);
                if (!qp)
                        return ERR_PTR(-ENOMEM);
                qp->pri.vid = 0xFFFF;
@@ -1088,7 +1107,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        case IB_QPT_UD:
        {
                err = create_qp_common(to_mdev(pd->device), pd, init_attr,
-                                      udata, 0, &qp);
+                                      udata, 0, &qp, gfp);
                if (err)
                        return ERR_PTR(err);
 
@@ -1106,7 +1125,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 
                err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
                                       get_sqp_num(to_mdev(pd->device), init_attr),
-                                      &qp);
+                                      &qp, gfp);
                if (err)
                        return ERR_PTR(err);
 
@@ -1930,6 +1949,19 @@ out:
        return err;
 }
 
+static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
+{
+       int i;
+       for (i = 0; i < dev->caps.num_ports; i++) {
+               if (qpn == dev->caps.qp0_proxy[i] ||
+                   qpn == dev->caps.qp0_tunnel[i]) {
+                       *qkey = dev->caps.qp0_qkey[i];
+                       return 0;
+               }
+       }
+       return -EINVAL;
+}
+
 static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
                                  struct ib_send_wr *wr,
                                  void *wqe, unsigned *mlx_seg_len)
@@ -1987,8 +2019,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
                        cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
 
        sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
-       if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
-               return -EINVAL;
+       if (mlx4_is_master(mdev->dev)) {
+               if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+                       return -EINVAL;
+       } else {
+               if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+                       return -EINVAL;
+       }
        sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
        sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
 
@@ -2370,7 +2407,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
 
 static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
                                    struct mlx4_wqe_datagram_seg *dseg,
-                                   struct ib_send_wr *wr, enum ib_qp_type qpt)
+                                   struct ib_send_wr *wr,
+                                   enum mlx4_ib_qp_type qpt)
 {
        union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
        struct mlx4_av sqp_av = {0};
@@ -2383,8 +2421,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
                        cpu_to_be32(0xf0000000);
 
        memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
-       /* This function used only for sending on QP1 proxies */
-       dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+       if (qpt == MLX4_IB_QPT_PROXY_GSI)
+               dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+       else
+               dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]);
        /* Use QKEY from the QP context, which is set by master */
        dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
 }
@@ -2679,11 +2719,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        break;
 
                case MLX4_IB_QPT_PROXY_SMI_OWNER:
-                       if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
-                               err = -ENOSYS;
-                               *bad_wr = wr;
-                               goto out;
-                       }
                        err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
                        if (unlikely(err)) {
                                *bad_wr = wr;
@@ -2700,16 +2735,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        size += seglen / 16;
                        break;
                case MLX4_IB_QPT_PROXY_SMI:
-                       /* don't allow QP0 sends on guests */
-                       err = -ENOSYS;
-                       *bad_wr = wr;
-                       goto out;
                case MLX4_IB_QPT_PROXY_GSI:
                        /* If we are tunneling special qps, this is a UD qp.
                         * In this case we first add a UD segment targeting
                         * the tunnel qp, and then add a header with address
                         * information */
-                       set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
+                       set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
+                                               qp->mlx4_ib_qp_type);
                        wqe  += sizeof (struct mlx4_wqe_datagram_seg);
                        size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
                        build_tunnel_header(wr, wqe, &seglen);
index 60c5fb025fc7e857d247e2664b35fab264558be5..62d9285300af09c64af29e8b19cf33395767a6f8 100644 (file)
@@ -134,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
                if (err)
                        goto err_mtt;
        } else {
-               err = mlx4_db_alloc(dev->dev, &srq->db, 0);
+               err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
                if (err)
                        goto err_srq;
 
                *srq->db.db = 0;
 
-               if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+               if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
+                                  GFP_KERNEL)) {
                        err = -ENOMEM;
                        goto err_db;
                }
@@ -165,7 +166,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
                if (err)
                        goto err_buf;
 
-               err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
+               err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
                if (err)
                        goto err_mtt;
 
index 5a38e43eca650c6cb736a5cee6e290e357aeb9ed..cb4c66e723b59d915b8e7f83221dcd437d180a8e 100644 (file)
@@ -389,8 +389,10 @@ struct mlx4_port {
        struct mlx4_ib_dev    *dev;
        struct attribute_group pkey_group;
        struct attribute_group gid_group;
-       u8                     port_num;
+       struct device_attribute enable_smi_admin;
+       struct device_attribute smi_enabled;
        int                    slave;
+       u8                     port_num;
 };
 
 
@@ -558,6 +560,101 @@ err:
        return NULL;
 }
 
+static ssize_t sysfs_show_smi_enabled(struct device *dev,
+                                     struct device_attribute *attr, char *buf)
+{
+       struct mlx4_port *p =
+               container_of(attr, struct mlx4_port, smi_enabled);
+       ssize_t len = 0;
+
+       if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num))
+               len = sprintf(buf, "%d\n", 1);
+       else
+               len = sprintf(buf, "%d\n", 0);
+
+       return len;
+}
+
+static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
+                                          struct device_attribute *attr,
+                                          char *buf)
+{
+       struct mlx4_port *p =
+               container_of(attr, struct mlx4_port, enable_smi_admin);
+       ssize_t len = 0;
+
+       if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num))
+               len = sprintf(buf, "%d\n", 1);
+       else
+               len = sprintf(buf, "%d\n", 0);
+
+       return len;
+}
+
+static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
+                                           struct device_attribute *attr,
+                                           const char *buf, size_t count)
+{
+       struct mlx4_port *p =
+               container_of(attr, struct mlx4_port, enable_smi_admin);
+       int enable;
+
+       if (sscanf(buf, "%i", &enable) != 1 ||
+           enable < 0 || enable > 1)
+               return -EINVAL;
+
+       if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable))
+               return -EINVAL;
+       return count;
+}
+
+static int add_vf_smi_entries(struct mlx4_port *p)
+{
+       int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+                       IB_LINK_LAYER_ETHERNET;
+       int ret;
+
+       /* do not display entries if eth transport, or if master */
+       if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+               return 0;
+
+       sysfs_attr_init(&p->smi_enabled.attr);
+       p->smi_enabled.show = sysfs_show_smi_enabled;
+       p->smi_enabled.store = NULL;
+       p->smi_enabled.attr.name = "smi_enabled";
+       p->smi_enabled.attr.mode = 0444;
+       ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr);
+       if (ret) {
+               pr_err("failed to create smi_enabled\n");
+               return ret;
+       }
+
+       sysfs_attr_init(&p->enable_smi_admin.attr);
+       p->enable_smi_admin.show = sysfs_show_enable_smi_admin;
+       p->enable_smi_admin.store = sysfs_store_enable_smi_admin;
+       p->enable_smi_admin.attr.name = "enable_smi_admin";
+       p->enable_smi_admin.attr.mode = 0644;
+       ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr);
+       if (ret) {
+               pr_err("failed to create enable_smi_admin\n");
+               sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+               return ret;
+       }
+       return 0;
+}
+
+static void remove_vf_smi_entries(struct mlx4_port *p)
+{
+       int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+                       IB_LINK_LAYER_ETHERNET;
+
+       if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+               return;
+
+       sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+       sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr);
+}
+
 static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
 {
        struct mlx4_port *p;
@@ -602,6 +699,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
        if (ret)
                goto err_free_gid;
 
+       ret = add_vf_smi_entries(p);
+       if (ret)
+               goto err_free_gid;
+
        list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
        return 0;
 
@@ -669,6 +770,7 @@ err_add:
                mport = container_of(p, struct mlx4_port, kobj);
                sysfs_remove_group(p, &mport->pkey_group);
                sysfs_remove_group(p, &mport->gid_group);
+               remove_vf_smi_entries(mport);
                kobject_put(p);
        }
        kobject_put(dev->dev_ports_parent[slave]);
@@ -713,6 +815,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
                        port = container_of(p, struct mlx4_port, kobj);
                        sysfs_remove_group(p, &port->pkey_group);
                        sysfs_remove_group(p, &port->gid_group);
+                       remove_vf_smi_entries(port);
                        kobject_put(p);
                        kobject_put(device->dev_ports_parent[slave]);
                }
index 62bb6b49dc1d56debdeb6ecea2d8b431fd11c2bb..8ae4f896cb4140485fb53e3d3b53c360dfe57ac6 100644 (file)
@@ -32,6 +32,7 @@
 
 #include <linux/kref.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
 #include "mlx5_ib.h"
 #include "user.h"
 
@@ -602,14 +603,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
                          int *cqe_size, int *index, int *inlen)
 {
        struct mlx5_ib_create_cq ucmd;
+       size_t ucmdlen;
        int page_shift;
        int npages;
        int ncont;
        int err;
 
-       if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+       ucmdlen =
+               (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
+                sizeof(ucmd)) ? (sizeof(ucmd) -
+                                 sizeof(ucmd.reserved)) : sizeof(ucmd);
+
+       if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
                return -EFAULT;
 
+       if (ucmdlen == sizeof(ucmd) &&
+           ucmd.reserved != 0)
+               return -EINVAL;
+
        if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
                return -EINVAL;
 
index 50541586e0a6171df4ef6fe2a1d5e8d20a223f11..f2ccf1a5a2910f92418751f2b2b4a6101da00105 100644 (file)
@@ -264,8 +264,6 @@ struct mlx5_ib_mr {
        __be64                  *pas;
        dma_addr_t              dma;
        int                     npages;
-       struct completion       done;
-       enum ib_wc_status       status;
        struct mlx5_ib_dev     *dev;
        struct mlx5_create_mkey_mbox_out out;
        struct mlx5_core_sig_ctx    *sig;
@@ -277,6 +275,17 @@ struct mlx5_ib_fast_reg_page_list {
        dma_addr_t                      map;
 };
 
+struct mlx5_ib_umr_context {
+       enum ib_wc_status       status;
+       struct completion       done;
+};
+
+static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
+{
+       context->status = -1;
+       init_completion(&context->done);
+}
+
 struct umr_common {
        struct ib_pd    *pd;
        struct ib_cq    *cq;
index 81392b26d078abfd50b819d9a6dd3589a21fa522..afa873bd028ed5ad76f2348bbe9789b0b225b476 100644 (file)
@@ -73,6 +73,8 @@ static void reg_mr_callback(int status, void *context)
        struct mlx5_cache_ent *ent = &cache->ent[c];
        u8 key;
        unsigned long flags;
+       struct mlx5_mr_table *table = &dev->mdev.priv.mr_table;
+       int err;
 
        spin_lock_irqsave(&ent->lock, flags);
        ent->pending--;
@@ -107,6 +109,13 @@ static void reg_mr_callback(int status, void *context)
        ent->cur++;
        ent->size++;
        spin_unlock_irqrestore(&ent->lock, flags);
+
+       write_lock_irqsave(&table->lock, flags);
+       err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
+                               &mr->mmr);
+       if (err)
+               pr_err("Error inserting to mr tree. 0x%x\n", -err);
+       write_unlock_irqrestore(&table->lock, flags);
 }
 
 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
@@ -699,7 +708,7 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
 
 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
 {
-       struct mlx5_ib_mr *mr;
+       struct mlx5_ib_umr_context *context;
        struct ib_wc wc;
        int err;
 
@@ -712,9 +721,9 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
                if (err == 0)
                        break;
 
-               mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
-               mr->status = wc.status;
-               complete(&mr->done);
+               context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
+               context->status = wc.status;
+               complete(&context->done);
        }
        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 }
@@ -726,11 +735,12 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
        struct device *ddev = dev->ib_dev.dma_device;
        struct umr_common *umrc = &dev->umrc;
+       struct mlx5_ib_umr_context umr_context;
        struct ib_send_wr wr, *bad;
        struct mlx5_ib_mr *mr;
        struct ib_sge sg;
        int size = sizeof(u64) * npages;
-       int err;
+       int err = 0;
        int i;
 
        for (i = 0; i < 1; i++) {
@@ -751,7 +761,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
        mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
        if (!mr->pas) {
                err = -ENOMEM;
-               goto error;
+               goto free_mr;
        }
 
        mlx5_ib_populate_pas(dev, umem, page_shift,
@@ -760,44 +770,46 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
        mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
                                 DMA_TO_DEVICE);
        if (dma_mapping_error(ddev, mr->dma)) {
-               kfree(mr->pas);
                err = -ENOMEM;
-               goto error;
+               goto free_pas;
        }
 
        memset(&wr, 0, sizeof(wr));
-       wr.wr_id = (u64)(unsigned long)mr;
+       wr.wr_id = (u64)(unsigned long)&umr_context;
        prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
 
-       /* We serialize polls so one process does not kidnap another's
-        * completion. This is not a problem since wr is completed in
-        * around 1 usec
-        */
+       mlx5_ib_init_umr_context(&umr_context);
        down(&umrc->sem);
-       init_completion(&mr->done);
        err = ib_post_send(umrc->qp, &wr, &bad);
        if (err) {
                mlx5_ib_warn(dev, "post send failed, err %d\n", err);
-               up(&umrc->sem);
-               goto error;
+               goto unmap_dma;
+       } else {
+               wait_for_completion(&umr_context.done);
+               if (umr_context.status != IB_WC_SUCCESS) {
+                       mlx5_ib_warn(dev, "reg umr failed\n");
+                       err = -EFAULT;
+               }
        }
-       wait_for_completion(&mr->done);
-       up(&umrc->sem);
 
+       mr->mmr.iova = virt_addr;
+       mr->mmr.size = len;
+       mr->mmr.pd = to_mpd(pd)->pdn;
+
+unmap_dma:
+       up(&umrc->sem);
        dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+
+free_pas:
        kfree(mr->pas);
 
-       if (mr->status != IB_WC_SUCCESS) {
-               mlx5_ib_warn(dev, "reg umr failed\n");
-               err = -EFAULT;
-               goto error;
+free_mr:
+       if (err) {
+               free_cached_mr(dev, mr);
+               return ERR_PTR(err);
        }
 
        return mr;
-
-error:
-       free_cached_mr(dev, mr);
-       return ERR_PTR(err);
 }
 
 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
@@ -926,24 +938,26 @@ error:
 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
        struct umr_common *umrc = &dev->umrc;
+       struct mlx5_ib_umr_context umr_context;
        struct ib_send_wr wr, *bad;
        int err;
 
        memset(&wr, 0, sizeof(wr));
-       wr.wr_id = (u64)(unsigned long)mr;
+       wr.wr_id = (u64)(unsigned long)&umr_context;
        prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
 
+       mlx5_ib_init_umr_context(&umr_context);
        down(&umrc->sem);
-       init_completion(&mr->done);
        err = ib_post_send(umrc->qp, &wr, &bad);
        if (err) {
                up(&umrc->sem);
                mlx5_ib_dbg(dev, "err %d\n", err);
                goto error;
+       } else {
+               wait_for_completion(&umr_context.done);
+               up(&umrc->sem);
        }
-       wait_for_completion(&mr->done);
-       up(&umrc->sem);
-       if (mr->status != IB_WC_SUCCESS) {
+       if (umr_context.status != IB_WC_SUCCESS) {
                mlx5_ib_warn(dev, "unreg umr failed\n");
                err = -EFAULT;
                goto error;
index dc930ed21eca66c17114ce27a79f2a6b7d7e0170..d13ddf1c0033385f9b2d6b5dadac29d87aaeaf45 100644 (file)
@@ -574,6 +574,10 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
        mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
 
+       qp->rq.offset = 0;
+       qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
+       qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+
        err = set_user_buf_size(dev, qp, &ucmd);
        if (err)
                goto err_uuar;
@@ -2078,6 +2082,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
        struct ib_sig_domain *wire = &sig_attrs->wire;
        int ret, selector;
 
+       memset(bsf, 0, sizeof(*bsf));
        switch (sig_attrs->mem.sig_type) {
        case IB_SIG_TYPE_T10_DIF:
                if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
@@ -2090,9 +2095,11 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
                        /* Same block structure */
                        basic->bsf_size_sbs = 1 << 4;
                        if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
-                               basic->wire.copy_byte_mask = 0xff;
-                       else
-                               basic->wire.copy_byte_mask = 0x3f;
+                               basic->wire.copy_byte_mask |= 0xc0;
+                       if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
+                               basic->wire.copy_byte_mask |= 0x30;
+                       if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
+                               basic->wire.copy_byte_mask |= 0x0f;
                } else
                        basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
 
@@ -2131,9 +2138,13 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
        int ret;
        int wqe_size;
 
-       if (!wr->wr.sig_handover.prot) {
+       if (!wr->wr.sig_handover.prot ||
+           (data_key == wr->wr.sig_handover.prot->lkey &&
+            data_va == wr->wr.sig_handover.prot->addr &&
+            data_len == wr->wr.sig_handover.prot->length)) {
                /**
                 * Source domain doesn't contain signature information
+                * or data and protection are interleaved in memory.
                 * So need construct:
                 *                  ------------------
                 *                 |     data_klm     |
@@ -2187,23 +2198,13 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
                data_sentry->bcount = cpu_to_be16(block_size);
                data_sentry->key = cpu_to_be32(data_key);
                data_sentry->va = cpu_to_be64(data_va);
+               data_sentry->stride = cpu_to_be16(block_size);
+
                prot_sentry->bcount = cpu_to_be16(prot_size);
                prot_sentry->key = cpu_to_be32(prot_key);
+               prot_sentry->va = cpu_to_be64(prot_va);
+               prot_sentry->stride = cpu_to_be16(prot_size);
 
-               if (prot_key == data_key && prot_va == data_va) {
-                       /**
-                        * The data and protection are interleaved
-                        * in a single memory region
-                        **/
-                       prot_sentry->va = cpu_to_be64(data_va + block_size);
-                       prot_sentry->stride = cpu_to_be16(block_size + prot_size);
-                       data_sentry->stride = prot_sentry->stride;
-               } else {
-                       /* The data and protection are two different buffers */
-                       prot_sentry->va = cpu_to_be64(prot_va);
-                       data_sentry->stride = cpu_to_be16(block_size);
-                       prot_sentry->stride = cpu_to_be16(prot_size);
-               }
                wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
                                 sizeof(*prot_sentry), 64);
        }
@@ -2275,7 +2276,10 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
 
        /* length of the protected region, data + protection */
        region_len = wr->sg_list->length;
-       if (wr->wr.sig_handover.prot)
+       if (wr->wr.sig_handover.prot &&
+           (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey  ||
+            wr->wr.sig_handover.prot->addr != wr->sg_list->addr  ||
+            wr->wr.sig_handover.prot->length != wr->sg_list->length))
                region_len += wr->wr.sig_handover.prot->length;
 
        /**
index 210b3eaf188aa25fcf11f657cefe907c8f77f197..384af6dec5eb207176a6998d4f92f4ea69f4acde 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/mlx5/srq.h>
 #include <linux/slab.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
 
 #include "mlx5_ib.h"
 #include "user.h"
@@ -78,16 +79,27 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
        struct mlx5_ib_create_srq ucmd;
+       size_t ucmdlen;
        int err;
        int npages;
        int page_shift;
        int ncont;
        u32 offset;
 
-       if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+       ucmdlen =
+               (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
+                sizeof(ucmd)) ? (sizeof(ucmd) -
+                                 sizeof(ucmd.reserved)) : sizeof(ucmd);
+
+       if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
                mlx5_ib_dbg(dev, "failed copy udata\n");
                return -EFAULT;
        }
+
+       if (ucmdlen == sizeof(ucmd) &&
+           ucmd.reserved != 0)
+               return -EINVAL;
+
        srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
 
        srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
index 0f4f8e42a17fe5f067d68782f21598980451c48f..d0ba264ac1ed259ab4b58473d3ba12c2d499f4b0 100644 (file)
@@ -91,6 +91,7 @@ struct mlx5_ib_create_cq {
        __u64   buf_addr;
        __u64   db_addr;
        __u32   cqe_size;
+       __u32   reserved; /* explicit padding (optional on i386) */
 };
 
 struct mlx5_ib_create_cq_resp {
@@ -109,6 +110,7 @@ struct mlx5_ib_create_srq {
        __u64   buf_addr;
        __u64   db_addr;
        __u32   flags;
+       __u32   reserved; /* explicit padding (optional on i386) */
 };
 
 struct mlx5_ib_create_srq_resp {
index 353c7b05a90a102db8fc22be79187918004334cc..3b2a6dc8ea99d734645a24cef66a78867f66a2cd 100644 (file)
@@ -68,7 +68,6 @@ MODULE_VERSION(DRV_VERSION);
 int max_mtu = 9000;
 int interrupt_mod_interval = 0;
 
-
 /* Interoperability */
 int mpa_version = 1;
 module_param(mpa_version, int, 0644);
@@ -112,6 +111,16 @@ static struct pci_device_id nes_pci_table[] = {
 
 MODULE_DEVICE_TABLE(pci, nes_pci_table);
 
+/* registered nes netlink callbacks */
+static struct ibnl_client_cbs nes_nl_cb_table[] = {
+       [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
+       [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
+       [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+       [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+       [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
+       [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+};
+
 static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
 static int nes_net_event(struct notifier_block *, unsigned long, void *);
 static int nes_notifiers_registered;
@@ -672,6 +681,17 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
        }
        nes_notifiers_registered++;
 
+       if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table))
+               printk(KERN_ERR PFX "%s[%u]: Failed to add netlink callback\n",
+                       __func__, __LINE__);
+
+       ret = iwpm_init(RDMA_NL_NES);
+       if (ret) {
+               printk(KERN_ERR PFX "%s: port mapper initialization failed\n",
+                               pci_name(pcidev));
+               goto bail7;
+       }
+
        INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status);
 
        /* Initialize network devices */
@@ -710,6 +730,7 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
 
        nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n",
                        nesdev->netdev_count, nesdev->nesadapter->netdev_count);
+       ibnl_remove_client(RDMA_NL_NES);
 
        nes_notifiers_registered--;
        if (nes_notifiers_registered == 0) {
@@ -773,6 +794,8 @@ static void nes_remove(struct pci_dev *pcidev)
                                nesdev->nesadapter->netdev_count--;
                        }
                }
+       ibnl_remove_client(RDMA_NL_NES);
+       iwpm_exit(RDMA_NL_NES);
 
        nes_notifiers_registered--;
        if (nes_notifiers_registered == 0) {
index 33cc58941a3ea32923b3d6ecfbe3480b617e308d..bd9d132f11c7c48c6134ac13ccf7662c93cdd53c 100644 (file)
@@ -51,6 +51,8 @@
 #include <rdma/ib_pack.h>
 #include <rdma/rdma_cm.h>
 #include <rdma/iw_cm.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/iw_portmap.h>
 
 #define NES_SEND_FIRST_WRITE
 
 #define NES_DBG_IW_TX       0x00040000
 #define NES_DBG_SHUTDOWN    0x00080000
 #define NES_DBG_PAU         0x00100000
+#define NES_DBG_NLMSG       0x00200000
 #define NES_DBG_RSVD1       0x10000000
 #define NES_DBG_RSVD2       0x20000000
 #define NES_DBG_RSVD3       0x40000000
index dfa9df484505e6ecdc5362f758ade1627b98d0c7..6f09a72e78d7d8ec9690413924e079af764aeaf3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2011 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2006 - 2014 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -59,6 +59,7 @@
 #include <net/route.h>
 #include <net/ip_fib.h>
 #include <net/tcp.h>
+#include <linux/fcntl.h>
 
 #include "nes.h"
 
@@ -166,7 +167,6 @@ int nes_rem_ref_cm_node(struct nes_cm_node *cm_node)
 {
        return rem_ref_cm_node(cm_node->cm_core, cm_node);
 }
-
 /**
  * create_event
  */
@@ -482,11 +482,11 @@ static void form_cm_frame(struct sk_buff *skb,
        iph->ttl = 0x40;
        iph->protocol = 0x06;   /* IPPROTO_TCP */
 
-       iph->saddr = htonl(cm_node->loc_addr);
-       iph->daddr = htonl(cm_node->rem_addr);
+       iph->saddr = htonl(cm_node->mapped_loc_addr);
+       iph->daddr = htonl(cm_node->mapped_rem_addr);
 
-       tcph->source = htons(cm_node->loc_port);
-       tcph->dest = htons(cm_node->rem_port);
+       tcph->source = htons(cm_node->mapped_loc_port);
+       tcph->dest = htons(cm_node->mapped_rem_port);
        tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
 
        if (flags & SET_ACK) {
@@ -525,6 +525,100 @@ static void form_cm_frame(struct sk_buff *skb,
        cm_packets_created++;
 }
 
+/*
+ * nes_create_sockaddr - Record ip addr and tcp port in a sockaddr struct
+ */
+static void nes_create_sockaddr(__be32 ip_addr, __be16 port,
+                               struct sockaddr_storage *addr)
+{
+       struct sockaddr_in *nes_sockaddr = (struct sockaddr_in *)addr;
+       nes_sockaddr->sin_family = AF_INET;
+       memcpy(&nes_sockaddr->sin_addr.s_addr, &ip_addr, sizeof(__be32));
+       nes_sockaddr->sin_port = port;
+}
+
+/*
+ * nes_create_mapinfo - Create a mapinfo object in the port mapper data base
+ */
+static int nes_create_mapinfo(struct nes_cm_info *cm_info)
+{
+       struct sockaddr_storage local_sockaddr;
+       struct sockaddr_storage mapped_sockaddr;
+
+       nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
+                               &local_sockaddr);
+       nes_create_sockaddr(htonl(cm_info->mapped_loc_addr),
+                       htons(cm_info->mapped_loc_port), &mapped_sockaddr);
+
+       return iwpm_create_mapinfo(&local_sockaddr,
+                               &mapped_sockaddr, RDMA_NL_NES);
+}
+
+/*
+ * nes_remove_mapinfo - Remove a mapinfo object from the port mapper data base
+ *                      and send a remove mapping op message to
+ *                      the userspace port mapper
+ */
+static int nes_remove_mapinfo(u32 loc_addr, u16 loc_port,
+                       u32 mapped_loc_addr, u16 mapped_loc_port)
+{
+       struct sockaddr_storage local_sockaddr;
+       struct sockaddr_storage mapped_sockaddr;
+
+       nes_create_sockaddr(htonl(loc_addr), htons(loc_port), &local_sockaddr);
+       nes_create_sockaddr(htonl(mapped_loc_addr), htons(mapped_loc_port),
+                               &mapped_sockaddr);
+
+       iwpm_remove_mapinfo(&local_sockaddr, &mapped_sockaddr);
+       return iwpm_remove_mapping(&local_sockaddr, RDMA_NL_NES);
+}
+
+/*
+ * nes_form_pm_msg - Form a port mapper message with mapping info
+ */
+static void nes_form_pm_msg(struct nes_cm_info *cm_info,
+                               struct iwpm_sa_data *pm_msg)
+{
+       nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
+                               &pm_msg->loc_addr);
+       nes_create_sockaddr(htonl(cm_info->rem_addr), htons(cm_info->rem_port),
+                               &pm_msg->rem_addr);
+}
+
+/*
+ * nes_form_reg_msg - Form a port mapper message with dev info
+ */
+static void nes_form_reg_msg(struct nes_vnic *nesvnic,
+                       struct iwpm_dev_data *pm_msg)
+{
+       memcpy(pm_msg->dev_name, nesvnic->nesibdev->ibdev.name,
+                               IWPM_DEVNAME_SIZE);
+       memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE);
+}
+
+/*
+ * nes_record_pm_msg - Save the received mapping info
+ */
+static void nes_record_pm_msg(struct nes_cm_info *cm_info,
+                       struct iwpm_sa_data *pm_msg)
+{
+       struct sockaddr_in *mapped_loc_addr =
+                       (struct sockaddr_in *)&pm_msg->mapped_loc_addr;
+       struct sockaddr_in *mapped_rem_addr =
+                       (struct sockaddr_in *)&pm_msg->mapped_rem_addr;
+
+       if (mapped_loc_addr->sin_family == AF_INET) {
+               cm_info->mapped_loc_addr =
+                       ntohl(mapped_loc_addr->sin_addr.s_addr);
+               cm_info->mapped_loc_port = ntohs(mapped_loc_addr->sin_port);
+       }
+       if (mapped_rem_addr->sin_family == AF_INET) {
+               cm_info->mapped_rem_addr =
+                       ntohl(mapped_rem_addr->sin_addr.s_addr);
+               cm_info->mapped_rem_port = ntohs(mapped_rem_addr->sin_port);
+       }
+}
+
 /**
  * print_core - dump a cm core
  */
@@ -1147,8 +1241,11 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
                          loc_addr, loc_port,
                          cm_node->rem_addr, cm_node->rem_port,
                          rem_addr, rem_port);
-               if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) &&
-                   (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) {
+               if ((cm_node->mapped_loc_addr == loc_addr) &&
+                       (cm_node->mapped_loc_port == loc_port) &&
+                       (cm_node->mapped_rem_addr == rem_addr) &&
+                       (cm_node->mapped_rem_port == rem_port)) {
+
                        add_ref_cm_node(cm_node);
                        spin_unlock_irqrestore(&cm_core->ht_lock, flags);
                        return cm_node;
@@ -1165,18 +1262,28 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
  * find_listener - find a cm node listening on this addr-port pair
  */
 static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
-                                            nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
+                                       nes_addr_t dst_addr, u16 dst_port,
+                                       enum nes_cm_listener_state listener_state, int local)
 {
        unsigned long flags;
        struct nes_cm_listener *listen_node;
+       nes_addr_t listen_addr;
+       u16 listen_port;
 
        /* walk list and find cm_node associated with this session ID */
        spin_lock_irqsave(&cm_core->listen_list_lock, flags);
        list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
+               if (local) {
+                       listen_addr = listen_node->loc_addr;
+                       listen_port = listen_node->loc_port;
+               } else {
+                       listen_addr = listen_node->mapped_loc_addr;
+                       listen_port = listen_node->mapped_loc_port;
+               }
                /* compare node pair, return node handle if a match */
-               if (((listen_node->loc_addr == dst_addr) ||
-                    listen_node->loc_addr == 0x00000000) &&
-                   (listen_node->loc_port == dst_port) &&
+               if (((listen_addr == dst_addr) ||
+                    listen_addr == 0x00000000) &&
+                   (listen_port == dst_port) &&
                    (listener_state & listen_node->listener_state)) {
                        atomic_inc(&listen_node->ref_count);
                        spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
@@ -1189,7 +1296,6 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
        return NULL;
 }
 
-
 /**
  * add_hte_node - add a cm node to the hash table
  */
@@ -1310,9 +1416,20 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
 
                spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
 
-               if (listener->nesvnic)
-                       nes_manage_apbvt(listener->nesvnic, listener->loc_port,
-                                        PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
+               if (listener->nesvnic) {
+                       nes_manage_apbvt(listener->nesvnic,
+                               listener->mapped_loc_port,
+                               PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn),
+                               NES_MANAGE_APBVT_DEL);
+
+                       nes_remove_mapinfo(listener->loc_addr,
+                                       listener->loc_port,
+                                       listener->mapped_loc_addr,
+                                       listener->mapped_loc_port);
+                       nes_debug(NES_DBG_NLMSG,
+                                       "Delete APBVT mapped_loc_port = %04X\n",
+                                       listener->mapped_loc_port);
+               }
 
                nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
 
@@ -1454,6 +1571,11 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
        cm_node->loc_port = cm_info->loc_port;
        cm_node->rem_port = cm_info->rem_port;
 
+       cm_node->mapped_loc_addr = cm_info->mapped_loc_addr;
+       cm_node->mapped_rem_addr = cm_info->mapped_rem_addr;
+       cm_node->mapped_loc_port = cm_info->mapped_loc_port;
+       cm_node->mapped_rem_port = cm_info->mapped_rem_port;
+
        cm_node->mpa_frame_rev = mpa_version;
        cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
        cm_node->mpav2_ird_ord = 0;
@@ -1500,8 +1622,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
        cm_node->loopbackpartner = NULL;
 
        /* get the mac addr for the remote node */
-       oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
-       arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
+       oldarpindex = nes_arp_table(nesdev, cm_node->mapped_rem_addr,
+                               NULL, NES_ARP_RESOLVE);
+       arpindex = nes_addr_resolve_neigh(nesvnic,
+                               cm_node->mapped_rem_addr, oldarpindex);
        if (arpindex < 0) {
                kfree(cm_node);
                return NULL;
@@ -1563,11 +1687,14 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core,
                mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0);
        } else {
                if (cm_node->apbvt_set && cm_node->nesvnic) {
-                       nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
-                                        PCI_FUNC(
-                                                cm_node->nesvnic->nesdev->pcidev->devfn),
+                       nes_manage_apbvt(cm_node->nesvnic, cm_node->mapped_loc_port,
+                                        PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
                                         NES_MANAGE_APBVT_DEL);
                }
+               nes_debug(NES_DBG_NLMSG, "Delete APBVT mapped_loc_port = %04X\n",
+                                       cm_node->mapped_loc_port);
+               nes_remove_mapinfo(cm_node->loc_addr, cm_node->loc_port,
+                       cm_node->mapped_loc_addr, cm_node->mapped_loc_port);
        }
 
        atomic_dec(&cm_core->node_cnt);
@@ -2235,17 +2362,21 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
  * mini_cm_listen - create a listen node with params
  */
 static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
-                                             struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
+                       struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
 {
        struct nes_cm_listener *listener;
+       struct iwpm_dev_data pm_reg_msg;
+       struct iwpm_sa_data pm_msg;
        unsigned long flags;
+       int iwpm_err = 0;
 
        nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
                  cm_info->loc_addr, cm_info->loc_port);
 
        /* cannot have multiple matching listeners */
-       listener = find_listener(cm_core, htonl(cm_info->loc_addr),
-                                htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE);
+       listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port,
+                               NES_CM_LISTENER_EITHER_STATE, 1);
+
        if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
                /* find automatically incs ref count ??? */
                atomic_dec(&listener->ref_count);
@@ -2254,6 +2385,22 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
        }
 
        if (!listener) {
+               nes_form_reg_msg(nesvnic, &pm_reg_msg);
+               iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
+               if (iwpm_err) {
+                       nes_debug(NES_DBG_NLMSG,
+                       "Port Mapper reg pid fail (err = %d).\n", iwpm_err);
+               }
+               if (iwpm_valid_pid() && !iwpm_err) {
+                       nes_form_pm_msg(cm_info, &pm_msg);
+                       iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_NES);
+                       if (iwpm_err)
+                               nes_debug(NES_DBG_NLMSG,
+                               "Port Mapper query fail (err = %d).\n", iwpm_err);
+                       else
+                               nes_record_pm_msg(cm_info, &pm_msg);
+               }
+
                /* create a CM listen node (1/2 node to compare incoming traffic to) */
                listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
                if (!listener) {
@@ -2261,8 +2408,10 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
                        return NULL;
                }
 
-               listener->loc_addr = htonl(cm_info->loc_addr);
-               listener->loc_port = htons(cm_info->loc_port);
+               listener->loc_addr = cm_info->loc_addr;
+               listener->loc_port = cm_info->loc_port;
+               listener->mapped_loc_addr = cm_info->mapped_loc_addr;
+               listener->mapped_loc_port = cm_info->mapped_loc_port;
                listener->reused_node = 0;
 
                atomic_set(&listener->ref_count, 1);
@@ -2324,14 +2473,18 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
 
        if (cm_info->loc_addr == cm_info->rem_addr) {
                loopbackremotelistener = find_listener(cm_core,
-                                                      ntohl(nesvnic->local_ipaddr), cm_node->rem_port,
-                                                      NES_CM_LISTENER_ACTIVE_STATE);
+                       cm_node->mapped_loc_addr, cm_node->mapped_rem_port,
+                       NES_CM_LISTENER_ACTIVE_STATE, 0);
                if (loopbackremotelistener == NULL) {
                        create_event(cm_node, NES_CM_EVENT_ABORTED);
                } else {
                        loopback_cm_info = *cm_info;
                        loopback_cm_info.loc_port = cm_info->rem_port;
                        loopback_cm_info.rem_port = cm_info->loc_port;
+                       loopback_cm_info.mapped_loc_port =
+                               cm_info->mapped_rem_port;
+                       loopback_cm_info.mapped_rem_port =
+                               cm_info->mapped_loc_port;
                        loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
                        loopbackremotenode = make_cm_node(cm_core, nesvnic,
                                                          &loopback_cm_info, loopbackremotelistener);
@@ -2560,6 +2713,12 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
        nfo.rem_addr = ntohl(iph->saddr);
        nfo.rem_port = ntohs(tcph->source);
 
+       /* If port mapper is available these should be mapped address info */
+       nfo.mapped_loc_addr = ntohl(iph->daddr);
+       nfo.mapped_loc_port = ntohs(tcph->dest);
+       nfo.mapped_rem_addr = ntohl(iph->saddr);
+       nfo.mapped_rem_port = ntohs(tcph->source);
+
        tmp_daddr = cpu_to_be32(iph->daddr);
        tmp_saddr = cpu_to_be32(iph->saddr);
 
@@ -2568,8 +2727,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
 
        do {
                cm_node = find_node(cm_core,
-                                   nfo.rem_port, nfo.rem_addr,
-                                   nfo.loc_port, nfo.loc_addr);
+                                   nfo.mapped_rem_port, nfo.mapped_rem_addr,
+                                   nfo.mapped_loc_port, nfo.mapped_loc_addr);
 
                if (!cm_node) {
                        /* Only type of packet accepted are for */
@@ -2578,9 +2737,9 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
                                skb_handled = 0;
                                break;
                        }
-                       listener = find_listener(cm_core, nfo.loc_addr,
-                                                nfo.loc_port,
-                                                NES_CM_LISTENER_ACTIVE_STATE);
+                       listener = find_listener(cm_core, nfo.mapped_loc_addr,
+                                       nfo.mapped_loc_port,
+                                       NES_CM_LISTENER_ACTIVE_STATE, 0);
                        if (!listener) {
                                nfo.cm_id = NULL;
                                nfo.conn_type = 0;
@@ -3184,10 +3343,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        nes_cm_init_tsa_conn(nesqp, cm_node);
 
-       nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port));
-       nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port));
+       nesqp->nesqp_context->tcpPorts[0] =
+                               cpu_to_le16(cm_node->mapped_loc_port);
+       nesqp->nesqp_context->tcpPorts[1] =
+                               cpu_to_le16(cm_node->mapped_rem_port);
 
-       nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr));
+       nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr);
 
        nesqp->nesqp_context->misc2 |= cpu_to_le32(
                (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3211,9 +3372,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        memset(&nes_quad, 0, sizeof(nes_quad));
        nes_quad.DstIpAdrIndex =
                cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
-       nes_quad.SrcIpadr = raddr->sin_addr.s_addr;
-       nes_quad.TcpPorts[0] = raddr->sin_port;
-       nes_quad.TcpPorts[1] = laddr->sin_port;
+       nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr);
+       nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port);
+       nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port);
 
        /* Produce hash key */
        crc_value = get_crc_value(&nes_quad);
@@ -3315,6 +3476,9 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        int apbvt_set = 0;
        struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
        struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+       struct iwpm_dev_data pm_reg_msg;
+       struct iwpm_sa_data pm_msg;
+       int iwpm_err = 0;
 
        if (cm_id->remote_addr.ss_family != AF_INET)
                return -ENOSYS;
@@ -3352,20 +3516,44 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
                  conn_param->private_data_len);
 
+       /* set up the connection params for the node */
+       cm_info.loc_addr = ntohl(laddr->sin_addr.s_addr);
+       cm_info.loc_port = ntohs(laddr->sin_port);
+       cm_info.rem_addr = ntohl(raddr->sin_addr.s_addr);
+       cm_info.rem_port = ntohs(raddr->sin_port);
+       cm_info.cm_id = cm_id;
+       cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+
+       /* No port mapper available, go with the specified peer information */
+       cm_info.mapped_loc_addr = cm_info.loc_addr;
+       cm_info.mapped_loc_port = cm_info.loc_port;
+       cm_info.mapped_rem_addr = cm_info.rem_addr;
+       cm_info.mapped_rem_port = cm_info.rem_port;
+
+       nes_form_reg_msg(nesvnic, &pm_reg_msg);
+       iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
+       if (iwpm_err) {
+               nes_debug(NES_DBG_NLMSG,
+                       "Port Mapper reg pid fail (err = %d).\n", iwpm_err);
+       }
+       if (iwpm_valid_pid() && !iwpm_err) {
+               nes_form_pm_msg(&cm_info, &pm_msg);
+               iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_NES);
+               if (iwpm_err)
+                       nes_debug(NES_DBG_NLMSG,
+                       "Port Mapper query fail (err = %d).\n", iwpm_err);
+               else
+                       nes_record_pm_msg(&cm_info, &pm_msg);
+       }
+
        if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) {
-               nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
-                                PCI_FUNC(nesdev->pcidev->devfn),
-                                NES_MANAGE_APBVT_ADD);
+               nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port,
+                       PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
                apbvt_set = 1;
        }
 
-       /* set up the connection params for the node */
-       cm_info.loc_addr = htonl(laddr->sin_addr.s_addr);
-       cm_info.loc_port = htons(laddr->sin_port);
-       cm_info.rem_addr = htonl(raddr->sin_addr.s_addr);
-       cm_info.rem_port = htons(raddr->sin_port);
-       cm_info.cm_id = cm_id;
-       cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+       if (nes_create_mapinfo(&cm_info))
+               return -ENOMEM;
 
        cm_id->add_ref(cm_id);
 
@@ -3375,10 +3563,14 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                                          &cm_info);
        if (!cm_node) {
                if (apbvt_set)
-                       nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
+                       nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port,
                                         PCI_FUNC(nesdev->pcidev->devfn),
                                         NES_MANAGE_APBVT_DEL);
 
+               nes_debug(NES_DBG_NLMSG, "Delete mapped_loc_port = %04X\n",
+                               cm_info.mapped_loc_port);
+               nes_remove_mapinfo(cm_info.loc_addr, cm_info.loc_port,
+                       cm_info.mapped_loc_addr, cm_info.mapped_loc_port);
                cm_id->rem_ref(cm_id);
                return -ENOMEM;
        }
@@ -3424,13 +3616,16 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
                        nesvnic->local_ipaddr, laddr->sin_addr.s_addr);
 
        /* setup listen params in our api call struct */
-       cm_info.loc_addr = nesvnic->local_ipaddr;
-       cm_info.loc_port = laddr->sin_port;
+       cm_info.loc_addr = ntohl(nesvnic->local_ipaddr);
+       cm_info.loc_port = ntohs(laddr->sin_port);
        cm_info.backlog = backlog;
        cm_info.cm_id = cm_id;
 
        cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
 
+       /* No port mapper available, go with the specified info */
+       cm_info.mapped_loc_addr = cm_info.loc_addr;
+       cm_info.mapped_loc_port = cm_info.loc_port;
 
        cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
        if (!cm_node) {
@@ -3442,7 +3637,10 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
        cm_id->provider_data = cm_node;
 
        if (!cm_node->reused_node) {
-               err = nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
+               if (nes_create_mapinfo(&cm_info))
+                       return -ENOMEM;
+
+               err = nes_manage_apbvt(nesvnic, cm_node->mapped_loc_port,
                                       PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
                                       NES_MANAGE_APBVT_ADD);
                if (err) {
@@ -3567,9 +3765,11 @@ static void cm_event_connected(struct nes_cm_event *event)
        nes_cm_init_tsa_conn(nesqp, cm_node);
 
        /* set the QP tsa context */
-       nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port));
-       nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port));
-       nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr));
+       nesqp->nesqp_context->tcpPorts[0] =
+                       cpu_to_le16(cm_node->mapped_loc_port);
+       nesqp->nesqp_context->tcpPorts[1] =
+                       cpu_to_le16(cm_node->mapped_rem_port);
+       nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr);
 
        nesqp->nesqp_context->misc2 |= cpu_to_le32(
                        (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3599,9 +3799,9 @@ static void cm_event_connected(struct nes_cm_event *event)
 
        nes_quad.DstIpAdrIndex =
                cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
-       nes_quad.SrcIpadr = raddr->sin_addr.s_addr;
-       nes_quad.TcpPorts[0] = raddr->sin_port;
-       nes_quad.TcpPorts[1] = laddr->sin_port;
+       nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr);
+       nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port);
+       nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port);
 
        /* Produce hash key */
        crc_value = get_crc_value(&nes_quad);
@@ -3629,7 +3829,7 @@ static void cm_event_connected(struct nes_cm_event *event)
        cm_event.ird = cm_node->ird_size;
        cm_event.ord = cm_node->ord_size;
 
-       cm_event_laddr->sin_addr.s_addr = event->cm_info.rem_addr;
+       cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
        ret = cm_id->event_handler(cm_id, &cm_event);
        nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
 
index 522c99cd07c4339a89f6eafb3073fcc0f1577bb6..f522cf6397893c916f44c1a3bc0c297b5dde6264 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2011 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2006 - 2014 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -293,8 +293,8 @@ struct nes_cm_listener {
        struct list_head           list;
        struct nes_cm_core         *cm_core;
        u8                         loc_mac[ETH_ALEN];
-       nes_addr_t                 loc_addr;
-       u16                        loc_port;
+       nes_addr_t                 loc_addr, mapped_loc_addr;
+       u16                        loc_port, mapped_loc_port;
        struct iw_cm_id            *cm_id;
        enum nes_cm_conn_type      conn_type;
        atomic_t                   ref_count;
@@ -308,7 +308,9 @@ struct nes_cm_listener {
 /* per connection node and node state information */
 struct nes_cm_node {
        nes_addr_t                loc_addr, rem_addr;
+       nes_addr_t                mapped_loc_addr, mapped_rem_addr;
        u16                       loc_port, rem_port;
+       u16                       mapped_loc_port, mapped_rem_port;
 
        u8                        loc_mac[ETH_ALEN];
        u8                        rem_mac[ETH_ALEN];
@@ -364,6 +366,10 @@ struct nes_cm_info {
        u16 rem_port;
        nes_addr_t loc_addr;
        nes_addr_t rem_addr;
+       u16 mapped_loc_port;
+       u16 mapped_rem_port;
+       nes_addr_t mapped_loc_addr;
+       nes_addr_t mapped_rem_addr;
 
        enum nes_cm_conn_type  conn_type;
        int backlog;
index 6c54106f5e64119cfd9c50f60fd3ab58ade5af84..41a9aec9998d103f81dc325129cdab47bd20e2ff 100644 (file)
@@ -510,16 +510,9 @@ exit:
        return status;
 }
 
-static int ocrdma_debugfs_open(struct inode *inode, struct file *file)
-{
-       if (inode->i_private)
-               file->private_data = inode->i_private;
-       return 0;
-}
-
 static const struct file_operations ocrdma_dbg_ops = {
        .owner = THIS_MODULE,
-       .open = ocrdma_debugfs_open,
+       .open = simple_open,
        .read = ocrdma_dbgfs_ops_read,
 };
 
index 5b7aeb224a30ab54d56ba2566aaa8f3d4507de9d..8d3c78ddc906af620bd03cc812bcac95fb35cf94 100644 (file)
@@ -1272,7 +1272,7 @@ static int qib_notify_dca(struct notifier_block *nb, unsigned long event,
  * Do all the generic driver unit- and chip-independent memory
  * allocation and initialization.
  */
-static int __init qlogic_ib_init(void)
+static int __init qib_ib_init(void)
 {
        int ret;
 
@@ -1316,12 +1316,12 @@ bail:
        return ret;
 }
 
-module_init(qlogic_ib_init);
+module_init(qib_ib_init);
 
 /*
  * Do the non-unit driver cleanup, memory free, etc. at unload.
  */
-static void __exit qlogic_ib_cleanup(void)
+static void __exit qib_ib_cleanup(void)
 {
        int ret;
 
@@ -1346,7 +1346,7 @@ static void __exit qlogic_ib_cleanup(void)
        qib_dev_cleanup();
 }
 
-module_exit(qlogic_ib_cleanup);
+module_exit(qib_ib_cleanup);
 
 /* this can only be called after a successful initialization */
 static void cleanup_device_data(struct qib_devdata *dd)
index edad991d60ed5dd73a4638c71c59411fa112763a..22c720e5740d900d3d4f1a6de4913d95708a8f54 100644 (file)
@@ -1028,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys)
 
                event.event = IB_EVENT_PKEY_CHANGE;
                event.device = &dd->verbs_dev.ibdev;
-               event.element.port_num = 1;
+               event.element.port_num = port;
                ib_dispatch_event(&event);
        }
        return 0;
index 0cad0c40d742638a3f5756a0c51d0b5974e98477..7fcc150d603c4d341034d6c4b9e9847588dfb69f 100644 (file)
@@ -985,7 +985,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
        struct ib_qp *ret;
 
        if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
-           init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) {
+           init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
+           init_attr->create_flags) {
                ret = ERR_PTR(-EINVAL);
                goto bail;
        }
index d48d2c0a2e3ca10dcf7272da71ce44110f957346..53bd6a2d9cdbbae4545a70d1d070b17eafb18154 100644 (file)
@@ -466,6 +466,9 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
        ucontext = to_uucontext(pd->uobject->context);
        us_ibdev = to_usdev(pd->device);
 
+       if (init_attr->create_flags)
+               return ERR_PTR(-EINVAL);
+
        err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
        if (err) {
                usnic_err("%s: cannot copy udata for create_qp\n",
index d135ad90d9144e3f9bb20311b8fa309b3950690d..3a4288e0fbace4621df7166e7e64ca7a97a9d283 100644 (file)
@@ -1,3 +1,21 @@
+/*
+ * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/slab.h>
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
new file mode 100644 (file)
index 0000000..f3c7dcf
--- /dev/null
@@ -0,0 +1,5 @@
+obj-$(CONFIG_INFINIBAND_IPOIB)         += ipoib/
+obj-$(CONFIG_INFINIBAND_SRP)           += srp/
+obj-$(CONFIG_INFINIBAND_SRPT)          += srpt/
+obj-$(CONFIG_INFINIBAND_ISER)          += iser/
+obj-$(CONFIG_INFINIBAND_ISERT)         += isert/
index 1377f85911c2490dc62764452856bcf680af7959..933efcea0d03f11b4da3967b8eedc137da21e08a 100644 (file)
@@ -1030,10 +1030,20 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
                .cap.max_send_sge       = 1,
                .sq_sig_type            = IB_SIGNAL_ALL_WR,
                .qp_type                = IB_QPT_RC,
-               .qp_context             = tx
+               .qp_context             = tx,
+               .create_flags           = IB_QP_CREATE_USE_GFP_NOIO
        };
 
-       return ib_create_qp(priv->pd, &attr);
+       struct ib_qp *tx_qp;
+
+       tx_qp = ib_create_qp(priv->pd, &attr);
+       if (PTR_ERR(tx_qp) == -EINVAL) {
+               ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n",
+                          priv->ca->name);
+               attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
+               tx_qp = ib_create_qp(priv->pd, &attr);
+       }
+       return tx_qp;
 }
 
 static int ipoib_cm_send_req(struct net_device *dev,
@@ -1104,12 +1114,14 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
        struct ipoib_dev_priv *priv = netdev_priv(p->dev);
        int ret;
 
-       p->tx_ring = vzalloc(ipoib_sendq_size * sizeof *p->tx_ring);
+       p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
+                              GFP_NOIO, PAGE_KERNEL);
        if (!p->tx_ring) {
                ipoib_warn(priv, "failed to allocate tx ring\n");
                ret = -ENOMEM;
                goto err_tx;
        }
+       memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
 
        p->qp = ipoib_cm_create_tx_qp(p->dev, p);
        if (IS_ERR(p->qp)) {
index 25f195ef44b02b09d3f34dec9b452cc7bb2a512c..eb7973957a6ea35585a16086adc40e29df151cb9 100644 (file)
@@ -99,6 +99,7 @@ MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
 module_param_named(pi_guard, iser_pi_guard, int, 0644);
 MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)");
 
+static struct workqueue_struct *release_wq;
 struct iser_global ig;
 
 void
@@ -337,24 +338,6 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
        return cls_conn;
 }
 
-static void
-iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn)
-{
-       struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iser_conn *ib_conn = conn->dd_data;
-
-       iscsi_conn_teardown(cls_conn);
-       /*
-        * Userspace will normally call the stop callback and
-        * already have freed the ib_conn, but if it goofed up then
-        * we free it here.
-        */
-       if (ib_conn) {
-               ib_conn->iscsi_conn = NULL;
-               iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */
-       }
-}
-
 static int
 iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
                     struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
@@ -392,29 +375,39 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
        conn->dd_data = ib_conn;
        ib_conn->iscsi_conn = conn;
 
-       iser_conn_get(ib_conn); /* ref iscsi/ib conn binding */
        return 0;
 }
 
+static int
+iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
+{
+       struct iscsi_conn *iscsi_conn;
+       struct iser_conn *ib_conn;
+
+       iscsi_conn = cls_conn->dd_data;
+       ib_conn = iscsi_conn->dd_data;
+       reinit_completion(&ib_conn->stop_completion);
+
+       return iscsi_conn_start(cls_conn);
+}
+
 static void
 iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
 {
        struct iscsi_conn *conn = cls_conn->dd_data;
        struct iser_conn *ib_conn = conn->dd_data;
 
+       iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn);
+       iscsi_conn_stop(cls_conn, flag);
+
        /*
         * Userspace may have goofed up and not bound the connection or
         * might have only partially setup the connection.
         */
        if (ib_conn) {
-               iscsi_conn_stop(cls_conn, flag);
-               /*
-                * There is no unbind event so the stop callback
-                * must release the ref from the bind.
-                */
-               iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */
+               conn->dd_data = NULL;
+               complete(&ib_conn->stop_completion);
        }
-       conn->dd_data = NULL;
 }
 
 static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
@@ -515,28 +508,28 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
        case ISCSI_PARAM_HDRDGST_EN:
                sscanf(buf, "%d", &value);
                if (value) {
-                       iser_err("DataDigest wasn't negotiated to None");
+                       iser_err("DataDigest wasn't negotiated to None\n");
                        return -EPROTO;
                }
                break;
        case ISCSI_PARAM_DATADGST_EN:
                sscanf(buf, "%d", &value);
                if (value) {
-                       iser_err("DataDigest wasn't negotiated to None");
+                       iser_err("DataDigest wasn't negotiated to None\n");
                        return -EPROTO;
                }
                break;
        case ISCSI_PARAM_IFMARKER_EN:
                sscanf(buf, "%d", &value);
                if (value) {
-                       iser_err("IFMarker wasn't negotiated to No");
+                       iser_err("IFMarker wasn't negotiated to No\n");
                        return -EPROTO;
                }
                break;
        case ISCSI_PARAM_OFMARKER_EN:
                sscanf(buf, "%d", &value);
                if (value) {
-                       iser_err("OFMarker wasn't negotiated to No");
+                       iser_err("OFMarker wasn't negotiated to No\n");
                        return -EPROTO;
                }
                break;
@@ -652,19 +645,20 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
        struct iser_conn *ib_conn;
 
        ib_conn = ep->dd_data;
-       if (ib_conn->iscsi_conn)
-               /*
-                * Must suspend xmit path if the ep is bound to the
-                * iscsi_conn, so we know we are not accessing the ib_conn
-                * when we free it.
-                *
-                * This may not be bound if the ep poll failed.
-                */
-               iscsi_suspend_tx(ib_conn->iscsi_conn);
-
-
-       iser_info("ib conn %p state %d\n", ib_conn, ib_conn->state);
+       iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);
        iser_conn_terminate(ib_conn);
+
+       /*
+        * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop
+        * call and ISER_CONN_DOWN state before freeing the iser resources.
+        * otherwise we are safe to free resources immediately.
+        */
+       if (ib_conn->iscsi_conn) {
+               INIT_WORK(&ib_conn->release_work, iser_release_work);
+               queue_work(release_wq, &ib_conn->release_work);
+       } else {
+               iser_conn_release(ib_conn);
+       }
 }
 
 static umode_t iser_attr_is_visible(int param_type, int param)
@@ -748,13 +742,13 @@ static struct iscsi_transport iscsi_iser_transport = {
        /* connection management */
        .create_conn            = iscsi_iser_conn_create,
        .bind_conn              = iscsi_iser_conn_bind,
-       .destroy_conn           = iscsi_iser_conn_destroy,
+       .destroy_conn           = iscsi_conn_teardown,
        .attr_is_visible        = iser_attr_is_visible,
        .set_param              = iscsi_iser_set_param,
        .get_conn_param         = iscsi_conn_get_param,
        .get_ep_param           = iscsi_iser_get_ep_param,
        .get_session_param      = iscsi_session_get_param,
-       .start_conn             = iscsi_conn_start,
+       .start_conn             = iscsi_iser_conn_start,
        .stop_conn              = iscsi_iser_conn_stop,
        /* iscsi host params */
        .get_host_param         = iscsi_host_get_param,
@@ -801,6 +795,12 @@ static int __init iser_init(void)
        mutex_init(&ig.connlist_mutex);
        INIT_LIST_HEAD(&ig.connlist);
 
+       release_wq = alloc_workqueue("release workqueue", 0, 0);
+       if (!release_wq) {
+               iser_err("failed to allocate release workqueue\n");
+               return -ENOMEM;
+       }
+
        iscsi_iser_scsi_transport = iscsi_register_transport(
                                                        &iscsi_iser_transport);
        if (!iscsi_iser_scsi_transport) {
@@ -819,7 +819,24 @@ register_transport_failure:
 
 static void __exit iser_exit(void)
 {
+       struct iser_conn *ib_conn, *n;
+       int connlist_empty;
+
        iser_dbg("Removing iSER datamover...\n");
+       destroy_workqueue(release_wq);
+
+       mutex_lock(&ig.connlist_mutex);
+       connlist_empty = list_empty(&ig.connlist);
+       mutex_unlock(&ig.connlist_mutex);
+
+       if (!connlist_empty) {
+               iser_err("Error cleanup stage completed but we still have iser "
+                        "connections, destroying them anyway.\n");
+               list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) {
+                       iser_conn_release(ib_conn);
+               }
+       }
+
        iscsi_unregister_transport(&iscsi_iser_transport);
        kmem_cache_destroy(ig.desc_cache);
 }
index 324129f80d40b23d5e36874b081e87e21c9abebc..97cd385bf7f72c6d0fdc664e30a949b343564888 100644 (file)
@@ -69,7 +69,7 @@
 
 #define DRV_NAME       "iser"
 #define PFX            DRV_NAME ": "
-#define DRV_VER                "1.3"
+#define DRV_VER                "1.4"
 
 #define iser_dbg(fmt, arg...)                          \
        do {                                            \
@@ -333,6 +333,8 @@ struct iser_conn {
        int                          post_recv_buf_count; /* posted rx count  */
        atomic_t                     post_send_buf_count; /* posted tx count   */
        char                         name[ISER_OBJECT_NAME_SIZE];
+       struct work_struct           release_work;
+       struct completion            stop_completion;
        struct list_head             conn_list;       /* entry in ig conn list */
 
        char                         *login_buf;
@@ -417,12 +419,12 @@ void iscsi_iser_recv(struct iscsi_conn *conn,
 
 void iser_conn_init(struct iser_conn *ib_conn);
 
-void iser_conn_get(struct iser_conn *ib_conn);
-
-int iser_conn_put(struct iser_conn *ib_conn, int destroy_cma_id_allowed);
+void iser_conn_release(struct iser_conn *ib_conn);
 
 void iser_conn_terminate(struct iser_conn *ib_conn);
 
+void iser_release_work(struct work_struct *work);
+
 void iser_rcv_completion(struct iser_rx_desc *desc,
                         unsigned long    dto_xfer_len,
                        struct iser_conn *ib_conn);
index 32849f2becde9e6fd4882bf0e3b63545351069c9..ea01075f9f9b81b180ecfe85f02d4b18a77a494f 100644 (file)
@@ -581,14 +581,30 @@ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
        return ret;
 }
 
+void iser_release_work(struct work_struct *work)
+{
+       struct iser_conn *ib_conn;
+
+       ib_conn = container_of(work, struct iser_conn, release_work);
+
+       /* wait for .conn_stop callback */
+       wait_for_completion(&ib_conn->stop_completion);
+
+       /* wait for the qp`s post send and post receive buffers to empty */
+       wait_event_interruptible(ib_conn->wait,
+                                ib_conn->state == ISER_CONN_DOWN);
+
+       iser_conn_release(ib_conn);
+}
+
 /**
  * Frees all conn objects and deallocs conn descriptor
  */
-static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)
+void iser_conn_release(struct iser_conn *ib_conn)
 {
        struct iser_device  *device = ib_conn->device;
 
-       BUG_ON(ib_conn->state != ISER_CONN_DOWN);
+       BUG_ON(ib_conn->state == ISER_CONN_UP);
 
        mutex_lock(&ig.connlist_mutex);
        list_del(&ib_conn->conn_list);
@@ -600,27 +616,13 @@ static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)
        if (device != NULL)
                iser_device_try_release(device);
        /* if cma handler context, the caller actually destroy the id */
-       if (ib_conn->cma_id != NULL && can_destroy_id) {
+       if (ib_conn->cma_id != NULL) {
                rdma_destroy_id(ib_conn->cma_id);
                ib_conn->cma_id = NULL;
        }
        iscsi_destroy_endpoint(ib_conn->ep);
 }
 
-void iser_conn_get(struct iser_conn *ib_conn)
-{
-       atomic_inc(&ib_conn->refcount);
-}
-
-int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id)
-{
-       if (atomic_dec_and_test(&ib_conn->refcount)) {
-               iser_conn_release(ib_conn, can_destroy_id);
-               return 1;
-       }
-       return 0;
-}
-
 /**
  * triggers start of the disconnect procedures and wait for them to be done
  */
@@ -638,24 +640,19 @@ void iser_conn_terminate(struct iser_conn *ib_conn)
        if (err)
                iser_err("Failed to disconnect, conn: 0x%p err %d\n",
                         ib_conn,err);
-
-       wait_event_interruptible(ib_conn->wait,
-                                ib_conn->state == ISER_CONN_DOWN);
-
-       iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
 }
 
-static int iser_connect_error(struct rdma_cm_id *cma_id)
+static void iser_connect_error(struct rdma_cm_id *cma_id)
 {
        struct iser_conn *ib_conn;
+
        ib_conn = (struct iser_conn *)cma_id->context;
 
        ib_conn->state = ISER_CONN_DOWN;
        wake_up_interruptible(&ib_conn->wait);
-       return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
 }
 
-static int iser_addr_handler(struct rdma_cm_id *cma_id)
+static void iser_addr_handler(struct rdma_cm_id *cma_id)
 {
        struct iser_device *device;
        struct iser_conn   *ib_conn;
@@ -664,7 +661,8 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id)
        device = iser_device_find_by_ib_device(cma_id);
        if (!device) {
                iser_err("device lookup/creation failed\n");
-               return iser_connect_error(cma_id);
+               iser_connect_error(cma_id);
+               return;
        }
 
        ib_conn = (struct iser_conn *)cma_id->context;
@@ -686,13 +684,12 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id)
        ret = rdma_resolve_route(cma_id, 1000);
        if (ret) {
                iser_err("resolve route failed: %d\n", ret);
-               return iser_connect_error(cma_id);
+               iser_connect_error(cma_id);
+               return;
        }
-
-       return 0;
 }
 
-static int iser_route_handler(struct rdma_cm_id *cma_id)
+static void iser_route_handler(struct rdma_cm_id *cma_id)
 {
        struct rdma_conn_param conn_param;
        int    ret;
@@ -720,9 +717,9 @@ static int iser_route_handler(struct rdma_cm_id *cma_id)
                goto failure;
        }
 
-       return 0;
+       return;
 failure:
-       return iser_connect_error(cma_id);
+       iser_connect_error(cma_id);
 }
 
 static void iser_connected_handler(struct rdma_cm_id *cma_id)
@@ -735,14 +732,13 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
        iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
 
        ib_conn = (struct iser_conn *)cma_id->context;
-       ib_conn->state = ISER_CONN_UP;
-       wake_up_interruptible(&ib_conn->wait);
+       if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP))
+               wake_up_interruptible(&ib_conn->wait);
 }
 
-static int iser_disconnected_handler(struct rdma_cm_id *cma_id)
+static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
 {
        struct iser_conn *ib_conn;
-       int ret;
 
        ib_conn = (struct iser_conn *)cma_id->context;
 
@@ -762,24 +758,19 @@ static int iser_disconnected_handler(struct rdma_cm_id *cma_id)
                ib_conn->state = ISER_CONN_DOWN;
                wake_up_interruptible(&ib_conn->wait);
        }
-
-       ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
-       return ret;
 }
 
 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
-       int ret = 0;
-
        iser_info("event %d status %d conn %p id %p\n",
                  event->event, event->status, cma_id->context, cma_id);
 
        switch (event->event) {
        case RDMA_CM_EVENT_ADDR_RESOLVED:
-               ret = iser_addr_handler(cma_id);
+               iser_addr_handler(cma_id);
                break;
        case RDMA_CM_EVENT_ROUTE_RESOLVED:
-               ret = iser_route_handler(cma_id);
+               iser_route_handler(cma_id);
                break;
        case RDMA_CM_EVENT_ESTABLISHED:
                iser_connected_handler(cma_id);
@@ -789,18 +780,18 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
        case RDMA_CM_EVENT_CONNECT_ERROR:
        case RDMA_CM_EVENT_UNREACHABLE:
        case RDMA_CM_EVENT_REJECTED:
-               ret = iser_connect_error(cma_id);
+               iser_connect_error(cma_id);
                break;
        case RDMA_CM_EVENT_DISCONNECTED:
        case RDMA_CM_EVENT_DEVICE_REMOVAL:
        case RDMA_CM_EVENT_ADDR_CHANGE:
-               ret = iser_disconnected_handler(cma_id);
+               iser_disconnected_handler(cma_id);
                break;
        default:
                iser_err("Unexpected RDMA CM event (%d)\n", event->event);
                break;
        }
-       return ret;
+       return 0;
 }
 
 void iser_conn_init(struct iser_conn *ib_conn)
@@ -809,7 +800,7 @@ void iser_conn_init(struct iser_conn *ib_conn)
        init_waitqueue_head(&ib_conn->wait);
        ib_conn->post_recv_buf_count = 0;
        atomic_set(&ib_conn->post_send_buf_count, 0);
-       atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */
+       init_completion(&ib_conn->stop_completion);
        INIT_LIST_HEAD(&ib_conn->conn_list);
        spin_lock_init(&ib_conn->lock);
 }
@@ -837,7 +828,6 @@ int iser_connect(struct iser_conn   *ib_conn,
 
        ib_conn->state = ISER_CONN_PENDING;
 
-       iser_conn_get(ib_conn); /* ref ib conn's cma id */
        ib_conn->cma_id = rdma_create_id(iser_cma_handler,
                                             (void *)ib_conn,
                                             RDMA_PS_TCP, IB_QPT_RC);
@@ -874,9 +864,8 @@ id_failure:
        ib_conn->cma_id = NULL;
 addr_failure:
        ib_conn->state = ISER_CONN_DOWN;
-       iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */
 connect_failure:
-       iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
+       iser_conn_release(ib_conn);
        return err;
 }
 
index 66a908bf3fb9e1d43654d07af1d346cca12a6223..e3c2c5b4297f69d033629717a87dea1f1fb6fdf9 100644 (file)
@@ -30,7 +30,7 @@
  * SOFTWARE.
  */
 
-#define pr_fmt(fmt) PFX fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
 #include <linux/init.h>
@@ -66,6 +66,8 @@ static unsigned int srp_sg_tablesize;
 static unsigned int cmd_sg_entries;
 static unsigned int indirect_sg_entries;
 static bool allow_ext_sg;
+static bool prefer_fr;
+static bool register_always;
 static int topspin_workarounds = 1;
 
 module_param(srp_sg_tablesize, uint, 0444);
@@ -87,6 +89,14 @@ module_param(topspin_workarounds, int, 0444);
 MODULE_PARM_DESC(topspin_workarounds,
                 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
 
+module_param(prefer_fr, bool, 0444);
+MODULE_PARM_DESC(prefer_fr,
+"Whether to use fast registration if both FMR and fast registration are supported");
+
+module_param(register_always, bool, 0444);
+MODULE_PARM_DESC(register_always,
+                "Use memory registration even for contiguous memory regions");
+
 static struct kernel_param_ops srp_tmo_ops;
 
 static int srp_reconnect_delay = 10;
@@ -288,28 +298,174 @@ static int srp_new_cm_id(struct srp_target_port *target)
        return 0;
 }
 
+static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
+{
+       struct srp_device *dev = target->srp_host->srp_dev;
+       struct ib_fmr_pool_param fmr_param;
+
+       memset(&fmr_param, 0, sizeof(fmr_param));
+       fmr_param.pool_size         = target->scsi_host->can_queue;
+       fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
+       fmr_param.cache             = 1;
+       fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
+       fmr_param.page_shift        = ilog2(dev->mr_page_size);
+       fmr_param.access            = (IB_ACCESS_LOCAL_WRITE |
+                                      IB_ACCESS_REMOTE_WRITE |
+                                      IB_ACCESS_REMOTE_READ);
+
+       return ib_create_fmr_pool(dev->pd, &fmr_param);
+}
+
+/**
+ * srp_destroy_fr_pool() - free the resources owned by a pool
+ * @pool: Fast registration pool to be destroyed.
+ */
+static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
+{
+       int i;
+       struct srp_fr_desc *d;
+
+       if (!pool)
+               return;
+
+       for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
+               if (d->frpl)
+                       ib_free_fast_reg_page_list(d->frpl);
+               if (d->mr)
+                       ib_dereg_mr(d->mr);
+       }
+       kfree(pool);
+}
+
+/**
+ * srp_create_fr_pool() - allocate and initialize a pool for fast registration
+ * @device:            IB device to allocate fast registration descriptors for.
+ * @pd:                Protection domain associated with the FR descriptors.
+ * @pool_size:         Number of descriptors to allocate.
+ * @max_page_list_len: Maximum fast registration work request page list length.
+ */
+static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
+                                             struct ib_pd *pd, int pool_size,
+                                             int max_page_list_len)
+{
+       struct srp_fr_pool *pool;
+       struct srp_fr_desc *d;
+       struct ib_mr *mr;
+       struct ib_fast_reg_page_list *frpl;
+       int i, ret = -EINVAL;
+
+       if (pool_size <= 0)
+               goto err;
+       ret = -ENOMEM;
+       pool = kzalloc(sizeof(struct srp_fr_pool) +
+                      pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
+       if (!pool)
+               goto err;
+       pool->size = pool_size;
+       pool->max_page_list_len = max_page_list_len;
+       spin_lock_init(&pool->lock);
+       INIT_LIST_HEAD(&pool->free_list);
+
+       for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
+               mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
+               if (IS_ERR(mr)) {
+                       ret = PTR_ERR(mr);
+                       goto destroy_pool;
+               }
+               d->mr = mr;
+               frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
+               if (IS_ERR(frpl)) {
+                       ret = PTR_ERR(frpl);
+                       goto destroy_pool;
+               }
+               d->frpl = frpl;
+               list_add_tail(&d->entry, &pool->free_list);
+       }
+
+out:
+       return pool;
+
+destroy_pool:
+       srp_destroy_fr_pool(pool);
+
+err:
+       pool = ERR_PTR(ret);
+       goto out;
+}
+
+/**
+ * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
+ * @pool: Pool to obtain descriptor from.
+ */
+static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
+{
+       struct srp_fr_desc *d = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&pool->lock, flags);
+       if (!list_empty(&pool->free_list)) {
+               d = list_first_entry(&pool->free_list, typeof(*d), entry);
+               list_del(&d->entry);
+       }
+       spin_unlock_irqrestore(&pool->lock, flags);
+
+       return d;
+}
+
+/**
+ * srp_fr_pool_put() - put an FR descriptor back in the free list
+ * @pool: Pool the descriptor was allocated from.
+ * @desc: Pointer to an array of fast registration descriptor pointers.
+ * @n:    Number of descriptors to put back.
+ *
+ * Note: The caller must already have queued an invalidation request for
+ * desc->mr->rkey before calling this function.
+ */
+static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
+                           int n)
+{
+       unsigned long flags;
+       int i;
+
+       spin_lock_irqsave(&pool->lock, flags);
+       for (i = 0; i < n; i++)
+               list_add(&desc[i]->entry, &pool->free_list);
+       spin_unlock_irqrestore(&pool->lock, flags);
+}
+
+static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
+{
+       struct srp_device *dev = target->srp_host->srp_dev;
+
+       return srp_create_fr_pool(dev->dev, dev->pd,
+                                 target->scsi_host->can_queue,
+                                 dev->max_pages_per_mr);
+}
+
 static int srp_create_target_ib(struct srp_target_port *target)
 {
+       struct srp_device *dev = target->srp_host->srp_dev;
        struct ib_qp_init_attr *init_attr;
        struct ib_cq *recv_cq, *send_cq;
        struct ib_qp *qp;
+       struct ib_fmr_pool *fmr_pool = NULL;
+       struct srp_fr_pool *fr_pool = NULL;
+       const int m = 1 + dev->use_fast_reg;
        int ret;
 
        init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
        if (!init_attr)
                return -ENOMEM;
 
-       recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
-                              srp_recv_completion, NULL, target,
+       recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, target,
                               target->queue_size, target->comp_vector);
        if (IS_ERR(recv_cq)) {
                ret = PTR_ERR(recv_cq);
                goto err;
        }
 
-       send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
-                              srp_send_completion, NULL, target,
-                              target->queue_size, target->comp_vector);
+       send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target,
+                              m * target->queue_size, target->comp_vector);
        if (IS_ERR(send_cq)) {
                ret = PTR_ERR(send_cq);
                goto err_recv_cq;
@@ -318,16 +474,16 @@ static int srp_create_target_ib(struct srp_target_port *target)
        ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
 
        init_attr->event_handler       = srp_qp_event;
-       init_attr->cap.max_send_wr     = target->queue_size;
+       init_attr->cap.max_send_wr     = m * target->queue_size;
        init_attr->cap.max_recv_wr     = target->queue_size;
        init_attr->cap.max_recv_sge    = 1;
        init_attr->cap.max_send_sge    = 1;
-       init_attr->sq_sig_type         = IB_SIGNAL_ALL_WR;
+       init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
        init_attr->qp_type             = IB_QPT_RC;
        init_attr->send_cq             = send_cq;
        init_attr->recv_cq             = recv_cq;
 
-       qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
+       qp = ib_create_qp(dev->pd, init_attr);
        if (IS_ERR(qp)) {
                ret = PTR_ERR(qp);
                goto err_send_cq;
@@ -337,6 +493,30 @@ static int srp_create_target_ib(struct srp_target_port *target)
        if (ret)
                goto err_qp;
 
+       if (dev->use_fast_reg && dev->has_fr) {
+               fr_pool = srp_alloc_fr_pool(target);
+               if (IS_ERR(fr_pool)) {
+                       ret = PTR_ERR(fr_pool);
+                       shost_printk(KERN_WARNING, target->scsi_host, PFX
+                                    "FR pool allocation failed (%d)\n", ret);
+                       goto err_qp;
+               }
+               if (target->fr_pool)
+                       srp_destroy_fr_pool(target->fr_pool);
+               target->fr_pool = fr_pool;
+       } else if (!dev->use_fast_reg && dev->has_fmr) {
+               fmr_pool = srp_alloc_fmr_pool(target);
+               if (IS_ERR(fmr_pool)) {
+                       ret = PTR_ERR(fmr_pool);
+                       shost_printk(KERN_WARNING, target->scsi_host, PFX
+                                    "FMR pool allocation failed (%d)\n", ret);
+                       goto err_qp;
+               }
+               if (target->fmr_pool)
+                       ib_destroy_fmr_pool(target->fmr_pool);
+               target->fmr_pool = fmr_pool;
+       }
+
        if (target->qp)
                ib_destroy_qp(target->qp);
        if (target->recv_cq)
@@ -371,8 +551,16 @@ err:
  */
 static void srp_free_target_ib(struct srp_target_port *target)
 {
+       struct srp_device *dev = target->srp_host->srp_dev;
        int i;
 
+       if (dev->use_fast_reg) {
+               if (target->fr_pool)
+                       srp_destroy_fr_pool(target->fr_pool);
+       } else {
+               if (target->fmr_pool)
+                       ib_destroy_fmr_pool(target->fmr_pool);
+       }
        ib_destroy_qp(target->qp);
        ib_destroy_cq(target->send_cq);
        ib_destroy_cq(target->recv_cq);
@@ -577,7 +765,8 @@ static void srp_disconnect_target(struct srp_target_port *target)
 
 static void srp_free_req_data(struct srp_target_port *target)
 {
-       struct ib_device *ibdev = target->srp_host->srp_dev->dev;
+       struct srp_device *dev = target->srp_host->srp_dev;
+       struct ib_device *ibdev = dev->dev;
        struct srp_request *req;
        int i;
 
@@ -586,7 +775,10 @@ static void srp_free_req_data(struct srp_target_port *target)
 
        for (i = 0; i < target->req_ring_size; ++i) {
                req = &target->req_ring[i];
-               kfree(req->fmr_list);
+               if (dev->use_fast_reg)
+                       kfree(req->fr_list);
+               else
+                       kfree(req->fmr_list);
                kfree(req->map_page);
                if (req->indirect_dma_addr) {
                        ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
@@ -605,6 +797,7 @@ static int srp_alloc_req_data(struct srp_target_port *target)
        struct srp_device *srp_dev = target->srp_host->srp_dev;
        struct ib_device *ibdev = srp_dev->dev;
        struct srp_request *req;
+       void *mr_list;
        dma_addr_t dma_addr;
        int i, ret = -ENOMEM;
 
@@ -617,12 +810,20 @@ static int srp_alloc_req_data(struct srp_target_port *target)
 
        for (i = 0; i < target->req_ring_size; ++i) {
                req = &target->req_ring[i];
-               req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
-                                       GFP_KERNEL);
-               req->map_page = kmalloc(SRP_FMR_SIZE * sizeof(void *),
-                                       GFP_KERNEL);
+               mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
+                                 GFP_KERNEL);
+               if (!mr_list)
+                       goto out;
+               if (srp_dev->use_fast_reg)
+                       req->fr_list = mr_list;
+               else
+                       req->fmr_list = mr_list;
+               req->map_page = kmalloc(srp_dev->max_pages_per_mr *
+                                       sizeof(void *), GFP_KERNEL);
+               if (!req->map_page)
+                       goto out;
                req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
-               if (!req->fmr_list || !req->map_page || !req->indirect_desc)
+               if (!req->indirect_desc)
                        goto out;
 
                dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
@@ -759,21 +960,56 @@ static int srp_connect_target(struct srp_target_port *target)
        }
 }
 
+static int srp_inv_rkey(struct srp_target_port *target, u32 rkey)
+{
+       struct ib_send_wr *bad_wr;
+       struct ib_send_wr wr = {
+               .opcode             = IB_WR_LOCAL_INV,
+               .wr_id              = LOCAL_INV_WR_ID_MASK,
+               .next               = NULL,
+               .num_sge            = 0,
+               .send_flags         = 0,
+               .ex.invalidate_rkey = rkey,
+       };
+
+       return ib_post_send(target->qp, &wr, &bad_wr);
+}
+
 static void srp_unmap_data(struct scsi_cmnd *scmnd,
                           struct srp_target_port *target,
                           struct srp_request *req)
 {
-       struct ib_device *ibdev = target->srp_host->srp_dev->dev;
-       struct ib_pool_fmr **pfmr;
+       struct srp_device *dev = target->srp_host->srp_dev;
+       struct ib_device *ibdev = dev->dev;
+       int i, res;
 
        if (!scsi_sglist(scmnd) ||
            (scmnd->sc_data_direction != DMA_TO_DEVICE &&
             scmnd->sc_data_direction != DMA_FROM_DEVICE))
                return;
 
-       pfmr = req->fmr_list;
-       while (req->nfmr--)
-               ib_fmr_pool_unmap(*pfmr++);
+       if (dev->use_fast_reg) {
+               struct srp_fr_desc **pfr;
+
+               for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
+                       res = srp_inv_rkey(target, (*pfr)->mr->rkey);
+                       if (res < 0) {
+                               shost_printk(KERN_ERR, target->scsi_host, PFX
+                                 "Queueing INV WR for rkey %#x failed (%d)\n",
+                                 (*pfr)->mr->rkey, res);
+                               queue_work(system_long_wq,
+                                          &target->tl_err_work);
+                       }
+               }
+               if (req->nmdesc)
+                       srp_fr_pool_put(target->fr_pool, req->fr_list,
+                                       req->nmdesc);
+       } else {
+               struct ib_pool_fmr **pfmr;
+
+               for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
+                       ib_fmr_pool_unmap(*pfmr);
+       }
 
        ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
                        scmnd->sc_data_direction);
@@ -813,6 +1049,10 @@ static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
 
 /**
  * srp_free_req() - Unmap data and add request to the free request list.
+ * @target: SRP target port.
+ * @req:    Request to be freed.
+ * @scmnd:  SCSI command associated with @req.
+ * @req_lim_delta: Amount to be added to @target->req_lim.
  */
 static void srp_free_req(struct srp_target_port *target,
                         struct srp_request *req, struct scsi_cmnd *scmnd,
@@ -882,21 +1122,19 @@ static int srp_rport_reconnect(struct srp_rport *rport)
         * callbacks will have finished before a new QP is allocated.
         */
        ret = srp_new_cm_id(target);
-       /*
-        * Whether or not creating a new CM ID succeeded, create a new
-        * QP. This guarantees that all completion callback function
-        * invocations have finished before request resetting starts.
-        */
-       if (ret == 0)
-               ret = srp_create_target_ib(target);
-       else
-               srp_create_target_ib(target);
 
        for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
                srp_finish_req(target, req, NULL, DID_RESET << 16);
        }
 
+       /*
+        * Whether or not creating a new CM ID succeeded, create a new
+        * QP. This guarantees that all callback functions for the old QP have
+        * finished before any send requests are posted on the new QP.
+        */
+       ret += srp_create_target_ib(target);
+
        INIT_LIST_HEAD(&target->free_tx);
        for (i = 0; i < target->queue_size; ++i)
                list_add(&target->tx_ring[i]->list, &target->free_tx);
@@ -928,33 +1166,87 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
 static int srp_map_finish_fmr(struct srp_map_state *state,
                              struct srp_target_port *target)
 {
-       struct srp_device *dev = target->srp_host->srp_dev;
        struct ib_pool_fmr *fmr;
        u64 io_addr = 0;
 
-       if (!state->npages)
-               return 0;
-
-       if (state->npages == 1) {
-               srp_map_desc(state, state->base_dma_addr, state->fmr_len,
-                            target->rkey);
-               state->npages = state->fmr_len = 0;
-               return 0;
-       }
-
-       fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages,
+       fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages,
                                   state->npages, io_addr);
        if (IS_ERR(fmr))
                return PTR_ERR(fmr);
 
        *state->next_fmr++ = fmr;
-       state->nfmr++;
+       state->nmdesc++;
+
+       srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey);
 
-       srp_map_desc(state, 0, state->fmr_len, fmr->fmr->rkey);
-       state->npages = state->fmr_len = 0;
        return 0;
 }
 
+static int srp_map_finish_fr(struct srp_map_state *state,
+                            struct srp_target_port *target)
+{
+       struct srp_device *dev = target->srp_host->srp_dev;
+       struct ib_send_wr *bad_wr;
+       struct ib_send_wr wr;
+       struct srp_fr_desc *desc;
+       u32 rkey;
+
+       desc = srp_fr_pool_get(target->fr_pool);
+       if (!desc)
+               return -ENOMEM;
+
+       rkey = ib_inc_rkey(desc->mr->rkey);
+       ib_update_fast_reg_key(desc->mr, rkey);
+
+       memcpy(desc->frpl->page_list, state->pages,
+              sizeof(state->pages[0]) * state->npages);
+
+       memset(&wr, 0, sizeof(wr));
+       wr.opcode = IB_WR_FAST_REG_MR;
+       wr.wr_id = FAST_REG_WR_ID_MASK;
+       wr.wr.fast_reg.iova_start = state->base_dma_addr;
+       wr.wr.fast_reg.page_list = desc->frpl;
+       wr.wr.fast_reg.page_list_len = state->npages;
+       wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
+       wr.wr.fast_reg.length = state->dma_len;
+       wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
+                                      IB_ACCESS_REMOTE_READ |
+                                      IB_ACCESS_REMOTE_WRITE);
+       wr.wr.fast_reg.rkey = desc->mr->lkey;
+
+       *state->next_fr++ = desc;
+       state->nmdesc++;
+
+       srp_map_desc(state, state->base_dma_addr, state->dma_len,
+                    desc->mr->rkey);
+
+       return ib_post_send(target->qp, &wr, &bad_wr);
+}
+
+static int srp_finish_mapping(struct srp_map_state *state,
+                             struct srp_target_port *target)
+{
+       int ret = 0;
+
+       if (state->npages == 0)
+               return 0;
+
+       if (state->npages == 1 && !register_always)
+               srp_map_desc(state, state->base_dma_addr, state->dma_len,
+                            target->rkey);
+       else
+               ret = target->srp_host->srp_dev->use_fast_reg ?
+                       srp_map_finish_fr(state, target) :
+                       srp_map_finish_fmr(state, target);
+
+       if (ret == 0) {
+               state->npages = 0;
+               state->dma_len = 0;
+       }
+
+       return ret;
+}
+
 static void srp_map_update_start(struct srp_map_state *state,
                                 struct scatterlist *sg, int sg_index,
                                 dma_addr_t dma_addr)
@@ -967,7 +1259,7 @@ static void srp_map_update_start(struct srp_map_state *state,
 static int srp_map_sg_entry(struct srp_map_state *state,
                            struct srp_target_port *target,
                            struct scatterlist *sg, int sg_index,
-                           int use_fmr)
+                           bool use_mr)
 {
        struct srp_device *dev = target->srp_host->srp_dev;
        struct ib_device *ibdev = dev->dev;
@@ -979,23 +1271,25 @@ static int srp_map_sg_entry(struct srp_map_state *state,
        if (!dma_len)
                return 0;
 
-       if (use_fmr == SRP_MAP_NO_FMR) {
-               /* Once we're in direct map mode for a request, we don't
-                * go back to FMR mode, so no need to update anything
+       if (!use_mr) {
+               /*
+                * Once we're in direct map mode for a request, we don't
+                * go back to FMR or FR mode, so no need to update anything
                 * other than the descriptor.
                 */
                srp_map_desc(state, dma_addr, dma_len, target->rkey);
                return 0;
        }
 
-       /* If we start at an offset into the FMR page, don't merge into
-        * the current FMR. Finish it out, and use the kernel's MR for this
-        * sg entry. This is to avoid potential bugs on some SRP targets
-        * that were never quite defined, but went away when the initiator
-        * avoided using FMR on such page fragments.
+       /*
+        * Since not all RDMA HW drivers support non-zero page offsets for
+        * FMR, if we start at an offset into a page, don't merge into the
+        * current FMR mapping. Finish it out, and use the kernel's MR for
+        * this sg entry.
         */
-       if (dma_addr & ~dev->fmr_page_mask || dma_len > dev->fmr_max_size) {
-               ret = srp_map_finish_fmr(state, target);
+       if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
+           dma_len > dev->mr_max_size) {
+               ret = srp_finish_mapping(state, target);
                if (ret)
                        return ret;
 
@@ -1004,52 +1298,106 @@ static int srp_map_sg_entry(struct srp_map_state *state,
                return 0;
        }
 
-       /* If this is the first sg to go into the FMR, save our position.
-        * We need to know the first unmapped entry, its index, and the
-        * first unmapped address within that entry to be able to restart
-        * mapping after an error.
+       /*
+        * If this is the first sg that will be mapped via FMR or via FR, save
+        * our position. We need to know the first unmapped entry, its index,
+        * and the first unmapped address within that entry to be able to
+        * restart mapping after an error.
         */
        if (!state->unmapped_sg)
                srp_map_update_start(state, sg, sg_index, dma_addr);
 
        while (dma_len) {
-               if (state->npages == SRP_FMR_SIZE) {
-                       ret = srp_map_finish_fmr(state, target);
+               unsigned offset = dma_addr & ~dev->mr_page_mask;
+               if (state->npages == dev->max_pages_per_mr || offset != 0) {
+                       ret = srp_finish_mapping(state, target);
                        if (ret)
                                return ret;
 
                        srp_map_update_start(state, sg, sg_index, dma_addr);
                }
 
-               len = min_t(unsigned int, dma_len, dev->fmr_page_size);
+               len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
 
                if (!state->npages)
                        state->base_dma_addr = dma_addr;
-               state->pages[state->npages++] = dma_addr;
-               state->fmr_len += len;
+               state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
+               state->dma_len += len;
                dma_addr += len;
                dma_len -= len;
        }
 
-       /* If the last entry of the FMR wasn't a full page, then we need to
+       /*
+        * If the last entry of the MR wasn't a full page, then we need to
         * close it out and start a new one -- we can only merge at page
         * boundries.
         */
        ret = 0;
-       if (len != dev->fmr_page_size) {
-               ret = srp_map_finish_fmr(state, target);
+       if (len != dev->mr_page_size) {
+               ret = srp_finish_mapping(state, target);
                if (!ret)
                        srp_map_update_start(state, NULL, 0, 0);
        }
        return ret;
 }
 
+static int srp_map_sg(struct srp_map_state *state,
+                     struct srp_target_port *target, struct srp_request *req,
+                     struct scatterlist *scat, int count)
+{
+       struct srp_device *dev = target->srp_host->srp_dev;
+       struct ib_device *ibdev = dev->dev;
+       struct scatterlist *sg;
+       int i;
+       bool use_mr;
+
+       state->desc     = req->indirect_desc;
+       state->pages    = req->map_page;
+       if (dev->use_fast_reg) {
+               state->next_fr = req->fr_list;
+               use_mr = !!target->fr_pool;
+       } else {
+               state->next_fmr = req->fmr_list;
+               use_mr = !!target->fmr_pool;
+       }
+
+       for_each_sg(scat, sg, count, i) {
+               if (srp_map_sg_entry(state, target, sg, i, use_mr)) {
+                       /*
+                        * Memory registration failed, so backtrack to the
+                        * first unmapped entry and continue on without using
+                        * memory registration.
+                        */
+                       dma_addr_t dma_addr;
+                       unsigned int dma_len;
+
+backtrack:
+                       sg = state->unmapped_sg;
+                       i = state->unmapped_index;
+
+                       dma_addr = ib_sg_dma_address(ibdev, sg);
+                       dma_len = ib_sg_dma_len(ibdev, sg);
+                       dma_len -= (state->unmapped_addr - dma_addr);
+                       dma_addr = state->unmapped_addr;
+                       use_mr = false;
+                       srp_map_desc(state, dma_addr, dma_len, target->rkey);
+               }
+       }
+
+       if (use_mr && srp_finish_mapping(state, target))
+               goto backtrack;
+
+       req->nmdesc = state->nmdesc;
+
+       return 0;
+}
+
 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
                        struct srp_request *req)
 {
-       struct scatterlist *scat, *sg;
+       struct scatterlist *scat;
        struct srp_cmd *cmd = req->cmd->buf;
-       int i, len, nents, count, use_fmr;
+       int len, nents, count;
        struct srp_device *dev;
        struct ib_device *ibdev;
        struct srp_map_state state;
@@ -1081,7 +1429,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
        fmt = SRP_DATA_DESC_DIRECT;
        len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
 
-       if (count == 1) {
+       if (count == 1 && !register_always) {
                /*
                 * The midlayer only generated a single gather/scatter
                 * entry, or DMA mapping coalesced everything to a
@@ -1094,13 +1442,13 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
                buf->key = cpu_to_be32(target->rkey);
                buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
 
-               req->nfmr = 0;
+               req->nmdesc = 0;
                goto map_complete;
        }
 
-       /* We have more than one scatter/gather entry, so build our indirect
-        * descriptor table, trying to merge as many entries with FMR as we
-        * can.
+       /*
+        * We have more than one scatter/gather entry, so build our indirect
+        * descriptor table, trying to merge as many entries as we can.
         */
        indirect_hdr = (void *) cmd->add_data;
 
@@ -1108,35 +1456,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
                                   target->indirect_size, DMA_TO_DEVICE);
 
        memset(&state, 0, sizeof(state));
-       state.desc      = req->indirect_desc;
-       state.pages     = req->map_page;
-       state.next_fmr  = req->fmr_list;
-
-       use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR;
-
-       for_each_sg(scat, sg, count, i) {
-               if (srp_map_sg_entry(&state, target, sg, i, use_fmr)) {
-                       /* FMR mapping failed, so backtrack to the first
-                        * unmapped entry and continue on without using FMR.
-                        */
-                       dma_addr_t dma_addr;
-                       unsigned int dma_len;
-
-backtrack:
-                       sg = state.unmapped_sg;
-                       i = state.unmapped_index;
-
-                       dma_addr = ib_sg_dma_address(ibdev, sg);
-                       dma_len = ib_sg_dma_len(ibdev, sg);
-                       dma_len -= (state.unmapped_addr - dma_addr);
-                       dma_addr = state.unmapped_addr;
-                       use_fmr = SRP_MAP_NO_FMR;
-                       srp_map_desc(&state, dma_addr, dma_len, target->rkey);
-               }
-       }
-
-       if (use_fmr == SRP_MAP_ALLOW_FMR && srp_map_finish_fmr(&state, target))
-               goto backtrack;
+       srp_map_sg(&state, target, req, scat, count);
 
        /* We've mapped the request, now pull as much of the indirect
         * descriptor table as we can into the command buffer. If this
@@ -1144,9 +1464,9 @@ backtrack:
         * guaranteed to fit into the command, as the SCSI layer won't
         * give us more S/G entries than we allow.
         */
-       req->nfmr = state.nfmr;
        if (state.ndesc == 1) {
-               /* FMR mapping was able to collapse this to one entry,
+               /*
+                * Memory registration collapsed the sg-list into one entry,
                 * so use a direct descriptor.
                 */
                struct srp_direct_buf *buf = (void *) cmd->add_data;
@@ -1455,6 +1775,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
 
 /**
  * srp_tl_err_work() - handle a transport layer error
+ * @work: Work structure embedded in an SRP target port.
  *
  * Note: This function may get invoked before the rport has been created,
  * hence the target->rport test.
@@ -1468,14 +1789,24 @@ static void srp_tl_err_work(struct work_struct *work)
                srp_start_tl_fail_timers(target->rport);
 }
 
-static void srp_handle_qp_err(enum ib_wc_status wc_status, bool send_err,
-                             struct srp_target_port *target)
+static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
+                             bool send_err, struct srp_target_port *target)
 {
        if (target->connected && !target->qp_in_error) {
-               shost_printk(KERN_ERR, target->scsi_host,
-                            PFX "failed %s status %d\n",
-                            send_err ? "send" : "receive",
-                            wc_status);
+               if (wr_id & LOCAL_INV_WR_ID_MASK) {
+                       shost_printk(KERN_ERR, target->scsi_host, PFX
+                                    "LOCAL_INV failed with status %d\n",
+                                    wc_status);
+               } else if (wr_id & FAST_REG_WR_ID_MASK) {
+                       shost_printk(KERN_ERR, target->scsi_host, PFX
+                                    "FAST_REG_MR failed status %d\n",
+                                    wc_status);
+               } else {
+                       shost_printk(KERN_ERR, target->scsi_host,
+                                    PFX "failed %s status %d for iu %p\n",
+                                    send_err ? "send" : "receive",
+                                    wc_status, (void *)(uintptr_t)wr_id);
+               }
                queue_work(system_long_wq, &target->tl_err_work);
        }
        target->qp_in_error = true;
@@ -1491,7 +1822,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
                if (likely(wc.status == IB_WC_SUCCESS)) {
                        srp_handle_recv(target, &wc);
                } else {
-                       srp_handle_qp_err(wc.status, false, target);
+                       srp_handle_qp_err(wc.wr_id, wc.status, false, target);
                }
        }
 }
@@ -1507,7 +1838,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
                        iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
                        list_add(&iu->list, &target->free_tx);
                } else {
-                       srp_handle_qp_err(wc.status, true, target);
+                       srp_handle_qp_err(wc.wr_id, wc.status, true, target);
                }
        }
 }
@@ -1521,7 +1852,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
        struct srp_cmd *cmd;
        struct ib_device *dev;
        unsigned long flags;
-       int len, result;
+       int len, ret;
        const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
 
        /*
@@ -1533,12 +1864,9 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
        if (in_scsi_eh)
                mutex_lock(&rport->mutex);
 
-       result = srp_chkready(target->rport);
-       if (unlikely(result)) {
-               scmnd->result = result;
-               scmnd->scsi_done(scmnd);
-               goto unlock_rport;
-       }
+       scmnd->result = srp_chkready(target->rport);
+       if (unlikely(scmnd->result))
+               goto err;
 
        spin_lock_irqsave(&target->lock, flags);
        iu = __srp_get_tx_iu(target, SRP_IU_CMD);
@@ -1553,7 +1881,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
        ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
                                   DMA_TO_DEVICE);
 
-       scmnd->result        = 0;
        scmnd->host_scribble = (void *) req;
 
        cmd = iu->buf;
@@ -1570,7 +1897,15 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
        len = srp_map_data(scmnd, target, req);
        if (len < 0) {
                shost_printk(KERN_ERR, target->scsi_host,
-                            PFX "Failed to map data\n");
+                            PFX "Failed to map data (%d)\n", len);
+               /*
+                * If we ran out of memory descriptors (-ENOMEM) because an
+                * application is queuing many requests with more than
+                * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
+                * to reduce queue depth temporarily.
+                */
+               scmnd->result = len == -ENOMEM ?
+                       DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
                goto err_iu;
        }
 
@@ -1582,11 +1917,13 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
                goto err_unmap;
        }
 
+       ret = 0;
+
 unlock_rport:
        if (in_scsi_eh)
                mutex_unlock(&rport->mutex);
 
-       return 0;
+       return ret;
 
 err_unmap:
        srp_unmap_data(scmnd, target, req);
@@ -1594,16 +1931,27 @@ err_unmap:
 err_iu:
        srp_put_tx_iu(target, iu, SRP_IU_CMD);
 
+       /*
+        * Avoid that the loops that iterate over the request ring can
+        * encounter a dangling SCSI command pointer.
+        */
+       req->scmnd = NULL;
+
        spin_lock_irqsave(&target->lock, flags);
        list_add(&req->list, &target->free_reqs);
 
 err_unlock:
        spin_unlock_irqrestore(&target->lock, flags);
 
-       if (in_scsi_eh)
-               mutex_unlock(&rport->mutex);
+err:
+       if (scmnd->result) {
+               scmnd->scsi_done(scmnd);
+               ret = 0;
+       } else {
+               ret = SCSI_MLQUEUE_HOST_BUSY;
+       }
 
-       return SCSI_MLQUEUE_HOST_BUSY;
+       goto unlock_rport;
 }
 
 /*
@@ -2310,6 +2658,8 @@ static struct class srp_class = {
 
 /**
  * srp_conn_unique() - check whether the connection to a target is unique
+ * @host:   SRP host.
+ * @target: SRP target port.
  */
 static bool srp_conn_unique(struct srp_host *host,
                            struct srp_target_port *target)
@@ -2605,7 +2955,8 @@ static ssize_t srp_create_target(struct device *dev,
                container_of(dev, struct srp_host, dev);
        struct Scsi_Host *target_host;
        struct srp_target_port *target;
-       struct ib_device *ibdev = host->srp_dev->dev;
+       struct srp_device *srp_dev = host->srp_dev;
+       struct ib_device *ibdev = srp_dev->dev;
        int ret;
 
        target_host = scsi_host_alloc(&srp_template,
@@ -2650,9 +3001,9 @@ static ssize_t srp_create_target(struct device *dev,
                goto err;
        }
 
-       if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
-                               target->cmd_sg_cnt < target->sg_tablesize) {
-               pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
+       if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
+           target->cmd_sg_cnt < target->sg_tablesize) {
+               pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
                target->sg_tablesize = target->cmd_sg_cnt;
        }
 
@@ -2790,9 +3141,9 @@ static void srp_add_one(struct ib_device *device)
 {
        struct srp_device *srp_dev;
        struct ib_device_attr *dev_attr;
-       struct ib_fmr_pool_param fmr_param;
        struct srp_host *host;
-       int max_pages_per_fmr, fmr_page_shift, s, e, p;
+       int mr_page_shift, s, e, p;
+       u64 max_pages_per_mr;
 
        dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
        if (!dev_attr)
@@ -2807,15 +3158,39 @@ static void srp_add_one(struct ib_device *device)
        if (!srp_dev)
                goto free_attr;
 
+       srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
+                           device->map_phys_fmr && device->unmap_fmr);
+       srp_dev->has_fr = (dev_attr->device_cap_flags &
+                          IB_DEVICE_MEM_MGT_EXTENSIONS);
+       if (!srp_dev->has_fmr && !srp_dev->has_fr)
+               dev_warn(&device->dev, "neither FMR nor FR is supported\n");
+
+       srp_dev->use_fast_reg = (srp_dev->has_fr &&
+                                (!srp_dev->has_fmr || prefer_fr));
+
        /*
         * Use the smallest page size supported by the HCA, down to a
         * minimum of 4096 bytes. We're unlikely to build large sglists
         * out of smaller entries.
         */
-       fmr_page_shift          = max(12, ffs(dev_attr->page_size_cap) - 1);
-       srp_dev->fmr_page_size  = 1 << fmr_page_shift;
-       srp_dev->fmr_page_mask  = ~((u64) srp_dev->fmr_page_size - 1);
-       srp_dev->fmr_max_size   = srp_dev->fmr_page_size * SRP_FMR_SIZE;
+       mr_page_shift           = max(12, ffs(dev_attr->page_size_cap) - 1);
+       srp_dev->mr_page_size   = 1 << mr_page_shift;
+       srp_dev->mr_page_mask   = ~((u64) srp_dev->mr_page_size - 1);
+       max_pages_per_mr        = dev_attr->max_mr_size;
+       do_div(max_pages_per_mr, srp_dev->mr_page_size);
+       srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
+                                         max_pages_per_mr);
+       if (srp_dev->use_fast_reg) {
+               srp_dev->max_pages_per_mr =
+                       min_t(u32, srp_dev->max_pages_per_mr,
+                             dev_attr->max_fast_reg_page_list_len);
+       }
+       srp_dev->mr_max_size    = srp_dev->mr_page_size *
+                                  srp_dev->max_pages_per_mr;
+       pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
+                device->name, mr_page_shift, dev_attr->max_mr_size,
+                dev_attr->max_fast_reg_page_list_len,
+                srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
 
        INIT_LIST_HEAD(&srp_dev->dev_list);
 
@@ -2831,27 +3206,6 @@ static void srp_add_one(struct ib_device *device)
        if (IS_ERR(srp_dev->mr))
                goto err_pd;
 
-       for (max_pages_per_fmr = SRP_FMR_SIZE;
-                       max_pages_per_fmr >= SRP_FMR_MIN_SIZE;
-                       max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) {
-               memset(&fmr_param, 0, sizeof fmr_param);
-               fmr_param.pool_size         = SRP_FMR_POOL_SIZE;
-               fmr_param.dirty_watermark   = SRP_FMR_DIRTY_SIZE;
-               fmr_param.cache             = 1;
-               fmr_param.max_pages_per_fmr = max_pages_per_fmr;
-               fmr_param.page_shift        = fmr_page_shift;
-               fmr_param.access            = (IB_ACCESS_LOCAL_WRITE |
-                                              IB_ACCESS_REMOTE_WRITE |
-                                              IB_ACCESS_REMOTE_READ);
-
-               srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
-               if (!IS_ERR(srp_dev->fmr_pool))
-                       break;
-       }
-
-       if (IS_ERR(srp_dev->fmr_pool))
-               srp_dev->fmr_pool = NULL;
-
        if (device->node_type == RDMA_NODE_IB_SWITCH) {
                s = 0;
                e = 0;
@@ -2914,8 +3268,6 @@ static void srp_remove_one(struct ib_device *device)
                kfree(host);
        }
 
-       if (srp_dev->fmr_pool)
-               ib_destroy_fmr_pool(srp_dev->fmr_pool);
        ib_dereg_mr(srp_dev->mr);
        ib_dealloc_pd(srp_dev->pd);
 
index aad27b7b4a4624bc040ef06e266d61bd537211d7..e46ecb15aa0df5cb5d992fac271bb047774a3676 100644 (file)
@@ -66,13 +66,10 @@ enum {
        SRP_TAG_NO_REQ          = ~0U,
        SRP_TAG_TSK_MGMT        = 1U << 31,
 
-       SRP_FMR_SIZE            = 512,
-       SRP_FMR_MIN_SIZE        = 128,
-       SRP_FMR_POOL_SIZE       = 1024,
-       SRP_FMR_DIRTY_SIZE      = SRP_FMR_POOL_SIZE / 4,
+       SRP_MAX_PAGES_PER_MR    = 512,
 
-       SRP_MAP_ALLOW_FMR       = 0,
-       SRP_MAP_NO_FMR          = 1,
+       LOCAL_INV_WR_ID_MASK    = 1,
+       FAST_REG_WR_ID_MASK     = 2,
 };
 
 enum srp_target_state {
@@ -86,15 +83,24 @@ enum srp_iu_type {
        SRP_IU_RSP,
 };
 
+/*
+ * @mr_page_mask: HCA memory registration page mask.
+ * @mr_page_size: HCA memory registration page size.
+ * @mr_max_size: Maximum size in bytes of a single FMR / FR registration
+ *   request.
+ */
 struct srp_device {
        struct list_head        dev_list;
        struct ib_device       *dev;
        struct ib_pd           *pd;
        struct ib_mr           *mr;
-       struct ib_fmr_pool     *fmr_pool;
-       u64                     fmr_page_mask;
-       int                     fmr_page_size;
-       int                     fmr_max_size;
+       u64                     mr_page_mask;
+       int                     mr_page_size;
+       int                     mr_max_size;
+       int                     max_pages_per_mr;
+       bool                    has_fmr;
+       bool                    has_fr;
+       bool                    use_fast_reg;
 };
 
 struct srp_host {
@@ -112,11 +118,14 @@ struct srp_request {
        struct list_head        list;
        struct scsi_cmnd       *scmnd;
        struct srp_iu          *cmd;
-       struct ib_pool_fmr    **fmr_list;
+       union {
+               struct ib_pool_fmr **fmr_list;
+               struct srp_fr_desc **fr_list;
+       };
        u64                    *map_page;
        struct srp_direct_buf  *indirect_desc;
        dma_addr_t              indirect_dma_addr;
-       short                   nfmr;
+       short                   nmdesc;
        short                   index;
 };
 
@@ -131,6 +140,10 @@ struct srp_target_port {
        struct ib_cq           *send_cq ____cacheline_aligned_in_smp;
        struct ib_cq           *recv_cq;
        struct ib_qp           *qp;
+       union {
+               struct ib_fmr_pool     *fmr_pool;
+               struct srp_fr_pool     *fr_pool;
+       };
        u32                     lkey;
        u32                     rkey;
        enum srp_target_state   state;
@@ -197,15 +210,66 @@ struct srp_iu {
        enum dma_data_direction direction;
 };
 
+/**
+ * struct srp_fr_desc - fast registration work request arguments
+ * @entry: Entry in srp_fr_pool.free_list.
+ * @mr:    Memory region.
+ * @frpl:  Fast registration page list.
+ */
+struct srp_fr_desc {
+       struct list_head                entry;
+       struct ib_mr                    *mr;
+       struct ib_fast_reg_page_list    *frpl;
+};
+
+/**
+ * struct srp_fr_pool - pool of fast registration descriptors
+ *
+ * An entry is available for allocation if and only if it occurs in @free_list.
+ *
+ * @size:      Number of descriptors in this pool.
+ * @max_page_list_len: Maximum fast registration work request page list length.
+ * @lock:      Protects free_list.
+ * @free_list: List of free descriptors.
+ * @desc:      Fast registration descriptor pool.
+ */
+struct srp_fr_pool {
+       int                     size;
+       int                     max_page_list_len;
+       spinlock_t              lock;
+       struct list_head        free_list;
+       struct srp_fr_desc      desc[0];
+};
+
+/**
+ * struct srp_map_state - per-request DMA memory mapping state
+ * @desc:          Pointer to the element of the SRP buffer descriptor array
+ *                 that is being filled in.
+ * @pages:         Array with DMA addresses of pages being considered for
+ *                 memory registration.
+ * @base_dma_addr:  DMA address of the first page that has not yet been mapped.
+ * @dma_len:       Number of bytes that will be registered with the next
+ *                 FMR or FR memory registration call.
+ * @total_len:     Total number of bytes in the sg-list being mapped.
+ * @npages:        Number of page addresses in the pages[] array.
+ * @nmdesc:        Number of FMR or FR memory descriptors used for mapping.
+ * @ndesc:         Number of SRP buffer descriptors that have been filled in.
+ * @unmapped_sg:    First element of the sg-list that is mapped via FMR or FR.
+ * @unmapped_index: Index of the first element mapped via FMR or FR.
+ * @unmapped_addr:  DMA address of the first element mapped via FMR or FR.
+ */
 struct srp_map_state {
-       struct ib_pool_fmr    **next_fmr;
+       union {
+               struct ib_pool_fmr **next_fmr;
+               struct srp_fr_desc **next_fr;
+       };
        struct srp_direct_buf  *desc;
        u64                    *pages;
        dma_addr_t              base_dma_addr;
-       u32                     fmr_len;
+       u32                     dma_len;
        u32                     total_len;
        unsigned int            npages;
-       unsigned int            nfmr;
+       unsigned int            nmdesc;
        unsigned int            ndesc;
        struct scatterlist     *unmapped_sg;
        int                     unmapped_index;
index c3ad464d0627b3291b100ad692d18406ad79d1c7..b0297da50304e95d83bb3ab5912172b9dc9752f9 100644 (file)
@@ -171,7 +171,7 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap)
  */
 
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-                  struct mlx4_buf *buf)
+                  struct mlx4_buf *buf, gfp_t gfp)
 {
        dma_addr_t t;
 
@@ -180,7 +180,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
                buf->npages       = 1;
                buf->page_shift   = get_order(size) + PAGE_SHIFT;
                buf->direct.buf   = dma_alloc_coherent(&dev->pdev->dev,
-                                                      size, &t, GFP_KERNEL);
+                                                      size, &t, gfp);
                if (!buf->direct.buf)
                        return -ENOMEM;
 
@@ -200,14 +200,14 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
                buf->npages      = buf->nbufs;
                buf->page_shift  = PAGE_SHIFT;
                buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
-                                          GFP_KERNEL);
+                                          gfp);
                if (!buf->page_list)
                        return -ENOMEM;
 
                for (i = 0; i < buf->nbufs; ++i) {
                        buf->page_list[i].buf =
                                dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
-                                                  &t, GFP_KERNEL);
+                                                  &t, gfp);
                        if (!buf->page_list[i].buf)
                                goto err_free;
 
@@ -218,7 +218,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 
                if (BITS_PER_LONG == 64) {
                        struct page **pages;
-                       pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL);
+                       pages = kmalloc(sizeof *pages * buf->nbufs, gfp);
                        if (!pages)
                                goto err_free;
                        for (i = 0; i < buf->nbufs; ++i)
@@ -260,11 +260,12 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
 }
 EXPORT_SYMBOL_GPL(mlx4_buf_free);
 
-static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device)
+static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device,
+                                                gfp_t gfp)
 {
        struct mlx4_db_pgdir *pgdir;
 
-       pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL);
+       pgdir = kzalloc(sizeof *pgdir, gfp);
        if (!pgdir)
                return NULL;
 
@@ -272,7 +273,7 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device)
        pgdir->bits[0] = pgdir->order0;
        pgdir->bits[1] = pgdir->order1;
        pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
-                                           &pgdir->db_dma, GFP_KERNEL);
+                                           &pgdir->db_dma, gfp);
        if (!pgdir->db_page) {
                kfree(pgdir);
                return NULL;
@@ -312,7 +313,7 @@ found:
        return 0;
 }
 
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order)
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        struct mlx4_db_pgdir *pgdir;
@@ -324,7 +325,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order)
                if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
                        goto out;
 
-       pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev));
+       pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp);
        if (!pgdir) {
                ret = -ENOMEM;
                goto out;
@@ -376,13 +377,13 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
 {
        int err;
 
-       err = mlx4_db_alloc(dev, &wqres->db, 1);
+       err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL);
        if (err)
                return err;
 
        *wqres->db.db = 0;
 
-       err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf);
+       err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL);
        if (err)
                goto err_db;
 
@@ -391,7 +392,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
        if (err)
                goto err_buf;
 
-       err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf);
+       err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL);
        if (err)
                goto err_mtt;
 
index 78099eab767374319c7e258bfa1f0d6df4c64fa3..3370ecb8c3d2cb5713dff18384831892bdb098fa 100644 (file)
@@ -705,20 +705,28 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
        struct ib_smp *smp = inbox->buf;
        u32 index;
        u8 port;
+       u8 opcode_modifier;
        u16 *table;
        int err;
        int vidx, pidx;
+       int network_view;
        struct mlx4_priv *priv = mlx4_priv(dev);
        struct ib_smp *outsmp = outbox->buf;
        __be16 *outtab = (__be16 *)(outsmp->data);
        __be32 slave_cap_mask;
        __be64 slave_node_guid;
+
        port = vhcr->in_modifier;
 
+       /* network-view bit is for driver use only, and should not be passed to FW */
+       opcode_modifier = vhcr->op_modifier & ~0x8; /* clear netw view bit */
+       network_view = !!(vhcr->op_modifier & 0x8);
+
        if (smp->base_version == 1 &&
            smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
            smp->class_version == 1) {
-               if (smp->method == IB_MGMT_METHOD_GET) {
+               /* host view is paravirtualized */
+               if (!network_view && smp->method == IB_MGMT_METHOD_GET) {
                        if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) {
                                index = be32_to_cpu(smp->attr_mod);
                                if (port < 1 || port > dev->caps.num_ports)
@@ -743,7 +751,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
                                /*get the slave specific caps:*/
                                /*do the command */
                                err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-                                           vhcr->in_modifier, vhcr->op_modifier,
+                                           vhcr->in_modifier, opcode_modifier,
                                            vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
                                /* modify the response for slaves */
                                if (!err && slave != mlx4_master_func_num(dev)) {
@@ -760,7 +768,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
                                smp->attr_mod = cpu_to_be32(slave / 8);
                                /* execute cmd */
                                err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-                                            vhcr->in_modifier, vhcr->op_modifier,
+                                            vhcr->in_modifier, opcode_modifier,
                                             vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
                                if (!err) {
                                        /* if needed, move slave gid to index 0 */
@@ -774,7 +782,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
                        }
                        if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
                                err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-                                            vhcr->in_modifier, vhcr->op_modifier,
+                                            vhcr->in_modifier, opcode_modifier,
                                             vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
                                if (!err) {
                                        slave_node_guid =  mlx4_get_slave_node_guid(dev, slave);
@@ -784,19 +792,24 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
                        }
                }
        }
+
+       /* Non-privileged VFs are only allowed "host" view LID-routed 'Get' MADs.
+        * These are the MADs used by ib verbs (such as ib_query_gids).
+        */
        if (slave != mlx4_master_func_num(dev) &&
-           ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) ||
-            (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
-             smp->method == IB_MGMT_METHOD_SET))) {
-               mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, "
-                        "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n",
-                        slave, smp->method, smp->mgmt_class,
-                        be16_to_cpu(smp->attr_id));
-               return -EPERM;
+           !mlx4_vf_smi_enabled(dev, slave, port)) {
+               if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+                     smp->method == IB_MGMT_METHOD_GET) || network_view) {
+                       mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n",
+                                slave, smp->method, smp->mgmt_class,
+                                network_view ? "Network" : "Host",
+                                be16_to_cpu(smp->attr_id));
+                       return -EPERM;
+               }
        }
-       /*default:*/
+
        return mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-                                   vhcr->in_modifier, vhcr->op_modifier,
+                                   vhcr->in_modifier, opcode_modifier,
                                    vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 }
 
@@ -1653,6 +1666,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave)
        for (port = min_port; port <= max_port; port++) {
                if (!test_bit(port - 1, actv_ports.ports))
                        continue;
+               priv->mfunc.master.vf_oper[slave].smi_enabled[port] =
+                       priv->mfunc.master.vf_admin[slave].enable_smi[port];
                vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
                vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
                vp_oper->state = *vp_admin;
@@ -1704,6 +1719,8 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave
        for (port = min_port; port <= max_port; port++) {
                if (!test_bit(port - 1, actv_ports.ports))
                        continue;
+               priv->mfunc.master.vf_oper[slave].smi_enabled[port] =
+                       MLX4_VF_SMI_DISABLED;
                vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
                if (NO_INDX != vp_oper->vlan_idx) {
                        __mlx4_unregister_vlan(&priv->dev,
@@ -2537,3 +2554,50 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat
        return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state);
+
+int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       if (slave < 1 || slave >= dev->num_slaves ||
+           port < 1 || port > MLX4_MAX_PORTS)
+               return 0;
+
+       return priv->mfunc.master.vf_oper[slave].smi_enabled[port] ==
+               MLX4_VF_SMI_ENABLED;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled);
+
+int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       if (slave == mlx4_master_func_num(dev))
+               return 1;
+
+       if (slave < 1 || slave >= dev->num_slaves ||
+           port < 1 || port > MLX4_MAX_PORTS)
+               return 0;
+
+       return priv->mfunc.master.vf_admin[slave].enable_smi[port] ==
+               MLX4_VF_SMI_ENABLED;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_get_enable_smi_admin);
+
+int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
+                                int enabled)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       if (slave == mlx4_master_func_num(dev))
+               return 0;
+
+       if (slave < 1 || slave >= dev->num_slaves ||
+           port < 1 || port > MLX4_MAX_PORTS ||
+           enabled < 0 || enabled > 1)
+               return -EINVAL;
+
+       priv->mfunc.master.vf_admin[slave].enable_smi[port] = enabled;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_set_enable_smi_admin);
index 0487121e4a0fe495d4252f01b24d16bdb2fefb06..c90cde5b4aeee5cd91239a1cdea479490c388170 100644 (file)
@@ -173,11 +173,11 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
        if (*cqn == -1)
                return -ENOMEM;
 
-       err = mlx4_table_get(dev, &cq_table->table, *cqn);
+       err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL);
        if (err)
                goto err_out;
 
-       err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn);
+       err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL);
        if (err)
                goto err_put;
        return 0;
index ba049ae88749dac986a0712d281bbd649152acdd..87857a6463ebb286f0110d7fc7a831d25743d27b 100644 (file)
@@ -972,7 +972,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
        if (!context)
                return -ENOMEM;
 
-       err = mlx4_qp_alloc(mdev->dev, qpn, qp);
+       err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL);
        if (err) {
                en_err(priv, "Failed to allocate qp #%x\n", qpn);
                goto out;
@@ -1012,7 +1012,7 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
                en_err(priv, "Failed reserving drop qpn\n");
                return err;
        }
-       err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp);
+       err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL);
        if (err) {
                en_err(priv, "Failed allocating drop qp\n");
                mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
@@ -1071,7 +1071,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
        }
 
        /* Configure RSS indirection qp */
-       err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
+       err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL);
        if (err) {
                en_err(priv, "Failed to allocate RSS indirection QP\n");
                goto rss_err;
index dd1f6d346459808dfe95690ce5fcf0af31e99231..bc0cc1eb214daf0029bae922f513751be071b0bb 100644 (file)
@@ -113,7 +113,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
               ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map);
 
        ring->qpn = qpn;
-       err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp);
+       err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);
        if (err) {
                en_err(priv, "Failed allocating qp %d\n", ring->qpn);
                goto err_map;
index d16a4d11890342167a2f2c8605e3b5e4e9d25198..01e6dd61ee3c0ea6e3356c18a29227d98ca83582 100644 (file)
@@ -178,8 +178,8 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
                                struct mlx4_cmd_info *cmd)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
-       u8      field;
-       u32     size;
+       u8      field, port;
+       u32     size, proxy_qp, qkey;
        int     err = 0;
 
 #define QUERY_FUNC_CAP_FLAGS_OFFSET            0x0
@@ -209,6 +209,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 
 /* when opcode modifier = 1 */
 #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET                0x3
+#define QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET     0x4
 #define QUERY_FUNC_CAP_FLAGS0_OFFSET           0x8
 #define QUERY_FUNC_CAP_FLAGS1_OFFSET           0xc
 
@@ -221,6 +222,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 #define QUERY_FUNC_CAP_FLAGS1_FORCE_MAC                0x40
 #define QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN       0x80
 #define QUERY_FUNC_CAP_FLAGS1_NIC_INFO                 0x10
+#define QUERY_FUNC_CAP_VF_ENABLE_QP0           0x08
 
 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80
 
@@ -234,28 +236,35 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
                        return -EINVAL;
 
                vhcr->in_modifier = converted_port;
-               /* Set nic_info bit to mark new fields support */
-               field  = QUERY_FUNC_CAP_FLAGS1_NIC_INFO;
-               MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET);
-
                /* phys-port = logical-port */
                field = vhcr->in_modifier -
                        find_first_bit(actv_ports.ports, dev->caps.num_ports);
                MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
 
-               field = vhcr->in_modifier;
+               port = vhcr->in_modifier;
+               proxy_qp = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1;
+
+               /* Set nic_info bit to mark new fields support */
+               field  = QUERY_FUNC_CAP_FLAGS1_NIC_INFO;
+
+               if (mlx4_vf_smi_enabled(dev, slave, port) &&
+                   !mlx4_get_parav_qkey(dev, proxy_qp, &qkey)) {
+                       field |= QUERY_FUNC_CAP_VF_ENABLE_QP0;
+                       MLX4_PUT(outbox->buf, qkey,
+                                QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET);
+               }
+               MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET);
+
                /* size is now the QP number */
-               size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1;
+               size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1;
                MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL);
 
                size += 2;
                MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL);
 
-               size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1;
-               MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY);
-
-               size += 2;
-               MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY);
+               MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP0_PROXY);
+               proxy_qp += 2;
+               MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP1_PROXY);
 
                MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier],
                         QUERY_FUNC_CAP_PHYS_PORT_ID);
@@ -326,7 +335,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
        struct mlx4_cmd_mailbox *mailbox;
        u32                     *outbox;
        u8                      field, op_modifier;
-       u32                     size;
+       u32                     size, qkey;
        int                     err = 0, quotas = 0;
 
        op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
@@ -414,7 +423,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
 
        MLX4_GET(func_cap->flags1, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET);
        if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) {
-               if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_OFFSET) {
+               if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN) {
                        mlx4_err(dev, "VLAN is enforced on this port\n");
                        err = -EPROTONOSUPPORT;
                        goto out;
@@ -442,6 +451,13 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
                goto out;
        }
 
+       if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) {
+               MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET);
+               func_cap->qp0_qkey = qkey;
+       } else {
+               func_cap->qp0_qkey = 0;
+       }
+
        MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL);
        func_cap->qp0_tunnel_qpn = size & 0xFFFFFF;
 
index 6811ee00ba7c6e3d20499cb9da8fa9052913b378..1fce03ebe5c4a27903256642abe6c3cf8fae0eb8 100644 (file)
@@ -134,6 +134,7 @@ struct mlx4_func_cap {
        int     max_eq;
        int     reserved_eq;
        int     mcg_quota;
+       u32     qp0_qkey;
        u32     qp0_tunnel_qpn;
        u32     qp0_proxy_qpn;
        u32     qp1_tunnel_qpn;
index 5fbf4924c2729e0dd3666a59ab64386ddf76b4c9..97c9b1db1d275ee15d7fd1a98937032a329db03b 100644 (file)
@@ -245,7 +245,8 @@ int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
                        MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
+                  gfp_t gfp)
 {
        u32 i = (obj & (table->num_obj - 1)) /
                        (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
@@ -259,7 +260,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
        }
 
        table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
-                                      (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
+                                      (table->lowmem ? gfp : GFP_HIGHUSER) |
                                       __GFP_NOWARN, table->coherent);
        if (!table->icm[i]) {
                ret = -ENOMEM;
@@ -356,7 +357,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
        u32 i;
 
        for (i = start; i <= end; i += inc) {
-               err = mlx4_table_get(dev, table, i);
+               err = mlx4_table_get(dev, table, i, GFP_KERNEL);
                if (err)
                        goto fail;
        }
index dee67fa39107f890508e158e56b680af6496d74a..0c73645501509d752f8316da7c72fefe8deb03f9 100644 (file)
@@ -71,7 +71,8 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
                                gfp_t gfp_mask, int coherent);
 void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
+                  gfp_t gfp);
 void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
 int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
                         u32 start, u32 end);
index 7cf9dadcb471bbfee9ef32761123ece4ceac131d..908326876ab5de176fff9f13d84b891c1486f33d 100644 (file)
@@ -666,13 +666,15 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
                return -ENODEV;
        }
 
+       dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
        dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
        dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
        dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
        dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 
        if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
-           !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
+           !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
+           !dev->caps.qp0_qkey) {
                err = -ENOMEM;
                goto err_mem;
        }
@@ -684,6 +686,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
                                 " port %d, aborting (%d).\n", i, err);
                        goto err_mem;
                }
+               dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey;
                dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
                dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
                dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
@@ -729,12 +732,16 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
        return 0;
 
 err_mem:
+       kfree(dev->caps.qp0_qkey);
        kfree(dev->caps.qp0_tunnel);
        kfree(dev->caps.qp0_proxy);
        kfree(dev->caps.qp1_tunnel);
        kfree(dev->caps.qp1_proxy);
-       dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
-               dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+       dev->caps.qp0_qkey = NULL;
+       dev->caps.qp0_tunnel = NULL;
+       dev->caps.qp0_proxy = NULL;
+       dev->caps.qp1_tunnel = NULL;
+       dev->caps.qp1_proxy = NULL;
 
        return err;
 }
@@ -1696,6 +1703,14 @@ unmap_bf:
        unmap_internal_clock(dev);
        unmap_bf_area(dev);
 
+       if (mlx4_is_slave(dev)) {
+               kfree(dev->caps.qp0_qkey);
+               kfree(dev->caps.qp0_tunnel);
+               kfree(dev->caps.qp0_proxy);
+               kfree(dev->caps.qp1_tunnel);
+               kfree(dev->caps.qp1_proxy);
+       }
+
 err_close:
        if (mlx4_is_slave(dev))
                mlx4_slave_exit(dev);
@@ -2565,6 +2580,14 @@ err_master_mfunc:
        if (mlx4_is_master(dev))
                mlx4_multi_func_cleanup(dev);
 
+       if (mlx4_is_slave(dev)) {
+               kfree(dev->caps.qp0_qkey);
+               kfree(dev->caps.qp0_tunnel);
+               kfree(dev->caps.qp0_proxy);
+               kfree(dev->caps.qp1_tunnel);
+               kfree(dev->caps.qp1_proxy);
+       }
+
 err_close:
        if (dev->flags & MLX4_FLAG_MSI_X)
                pci_disable_msix(pdev);
@@ -2688,6 +2711,7 @@ static void __mlx4_remove_one(struct pci_dev *pdev)
        if (!mlx4_is_slave(dev))
                mlx4_free_ownership(dev);
 
+       kfree(dev->caps.qp0_qkey);
        kfree(dev->caps.qp0_tunnel);
        kfree(dev->caps.qp0_proxy);
        kfree(dev->caps.qp1_tunnel);
index f9c46510196341a6089b0a23d7b53455dad69ae5..7d39cb30c883b90afa7529bac2ffedf8da134ebc 100644 (file)
@@ -133,6 +133,11 @@ enum {
        MLX4_COMM_CMD_FLR = 254
 };
 
+enum {
+       MLX4_VF_SMI_DISABLED,
+       MLX4_VF_SMI_ENABLED
+};
+
 /*The flag indicates that the slave should delay the RESET cmd*/
 #define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb
 /*indicates how many retries will be done if we are in the middle of FLR*/
@@ -488,6 +493,7 @@ struct mlx4_vport_state {
 
 struct mlx4_vf_admin_state {
        struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1];
+       u8 enable_smi[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_vport_oper_state {
@@ -495,8 +501,10 @@ struct mlx4_vport_oper_state {
        int mac_idx;
        int vlan_idx;
 };
+
 struct mlx4_vf_oper_state {
        struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1];
+       u8 smi_enabled[MLX4_MAX_PORTS + 1];
 };
 
 struct slave_list {
@@ -888,7 +896,7 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
 void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
-int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn);
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp);
 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn);
 int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn);
 void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn);
@@ -896,7 +904,7 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn);
 void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn);
 int __mlx4_mpt_reserve(struct mlx4_dev *dev);
 void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index);
-int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index);
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp);
 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index);
 u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order);
 void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order);
index 24835853b7533ec7bf9f73c05e8a7cd713414956..4c71dafad2175a11d3da7dfa3b554da2e9964e7c 100644 (file)
@@ -364,14 +364,14 @@ static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
        __mlx4_mpt_release(dev, index);
 }
 
-int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
 {
        struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
 
-       return mlx4_table_get(dev, &mr_table->dmpt_table, index);
+       return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp);
 }
 
-static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
+static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
 {
        u64 param = 0;
 
@@ -382,7 +382,7 @@ static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
                                                        MLX4_CMD_TIME_CLASS_A,
                                                        MLX4_CMD_WRAPPED);
        }
-       return __mlx4_mpt_alloc_icm(dev, index);
+       return __mlx4_mpt_alloc_icm(dev, index, gfp);
 }
 
 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
@@ -469,7 +469,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
        struct mlx4_mpt_entry *mpt_entry;
        int err;
 
-       err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key));
+       err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL);
        if (err)
                return err;
 
@@ -627,13 +627,14 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 EXPORT_SYMBOL_GPL(mlx4_write_mtt);
 
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-                      struct mlx4_buf *buf)
+                      struct mlx4_buf *buf, gfp_t gfp)
 {
        u64 *page_list;
        int err;
        int i;
 
-       page_list = kmalloc(buf->npages * sizeof *page_list, GFP_KERNEL);
+       page_list = kmalloc(buf->npages * sizeof *page_list,
+                           gfp);
        if (!page_list)
                return -ENOMEM;
 
@@ -680,7 +681,7 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw)
        struct mlx4_mpt_entry *mpt_entry;
        int err;
 
-       err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key));
+       err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL);
        if (err)
                return err;
 
index 61d64ebffd56e64b0fa8bf2d0fb69308e3d02c49..07198cacbb20d509d1a002ddd8096364c0c10e16 100644 (file)
@@ -272,29 +272,29 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
 
-int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        struct mlx4_qp_table *qp_table = &priv->qp_table;
        int err;
 
-       err = mlx4_table_get(dev, &qp_table->qp_table, qpn);
+       err = mlx4_table_get(dev, &qp_table->qp_table, qpn, gfp);
        if (err)
                goto err_out;
 
-       err = mlx4_table_get(dev, &qp_table->auxc_table, qpn);
+       err = mlx4_table_get(dev, &qp_table->auxc_table, qpn, gfp);
        if (err)
                goto err_put_qp;
 
-       err = mlx4_table_get(dev, &qp_table->altc_table, qpn);
+       err = mlx4_table_get(dev, &qp_table->altc_table, qpn, gfp);
        if (err)
                goto err_put_auxc;
 
-       err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn);
+       err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn, gfp);
        if (err)
                goto err_put_altc;
 
-       err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn);
+       err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn, gfp);
        if (err)
                goto err_put_rdmarc;
 
@@ -316,7 +316,7 @@ err_out:
        return err;
 }
 
-static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
+static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
 {
        u64 param = 0;
 
@@ -326,7 +326,7 @@ static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
                                    MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A,
                                    MLX4_CMD_WRAPPED);
        }
-       return __mlx4_qp_alloc_icm(dev, qpn);
+       return __mlx4_qp_alloc_icm(dev, qpn, gfp);
 }
 
 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
@@ -355,7 +355,7 @@ static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
                __mlx4_qp_free_icm(dev, qpn);
 }
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        struct mlx4_qp_table *qp_table = &priv->qp_table;
@@ -366,7 +366,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
 
        qp->qpn = qpn;
 
-       err = mlx4_qp_alloc_icm(dev, qpn);
+       err = mlx4_qp_alloc_icm(dev, qpn, gfp);
        if (err)
                return err;
 
index 1c3fdd4a1f7df3fe84847ae7ff278d652db13463..abdb000bba302c24660c94984e9a472f9ee7968c 100644 (file)
@@ -1532,7 +1532,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
                        return err;
 
                if (!fw_reserved(dev, qpn)) {
-                       err = __mlx4_qp_alloc_icm(dev, qpn);
+                       err = __mlx4_qp_alloc_icm(dev, qpn, GFP_KERNEL);
                        if (err) {
                                res_abort_move(dev, slave, RES_QP, qpn);
                                return err;
@@ -1619,7 +1619,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
                if (err)
                        return err;
 
-               err = __mlx4_mpt_alloc_icm(dev, mpt->key);
+               err = __mlx4_mpt_alloc_icm(dev, mpt->key, GFP_KERNEL);
                if (err) {
                        res_abort_move(dev, slave, RES_MPT, id);
                        return err;
@@ -2827,10 +2827,12 @@ static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start,
 }
 
 static int verify_qp_parameters(struct mlx4_dev *dev,
+                               struct mlx4_vhcr *vhcr,
                                struct mlx4_cmd_mailbox *inbox,
                                enum qp_transition transition, u8 slave)
 {
        u32                     qp_type;
+       u32                     qpn;
        struct mlx4_qp_context  *qp_ctx;
        enum mlx4_qp_optpar     optpar;
        int port;
@@ -2873,8 +2875,22 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
                default:
                        break;
                }
+               break;
 
+       case MLX4_QP_ST_MLX:
+               qpn = vhcr->in_modifier & 0x7fffff;
+               port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+               if (transition == QP_TRANS_INIT2RTR &&
+                   slave != mlx4_master_func_num(dev) &&
+                   mlx4_is_qp_reserved(dev, qpn) &&
+                   !mlx4_vf_smi_enabled(dev, slave, port)) {
+                       /* only enabled VFs may create MLX proxy QPs */
+                       mlx4_err(dev, "%s: unprivileged slave %d attempting to create an MLX proxy special QP on port %d\n",
+                                __func__, slave, port);
+                       return -EPERM;
+               }
                break;
+
        default:
                break;
        }
@@ -3454,7 +3470,7 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
        err = adjust_qp_sched_queue(dev, slave, qpc, inbox);
        if (err)
                return err;
-       err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave);
+       err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_INIT2RTR, slave);
        if (err)
                return err;
 
@@ -3508,7 +3524,7 @@ int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
        err = adjust_qp_sched_queue(dev, slave, context, inbox);
        if (err)
                return err;
-       err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave);
+       err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTR2RTS, slave);
        if (err)
                return err;
 
@@ -3530,7 +3546,7 @@ int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
        err = adjust_qp_sched_queue(dev, slave, context, inbox);
        if (err)
                return err;
-       err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave);
+       err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTS2RTS, slave);
        if (err)
                return err;
 
@@ -3567,7 +3583,7 @@ int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
        err = adjust_qp_sched_queue(dev, slave, context, inbox);
        if (err)
                return err;
-       err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave);
+       err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2SQD, slave);
        if (err)
                return err;
 
@@ -3589,7 +3605,7 @@ int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
        err = adjust_qp_sched_queue(dev, slave, context, inbox);
        if (err)
                return err;
-       err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave);
+       err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2RTS, slave);
        if (err)
                return err;
 
index 98faf870b0b084d7ecf06a09ee7a14afefd82c3d..67146624eb58b7946665026ee29ce57a84316888 100644 (file)
@@ -103,11 +103,11 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
        if (*srqn == -1)
                return -ENOMEM;
 
-       err = mlx4_table_get(dev, &srq_table->table, *srqn);
+       err = mlx4_table_get(dev, &srq_table->table, *srqn, GFP_KERNEL);
        if (err)
                goto err_out;
 
-       err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn);
+       err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn, GFP_KERNEL);
        if (err)
                goto err_put;
        return 0;
index 4cc92764940477c4f9622fc0cbc4238a08d81283..ac52a0fe2d3af12ccda01d858a4f78d9eb7e2343 100644 (file)
@@ -82,7 +82,11 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
                return mlx5_cmd_status_to_err(&lout.hdr);
        }
 
+       mr->iova = be64_to_cpu(in->seg.start_addr);
+       mr->size = be64_to_cpu(in->seg.len);
        mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
+       mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff;
+
        mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
                      be32_to_cpu(lout.mkey), key, mr->key);
 
index ba87bd21295a533c8d6941bc4c11d29bceba0112..3447bead962015d1dce95a694d85c2d4ba7d4fd2 100644 (file)
@@ -401,6 +401,7 @@ struct mlx4_caps {
        int                     max_rq_desc_sz;
        int                     max_qp_init_rdma;
        int                     max_qp_dest_rdma;
+       u32                     *qp0_qkey;
        u32                     *qp0_proxy;
        u32                     *qp1_proxy;
        u32                     *qp0_tunnel;
@@ -837,7 +838,7 @@ static inline int mlx4_is_slave(struct mlx4_dev *dev)
 }
 
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-                  struct mlx4_buf *buf);
+                  struct mlx4_buf *buf, gfp_t gfp);
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
 static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
 {
@@ -874,9 +875,10 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw);
 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
                   int start_index, int npages, u64 *page_list);
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-                      struct mlx4_buf *buf);
+                      struct mlx4_buf *buf, gfp_t gfp);
 
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order);
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order,
+                 gfp_t gfp);
 void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db);
 
 int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
@@ -892,7 +894,8 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
 int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);
 void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp,
+                 gfp_t gfp);
 void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
 
 int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn,
@@ -1234,4 +1237,8 @@ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port);
 int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
 
 int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
+int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
+int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
+int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
+                                int enable);
 #endif /* MLX4_DEVICE_H */
index 93cef6313e72d2a63e2eb1c8e5c6347ae5664cad..2bce4aad257094ae16398c3cdd946405f4f2427d 100644 (file)
@@ -427,7 +427,6 @@ struct mlx5_core_mr {
        u64                     size;
        u32                     key;
        u32                     pd;
-       u32                     access;
 };
 
 struct mlx5_core_srq {
index acd825182977ca73ec0eb161af9d768a0dc365b9..7ccef342f72420a1c54a14341793a99ff59bd047 100644 (file)
@@ -80,8 +80,8 @@ enum rdma_transport_type {
        RDMA_TRANSPORT_USNIC_UDP
 };
 
-enum rdma_transport_type
-rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__;
+__attribute_const__ enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type);
 
 enum rdma_link_layer {
        IB_LINK_LAYER_UNSPECIFIED,
@@ -466,14 +466,14 @@ enum ib_rate {
  * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
  * @rate: rate to convert.
  */
-int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
+__attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
 
 /**
  * ib_rate_to_mbps - Convert the IB rate enum to Mbps.
  * For example, IB_RATE_2_5_GBPS will be converted to 2500.
  * @rate: rate to convert.
  */
-int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__;
+__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
 
 enum ib_mr_create_flags {
        IB_MR_SIGNATURE_EN = 1,
@@ -604,7 +604,7 @@ struct ib_mr_status {
  * enum.
  * @mult: multiple to convert.
  */
-enum ib_rate mult_to_ib_rate(int mult) __attribute_const__;
+__attribute_const__ enum ib_rate mult_to_ib_rate(int mult);
 
 struct ib_ah_attr {
        struct ib_global_route  grh;
@@ -783,6 +783,7 @@ enum ib_qp_create_flags {
        IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK   = 1 << 1,
        IB_QP_CREATE_NETIF_QP                   = 1 << 5,
        IB_QP_CREATE_SIGNATURE_EN               = 1 << 6,
+       IB_QP_CREATE_USE_GFP_NOIO               = 1 << 7,
        /* reserve bits 26-31 for low level drivers' internal use */
        IB_QP_CREATE_RESERVED_START             = 1 << 26,
        IB_QP_CREATE_RESERVED_END               = 1 << 31,
diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
new file mode 100644 (file)
index 0000000..928b277
--- /dev/null
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IW_PORTMAP_H
+#define _IW_PORTMAP_H
+
+#define IWPM_ULIBNAME_SIZE     32
+#define IWPM_DEVNAME_SIZE      32
+#define IWPM_IFNAME_SIZE       16
+#define IWPM_IPADDR_SIZE       16
+
+enum {
+       IWPM_INVALID_NLMSG_ERR = 10,
+       IWPM_CREATE_MAPPING_ERR,
+       IWPM_DUPLICATE_MAPPING_ERR,
+       IWPM_UNKNOWN_MAPPING_ERR,
+       IWPM_CLIENT_DEV_INFO_ERR,
+       IWPM_USER_LIB_INFO_ERR,
+       IWPM_REMOTE_QUERY_REJECT
+};
+
+struct iwpm_dev_data {
+       char dev_name[IWPM_DEVNAME_SIZE];
+       char if_name[IWPM_IFNAME_SIZE];
+};
+
+struct iwpm_sa_data {
+       struct sockaddr_storage loc_addr;
+       struct sockaddr_storage mapped_loc_addr;
+       struct sockaddr_storage rem_addr;
+       struct sockaddr_storage mapped_rem_addr;
+};
+
+/**
+ * iwpm_init - Allocate resources for the iwarp port mapper
+ *
+ * Should be called when network interface goes up.
+ */
+int iwpm_init(u8);
+
+/**
+ * iwpm_exit - Deallocate resources for the iwarp port mapper
+ *
+ * Should be called when network interface goes down.
+ */
+int iwpm_exit(u8);
+
+/**
+ * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid
+ *
+ * Returns true if the pid is greater than zero, otherwise returns false
+ */
+int iwpm_valid_pid(void);
+
+/**
+ * iwpm_register_pid - Send a netlink query to userspace
+ *                     to get the iwarp port mapper pid
+ * @pm_msg: Contains driver info to send to the userspace port mapper
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_add_mapping - Send a netlink add mapping request to
+ *                    the userspace port mapper
+ * @pm_msg: Contains the local ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
+ * If the request is successful, the pm_msg stores
+ * the port mapper response (mapped address info)
+ */
+int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_add_and_query_mapping - Send a netlink add and query mapping request
+ *                              to the userspace port mapper
+ * @pm_msg: Contains the local and remote ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
+ * If the request is successful, the pm_msg stores the
+ * port mapper response (mapped local and remote address info)
+ */
+int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_remove_mapping - Send a netlink remove mapping request
+ *                       to the userspace port mapper
+ *
+ * @local_addr: Local ip/tcp address to remove
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client);
+
+/**
+ * iwpm_register_pid_cb - Process the port mapper response to
+ *                        iwpm_register_pid query
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * If successful, the function receives the userspace port mapper pid
+ * which is used in future communication with the port mapper
+ */
+int iwpm_register_pid_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_add_mapping_cb - Process the port mapper response to
+ *                       iwpm_add_mapping request
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_add_and_query_mapping_cb - Process the port mapper response to
+ *                                 iwpm_add_and_query_mapping request
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_mapping_error_cb - Process port mapper notification for error
+ *
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_mapping_error_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_mapping_info_cb - Process a notification that the userspace
+ *                        port mapper daemon is started
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Using the received port mapper pid, send all the local mapping
+ * info records to the userspace port mapper
+ */
+int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_ack_mapping_info_cb - Process the port mapper ack for
+ *                            the provided local mapping info records
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
+ *                       info in a hash table
+ * @local_addr: Local ip/tcp address
+ * @mapped_addr: Mapped local ip/tcp address
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_create_mapinfo(struct sockaddr_storage *local_addr,
+                       struct sockaddr_storage *mapped_addr, u8 nl_client);
+
+/**
+ * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
+ *                       info from the hash table
+ * @local_addr: Local ip/tcp address
+ * @mapped_addr: Mapped local ip/tcp address
+ *
+ * Returns err code if mapping info is not found in the hash table,
+ * otherwise returns 0
+ */
+int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr,
+                       struct sockaddr_storage *mapped_addr);
+
+#endif /* _IW_PORTMAP_H */
index e38de79eeb48f9070a65d806a38eff84a3f92914..0790882e0c9b32443a37c44a26b3357224245d82 100644 (file)
@@ -43,7 +43,7 @@ int ibnl_remove_client(int index);
  * Returns the allocated buffer on success and NULL on failure.
  */
 void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
-                  int len, int client, int op);
+                  int len, int client, int op, int flags);
 /**
  * Put a new attribute in a supplied skb.
  * @skb: The netlink skb.
@@ -56,4 +56,25 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
 int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
                  int len, void *data, int type);
 
+/**
+ * Send the supplied skb to a specific userspace PID.
+ * @skb: The netlink skb
+ * @nlh: Header of the netlink message to send
+ * @pid: Userspace netlink process ID
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+                       __u32 pid);
+
+/**
+ * Send the supplied skb to a netlink group.
+ * @skb: The netlink skb
+ * @nlh: Header of the netlink message to send
+ * @group: Netlink group ID
+ * @flags: allocation flags
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+                       unsigned int group, gfp_t flags);
+
 #endif /* _RDMA_NETLINK_H */
index 8297285b62886a5177a1d90108a4fbd1b2ebc630..de69170a30ce525378632523417c881985f6143a 100644 (file)
@@ -4,7 +4,16 @@
 #include <linux/types.h>
 
 enum {
-       RDMA_NL_RDMA_CM = 1
+       RDMA_NL_RDMA_CM = 1,
+       RDMA_NL_NES,
+       RDMA_NL_C4IW,
+       RDMA_NL_NUM_CLIENTS
+};
+
+enum {
+       RDMA_NL_GROUP_CM = 1,
+       RDMA_NL_GROUP_IWPM,
+       RDMA_NL_NUM_GROUPS
 };
 
 #define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
@@ -22,6 +31,18 @@ enum {
        RDMA_NL_RDMA_CM_NUM_ATTR,
 };
 
+/* iwarp port mapper op-codes */
+enum {
+       RDMA_NL_IWPM_REG_PID = 0,
+       RDMA_NL_IWPM_ADD_MAPPING,
+       RDMA_NL_IWPM_QUERY_MAPPING,
+       RDMA_NL_IWPM_REMOVE_MAPPING,
+       RDMA_NL_IWPM_HANDLE_ERR,
+       RDMA_NL_IWPM_MAPINFO,
+       RDMA_NL_IWPM_MAPINFO_NUM,
+       RDMA_NL_IWPM_NUM_OPS
+};
+
 struct rdma_cm_id_stats {
        __u32   qp_num;
        __u32   bound_dev_if;
@@ -33,5 +54,78 @@ struct rdma_cm_id_stats {
        __u8    qp_type;
 };
 
+enum {
+       IWPM_NLA_REG_PID_UNSPEC = 0,
+       IWPM_NLA_REG_PID_SEQ,
+       IWPM_NLA_REG_IF_NAME,
+       IWPM_NLA_REG_IBDEV_NAME,
+       IWPM_NLA_REG_ULIB_NAME,
+       IWPM_NLA_REG_PID_MAX
+};
+
+enum {
+       IWPM_NLA_RREG_PID_UNSPEC = 0,
+       IWPM_NLA_RREG_PID_SEQ,
+       IWPM_NLA_RREG_IBDEV_NAME,
+       IWPM_NLA_RREG_ULIB_NAME,
+       IWPM_NLA_RREG_ULIB_VER,
+       IWPM_NLA_RREG_PID_ERR,
+       IWPM_NLA_RREG_PID_MAX
+
+};
+
+enum {
+       IWPM_NLA_MANAGE_MAPPING_UNSPEC = 0,
+       IWPM_NLA_MANAGE_MAPPING_SEQ,
+       IWPM_NLA_MANAGE_ADDR,
+       IWPM_NLA_MANAGE_MAPPED_LOC_ADDR,
+       IWPM_NLA_RMANAGE_MAPPING_ERR,
+       IWPM_NLA_RMANAGE_MAPPING_MAX
+};
+
+#define IWPM_NLA_MANAGE_MAPPING_MAX 3
+#define IWPM_NLA_QUERY_MAPPING_MAX  4
+#define IWPM_NLA_MAPINFO_SEND_MAX   3
+
+enum {
+       IWPM_NLA_QUERY_MAPPING_UNSPEC = 0,
+       IWPM_NLA_QUERY_MAPPING_SEQ,
+       IWPM_NLA_QUERY_LOCAL_ADDR,
+       IWPM_NLA_QUERY_REMOTE_ADDR,
+       IWPM_NLA_RQUERY_MAPPED_LOC_ADDR,
+       IWPM_NLA_RQUERY_MAPPED_REM_ADDR,
+       IWPM_NLA_RQUERY_MAPPING_ERR,
+       IWPM_NLA_RQUERY_MAPPING_MAX
+};
+
+enum {
+       IWPM_NLA_MAPINFO_REQ_UNSPEC = 0,
+       IWPM_NLA_MAPINFO_ULIB_NAME,
+       IWPM_NLA_MAPINFO_ULIB_VER,
+       IWPM_NLA_MAPINFO_REQ_MAX
+};
+
+enum {
+       IWPM_NLA_MAPINFO_UNSPEC = 0,
+       IWPM_NLA_MAPINFO_LOCAL_ADDR,
+       IWPM_NLA_MAPINFO_MAPPED_ADDR,
+       IWPM_NLA_MAPINFO_MAX
+};
+
+enum {
+       IWPM_NLA_MAPINFO_NUM_UNSPEC = 0,
+       IWPM_NLA_MAPINFO_SEQ,
+       IWPM_NLA_MAPINFO_SEND_NUM,
+       IWPM_NLA_MAPINFO_ACK_NUM,
+       IWPM_NLA_MAPINFO_NUM_MAX
+};
+
+enum {
+       IWPM_NLA_ERR_UNSPEC = 0,
+       IWPM_NLA_ERR_SEQ,
+       IWPM_NLA_ERR_CODE,
+       IWPM_NLA_ERR_MAX
+};
+
 
 #endif /* _UAPI_RDMA_NETLINK_H */