#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/cgroup_rdma.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <rdma/ib_verbs.h>
#include <rdma/opa_addr.h>
struct list_head qp_list;
};
+/**
+ * struct rdma_dev_net - rdma net namespace metadata for a net
+ * @nl_sock: Pointer to netlink socket
+ * @net: Pointer to owner net namespace
+ * @id: xarray id to identify the net namespace.
+ */
+struct rdma_dev_net {
+ struct sock *nl_sock;
+ possible_net_t net;
+ u32 id;
+};
+
extern const struct attribute_group ib_dev_attr_group;
extern bool ib_devices_shared_netns;
+extern unsigned int rdma_dev_net_id;
+
+static inline struct rdma_dev_net *rdma_net_to_dev_net(struct net *net)
+{
+ return net_generic(net, rdma_dev_net_id);
+}
int ib_device_register_sysfs(struct ib_device *device);
void ib_device_unregister_sysfs(struct ib_device *device);
int ib_sa_init(void);
void ib_sa_cleanup(void);
-int rdma_nl_init(void);
void rdma_nl_exit(void);
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct ib_udata *udata,
struct ib_uobject *uobj)
{
+ enum ib_qp_type qp_type = attr->qp_type;
struct ib_qp *qp;
+ bool is_xrc;
if (!dev->ops.create_qp)
return ERR_PTR(-EOPNOTSUPP);
* and more importantly they are created internaly by driver,
* see mlx5 create_dev_resources() as an example.
*/
- if (attr->qp_type < IB_QPT_XRC_INI) {
+ is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT;
+ if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) {
qp->res.type = RDMA_RESTRACK_QP;
if (uobj)
rdma_restrack_uadd(&qp->res);
int ib_device_set_netns_put(struct sk_buff *skb,
struct ib_device *dev, u32 ns_fd);
+
+int rdma_nl_net_init(struct rdma_dev_net *rnet);
+void rdma_nl_net_exit(struct rdma_dev_net *rnet);
#endif /* _CORE_PRIV_H */
int ret;
port_counter = &dev->port_data[port].port_counter;
+ if (!port_counter->hstats)
+ return -EOPNOTSUPP;
+
mutex_lock(&port_counter->lock);
if (on) {
ret = __counter_set_mode(&port_counter->mode,
if (!rdma_is_port_valid(dev, port))
return -EINVAL;
+ if (!dev->port_data[port].port_counter.hstats)
+ return -EOPNOTSUPP;
+
qp = rdma_counter_get_qp(dev, qp_num);
if (!qp)
return -ENOENT;
void rdma_counter_init(struct ib_device *dev)
{
struct rdma_port_counter *port_counter;
- u32 port;
+ u32 port, i;
if (!dev->port_data)
return;
return;
fail:
- rdma_for_each_port(dev, port) {
+ for (i = port; i >= rdma_start_port(dev); i--) {
port_counter = &dev->port_data[port].port_counter;
kfree(port_counter->hstats);
port_counter->hstats = NULL;
+ mutex_destroy(&port_counter->lock);
}
-
- return;
}
void rdma_counter_release(struct ib_device *dev)
rdma_for_each_port(dev, port) {
port_counter = &dev->port_data[port].port_counter;
kfree(port_counter->hstats);
+ mutex_destroy(&port_counter->lock);
}
}
#include <linux/init.h>
#include <linux/netdevice.h>
#include <net/net_namespace.h>
-#include <net/netns/generic.h>
#include <linux/security.h>
#include <linux/notifier.h>
#include <linux/hashtable.h>
static DECLARE_RWSEM(devices_rwsem);
#define DEVICE_REGISTERED XA_MARK_1
- static LIST_HEAD(client_list);
+ static u32 highest_client_id;
#define CLIENT_REGISTERED XA_MARK_1
static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
static DECLARE_RWSEM(clients_rwsem);
+ static void ib_client_put(struct ib_client *client)
+ {
+ if (refcount_dec_and_test(&client->uses))
+ complete(&client->uses_zero);
+ }
+
/*
* If client_data is registered then the corresponding client must also still
* be registered.
*/
#define CLIENT_DATA_REGISTERED XA_MARK_1
-/**
- * struct rdma_dev_net - rdma net namespace metadata for a net
- * @net: Pointer to owner net namespace
- * @id: xarray id to identify the net namespace.
- */
-struct rdma_dev_net {
- possible_net_t net;
- u32 id;
-};
-
-static unsigned int rdma_dev_net_id;
+unsigned int rdma_dev_net_id;
/*
* A list of net namespaces is maintained in an xarray. This is necessary
rcu_head);
}
+ mutex_destroy(&dev->unregistration_lock);
+ mutex_destroy(&dev->compat_devs_mutex);
+
xa_destroy(&dev->compat_devs);
xa_destroy(&dev->client_data);
kfree_rcu(dev, rcu_head);
return 0;
down_write(&device->client_data_rwsem);
+ /*
+ * So long as the client is registered hold both the client and device
+ * unregistration locks.
+ */
+ if (!refcount_inc_not_zero(&client->uses))
+ goto out_unlock;
+ refcount_inc(&device->refcount);
+
/*
* Another caller to add_client_context got here first and has already
* completely initialized context.
return 0;
out:
+ ib_device_put(device);
+ ib_client_put(client);
+ out_unlock:
up_write(&device->client_data_rwsem);
return ret;
}
client_data = xa_load(&device->client_data, client_id);
xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
client = xa_load(&clients, client_id);
- downgrade_write(&device->client_data_rwsem);
+ up_write(&device->client_data_rwsem);
/*
* Notice we cannot be holding any exclusive locks when calling the
*
* For this reason clients and drivers should not call the
* unregistration functions will holdling any locks.
- *
- * It tempting to drop the client_data_rwsem too, but this is required
- * to ensure that unregister_client does not return until all clients
- * are completely unregistered, which is required to avoid module
- * unloading races.
*/
if (client->remove)
client->remove(device, client_data);
xa_erase(&device->client_data, client_id);
- up_read(&device->client_data_rwsem);
+ ib_device_put(device);
+ ib_client_put(client);
}
static int alloc_port_data(struct ib_device *device)
static void rdma_dev_exit_net(struct net *net)
{
- struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
struct ib_device *dev;
unsigned long index;
int ret;
}
up_read(&devices_rwsem);
+ rdma_nl_net_exit(rnet);
xa_erase(&rdma_nets, rnet->id);
}
static __net_init int rdma_dev_init_net(struct net *net)
{
- struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
unsigned long index;
struct ib_device *dev;
int ret;
+ write_pnet(&rnet->net, net);
+
+ ret = rdma_nl_net_init(rnet);
+ if (ret)
+ return ret;
+
/* No need to create any compat devices in default init_net. */
if (net_eq(net, &init_net))
return 0;
- write_pnet(&rnet->net, net);
-
ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL);
- if (ret)
+ if (ret) {
+ rdma_nl_net_exit(rnet);
return ret;
+ }
down_read(&devices_rwsem);
xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
static void disable_device(struct ib_device *device)
{
- struct ib_client *client;
+ u32 cid;
WARN_ON(!refcount_read(&device->refcount));
xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
up_write(&devices_rwsem);
+ /*
+ * Remove clients in LIFO order, see assign_client_id. This could be
+ * more efficient if xarray learns to reverse iterate. Since no new
+ * clients can be added to this ib_device past this point we only need
+ * the maximum possible client_id value here.
+ */
down_read(&clients_rwsem);
- list_for_each_entry_reverse(client, &client_list, list)
- remove_client_context(device, client->client_id);
+ cid = highest_client_id;
up_read(&clients_rwsem);
+ while (cid) {
+ cid--;
+ remove_client_context(device, cid);
+ }
/* Pairs with refcount_set in enable_device */
ib_device_put(device);
/*
* The add/remove callbacks must be called in FIFO/LIFO order. To
* achieve this we assign client_ids so they are sorted in
- * registration order, and retain a linked list we can reverse iterate
- * to get the LIFO order. The extra linked list can go away if xarray
- * learns to reverse iterate.
+ * registration order.
*/
- if (list_empty(&client_list)) {
- client->client_id = 0;
- } else {
- struct ib_client *last;
-
- last = list_last_entry(&client_list, struct ib_client, list);
- client->client_id = last->client_id + 1;
- }
+ client->client_id = highest_client_id;
ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
if (ret)
goto out;
+ highest_client_id++;
xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
- list_add_tail(&client->list, &client_list);
out:
up_write(&clients_rwsem);
return ret;
}
+ static void remove_client_id(struct ib_client *client)
+ {
+ down_write(&clients_rwsem);
+ xa_erase(&clients, client->client_id);
+ for (; highest_client_id; highest_client_id--)
+ if (xa_load(&clients, highest_client_id - 1))
+ break;
+ up_write(&clients_rwsem);
+ }
+
/**
* ib_register_client - Register an IB client
* @client:Client to register
unsigned long index;
int ret;
+ refcount_set(&client->uses, 1);
+ init_completion(&client->uses_zero);
ret = assign_client_id(client);
if (ret)
return ret;
unsigned long index;
down_write(&clients_rwsem);
+ ib_client_put(client);
xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
up_write(&clients_rwsem);
- /*
- * Every device still known must be serialized to make sure we are
- * done with the client callbacks before we return.
- */
- down_read(&devices_rwsem);
- xa_for_each (&devices, index, device)
+
+ /* We do not want to have locks while calling client->remove() */
+ rcu_read_lock();
+ xa_for_each (&devices, index, device) {
+ if (!ib_device_try_get(device))
+ continue;
+ rcu_read_unlock();
+
remove_client_context(device, client->client_id);
- up_read(&devices_rwsem);
- down_write(&clients_rwsem);
- list_del(&client->list);
- xa_erase(&clients, client->client_id);
- up_write(&clients_rwsem);
+ ib_device_put(device);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+
+ /*
+ * remove_client_context() is not a fence, it can return even though a
+ * removal is ongoing. Wait until all removals are completed.
+ */
+ wait_for_completion(&client->uses_zero);
+ remove_client_id(client);
}
EXPORT_SYMBOL(ib_unregister_client);
}
EXPORT_SYMBOL(ib_dispatch_event);
-/**
- * ib_query_port - Query IB port attributes
- * @device:Device to query
- * @port_num:Port number to query
- * @port_attr:Port attributes
- *
- * ib_query_port() returns the attributes of a port through the
- * @port_attr pointer.
- */
-int ib_query_port(struct ib_device *device,
- u8 port_num,
- struct ib_port_attr *port_attr)
+static int iw_query_port(struct ib_device *device,
+ u8 port_num,
+ struct ib_port_attr *port_attr)
{
- union ib_gid gid;
+ struct in_device *inetdev;
+ struct net_device *netdev;
int err;
- if (!rdma_is_port_valid(device, port_num))
- return -EINVAL;
+ memset(port_attr, 0, sizeof(*port_attr));
+
+ netdev = ib_device_get_netdev(device, port_num);
+ if (!netdev)
+ return -ENODEV;
+
+ dev_put(netdev);
+
+ port_attr->max_mtu = IB_MTU_4096;
+ port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
+
+ if (!netif_carrier_ok(netdev)) {
+ port_attr->state = IB_PORT_DOWN;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ } else {
+ inetdev = in_dev_get(netdev);
+
+ if (inetdev && inetdev->ifa_list) {
+ port_attr->state = IB_PORT_ACTIVE;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ in_dev_put(inetdev);
+ } else {
+ port_attr->state = IB_PORT_INIT;
+ port_attr->phys_state =
+ IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
+ }
+ }
+
+ err = device->ops.query_port(device, port_num, port_attr);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int __ib_query_port(struct ib_device *device,
+ u8 port_num,
+ struct ib_port_attr *port_attr)
+{
+ union ib_gid gid = {};
+ int err;
memset(port_attr, 0, sizeof(*port_attr));
+
err = device->ops.query_port(device, port_num, port_attr);
if (err || port_attr->subnet_prefix)
return err;
- if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
+ if (rdma_port_get_link_layer(device, port_num) !=
+ IB_LINK_LAYER_INFINIBAND)
return 0;
err = device->ops.query_gid(device, port_num, 0, &gid);
port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
return 0;
}
+
+/**
+ * ib_query_port - Query IB port attributes
+ * @device:Device to query
+ * @port_num:Port number to query
+ * @port_attr:Port attributes
+ *
+ * ib_query_port() returns the attributes of a port through the
+ * @port_attr pointer.
+ */
+int ib_query_port(struct ib_device *device,
+ u8 port_num,
+ struct ib_port_attr *port_attr)
+{
+ if (!rdma_is_port_valid(device, port_num))
+ return -EINVAL;
+
+ if (rdma_protocol_iwarp(device, port_num))
+ return iw_query_port(device, port_num, port_attr);
+ else
+ return __ib_query_port(device, port_num, port_attr);
+}
EXPORT_SYMBOL(ib_query_port);
static void add_ndev_hash(struct ib_port_data *pdata)
SET_DEVICE_OP(dev_ops, get_vf_config);
SET_DEVICE_OP(dev_ops, get_vf_stats);
SET_DEVICE_OP(dev_ops, init_port);
+ SET_DEVICE_OP(dev_ops, invalidate_range);
SET_DEVICE_OP(dev_ops, iw_accept);
SET_DEVICE_OP(dev_ops, iw_add_ref);
SET_DEVICE_OP(dev_ops, iw_connect);
goto err_comp_unbound;
}
- ret = rdma_nl_init();
- if (ret) {
- pr_warn("Couldn't init IB netlink interface: err %d\n", ret);
- goto err_sysfs;
- }
-
ret = addr_init();
if (ret) {
pr_warn("Could't init IB address resolution\n");
err_addr:
addr_cleanup();
err_ibnl:
- rdma_nl_exit();
-err_sysfs:
class_unregister(&ib_class);
err_comp_unbound:
destroy_workqueue(ib_comp_unbound_wq);
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
put_device(data.cdev);
if (ibdev)
ib_device_put(ibdev);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
out_data:
put_device(data.cdev);
return err;
}
nlmsg_end(msg, nlh);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
}
static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_fill:
rdma_counter_unbind_qpn(device, port, qpn, cntn);
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_fill:
rdma_counter_bind_qpn(device, port, qpn, cntn);
mutex_unlock(&stats->lock);
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_table:
nla_nest_cancel(msg, table_attr);
if (fill_nldev_handle(msg, device) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
- nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode))
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
+ ret = -EMSGSIZE;
goto err_msg;
+ }
if ((mode == RDMA_COUNTER_MODE_AUTO) &&
- nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask))
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
+ ret = -EMSGSIZE;
goto err_msg;
+ }
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_msg:
nlmsg_free(msg);
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/slab.h>
+ #include <linux/nospec.h>
#include <linux/uaccess.h>
if (get_user(id, arg))
return -EFAULT;
+ if (id >= IB_UMAD_MAX_AGENTS)
+ return -EINVAL;
mutex_lock(&file->port->file_mutex);
mutex_lock(&file->mutex);
- if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
+ id = array_index_nospec(id, IB_UMAD_MAX_AGENTS);
+ if (!__get_agent(file, id)) {
ret = -EINVAL;
goto out;
}
ib_unregister_mad_agent(file->agent[i]);
mutex_unlock(&file->port->file_mutex);
-
+ mutex_destroy(&file->mutex);
kfree(file);
return 0;
}
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <rdma/opa_addr.h>
+ #include <linux/nospec.h>
#include "hfi.h"
#include "common.h"
else
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
- if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
- pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
pbc = create_pbc(ppd,
pbc,
qp->srate_mbps,
vl,
plen);
- /* Update HCRC based on packet opcode */
- pbc = update_hcrc(ps->opcode, pbc);
+ if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
+ pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
+ else
+ /* Update HCRC based on packet opcode */
+ pbc = update_hcrc(ps->opcode, pbc);
}
tx->wqe = qp->s_wqe;
ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
else
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
+ pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
- pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
-
- /* Update HCRC based on packet opcode */
- pbc = update_hcrc(ps->opcode, pbc);
+ else
+ /* Update HCRC based on packet opcode */
+ pbc = update_hcrc(ps->opcode, pbc);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
sl = rdma_ah_get_sl(ah_attr);
if (sl >= ARRAY_SIZE(ibp->sl_to_sc))
return -EINVAL;
+ sl = array_index_nospec(sl, ARRAY_SIZE(ibp->sl_to_sc));
sc5 = ibp->sl_to_sc[sl];
if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0);
pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
- if (!pd)
+ if (!pd) {
+ ret = -ENOMEM;
goto alloc_mem_failed;
+ }
pd->device = ibdev;
ret = hns_roce_alloc_pd(pd, NULL);
struct hns_roce_free_mr *free_mr;
struct hns_roce_v1_priv *priv;
struct completion comp;
- unsigned long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS;
+ long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS;
priv = (struct hns_roce_v1_priv *)hr_dev->priv;
free_mr = &priv->free_mr;
queue_work(free_mr->free_mr_wq, &(lp_qp_work->work));
- while (end) {
+ while (end > 0) {
if (try_wait_for_completion(&comp))
return 0;
msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE);
struct hns_roce_free_mr *free_mr;
struct hns_roce_v1_priv *priv;
struct completion comp;
- unsigned long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS;
+ long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS;
unsigned long start = jiffies;
int npages;
int ret = 0;
queue_work(free_mr->free_mr_wq, &(mr_work->work));
- while (end) {
+ while (end > 0) {
if (try_wait_for_completion(&comp))
goto free_mr;
msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
{
struct device *dev = &hr_dev->pdev->dev;
struct hns_roce_v1_priv *priv;
- unsigned long end = 0, flags = 0;
+ unsigned long flags = 0;
+ long end = HW_SYNC_TIMEOUT_MSECS;
__le32 bt_cmd_val[2] = {0};
void __iomem *bt_cmd;
u64 bt_ba = 0;
bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
- end = HW_SYNC_TIMEOUT_MSECS;
while (1) {
if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) {
if (!end) {
/* fetch the interrupt numbers */
for (i = 0; i < HNS_ROCE_V1_MAX_IRQ_NUM; i++) {
hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i);
- if (hr_dev->irq[i] <= 0) {
- dev_err(dev, "platform get of irq[=%d] failed!\n", i);
+ if (hr_dev->irq[i] <= 0)
return -EINVAL;
- }
}
return 0;
case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
case MLX5_EVENT_TYPE_DCT_DRAINED:
case MLX5_EVENT_TYPE_COMP:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
return true;
default:
return false;
case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
return eqe->data.qp_srq.type;
case MLX5_EVENT_TYPE_CQ_ERROR:
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
return 0;
case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
return MLX5_EVENT_QUEUE_TYPE_DCT;
default:
return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
break;
case MLX5_CMD_OP_ARM_XRQ:
case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+ case MLX5_CMD_OP_MODIFY_XRQ:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
MLX5_GET(arm_xrq_in, in, xrqn));
break;
return true;
return false;
}
+ case MLX5_CMD_OP_CREATE_PSV:
+ {
+ u8 num_psv = MLX5_GET(create_psv_in, in, num_psv);
+
+ if (num_psv == 1)
+ return true;
+ return false;
+ }
default:
return false;
}
case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
case MLX5_CMD_OP_ARM_XRQ:
case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+ case MLX5_CMD_OP_MODIFY_XRQ:
return true;
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
{
case MLX5_CMD_OP_QUERY_CONG_STATUS:
case MLX5_CMD_OP_QUERY_CONG_PARAMS:
case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+ case MLX5_CMD_OP_QUERY_LAG:
return true;
default:
return false;
case MLX5_CMD_OP_ALLOC_XRCD:
MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
break;
+ case MLX5_CMD_OP_CREATE_PSV:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_PSV);
+ MLX5_SET(destroy_psv_in, din, psvn,
+ MLX5_GET(create_psv_out, out, psv0_index));
+ break;
default:
/* The entry must match to one of the devx_is_obj_create_cmd */
WARN_ON(true);
event_sub->eventfd =
eventfd_ctx_fdget(redirect_fd);
- if (IS_ERR(event_sub)) {
+ if (IS_ERR(event_sub->eventfd)) {
err = PTR_ERR(event_sub->eventfd);
event_sub->eventfd = NULL;
goto err;
case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
break;
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
+ obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
+ break;
case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
break;
case MLX5_EVENT_TYPE_CQ_ERROR:
struct devx_async_event_file *ev_file = filp->private_data;
struct devx_event_subscription *event_sub, *event_sub_tmp;
struct devx_async_event_data *entry, *tmp;
+ struct mlx5_ib_dev *dev = ev_file->dev;
- mutex_lock(&ev_file->dev->devx_event_table.event_xa_lock);
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
/* delete the subscriptions which are related to this FD */
list_for_each_entry_safe(event_sub, event_sub_tmp,
&ev_file->subscribed_events_list, file_list) {
- devx_cleanup_subscription(ev_file->dev, event_sub);
+ devx_cleanup_subscription(dev, event_sub);
if (event_sub->eventfd)
eventfd_ctx_put(event_sub->eventfd);
kfree_rcu(event_sub, rcu);
}
- mutex_unlock(&ev_file->dev->devx_event_table.event_xa_lock);
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
/* free the pending events allocation */
if (!ev_file->omit_data) {
}
uverbs_close_fd(filp);
- put_device(&ev_file->dev->ib_dev.dev);
+ put_device(&dev->ib_dev.dev);
return 0;
}
props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
props->pkey_tbl_len = 1;
props->state = IB_PORT_DOWN;
- props->phys_state = 3;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
props->qkey_viol_cntr = qkey_viol_cntr;
if (netif_running(ndev) && netif_carrier_ok(ndev)) {
props->state = IB_PORT_ACTIVE;
- props->phys_state = 5;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
}
ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
if (err)
goto out_sys_pages;
- if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)
- context->ibucontext.invalidate_range =
- &mlx5_ib_invalidate_range;
-
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
err = mlx5_ib_devx_create(dev, true);
if (err < 0)
INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
};
+static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
+{
+ return MLX5_ESWITCH_MANAGER(mdev) &&
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
+ MLX5_ESWITCH_OFFLOADS;
+}
+
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
+ int num_cnt_ports;
int i;
- for (i = 0; i < dev->num_ports; i++) {
+ num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
+
+ for (i = 0; i < num_cnt_ports; i++) {
if (dev->port[i].cnts.set_id_valid)
mlx5_core_dealloc_q_counter(dev->mdev,
dev->port[i].cnts.set_id);
static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
+ int num_cnt_ports;
int err = 0;
int i;
bool is_shared;
is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
+ num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
- for (i = 0; i < dev->num_ports; i++) {
+ for (i = 0; i < num_cnt_ports; i++) {
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
if (err)
goto err_alloc;
}
dev->port[i].cnts.set_id_valid = true;
}
-
return 0;
err_alloc:
return err;
}
+static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
+ u8 port_num)
+{
+ return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
+ &dev->port[port_num].cnts;
+}
+
+/**
+ * mlx5_ib_get_counters_id - Returns counters id to use for device+port
+ * @dev: Pointer to mlx5 IB device
+ * @port_num: Zero based port number
+ *
+ * mlx5_ib_get_counters_id() Returns counters set id to use for given
+ * device port combination in switchdev and non switchdev mode of the
+ * parent device.
+ */
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
+{
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
+
+ return cnts->set_id;
+}
+
static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_port *port = &dev->port[port_num - 1];
+ const struct mlx5_ib_counters *cnts;
+ bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
- /* We support only per port stats */
- if (port_num == 0)
+ if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
return NULL;
- return rdma_alloc_hw_stats_struct(port->cnts.names,
- port->cnts.num_q_counters +
- port->cnts.num_cong_counters +
- port->cnts.num_ext_ppcnt_counters,
+ cnts = get_counters(dev, port_num - 1);
+
+ return rdma_alloc_hw_stats_struct(cnts->names,
+ cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
- struct mlx5_ib_port *port,
+ const struct mlx5_ib_counters *cnts,
struct rdma_hw_stats *stats,
u16 set_id)
{
if (ret)
goto free;
- for (i = 0; i < port->cnts.num_q_counters; i++) {
- val = *(__be32 *)(out + port->cnts.offsets[i]);
+ for (i = 0; i < cnts->num_q_counters; i++) {
+ val = *(__be32 *)(out + cnts->offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
}
static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
- struct mlx5_ib_port *port,
- struct rdma_hw_stats *stats)
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats)
{
- int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters;
+ int offset = cnts->num_q_counters + cnts->num_cong_counters;
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
int ret, i;
void *out;
if (ret)
goto free;
- for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) {
+ for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
stats->value[i + offset] =
be64_to_cpup((__be64 *)(out +
- port->cnts.offsets[i + offset]));
- }
-
+ cnts->offsets[i + offset]));
free:
kvfree(out);
return ret;
u8 port_num, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_port *port = &dev->port[port_num - 1];
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
struct mlx5_core_dev *mdev;
int ret, num_counters;
u8 mdev_port_num;
if (!stats)
return -EINVAL;
- num_counters = port->cnts.num_q_counters +
- port->cnts.num_cong_counters +
- port->cnts.num_ext_ppcnt_counters;
+ num_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
/* q_counters are per IB device, query the master mdev */
- ret = mlx5_ib_query_q_counters(dev->mdev, port, stats,
- port->cnts.set_id);
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
if (ret)
return ret;
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats);
+ ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
if (ret)
return ret;
}
}
ret = mlx5_lag_query_cong_counters(dev->mdev,
stats->value +
- port->cnts.num_q_counters,
- port->cnts.num_cong_counters,
- port->cnts.offsets +
- port->cnts.num_q_counters);
+ cnts->num_q_counters,
+ cnts->num_cong_counters,
+ cnts->offsets +
+ cnts->num_q_counters);
mlx5_ib_put_native_port_mdev(dev, port_num);
if (ret)
mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
- struct mlx5_ib_port *port = &dev->port[counter->port - 1];
+ const struct mlx5_ib_counters *cnts =
+ get_counters(dev, counter->port - 1);
/* Q counters are in the beginning of all counters */
- return rdma_alloc_hw_stats_struct(port->cnts.names,
- port->cnts.num_q_counters,
+ return rdma_alloc_hw_stats_struct(cnts->names,
+ cnts->num_q_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
- struct mlx5_ib_port *port = &dev->port[counter->port - 1];
+ const struct mlx5_ib_counters *cnts =
+ get_counters(dev, counter->port - 1);
- return mlx5_ib_query_q_counters(dev->mdev, port,
+ return mlx5_ib_query_q_counters(dev->mdev, cnts,
counter->stats, counter->id);
}
mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
}
-/* The mlx5_ib_multiport_mutex should be held when calling this function */
static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
int err;
int i;
+ lockdep_assert_held(&mlx5_ib_multiport_mutex);
+
mlx5_ib_cleanup_cong_debugfs(ibdev, port_num);
spin_lock(&port->mp.mpi_lock);
return;
}
- if (mpi->mdev_events.notifier_call)
- mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
- mpi->mdev_events.notifier_call = NULL;
-
mpi->ibdev = NULL;
spin_unlock(&port->mp.mpi_lock);
+ if (mpi->mdev_events.notifier_call)
+ mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
+ mpi->mdev_events.notifier_call = NULL;
mlx5_remove_netdev_notifier(ibdev, port_num);
spin_lock(&port->mp.mpi_lock);
ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN;
}
-/* The mlx5_ib_multiport_mutex should be held when calling this function */
static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
int err;
+ lockdep_assert_held(&mlx5_ib_multiport_mutex);
+
spin_lock(&ibdev->port[port_num].mp.mpi_lock);
if (ibdev->port[port_num].mp.mpi) {
mlx5_ib_dbg(ibdev, "port %d already affiliated.\n",
dev->port = kcalloc(num_ports, sizeof(*dev->port),
GFP_KERNEL);
if (!dev->port) {
- ib_dealloc_device((struct ib_device *)dev);
+ ib_dealloc_device(&dev->ib_dev);
return NULL;
}
for (i = 0; i < nentries; i++, pklm++) {
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
va = (offset + i) * MLX5_IMR_MTT_SIZE;
- if (odp && odp->umem.address == va) {
+ if (odp && ib_umem_start(odp) == va) {
struct mlx5_ib_mr *mtt = odp->private;
pklm->key = cpu_to_be32(mtt->ibmr.lkey);
mr->parent = NULL;
synchronize_srcu(&mr->dev->mr_srcu);
- ib_umem_release(&odp->umem);
+ ib_umem_odp_release(odp);
if (imr->live)
mlx5_ib_update_xlt(imr, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
}
static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
- struct ib_umem *umem,
+ struct ib_umem_odp *umem_odp,
bool ksm, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
mr->dev = dev;
mr->access_flags = access_flags;
mr->mmkey.iova = 0;
- mr->umem = umem;
+ mr->umem = &umem_odp->umem;
if (ksm) {
err = mlx5_ib_update_xlt(mr, 0,
if (nentries)
nentries++;
} else {
- odp = ib_alloc_odp_umem(odp_mr, addr,
- MLX5_IMR_MTT_SIZE);
+ odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE);
if (IS_ERR(odp)) {
mutex_unlock(&odp_mr->umem_mutex);
return ERR_CAST(odp);
}
- mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0,
+ mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0,
mr->access_flags);
if (IS_ERR(mtt)) {
mutex_unlock(&odp_mr->umem_mutex);
- ib_umem_release(&odp->umem);
+ ib_umem_odp_release(odp);
return ERR_CAST(mtt);
}
addr += MLX5_IMR_MTT_SIZE;
if (unlikely(addr < io_virt + bcnt)) {
odp = odp_next(odp);
- if (odp && odp->umem.address != addr)
+ if (odp && ib_umem_start(odp) != addr)
odp = NULL;
goto next_mr;
}
int access_flags)
{
struct mlx5_ib_mr *imr;
- struct ib_umem *umem;
+ struct ib_umem_odp *umem_odp;
- umem = ib_umem_get(udata, 0, 0, access_flags, 0);
- if (IS_ERR(umem))
- return ERR_CAST(umem);
+ umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags);
+ if (IS_ERR(umem_odp))
+ return ERR_CAST(umem_odp);
- imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
+ imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags);
if (IS_ERR(imr)) {
- ib_umem_release(umem);
+ ib_umem_odp_release(umem_odp);
return ERR_CAST(imr);
}
- imr->umem = umem;
+ imr->umem = &umem_odp->umem;
init_waitqueue_head(&imr->q_leaf_free);
atomic_set(&imr->num_leaf_free, 0);
atomic_set(&imr->num_pending_prefetch, 0);
return imr;
}
- static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
- void *cookie)
+ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
{
- struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
-
- if (mr->parent != imr)
- return 0;
-
- ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
- ib_umem_end(umem_odp));
+ struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
+ struct rb_node *node;
- if (umem_odp->dying)
- return 0;
+ down_read(&per_mm->umem_rwsem);
+ for (node = rb_first_cached(&per_mm->umem_tree); node;
+ node = rb_next(node)) {
+ struct ib_umem_odp *umem_odp =
+ rb_entry(node, struct ib_umem_odp, interval_tree.rb);
+ struct mlx5_ib_mr *mr = umem_odp->private;
- WRITE_ONCE(umem_odp->dying, 1);
- atomic_inc(&imr->num_leaf_free);
- schedule_work(&umem_odp->work);
+ if (mr->parent != imr)
+ continue;
- return 0;
- }
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
- void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
- {
- struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
+ if (umem_odp->dying)
+ continue;
- down_read(&per_mm->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX,
- mr_leaf_free, true, imr);
+ WRITE_ONCE(umem_odp->dying, 1);
+ atomic_inc(&imr->num_leaf_free);
+ schedule_work(&umem_odp->work);
+ }
up_read(&per_mm->umem_rwsem);
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
u32 flags)
{
int npages = 0, current_seq, page_shift, ret, np;
- bool implicit = false;
struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
struct ib_umem_odp *odp;
size_t size;
- if (!odp_mr->page_list) {
+ if (odp_mr->is_implicit_odp) {
odp = implicit_mr_get_data(mr, io_virt, bcnt);
if (IS_ERR(odp))
return PTR_ERR(odp);
mr = odp->private;
- implicit = true;
} else {
odp = odp_mr;
}
start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
access_mask = ODP_READ_ALLOWED_BIT;
- if (prefetch && !downgrade && !mr->umem->writable) {
+ if (prefetch && !downgrade && !odp->umem.writable) {
/* prefetch with write-access must
* be supported by the MR
*/
goto out;
}
- if (mr->umem->writable && !downgrade)
+ if (odp->umem.writable && !downgrade)
access_mask |= ODP_WRITE_ALLOWED_BIT;
current_seq = READ_ONCE(odp->notifiers_seq);
*/
smp_rmb();
- ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size,
- access_mask, current_seq);
+ ret = ib_umem_odp_map_dma_pages(odp, io_virt, size, access_mask,
+ current_seq);
if (ret < 0)
goto out;
np = ret;
mutex_lock(&odp->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem),
- current_seq)) {
+ if (!ib_umem_mmu_notifier_retry(odp, current_seq)) {
/*
* No need to check whether the MTTs really belong to
* this MR, since ib_umem_odp_map_dma_pages already
io_virt += size;
next = odp_next(odp);
- if (unlikely(!next || next->umem.address != io_virt)) {
+ if (unlikely(!next || ib_umem_start(next) != io_virt)) {
mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
io_virt, next);
return -EAGAIN;
out:
if (ret == -EAGAIN) {
- if (implicit || !odp->dying) {
- unsigned long timeout =
- msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
-
- if (!wait_for_completion_timeout(
- &odp->notifier_completion,
- timeout)) {
- mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
- current_seq, odp->notifiers_seq, odp->notifiers_count);
- }
- } else {
- /* The MR is being killed, kill the QP as well. */
- ret = -EFAULT;
+ unsigned long timeout = msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
+
+ if (!wait_for_completion_timeout(&odp->notifier_completion,
+ timeout)) {
+ mlx5_ib_warn(
+ dev,
+ "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
+ current_seq, odp->notifiers_seq,
+ odp->notifiers_count);
}
}
u32 transport_caps;
struct mlx5_base_av *av;
unsigned ds, opcode;
-#if defined(DEBUG)
- u32 ctrl_wqe_index, ctrl_qpn;
-#endif
u32 qpn = qp->trans_qp.base.mqp.qpn;
ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
return -EFAULT;
}
-#if defined(DEBUG)
- ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) &
- MLX5_WQE_CTRL_WQE_INDEX_MASK) >>
- MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
- if (wqe_index != ctrl_wqe_index) {
- mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
- wqe_index, qpn,
- ctrl_wqe_index);
- return -EFAULT;
- }
-
- ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
- MLX5_WQE_CTRL_QPN_SHIFT;
- if (qpn != ctrl_qpn) {
- mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
- wqe_index, qpn,
- ctrl_qpn);
- return -EFAULT;
- }
-#endif /* DEBUG */
-
*wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS;
*wqe += sizeof(*ctrl);
static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
.advise_mr = mlx5_ib_advise_mr,
+ .invalidate_range = mlx5_ib_invalidate_range,
};
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
attr->gid_tbl_len = 1;
attr->max_msg_sz = -1;
attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
- attr->phys_state = sdev->state == IB_PORT_ACTIVE ? 5 : 3;
+ attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
+ IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
attr->pkey_tbl_len = 1;
attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
attr->state = sdev->state;
spin_lock_init(&cq->lock);
- cq->notify = &((struct siw_cq_ctrl *)&cq->queue[size])->notify;
+ cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];
if (udata) {
struct siw_uresp_create_cq uresp = {};
siw_dbg_cq(cq, "flags: 0x%02x\n", flags);
if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
- /* CQ event for next solicited completion */
- smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);
+ /*
+ * Enable CQ event for next solicited completion.
+ * and make it visible to all associated producers.
+ */
+ smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);
else
- /* CQ event for any signalled completion */
- smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);
+ /*
+ * Enable CQ event for any signalled completion.
+ * and make it visible to all associated producers.
+ */
+ smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL);
if (flags & IB_CQ_REPORT_MISSED_EVENTS)
return cq->cq_put - cq->cq_get;
* access to counter list:
* - create (user context)
* - mlx5_fc_create() only adds to an addlist to be used by
- * mlx5_fc_stats_query_work(). addlist is a lockless single linked list
+ * mlx5_fc_stats_work(). addlist is a lockless single linked list
* that doesn't require any additional synchronization when adding single
* node.
* - spawn thread to do the actual destroy
spin_unlock(&fc_stats->counters_idr_lock);
}
-/* The function returns the last counter that was queried so the caller
- * function can continue calling it till all counters are queried.
- */
-static struct mlx5_fc *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
- struct mlx5_fc *first,
- u32 last_id)
+static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
{
- struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
- struct mlx5_fc *counter = NULL;
- struct mlx5_cmd_fc_bulk *b;
- bool more = false;
- u32 afirst_id;
- int num;
- int err;
+ return min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
+ (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+}
- int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
- (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+static void update_counter_cache(int index, u32 *bulk_raw_data,
+ struct mlx5_fc_cache *cache)
+{
+ void *stats = MLX5_ADDR_OF(query_flow_counter_out, bulk_raw_data,
+ flow_statistics[index]);
+ u64 packets = MLX5_GET64(traffic_counter, stats, packets);
+ u64 bytes = MLX5_GET64(traffic_counter, stats, octets);
- /* first id must be aligned to 4 when using bulk query */
- afirst_id = first->id & ~0x3;
+ if (cache->packets == packets)
+ return;
- /* number of counters to query inc. the last counter */
- num = ALIGN(last_id - afirst_id + 1, 4);
- if (num > max_bulk) {
- num = max_bulk;
- last_id = afirst_id + num - 1;
- }
+ cache->packets = packets;
+ cache->bytes = bytes;
+ cache->lastuse = jiffies;
+}
- b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num);
- if (!b) {
- mlx5_core_err(dev, "Error allocating resources for bulk query\n");
- return NULL;
- }
+static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
+ struct mlx5_fc *first,
+ u32 last_id)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ bool query_more_counters = (first->id <= last_id);
+ int max_bulk_len = get_max_bulk_query_len(dev);
+ u32 *data = fc_stats->bulk_query_out;
+ struct mlx5_fc *counter = first;
+ u32 bulk_base_id;
+ int bulk_len;
+ int err;
- err = mlx5_cmd_fc_bulk_query(dev, b);
- if (err) {
- mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
- goto out;
- }
+ while (query_more_counters) {
+ /* first id must be aligned to 4 when using bulk query */
+ bulk_base_id = counter->id & ~0x3;
- counter = first;
- list_for_each_entry_from(counter, &fc_stats->counters, list) {
- struct mlx5_fc_cache *c = &counter->cache;
- u64 packets;
- u64 bytes;
+ /* number of counters to query inc. the last counter */
+ bulk_len = min_t(int, max_bulk_len,
+ ALIGN(last_id - bulk_base_id + 1, 4));
- if (counter->id > last_id) {
- more = true;
- break;
+ err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len,
+ data);
+ if (err) {
+ mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
+ return;
}
+ query_more_counters = false;
- mlx5_cmd_fc_bulk_get(dev, b,
- counter->id, &packets, &bytes);
+ list_for_each_entry_from(counter, &fc_stats->counters, list) {
+ int counter_index = counter->id - bulk_base_id;
+ struct mlx5_fc_cache *cache = &counter->cache;
- if (c->packets == packets)
- continue;
+ if (counter->id >= bulk_base_id + bulk_len) {
+ query_more_counters = true;
+ break;
+ }
- c->packets = packets;
- c->bytes = bytes;
- c->lastuse = jiffies;
+ update_counter_cache(counter_index, data, cache);
+ }
}
-
-out:
- mlx5_cmd_fc_bulk_free(b);
-
- return more ? counter : NULL;
}
static void mlx5_free_fc(struct mlx5_core_dev *dev,
counter = list_first_entry(&fc_stats->counters, struct mlx5_fc,
list);
- while (counter)
- counter = mlx5_fc_stats_query(dev, counter, last->id);
+ if (counter)
+ mlx5_fc_stats_query_counter_range(dev, counter, last->id);
fc_stats->next_query = now + fc_stats->sampling_interval;
}
int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ int max_bulk_len;
+ int max_out_len;
spin_lock_init(&fc_stats->counters_idr_lock);
idr_init(&fc_stats->counters_idr);
init_llist_head(&fc_stats->addlist);
init_llist_head(&fc_stats->dellist);
+ max_bulk_len = get_max_bulk_query_len(dev);
+ max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
+ fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL);
+ if (!fc_stats->bulk_query_out)
+ return -ENOMEM;
+
fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
if (!fc_stats->wq)
- return -ENOMEM;
+ goto err_wq_create;
fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD;
INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
return 0;
+
+err_wq_create:
+ kfree(fc_stats->bulk_query_out);
+ return -ENOMEM;
}
void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
destroy_workqueue(dev->priv.fc_stats.wq);
dev->priv.fc_stats.wq = NULL;
+ kfree(fc_stats->bulk_query_out);
+
idr_destroy(&fc_stats->counters_idr);
tmplist = llist_del_all(&fc_stats->addlist);
}
EXPORT_SYMBOL(mlx5_fc_query);
+ u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter)
+ {
+ return counter->cache.lastuse;
+ }
+
void mlx5_fc_query_cached(struct mlx5_fc *counter,
u64 *bytes, u64 *packets, u64 *lastuse)
{
/* Vendor specific information */
dev->vendor_id = cdev->vendor_id;
dev->vendor_part_id = cdev->device_id;
- dev->hw_ver = 0;
+ dev->hw_ver = cdev->chip_rev;
dev->fw_ver = (FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) |
(FW_REVISION_VERSION << 8) | (FW_ENGINEERING_VERSION);
SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_LOCAL_INV_FENCE, 1);
/* Check atomic operations support in PCI configuration space. */
- pci_read_config_dword(cdev->pdev,
- cdev->pdev->pcie_cap + PCI_EXP_DEVCTL2,
- &pci_status_control);
+ pcie_capability_read_dword(cdev->pdev, PCI_EXP_DEVCTL2,
+ &pci_status_control);
if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN)
SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1);
/* Calculate the corresponding DPI address */
dpi_start_offset = p_hwfn->dpi_start_offset;
- out_params->dpi_addr = (u64)((u8 __iomem *)p_hwfn->doorbells +
- dpi_start_offset +
- ((out_params->dpi) * p_hwfn->dpi_size));
+ out_params->dpi_addr = p_hwfn->doorbells + dpi_start_offset +
+ out_params->dpi * p_hwfn->dpi_size;
out_params->dpi_phys_addr = p_hwfn->db_phys_addr +
dpi_start_offset +
MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725,
MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726,
MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727,
+ MLX5_CMD_OP_RELEASE_XRQ_ERROR = 0x729,
+ MLX5_CMD_OP_MODIFY_XRQ = 0x72a,
MLX5_CMD_OP_QUERY_ESW_FUNCTIONS = 0x740,
MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750,
MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751,
MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1,
};
+#define MLX5_FC_BULK_SIZE_FACTOR 128
+
+enum mlx5_fc_bulk_alloc_bitmask {
+ MLX5_FC_BULK_128 = (1 << 0),
+ MLX5_FC_BULK_256 = (1 << 1),
+ MLX5_FC_BULK_512 = (1 << 2),
+ MLX5_FC_BULK_1024 = (1 << 3),
+ MLX5_FC_BULK_2048 = (1 << 4),
+ MLX5_FC_BULK_4096 = (1 << 5),
+ MLX5_FC_BULK_8192 = (1 << 6),
+ MLX5_FC_BULK_16384 = (1 << 7),
+};
+
+#define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum))
+
struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_0[0x30];
u8 vhca_id[0x10];
u8 reserved_at_2e0[0x7];
u8 max_qp_mcg[0x19];
- u8 reserved_at_300[0x18];
+ u8 reserved_at_300[0x10];
+ u8 flow_counter_bulk_alloc[0x8];
u8 log_max_mcg[0x8];
u8 reserved_at_320[0x3];
struct mlx5_ifc_tisc_bits {
u8 strict_lag_tx_port_affinity[0x1];
u8 tls_en[0x1];
- u8 reserved_at_1[0x2];
+ u8 reserved_at_2[0x2];
u8 lag_tx_port_affinity[0x04];
u8 reserved_at_8[0x4];
SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3,
};
+enum {
+ ELEMENT_TYPE_CAP_MASK_TASR = 1 << 0,
+ ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1,
+ ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2,
+ ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3,
+};
+
struct mlx5_ifc_scheduling_context_bits {
u8 element_type[0x8];
u8 reserved_at_8[0x18];
struct mlx5_ifc_cqc_bits cq_context;
- u8 reserved_at_280[0x40];
+ u8 reserved_at_280[0x60];
u8 cq_umem_valid[0x1];
- u8 reserved_at_2c1[0x5bf];
+ u8 reserved_at_2e1[0x1f];
+
+ u8 reserved_at_300[0x580];
u8 pas[0][0x40];
};
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
- u8 reserved_at_40[0x40];
+ u8 reserved_at_40[0x38];
+ u8 flow_counter_bulk[0x8];
};
struct mlx5_ifc_add_vxlan_udp_dport_out_bits {
u8 syndrome[0x20];
- u8 reserved_at_40[0x40];
-
struct mlx5_ifc_lagc_bits ctx;
};
#if defined(CONFIG_DYNAMIC_DEBUG)
#define ibdev_dbg(__dev, format, args...) \
dynamic_ibdev_dbg(__dev, format, ##args)
-#elif defined(DEBUG)
-#define ibdev_dbg(__dev, format, args...) \
- ibdev_printk(KERN_DEBUG, __dev, format, ##args)
#else
__printf(2, 3) __cold
static inline
void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
#endif
+#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ if (__ratelimit(&_rs)) \
+ ibdev_level(ibdev, fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_alert_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_crit_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_err_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_warn_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_notice_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_info_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
+
+#if defined(CONFIG_DYNAMIC_DEBUG)
+/* descriptor check is first to prevent flooding with "callbacks suppressed" */
+#define ibdev_dbg_ratelimited(ibdev, fmt, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
+ if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) \
+ __dynamic_ibdev_dbg(&descriptor, ibdev, fmt, \
+ ##__VA_ARGS__); \
+} while (0)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
+
union ib_gid {
u8 raw[16];
struct {
IB_PORT_ACTIVE_DEFER = 5
};
+enum ib_port_phys_state {
+ IB_PORT_PHYS_STATE_SLEEP = 1,
+ IB_PORT_PHYS_STATE_POLLING = 2,
+ IB_PORT_PHYS_STATE_DISABLED = 3,
+ IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4,
+ IB_PORT_PHYS_STATE_LINK_UP = 5,
+ IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6,
+ IB_PORT_PHYS_STATE_PHY_TEST = 7,
+};
+
enum ib_port_width {
IB_WIDTH_1X = 1,
IB_WIDTH_2X = 16,
bool cleanup_retryable;
- void (*invalidate_range)(struct ib_umem_odp *umem_odp,
- unsigned long start, unsigned long end);
struct mutex per_mm_list_lock;
struct list_head per_mm_list;
u64 iova);
int (*unmap_fmr)(struct list_head *fmr_list);
int (*dealloc_fmr)(struct ib_fmr *fmr);
+ void (*invalidate_range)(struct ib_umem_odp *umem_odp,
+ unsigned long start, unsigned long end);
int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
const union ib_gid *gid,
const struct sockaddr *addr,
void *client_data);
- struct list_head list;
+
+ refcount_t uses;
+ struct completion uses_zero;
u32 client_id;
/* kverbs are not required by the client */
NULL);
}
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+ int nr_cqe, enum ib_poll_context poll_ctx,
+ const char *caller);
+
+/**
+ * ib_alloc_cq_any: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @poll_ctx: Context used for polling the CQ
+ */
+static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
+ void *private, int nr_cqe,
+ enum ib_poll_context poll_ctx)
+{
+ return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx,
+ KBUILD_MODNAME);
+}
+
/**
* ib_free_cq_user - Free kernel/user CQ
* @cq: The CQ to free