Merge branches 'cma', 'ehca', 'ipath', 'iser', 'mlx4' and 'nes' into for-next
authorRoland Dreier <rolandd@cisco.com>
Thu, 25 Dec 2008 04:35:42 +0000 (20:35 -0800)
committerRoland Dreier <rolandd@cisco.com>
Thu, 25 Dec 2008 04:35:42 +0000 (20:35 -0800)
39 files changed:
drivers/infiniband/core/addr.c
drivers/infiniband/core/cma.c
drivers/infiniband/hw/ehca/ehca_classes.h
drivers/infiniband/hw/ehca/ehca_eq.c
drivers/infiniband/hw/ehca/ehca_main.c
drivers/infiniband/hw/ehca/ehca_qp.c
drivers/infiniband/hw/ehca/ehca_reqs.c
drivers/infiniband/hw/ipath/ipath_driver.c
drivers/infiniband/hw/ipath/ipath_file_ops.c
drivers/infiniband/hw/ipath/ipath_fs.c
drivers/infiniband/hw/ipath/ipath_iba6120.c
drivers/infiniband/hw/ipath/ipath_iba7220.c
drivers/infiniband/hw/ipath/ipath_init_chip.c
drivers/infiniband/hw/ipath/ipath_kernel.h
drivers/infiniband/hw/ipath/ipath_keys.c
drivers/infiniband/hw/ipath/ipath_mad.c
drivers/infiniband/hw/ipath/ipath_qp.c
drivers/infiniband/hw/ipath/ipath_rc.c
drivers/infiniband/hw/ipath/ipath_sdma.c
drivers/infiniband/hw/ipath/ipath_stats.c
drivers/infiniband/hw/ipath/ipath_ud.c
drivers/infiniband/hw/ipath/ipath_verbs.c
drivers/infiniband/hw/ipath/ipath_verbs.h
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/nes/nes.h
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/nes/nes_cm.h
drivers/infiniband/hw/nes/nes_hw.c
drivers/infiniband/hw/nes/nes_utils.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/net/mlx4/cq.c
drivers/net/mlx4/en_cq.c
drivers/net/mlx4/en_main.c
drivers/net/mlx4/eq.c
drivers/net/mlx4/main.c
drivers/net/mlx4/mlx4.h
drivers/net/mlx4/profile.c
include/linux/mlx4/device.h

index 09a2bec7fd3207b65abac3f93339b7cd46a66c8e..d98b05b28262776d954818b5989a49f5207a7f85 100644 (file)
@@ -41,6 +41,8 @@
 #include <net/neighbour.h>
 #include <net/route.h>
 #include <net/netevent.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
 #include <rdma/ib_addr.h>
 
 MODULE_AUTHOR("Sean Hefty");
@@ -49,8 +51,8 @@ MODULE_LICENSE("Dual BSD/GPL");
 
 struct addr_req {
        struct list_head list;
-       struct sockaddr src_addr;
-       struct sockaddr dst_addr;
+       struct sockaddr_storage src_addr;
+       struct sockaddr_storage dst_addr;
        struct rdma_dev_addr *addr;
        struct rdma_addr_client *client;
        void *context;
@@ -113,15 +115,32 @@ EXPORT_SYMBOL(rdma_copy_addr);
 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 {
        struct net_device *dev;
-       __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
-       int ret;
+       int ret = -EADDRNOTAVAIL;
 
-       dev = ip_dev_find(&init_net, ip);
-       if (!dev)
-               return -EADDRNOTAVAIL;
+       switch (addr->sa_family) {
+       case AF_INET:
+               dev = ip_dev_find(&init_net,
+                       ((struct sockaddr_in *) addr)->sin_addr.s_addr);
+
+               if (!dev)
+                       return ret;
 
-       ret = rdma_copy_addr(dev_addr, dev, NULL);
-       dev_put(dev);
+               ret = rdma_copy_addr(dev_addr, dev, NULL);
+               dev_put(dev);
+               break;
+       case AF_INET6:
+               for_each_netdev(&init_net, dev) {
+                       if (ipv6_chk_addr(&init_net,
+                                         &((struct sockaddr_in6 *) addr)->sin6_addr,
+                                         dev, 1)) {
+                               ret = rdma_copy_addr(dev_addr, dev, NULL);
+                               break;
+                       }
+               }
+               break;
+       default:
+               break;
+       }
        return ret;
 }
 EXPORT_SYMBOL(rdma_translate_ip);
@@ -156,22 +175,37 @@ static void queue_req(struct addr_req *req)
        mutex_unlock(&lock);
 }
 
-static void addr_send_arp(struct sockaddr_in *dst_in)
+static void addr_send_arp(struct sockaddr *dst_in)
 {
        struct rtable *rt;
        struct flowi fl;
-       __be32 dst_ip = dst_in->sin_addr.s_addr;
+       struct dst_entry *dst;
 
        memset(&fl, 0, sizeof fl);
-       fl.nl_u.ip4_u.daddr = dst_ip;
-       if (ip_route_output_key(&init_net, &rt, &fl))
-               return;
+       if (dst_in->sa_family == AF_INET)  {
+               fl.nl_u.ip4_u.daddr =
+                       ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
 
-       neigh_event_send(rt->u.dst.neighbour, NULL);
-       ip_rt_put(rt);
+               if (ip_route_output_key(&init_net, &rt, &fl))
+                       return;
+
+               neigh_event_send(rt->u.dst.neighbour, NULL);
+               ip_rt_put(rt);
+
+       } else {
+               fl.nl_u.ip6_u.daddr =
+                       ((struct sockaddr_in6 *) dst_in)->sin6_addr;
+
+               dst = ip6_route_output(&init_net, NULL, &fl);
+               if (!dst)
+                       return;
+
+               neigh_event_send(dst->neighbour, NULL);
+               dst_release(dst);
+       }
 }
 
-static int addr_resolve_remote(struct sockaddr_in *src_in,
+static int addr4_resolve_remote(struct sockaddr_in *src_in,
                               struct sockaddr_in *dst_in,
                               struct rdma_dev_addr *addr)
 {
@@ -220,10 +254,51 @@ out:
        return ret;
 }
 
+static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
+                              struct sockaddr_in6 *dst_in,
+                              struct rdma_dev_addr *addr)
+{
+       struct flowi fl;
+       struct neighbour *neigh;
+       struct dst_entry *dst;
+       int ret = -ENODATA;
+
+       memset(&fl, 0, sizeof fl);
+       fl.nl_u.ip6_u.daddr = dst_in->sin6_addr;
+       fl.nl_u.ip6_u.saddr = src_in->sin6_addr;
+
+       dst = ip6_route_output(&init_net, NULL, &fl);
+       if (!dst)
+               return ret;
+
+       if (dst->dev->flags & IFF_NOARP) {
+               ret = rdma_copy_addr(addr, dst->dev, NULL);
+       } else {
+               neigh = dst->neighbour;
+               if (neigh && (neigh->nud_state & NUD_VALID))
+                       ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
+       }
+
+       dst_release(dst);
+       return ret;
+}
+
+static int addr_resolve_remote(struct sockaddr *src_in,
+                               struct sockaddr *dst_in,
+                               struct rdma_dev_addr *addr)
+{
+       if (src_in->sa_family == AF_INET) {
+               return addr4_resolve_remote((struct sockaddr_in *) src_in,
+                       (struct sockaddr_in *) dst_in, addr);
+       } else
+               return addr6_resolve_remote((struct sockaddr_in6 *) src_in,
+                       (struct sockaddr_in6 *) dst_in, addr);
+}
+
 static void process_req(struct work_struct *work)
 {
        struct addr_req *req, *temp_req;
-       struct sockaddr_in *src_in, *dst_in;
+       struct sockaddr *src_in, *dst_in;
        struct list_head done_list;
 
        INIT_LIST_HEAD(&done_list);
@@ -231,8 +306,8 @@ static void process_req(struct work_struct *work)
        mutex_lock(&lock);
        list_for_each_entry_safe(req, temp_req, &req_list, list) {
                if (req->status == -ENODATA) {
-                       src_in = (struct sockaddr_in *) &req->src_addr;
-                       dst_in = (struct sockaddr_in *) &req->dst_addr;
+                       src_in = (struct sockaddr *) &req->src_addr;
+                       dst_in = (struct sockaddr *) &req->dst_addr;
                        req->status = addr_resolve_remote(src_in, dst_in,
                                                          req->addr);
                        if (req->status && time_after_eq(jiffies, req->timeout))
@@ -251,41 +326,72 @@ static void process_req(struct work_struct *work)
 
        list_for_each_entry_safe(req, temp_req, &done_list, list) {
                list_del(&req->list);
-               req->callback(req->status, &req->src_addr, req->addr,
-                             req->context);
+               req->callback(req->status, (struct sockaddr *) &req->src_addr,
+                       req->addr, req->context);
                put_client(req->client);
                kfree(req);
        }
 }
 
-static int addr_resolve_local(struct sockaddr_in *src_in,
-                             struct sockaddr_in *dst_in,
+static int addr_resolve_local(struct sockaddr *src_in,
+                             struct sockaddr *dst_in,
                              struct rdma_dev_addr *addr)
 {
        struct net_device *dev;
-       __be32 src_ip = src_in->sin_addr.s_addr;
-       __be32 dst_ip = dst_in->sin_addr.s_addr;
        int ret;
 
-       dev = ip_dev_find(&init_net, dst_ip);
-       if (!dev)
-               return -EADDRNOTAVAIL;
-
-       if (ipv4_is_zeronet(src_ip)) {
-               src_in->sin_family = dst_in->sin_family;
-               src_in->sin_addr.s_addr = dst_ip;
-               ret = rdma_copy_addr(addr, dev, dev->dev_addr);
-       } else if (ipv4_is_loopback(src_ip)) {
-               ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
-               if (!ret)
-                       memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
+       if (dst_in->sa_family == AF_INET) {
+               __be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr;
+               __be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
+
+               dev = ip_dev_find(&init_net, dst_ip);
+               if (!dev)
+                       return -EADDRNOTAVAIL;
+
+               if (ipv4_is_zeronet(src_ip)) {
+                       src_in->sa_family = dst_in->sa_family;
+                       ((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip;
+                       ret = rdma_copy_addr(addr, dev, dev->dev_addr);
+               } else if (ipv4_is_loopback(src_ip)) {
+                       ret = rdma_translate_ip(dst_in, addr);
+                       if (!ret)
+                               memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
+               } else {
+                       ret = rdma_translate_ip(src_in, addr);
+                       if (!ret)
+                               memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
+               }
+               dev_put(dev);
        } else {
-               ret = rdma_translate_ip((struct sockaddr *)src_in, addr);
-               if (!ret)
-                       memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
+               struct in6_addr *a;
+
+               for_each_netdev(&init_net, dev)
+                       if (ipv6_chk_addr(&init_net,
+                                         &((struct sockaddr_in6 *) addr)->sin6_addr,
+                                         dev, 1))
+                               break;
+
+               if (!dev)
+                       return -EADDRNOTAVAIL;
+
+               a = &((struct sockaddr_in6 *) src_in)->sin6_addr;
+
+               if (ipv6_addr_any(a)) {
+                       src_in->sa_family = dst_in->sa_family;
+                       ((struct sockaddr_in6 *) src_in)->sin6_addr =
+                               ((struct sockaddr_in6 *) dst_in)->sin6_addr;
+                       ret = rdma_copy_addr(addr, dev, dev->dev_addr);
+               } else if (ipv6_addr_loopback(a)) {
+                       ret = rdma_translate_ip(dst_in, addr);
+                       if (!ret)
+                               memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
+               } else  {
+                       ret = rdma_translate_ip(src_in, addr);
+                       if (!ret)
+                               memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
+               }
        }
 
-       dev_put(dev);
        return ret;
 }
 
@@ -296,7 +402,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
                                     struct rdma_dev_addr *addr, void *context),
                    void *context)
 {
-       struct sockaddr_in *src_in, *dst_in;
+       struct sockaddr *src_in, *dst_in;
        struct addr_req *req;
        int ret = 0;
 
@@ -313,8 +419,8 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
        req->client = client;
        atomic_inc(&client->refcount);
 
-       src_in = (struct sockaddr_in *) &req->src_addr;
-       dst_in = (struct sockaddr_in *) &req->dst_addr;
+       src_in = (struct sockaddr *) &req->src_addr;
+       dst_in = (struct sockaddr *) &req->dst_addr;
 
        req->status = addr_resolve_local(src_in, dst_in, addr);
        if (req->status == -EADDRNOTAVAIL)
index d951896ff7fc9f6023db24c6280d22870f98f946..2a2e50871b4063beb3c3ab8c4ae73ead5797e060 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/inetdevice.h>
 
 #include <net/tcp.h>
+#include <net/ipv6.h>
 
 #include <rdma/rdma_cm.h>
 #include <rdma/rdma_cm_ib.h>
@@ -636,7 +637,12 @@ static inline int cma_zero_addr(struct sockaddr *addr)
 
 static inline int cma_loopback_addr(struct sockaddr *addr)
 {
-       return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
+       if (addr->sa_family == AF_INET)
+               return ipv4_is_loopback(
+                       ((struct sockaddr_in *) addr)->sin_addr.s_addr);
+       else
+               return ipv6_addr_loopback(
+                       &((struct sockaddr_in6 *) addr)->sin6_addr);
 }
 
 static inline int cma_any_addr(struct sockaddr *addr)
@@ -1467,10 +1473,10 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)
 
 static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
 {
-       struct sockaddr_in addr_in;
+       struct sockaddr_storage addr_in;
 
        memset(&addr_in, 0, sizeof addr_in);
-       addr_in.sin_family = af;
+       addr_in.ss_family = af;
        return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
 }
 
@@ -2073,7 +2079,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
        struct rdma_id_private *id_priv;
        int ret;
 
-       if (addr->sa_family != AF_INET)
+       if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
                return -EAFNOSUPPORT;
 
        id_priv = container_of(id, struct rdma_id_private, id);
@@ -2113,31 +2119,59 @@ EXPORT_SYMBOL(rdma_bind_addr);
 static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
                          struct rdma_route *route)
 {
-       struct sockaddr_in *src4, *dst4;
        struct cma_hdr *cma_hdr;
        struct sdp_hh *sdp_hdr;
 
-       src4 = (struct sockaddr_in *) &route->addr.src_addr;
-       dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
-
-       switch (ps) {
-       case RDMA_PS_SDP:
-               sdp_hdr = hdr;
-               if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
-                       return -EINVAL;
-               sdp_set_ip_ver(sdp_hdr, 4);
-               sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
-               sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
-               sdp_hdr->port = src4->sin_port;
-               break;
-       default:
-               cma_hdr = hdr;
-               cma_hdr->cma_version = CMA_VERSION;
-               cma_set_ip_ver(cma_hdr, 4);
-               cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
-               cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
-               cma_hdr->port = src4->sin_port;
-               break;
+       if (route->addr.src_addr.ss_family == AF_INET) {
+               struct sockaddr_in *src4, *dst4;
+
+               src4 = (struct sockaddr_in *) &route->addr.src_addr;
+               dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
+
+               switch (ps) {
+               case RDMA_PS_SDP:
+                       sdp_hdr = hdr;
+                       if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+                               return -EINVAL;
+                       sdp_set_ip_ver(sdp_hdr, 4);
+                       sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+                       sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+                       sdp_hdr->port = src4->sin_port;
+                       break;
+               default:
+                       cma_hdr = hdr;
+                       cma_hdr->cma_version = CMA_VERSION;
+                       cma_set_ip_ver(cma_hdr, 4);
+                       cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+                       cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+                       cma_hdr->port = src4->sin_port;
+                       break;
+               }
+       } else {
+               struct sockaddr_in6 *src6, *dst6;
+
+               src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
+               dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
+
+               switch (ps) {
+               case RDMA_PS_SDP:
+                       sdp_hdr = hdr;
+                       if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+                               return -EINVAL;
+                       sdp_set_ip_ver(sdp_hdr, 6);
+                       sdp_hdr->src_addr.ip6 = src6->sin6_addr;
+                       sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
+                       sdp_hdr->port = src6->sin6_port;
+                       break;
+               default:
+                       cma_hdr = hdr;
+                       cma_hdr->cma_version = CMA_VERSION;
+                       cma_set_ip_ver(cma_hdr, 6);
+                       cma_hdr->src_addr.ip6 = src6->sin6_addr;
+                       cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
+                       cma_hdr->port = src6->sin6_port;
+                       break;
+               }
        }
        return 0;
 }
index 7fc35cf0cddf35285cd91586e6d1aa02145a6a00..c825142a2fb752c7514e3af7d95d13c6d2d60f8b 100644 (file)
@@ -175,6 +175,13 @@ struct ehca_queue_map {
        unsigned int next_wqe_idx;   /* Idx to first wqe to be flushed */
 };
 
+/* function to calculate the next index for the qmap */
+static inline unsigned int next_index(unsigned int cur_index, unsigned int limit)
+{
+       unsigned int temp = cur_index + 1;
+       return (temp == limit) ? 0 : temp;
+}
+
 struct ehca_qp {
        union {
                struct ib_qp ib_qp;
index 49660dfa186785f8d73b16b84e0ddcc141967cac..523e733c630e560acf6585ed1f35e772378e9649 100644 (file)
@@ -113,7 +113,7 @@ int ehca_create_eq(struct ehca_shca *shca,
                        if (h_ret != H_SUCCESS || vpage)
                                goto create_eq_exit2;
                } else {
-                       if (h_ret != H_PAGE_REGISTERED || !vpage)
+                       if (h_ret != H_PAGE_REGISTERED)
                                goto create_eq_exit2;
                }
        }
index bec7e0249358f6cb876729d9070c51d94f6e29e7..3b77b674cbf61a4caba20fc0fb42daa49f55eda9 100644 (file)
@@ -717,6 +717,7 @@ static int __devinit ehca_probe(struct of_device *dev,
        const u64 *handle;
        struct ib_pd *ibpd;
        int ret, i, eq_size;
+       unsigned long flags;
 
        handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
        if (!handle) {
@@ -830,9 +831,9 @@ static int __devinit ehca_probe(struct of_device *dev,
                ehca_err(&shca->ib_device,
                         "Cannot create device attributes  ret=%d", ret);
 
-       spin_lock(&shca_list_lock);
+       spin_lock_irqsave(&shca_list_lock, flags);
        list_add(&shca->shca_list, &shca_list);
-       spin_unlock(&shca_list_lock);
+       spin_unlock_irqrestore(&shca_list_lock, flags);
 
        return 0;
 
@@ -878,6 +879,7 @@ probe1:
 static int __devexit ehca_remove(struct of_device *dev)
 {
        struct ehca_shca *shca = dev->dev.driver_data;
+       unsigned long flags;
        int ret;
 
        sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp);
@@ -915,9 +917,9 @@ static int __devexit ehca_remove(struct of_device *dev)
 
        ib_dealloc_device(&shca->ib_device);
 
-       spin_lock(&shca_list_lock);
+       spin_lock_irqsave(&shca_list_lock, flags);
        list_del(&shca->shca_list);
-       spin_unlock(&shca_list_lock);
+       spin_unlock_irqrestore(&shca_list_lock, flags);
 
        return ret;
 }
@@ -975,6 +977,7 @@ static int ehca_mem_notifier(struct notifier_block *nb,
                             unsigned long action, void *data)
 {
        static unsigned long ehca_dmem_warn_time;
+       unsigned long flags;
 
        switch (action) {
        case MEM_CANCEL_OFFLINE:
@@ -985,12 +988,12 @@ static int ehca_mem_notifier(struct notifier_block *nb,
        case MEM_GOING_ONLINE:
        case MEM_GOING_OFFLINE:
                /* only ok if no hca is attached to the lpar */
-               spin_lock(&shca_list_lock);
+               spin_lock_irqsave(&shca_list_lock, flags);
                if (list_empty(&shca_list)) {
-                       spin_unlock(&shca_list_lock);
+                       spin_unlock_irqrestore(&shca_list_lock, flags);
                        return NOTIFY_OK;
                } else {
-                       spin_unlock(&shca_list_lock);
+                       spin_unlock_irqrestore(&shca_list_lock, flags);
                        if (printk_timed_ratelimit(&ehca_dmem_warn_time,
                                                   30 * 1000))
                                ehca_gen_err("DMEM operations are not allowed"
index cadbf0cdd910e996ddca5029e6a66dcbe27382a9..f161cf173dbe3585e6cbf87275e71ef435ad018e 100644 (file)
@@ -1138,14 +1138,14 @@ static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
                return -EFAULT;
        }
 
-       tail_idx = (qmap->tail + 1) % qmap->entries;
+       tail_idx = next_index(qmap->tail, qmap->entries);
        wqe_idx = q_ofs / ipz_queue->qe_size;
 
        /* check all processed wqes, whether a cqe is requested or not */
        while (tail_idx != wqe_idx) {
                if (qmap->map[tail_idx].cqe_req)
                        qmap->left_to_poll++;
-               tail_idx = (tail_idx + 1) % qmap->entries;
+               tail_idx = next_index(tail_idx, qmap->entries);
        }
        /* save index in queue, where we have to start flushing */
        qmap->next_wqe_idx = wqe_idx;
@@ -1195,14 +1195,14 @@ static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
        } else {
                spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
                my_qp->sq_map.left_to_poll = 0;
-               my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) %
-                                               my_qp->sq_map.entries;
+               my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
+                                                       my_qp->sq_map.entries);
                spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
 
                spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
                my_qp->rq_map.left_to_poll = 0;
-               my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) %
-                                               my_qp->rq_map.entries;
+               my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
+                                                       my_qp->rq_map.entries);
                spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
        }
 
index 00a648f4316c66b0f4585cfbf63db7792fee2c3d..c7112686782faeda1fb38e4a2a882598c0744856 100644 (file)
@@ -726,13 +726,13 @@ repoll:
                 * set left_to_poll to 0 because in error state, we will not
                 * get any additional CQEs
                 */
-               my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) %
-                                               my_qp->sq_map.entries;
+               my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
+                                                       my_qp->sq_map.entries);
                my_qp->sq_map.left_to_poll = 0;
                ehca_add_to_err_list(my_qp, 1);
 
-               my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) %
-                                               my_qp->rq_map.entries;
+               my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
+                                                       my_qp->rq_map.entries);
                my_qp->rq_map.left_to_poll = 0;
                if (HAS_RQ(my_qp))
                        ehca_add_to_err_list(my_qp, 0);
@@ -860,9 +860,8 @@ static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
 
                /* mark as reported and advance next_wqe pointer */
                qmap_entry->reported = 1;
-               qmap->next_wqe_idx++;
-               if (qmap->next_wqe_idx == qmap->entries)
-                       qmap->next_wqe_idx = 0;
+               qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,
+                                               qmap->entries);
                qmap_entry = &qmap->map[qmap->next_wqe_idx];
 
                wc++; nr++;
index ad0aab60b051225deb5020623d02c7d09d56ef37..69c0ce321b4e7cecf4b475bd230cc5bda2614245 100644 (file)
@@ -661,6 +661,8 @@ bail:
 static void __devexit cleanup_device(struct ipath_devdata *dd)
 {
        int port;
+       struct ipath_portdata **tmp;
+       unsigned long flags;
 
        if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
                /* can't do anything more with chip; needs re-init */
@@ -742,20 +744,21 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
 
        /*
         * free any resources still in use (usually just kernel ports)
-        * at unload; we do for portcnt, not cfgports, because cfgports
-        * could have changed while we were loaded.
+        * at unload; we do for portcnt, because that's what we allocate.
+        * We acquire lock to be really paranoid that ipath_pd isn't being
+        * accessed from some interrupt-related code (that should not happen,
+        * but best to be sure).
         */
+       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
+       tmp = dd->ipath_pd;
+       dd->ipath_pd = NULL;
+       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
        for (port = 0; port < dd->ipath_portcnt; port++) {
-               struct ipath_portdata *pd = dd->ipath_pd[port];
-               dd->ipath_pd[port] = NULL;
+               struct ipath_portdata *pd = tmp[port];
+               tmp[port] = NULL; /* debugging paranoia */
                ipath_free_pddata(dd, pd);
        }
-       kfree(dd->ipath_pd);
-       /*
-        * debuggability, in case some cleanup path tries to use it
-        * after this
-        */
-       dd->ipath_pd = NULL;
+       kfree(tmp);
 }
 
 static void __devexit ipath_remove_one(struct pci_dev *pdev)
@@ -2586,6 +2589,7 @@ int ipath_reset_device(int unit)
 {
        int ret, i;
        struct ipath_devdata *dd = ipath_lookup(unit);
+       unsigned long flags;
 
        if (!dd) {
                ret = -ENODEV;
@@ -2611,18 +2615,21 @@ int ipath_reset_device(int unit)
                goto bail;
        }
 
+       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
        if (dd->ipath_pd)
                for (i = 1; i < dd->ipath_cfgports; i++) {
-                       if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) {
-                               ipath_dbg("unit %u port %d is in use "
-                                         "(PID %u cmd %s), can't reset\n",
-                                         unit, i,
-                                         pid_nr(dd->ipath_pd[i]->port_pid),
-                                         dd->ipath_pd[i]->port_comm);
-                               ret = -EBUSY;
-                               goto bail;
-                       }
+                       if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
+                               continue;
+                       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
+                       ipath_dbg("unit %u port %d is in use "
+                                 "(PID %u cmd %s), can't reset\n",
+                                 unit, i,
+                                 pid_nr(dd->ipath_pd[i]->port_pid),
+                                 dd->ipath_pd[i]->port_comm);
+                       ret = -EBUSY;
+                       goto bail;
                }
+       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
 
        if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
                teardown_sdma(dd);
@@ -2656,9 +2663,12 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
 {
        int i, sub, any = 0;
        struct pid *pid;
+       unsigned long flags;
 
        if (!dd->ipath_pd)
                return 0;
+
+       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
        for (i = 1; i < dd->ipath_cfgports; i++) {
                if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
                        continue;
@@ -2682,6 +2692,7 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
                        any++;
                }
        }
+       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
        return any;
 }
 
index 1af1f3a907c6eb25a7930a6fd2e22ca36ad7135b..239d4e8068ac14e5ce39c52218726f1ed1d732da 100644 (file)
@@ -223,8 +223,13 @@ static int ipath_get_base_info(struct file *fp,
                        (unsigned long long) kinfo->spi_subport_rcvhdr_base);
        }
 
-       kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
-               dd->ipath_palign;
+       /*
+        * All user buffers are 2KB buffers.  If we ever support
+        * giving 4KB buffers to user processes, this will need some
+        * work.
+        */
+       kinfo->spi_pioindex = (kinfo->spi_piobufbase -
+               (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
        kinfo->spi_pioalign = dd->ipath_palign;
 
        kinfo->spi_qpair = IPATH_KD_QP;
@@ -2041,7 +2046,9 @@ static int ipath_close(struct inode *in, struct file *fp)
        struct ipath_filedata *fd;
        struct ipath_portdata *pd;
        struct ipath_devdata *dd;
+       unsigned long flags;
        unsigned port;
+       struct pid *pid;
 
        ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
                   (long)in->i_rdev, fp->private_data);
@@ -2074,14 +2081,13 @@ static int ipath_close(struct inode *in, struct file *fp)
                mutex_unlock(&ipath_mutex);
                goto bail;
        }
+       /* early; no interrupt users after this */
+       spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
        port = pd->port_port;
-
-       if (pd->port_hdrqfull) {
-               ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
-                          "during run\n", pd->port_comm, pid_nr(pd->port_pid),
-                          pd->port_hdrqfull);
-               pd->port_hdrqfull = 0;
-       }
+       dd->ipath_pd[port] = NULL;
+       pid = pd->port_pid;
+       pd->port_pid = NULL;
+       spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
 
        if (pd->port_rcvwait_to || pd->port_piowait_to
            || pd->port_rcvnowait || pd->port_pionowait) {
@@ -2138,13 +2144,11 @@ static int ipath_close(struct inode *in, struct file *fp)
                        unlock_expected_tids(pd);
                ipath_stats.sps_ports--;
                ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
-                          pd->port_comm, pid_nr(pd->port_pid),
+                          pd->port_comm, pid_nr(pid),
                           dd->ipath_unit, port);
        }
 
-       put_pid(pd->port_pid);
-       pd->port_pid = NULL;
-       dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
+       put_pid(pid);
        mutex_unlock(&ipath_mutex);
        ipath_free_pddata(dd, pd); /* after releasing the mutex */
 
index 8bb5170b4e416b1b660f333561ffa7c8bb4c7360..53912c327bfea25dc4bab902d788042973441140 100644 (file)
@@ -86,7 +86,7 @@ static int create_file(const char *name, mode_t mode,
        *dentry = NULL;
        mutex_lock(&parent->d_inode->i_mutex);
        *dentry = lookup_one_len(name, parent, strlen(name));
-       if (!IS_ERR(dentry))
+       if (!IS_ERR(*dentry))
                error = ipathfs_mknod(parent->d_inode, *dentry,
                                      mode, fops, data);
        else
index 421cc2af891f06984095f5797863705c5b795121..fbf8c5379ea844de6ac31b0513c55156a6d8455b 100644 (file)
@@ -721,6 +721,12 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
                                 INFINIPATH_HWE_SERDESPLLFAILED);
        }
 
+       dd->ibdeltainprog = 1;
+       dd->ibsymsnap =
+            ipath_read_creg32(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
+       dd->iblnkerrsnap =
+            ipath_read_creg32(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+
        val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
        config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
 
@@ -810,6 +816,36 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
 {
        u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
 
+       if (dd->ibsymdelta || dd->iblnkerrdelta ||
+           dd->ibdeltainprog) {
+               u64 diagc;
+               /* enable counter writes */
+               diagc = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwdiagctrl);
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl,
+                                diagc | INFINIPATH_DC_COUNTERWREN);
+
+               if (dd->ibsymdelta || dd->ibdeltainprog) {
+                       val = ipath_read_creg32(dd,
+                                       dd->ipath_cregs->cr_ibsymbolerrcnt);
+                       if (dd->ibdeltainprog)
+                               val -= val - dd->ibsymsnap;
+                       val -= dd->ibsymdelta;
+                       ipath_write_creg(dd,
+                                 dd->ipath_cregs->cr_ibsymbolerrcnt, val);
+               }
+               if (dd->iblnkerrdelta || dd->ibdeltainprog) {
+                       val = ipath_read_creg32(dd,
+                                       dd->ipath_cregs->cr_iblinkerrrecovcnt);
+                       if (dd->ibdeltainprog)
+                               val -= val - dd->iblnkerrsnap;
+                       val -= dd->iblnkerrdelta;
+                       ipath_write_creg(dd,
+                                  dd->ipath_cregs->cr_iblinkerrrecovcnt, val);
+            }
+
+            /* and disable counter writes */
+            ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, diagc);
+       }
        val |= INFINIPATH_SERDC0_TXIDLE;
        ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
                  (unsigned long long) val);
@@ -1749,6 +1785,31 @@ static void ipath_pe_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
 
 static int ipath_pe_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
 {
+       if (ibup) {
+               if (dd->ibdeltainprog) {
+                       dd->ibdeltainprog = 0;
+                       dd->ibsymdelta +=
+                               ipath_read_creg32(dd,
+                                 dd->ipath_cregs->cr_ibsymbolerrcnt) -
+                               dd->ibsymsnap;
+                       dd->iblnkerrdelta +=
+                               ipath_read_creg32(dd,
+                                 dd->ipath_cregs->cr_iblinkerrrecovcnt) -
+                               dd->iblnkerrsnap;
+               }
+       } else {
+               dd->ipath_lli_counter = 0;
+               if (!dd->ibdeltainprog) {
+                       dd->ibdeltainprog = 1;
+                       dd->ibsymsnap =
+                               ipath_read_creg32(dd,
+                                 dd->ipath_cregs->cr_ibsymbolerrcnt);
+                       dd->iblnkerrsnap =
+                               ipath_read_creg32(dd,
+                                 dd->ipath_cregs->cr_iblinkerrrecovcnt);
+               }
+       }
+
        ipath_setup_pe_setextled(dd, ipath_ib_linkstate(dd, ibcs),
                ipath_ib_linktrstate(dd, ibcs));
        return 0;
index 9839e20119bcfc64f483f68cf2b18d24e1786f77..b2a9d4c155d14fbe44006156ed76a8ea21d54592 100644 (file)
@@ -951,6 +951,12 @@ static int ipath_7220_bringup_serdes(struct ipath_devdata *dd)
                                 INFINIPATH_HWE_SERDESPLLFAILED);
        }
 
+       dd->ibdeltainprog = 1;
+       dd->ibsymsnap =
+            ipath_read_creg32(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
+       dd->iblnkerrsnap =
+            ipath_read_creg32(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+
        if (!dd->ipath_ibcddrctrl) {
                /* not on re-init after reset */
                dd->ipath_ibcddrctrl =
@@ -1084,6 +1090,37 @@ static void ipath_7220_config_jint(struct ipath_devdata *dd,
 static void ipath_7220_quiet_serdes(struct ipath_devdata *dd)
 {
        u64 val;
+       if (dd->ibsymdelta || dd->iblnkerrdelta ||
+           dd->ibdeltainprog) {
+               u64 diagc;
+               /* enable counter writes */
+               diagc = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwdiagctrl);
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl,
+                                diagc | INFINIPATH_DC_COUNTERWREN);
+
+               if (dd->ibsymdelta || dd->ibdeltainprog) {
+                       val = ipath_read_creg32(dd,
+                                       dd->ipath_cregs->cr_ibsymbolerrcnt);
+                       if (dd->ibdeltainprog)
+                               val -= val - dd->ibsymsnap;
+                       val -= dd->ibsymdelta;
+                       ipath_write_creg(dd,
+                                 dd->ipath_cregs->cr_ibsymbolerrcnt, val);
+               }
+               if (dd->iblnkerrdelta || dd->ibdeltainprog) {
+                       val = ipath_read_creg32(dd,
+                                       dd->ipath_cregs->cr_iblinkerrrecovcnt);
+                       if (dd->ibdeltainprog)
+                               val -= val - dd->iblnkerrsnap;
+                       val -= dd->iblnkerrdelta;
+                       ipath_write_creg(dd,
+                                  dd->ipath_cregs->cr_iblinkerrrecovcnt, val);
+            }
+
+            /* and disable counter writes */
+            ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, diagc);
+       }
+
        dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG;
        wake_up(&dd->ipath_autoneg_wait);
        cancel_delayed_work(&dd->ipath_autoneg_work);
@@ -2325,7 +2362,7 @@ static void try_auto_neg(struct ipath_devdata *dd)
 
 static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
 {
-       int ret = 0;
+       int ret = 0, symadj = 0;
        u32 ltstate = ipath_ib_linkstate(dd, ibcs);
 
        dd->ipath_link_width_active =
@@ -2368,6 +2405,13 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
                        ipath_dbg("DDR negotiation try, %u/%u\n",
                                dd->ipath_autoneg_tries,
                                IPATH_AUTONEG_TRIES);
+                       if (!dd->ibdeltainprog) {
+                               dd->ibdeltainprog = 1;
+                               dd->ibsymsnap = ipath_read_creg32(dd,
+                                       dd->ipath_cregs->cr_ibsymbolerrcnt);
+                               dd->iblnkerrsnap = ipath_read_creg32(dd,
+                                       dd->ipath_cregs->cr_iblinkerrrecovcnt);
+                       }
                        try_auto_neg(dd);
                        ret = 1; /* no other IB status change processing */
                } else if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)
@@ -2388,6 +2432,7 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
                                set_speed_fast(dd,
                                        dd->ipath_link_speed_enabled);
                                wake_up(&dd->ipath_autoneg_wait);
+                               symadj = 1;
                        } else if (dd->ipath_flags & IPATH_IB_AUTONEG_FAILED) {
                                /*
                                 * clear autoneg failure flag, and do setup
@@ -2403,22 +2448,28 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
                                        IBA7220_IBC_IBTA_1_2_MASK;
                                ipath_write_kreg(dd,
                                        IPATH_KREG_OFFSET(IBNCModeCtrl), 0);
+                               symadj = 1;
                        }
                }
                /*
-                * if we are in 1X, and are in autoneg width, it
-                * could be due to an xgxs problem, so if we haven't
+                * if we are in 1X on rev1 only, and are in autoneg width,
+                * it could be due to an xgxs problem, so if we haven't
                 * already tried, try twice to get to 4X; if we
                 * tried, and couldn't, report it, since it will
                 * probably not be what is desired.
                 */
-               if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X |
+               if (dd->ipath_minrev == 1 &&
+                   (dd->ipath_link_width_enabled & (IB_WIDTH_1X |
                        IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X)
                        && dd->ipath_link_width_active == IB_WIDTH_1X
                        && dd->ipath_x1_fix_tries < 3) {
-                       if (++dd->ipath_x1_fix_tries == 3)
+                    if (++dd->ipath_x1_fix_tries == 3) {
                                dev_info(&dd->pcidev->dev,
                                        "IB link is in 1X mode\n");
+                               if (!(dd->ipath_flags &
+                                     IPATH_IB_AUTONEG_INPROG))
+                                       symadj = 1;
+                    }
                        else {
                                ipath_cdbg(VERBOSE, "IB 1X in "
                                        "auto-width, try %u to be "
@@ -2429,7 +2480,8 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
                                dd->ipath_f_xgxs_reset(dd);
                                ret = 1; /* skip other processing */
                        }
-               }
+               } else if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG))
+                       symadj = 1;
 
                if (!ret) {
                        dd->delay_mult = rate_to_delay
@@ -2440,6 +2492,25 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
                }
        }
 
+       if (symadj) {
+               if (dd->ibdeltainprog) {
+                       dd->ibdeltainprog = 0;
+                       dd->ibsymdelta += ipath_read_creg32(dd,
+                               dd->ipath_cregs->cr_ibsymbolerrcnt) -
+                               dd->ibsymsnap;
+                       dd->iblnkerrdelta += ipath_read_creg32(dd,
+                               dd->ipath_cregs->cr_iblinkerrrecovcnt) -
+                               dd->iblnkerrsnap;
+               }
+       } else if (!ibup && !dd->ibdeltainprog
+                  && !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)) {
+               dd->ibdeltainprog = 1;
+               dd->ibsymsnap = ipath_read_creg32(dd,
+                                    dd->ipath_cregs->cr_ibsymbolerrcnt);
+               dd->iblnkerrsnap = ipath_read_creg32(dd,
+                                    dd->ipath_cregs->cr_iblinkerrrecovcnt);
+       }
+
        if (!ret)
                ipath_setup_7220_setextled(dd, ipath_ib_linkstate(dd, ibcs),
                        ltstate);
index 3e5baa43fc822468b232819f62716a2aba1d8c1d..64aeefbd2a5d74c09ad5debc4f0cfe0177ae7263 100644 (file)
@@ -229,6 +229,7 @@ static int init_chip_first(struct ipath_devdata *dd)
        spin_lock_init(&dd->ipath_kernel_tid_lock);
        spin_lock_init(&dd->ipath_user_tid_lock);
        spin_lock_init(&dd->ipath_sendctrl_lock);
+       spin_lock_init(&dd->ipath_uctxt_lock);
        spin_lock_init(&dd->ipath_sdma_lock);
        spin_lock_init(&dd->ipath_gpio_lock);
        spin_lock_init(&dd->ipath_eep_st_lock);
index 0bd8bcb184a18cf38c44cc33ae980210711dd44c..6ba4861dd6ac683e4637ea86a14bac9101af7164 100644 (file)
@@ -355,6 +355,19 @@ struct ipath_devdata {
        /* errors masked because they occur too fast */
        ipath_err_t ipath_maskederrs;
        u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
+       /* these 5 fields are used to establish deltas for IB Symbol
+        * errors and linkrecovery errors. They can be reported on
+        * some chips during link negotiation prior to INIT, and with
+        * DDR when faking DDR negotiations with non-IBTA switches.
+        * The chip counters are adjusted at driver unload if there is
+        * a non-zero delta.
+        */
+       u64 ibdeltainprog;
+       u64 ibsymdelta;
+       u64 ibsymsnap;
+       u64 iblnkerrdelta;
+       u64 iblnkerrsnap;
+
        /* time in jiffies at which to re-enable maskederrs */
        unsigned long ipath_unmasktime;
        /* count of egrfull errors, combined for all ports */
@@ -464,6 +477,8 @@ struct ipath_devdata {
        spinlock_t ipath_kernel_tid_lock;
        spinlock_t ipath_user_tid_lock;
        spinlock_t ipath_sendctrl_lock;
+       /* around ipath_pd and (user ports) port_cnt use (intr vs free) */
+       spinlock_t ipath_uctxt_lock;
 
        /*
         * IPATH_STATUS_*,
index 8f32b17a5eed019d304a4b3c4283019274e69170..c0e933fec2187d37c1d40b59f78ad4feea90c367 100644 (file)
@@ -132,6 +132,7 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
         * (see ipath_get_dma_mr and ipath_dma.c).
         */
        if (sge->lkey == 0) {
+               /* always a kernel port, no locking needed */
                struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
 
                if (pd->user) {
@@ -211,6 +212,7 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
         * (see ipath_get_dma_mr and ipath_dma.c).
         */
        if (rkey == 0) {
+               /* always a kernel port, no locking needed */
                struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
 
                if (pd->user) {
index be4fc9ada8e7f1e872b740686a74404ae53525fc..17a123197477324bb78f6fc10b42f444b6cabbcb 100644 (file)
@@ -348,6 +348,7 @@ bail:
  */
 static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
 {
+       /* always a kernel port, no locking needed */
        struct ipath_portdata *pd = dd->ipath_pd[0];
 
        memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
@@ -730,6 +731,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
        int i;
        int changed = 0;
 
+       /* always a kernel port, no locking needed */
        pd = dd->ipath_pd[0];
 
        for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
index 4715911101e4fff8363c0e748929302a213d05bc..3a5a89b609c4e99bc761e28193b24bd01600250c 100644 (file)
@@ -745,6 +745,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
        struct ipath_swqe *swq = NULL;
        struct ipath_ibdev *dev;
        size_t sz;
+       size_t sg_list_sz;
        struct ib_qp *ret;
 
        if (init_attr->create_flags) {
@@ -789,19 +790,31 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
                        goto bail;
                }
                sz = sizeof(*qp);
+               sg_list_sz = 0;
                if (init_attr->srq) {
                        struct ipath_srq *srq = to_isrq(init_attr->srq);
 
-                       sz += sizeof(*qp->r_sg_list) *
-                               srq->rq.max_sge;
-               } else
-                       sz += sizeof(*qp->r_sg_list) *
-                               init_attr->cap.max_recv_sge;
-               qp = kmalloc(sz, GFP_KERNEL);
+                       if (srq->rq.max_sge > 1)
+                               sg_list_sz = sizeof(*qp->r_sg_list) *
+                                       (srq->rq.max_sge - 1);
+               } else if (init_attr->cap.max_recv_sge > 1)
+                       sg_list_sz = sizeof(*qp->r_sg_list) *
+                               (init_attr->cap.max_recv_sge - 1);
+               qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
                if (!qp) {
                        ret = ERR_PTR(-ENOMEM);
                        goto bail_swq;
                }
+               if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
+                   init_attr->qp_type == IB_QPT_SMI ||
+                   init_attr->qp_type == IB_QPT_GSI)) {
+                       qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
+                       if (!qp->r_ud_sg_list) {
+                               ret = ERR_PTR(-ENOMEM);
+                               goto bail_qp;
+                       }
+               } else
+                       qp->r_ud_sg_list = NULL;
                if (init_attr->srq) {
                        sz = 0;
                        qp->r_rq.size = 0;
@@ -818,7 +831,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
                                              qp->r_rq.size * sz);
                        if (!qp->r_rq.wq) {
                                ret = ERR_PTR(-ENOMEM);
-                               goto bail_qp;
+                               goto bail_sg_list;
                        }
                }
 
@@ -848,7 +861,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
                if (err) {
                        ret = ERR_PTR(err);
                        vfree(qp->r_rq.wq);
-                       goto bail_qp;
+                       goto bail_sg_list;
                }
                qp->ip = NULL;
                qp->s_tx = NULL;
@@ -925,6 +938,8 @@ bail_ip:
                vfree(qp->r_rq.wq);
        ipath_free_qp(&dev->qp_table, qp);
        free_qpn(&dev->qp_table, qp->ibqp.qp_num);
+bail_sg_list:
+       kfree(qp->r_ud_sg_list);
 bail_qp:
        kfree(qp);
 bail_swq:
@@ -989,6 +1004,7 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
                kref_put(&qp->ip->ref, ipath_release_mmap_info);
        else
                vfree(qp->r_rq.wq);
+       kfree(qp->r_ud_sg_list);
        vfree(qp->s_wq);
        kfree(qp);
        return 0;
index 7b93cda1a4bdcf9681c652fa525058078b48f570..9170710b950ddfafe16fffe2a7b7a157d8d0d97b 100644 (file)
@@ -573,9 +573,8 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
                qp->s_state = OP(RDMA_READ_REQUEST);
                hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
-               bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-               if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-                       qp->s_next_psn = qp->s_psn;
+               bth2 = qp->s_psn & IPATH_PSN_MASK;
+               qp->s_psn = wqe->lpsn + 1;
                ss = NULL;
                len = 0;
                qp->s_cur++;
index 284c9bca517e3bc7f55bd7661c3cb2a0ab7bfa7e..8e255adf5d9bf8b400712fdaf11b3ca8eb5a9d40 100644 (file)
@@ -698,10 +698,8 @@ retry:
 
        addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
                              tx->map_len, DMA_TO_DEVICE);
-       if (dma_mapping_error(&dd->pcidev->dev, addr)) {
-               ret = -EIO;
-               goto unlock;
-       }
+       if (dma_mapping_error(&dd->pcidev->dev, addr))
+               goto ioerr;
 
        dwoffset = tx->map_len >> 2;
        make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
@@ -741,6 +739,8 @@ retry:
                dw = (len + 3) >> 2;
                addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
                                      DMA_TO_DEVICE);
+               if (dma_mapping_error(&dd->pcidev->dev, addr))
+                       goto unmap;
                make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
                /* SDmaUseLargeBuf has to be set in every descriptor */
                if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
@@ -798,7 +798,18 @@ retry:
        list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
        if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
                vl15_watchdog_enq(dd);
-
+       goto unlock;
+
+unmap:
+       while (tail != dd->ipath_sdma_descq_tail) {
+               if (!tail)
+                       tail = dd->ipath_sdma_descq_cnt - 1;
+               else
+                       tail--;
+               unmap_desc(dd, tail);
+       }
+ioerr:
+       ret = -EIO;
 unlock:
        spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
 fail:
index c8e3d65f0de80fa2d0b2e0556cb66091c126dbf5..f63e143e3292130cceea8426442e6e263b797d53 100644 (file)
@@ -112,6 +112,14 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
                        dd->ipath_lastrpkts = val;
                }
                val64 = dd->ipath_rpkts;
+       } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
+               if (dd->ibdeltainprog)
+                       val64 -= val64 - dd->ibsymsnap;
+               val64 -= dd->ibsymdelta;
+       } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
+               if (dd->ibdeltainprog)
+                       val64 -= val64 - dd->iblnkerrsnap;
+               val64 -= dd->iblnkerrdelta;
        } else
                val64 = (u64) val;
 
index 729446f56aab580d2ffcd7200a124d22ea6e7c3e..91c74cc797ae78a75030a01553ae3b39fb0510b0 100644 (file)
@@ -70,8 +70,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
                goto done;
        }
 
-       rsge.sg_list = NULL;
-
        /*
         * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
         * Qkeys with the high order bit set mean use the
@@ -115,21 +113,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
                rq = &qp->r_rq;
        }
 
-       if (rq->max_sge > 1) {
-               /*
-                * XXX We could use GFP_KERNEL if ipath_do_send()
-                * was always called from the tasklet instead of
-                * from ipath_post_send().
-                */
-               rsge.sg_list = kmalloc((rq->max_sge - 1) *
-                                       sizeof(struct ipath_sge),
-                                      GFP_ATOMIC);
-               if (!rsge.sg_list) {
-                       dev->n_pkt_drops++;
-                       goto drop;
-               }
-       }
-
        /*
         * Get the next work request entry to find where to put the data.
         * Note that it is safe to drop the lock after changing rq->tail
@@ -147,6 +130,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
                goto drop;
        }
        wqe = get_rwqe_ptr(rq, tail);
+       rsge.sg_list = qp->r_ud_sg_list;
        if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
                spin_unlock_irqrestore(&rq->lock, flags);
                dev->n_pkt_drops++;
@@ -242,7 +226,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
        ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
                       swqe->wr.send_flags & IB_SEND_SOLICITED);
 drop:
-       kfree(rsge.sg_list);
        if (atomic_dec_and_test(&qp->refcount))
                wake_up(&qp->wait);
 done:;
index eabc4247860b429db0d6603bba4e16c943d0af7c..cdf0e6abd34d53dabeec4548e74168a171462620 100644 (file)
@@ -1852,7 +1852,7 @@ unsigned ipath_get_npkeys(struct ipath_devdata *dd)
 }
 
 /**
- * ipath_get_pkey - return the indexed PKEY from the port PKEY table
+ * ipath_get_pkey - return the indexed PKEY from the port PKEY table
  * @dd: the infinipath device
  * @index: the PKEY index
  */
@@ -1860,6 +1860,7 @@ unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
 {
        unsigned ret;
 
+       /* always a kernel port, no locking needed */
        if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
                ret = 0;
        else
index 9d12ae8a778eba131e5936e9e37e22a3390bfe10..11e3f613df939e69f6abda658a642fdc75918e06 100644 (file)
@@ -431,6 +431,7 @@ struct ipath_qp {
        u32 s_lsn;              /* limit sequence number (credit) */
        struct ipath_swqe *s_wq;        /* send work queue */
        struct ipath_swqe *s_wqe;
+       struct ipath_sge *r_ud_sg_list;
        struct ipath_rq r_rq;           /* receive work queue */
        struct ipath_sge r_sg_list[0];  /* verified SGEs */
 };
index 18308494a195bb0d4af1a068ddfd319a3f7b3488..8415ecce5c4c0ba5e646bea4e943f12ddcb94bdb 100644 (file)
@@ -222,7 +222,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
        }
 
        err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
-                           cq->db.dma, &cq->mcq, 0);
+                           cq->db.dma, &cq->mcq, vector, 0);
        if (err)
                goto err_dbmap;
 
@@ -325,15 +325,17 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
 
 static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
 {
-       struct mlx4_cqe *cqe;
+       struct mlx4_cqe *cqe, *new_cqe;
        int i;
 
        i = cq->mcq.cons_index;
        cqe = get_cqe(cq, i & cq->ibcq.cqe);
        while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
-               memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
-                                       (i + 1) & cq->resize_buf->cqe),
-                       get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+               new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
+                                          (i + 1) & cq->resize_buf->cqe);
+               memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+               new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
+                       (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
                cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
        }
        ++cq->mcq.cons_index;
index 2e80f8f47b02ad1fbc0fc1d3fe9ae3bf24313f5a..dcefe1fceb5ca9feb00f64aba343204e96e1bf14 100644 (file)
@@ -578,7 +578,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
                ibdev->num_ports++;
        ibdev->ib_dev.phys_port_cnt     = ibdev->num_ports;
-       ibdev->ib_dev.num_comp_vectors  = 1;
+       ibdev->ib_dev.num_comp_vectors  = dev->caps.num_comp_vectors;
        ibdev->ib_dev.dma_device        = &dev->pdev->dev;
 
        ibdev->ib_dev.uverbs_abi_ver    = MLX4_IB_UVERBS_ABI_VERSION;
index 1595dc7bba9dcd6f58d1ff9134f6002ebf9b31b7..13a5bb1a7bcf5895ce829e686eae14dc2ddd4980 100644 (file)
 
 #ifdef CONFIG_INFINIBAND_NES_DEBUG
 #define nes_debug(level, fmt, args...) \
+do { \
        if (level & nes_debug_level) \
-               printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args)
-
-#define assert(expr)                                                \
-if (!(expr)) {                                                       \
-       printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n",  \
-                  #expr, __FILE__, __func__, __LINE__);                \
-}
+               printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \
+} while (0)
+
+#define assert(expr) \
+do { \
+       if (!(expr)) { \
+               printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \
+                          #expr, __FILE__, __func__, __LINE__); \
+       } \
+} while (0)
 
 #define NES_EVENT_TIMEOUT   1200000
 #else
index 2caf9da81ad50d6db5db8bce1c24cb21c2bf46d5..cb48041bed694857a8bc4757f1ad854e93deed0f 100644 (file)
@@ -86,15 +86,14 @@ static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *,
        struct nes_cm_node *);
 static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *,
        struct nes_cm_node *);
-static void mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
+static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
        struct sk_buff *);
 static int mini_cm_dealloc_core(struct nes_cm_core *);
 static int mini_cm_get(struct nes_cm_core *);
 static int mini_cm_set(struct nes_cm_core *, u32, u32);
 
-static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *,
+static void form_cm_frame(struct sk_buff *, struct nes_cm_node *,
        void *, u32, void *, u32, u8);
-static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node);
 static int add_ref_cm_node(struct nes_cm_node *);
 static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
 
@@ -251,7 +250,7 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len)
  * form_cm_frame - get a free packet and build empty frame Use
  * node info to build.
  */
-static struct sk_buff *form_cm_frame(struct sk_buff *skb,
+static void form_cm_frame(struct sk_buff *skb,
        struct nes_cm_node *cm_node, void *options, u32 optionsize,
        void *data, u32 datasize, u8 flags)
 {
@@ -339,7 +338,6 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb,
        skb_shinfo(skb)->nr_frags = 0;
        cm_packets_created++;
 
-       return skb;
 }
 
 
@@ -356,7 +354,6 @@ static void print_core(struct nes_cm_core *core)
 
        nes_debug(NES_DBG_CM, "State         : %u \n",  core->state);
 
-       nes_debug(NES_DBG_CM, "Tx Free cnt   : %u \n", skb_queue_len(&core->tx_free_list));
        nes_debug(NES_DBG_CM, "Listen Nodes  : %u \n", atomic_read(&core->listen_node_cnt));
        nes_debug(NES_DBG_CM, "Active Nodes  : %u \n", atomic_read(&core->node_cnt));
 
@@ -381,8 +378,6 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
        int ret = 0;
        u32 was_timer_set;
 
-       if (!cm_node)
-               return -EINVAL;
        new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
        if (!new_send)
                return -1;
@@ -459,13 +454,23 @@ static void nes_cm_timer_tick(unsigned long pass)
        int ret = NETDEV_TX_OK;
        enum nes_cm_node_state last_state;
 
+       struct list_head timer_list;
+       INIT_LIST_HEAD(&timer_list);
        spin_lock_irqsave(&cm_core->ht_lock, flags);
 
        list_for_each_safe(list_node, list_core_temp,
-               &cm_core->connected_nodes) {
+                               &cm_core->connected_nodes) {
                cm_node = container_of(list_node, struct nes_cm_node, list);
-               add_ref_cm_node(cm_node);
-               spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+               if (!list_empty(&cm_node->recv_list) || (cm_node->send_entry)) {
+                       add_ref_cm_node(cm_node);
+                       list_add(&cm_node->timer_entry, &timer_list);
+               }
+       }
+       spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+       list_for_each_safe(list_node, list_core_temp, &timer_list) {
+               cm_node = container_of(list_node, struct nes_cm_node,
+                                       timer_entry);
                spin_lock_irqsave(&cm_node->recv_list_lock, flags);
                list_for_each_safe(list_core, list_node_temp,
                        &cm_node->recv_list) {
@@ -519,7 +524,7 @@ static void nes_cm_timer_tick(unsigned long pass)
                do {
                        send_entry = cm_node->send_entry;
                        if (!send_entry)
-                               continue;
+                               break;
                        if (time_after(send_entry->timetosend, jiffies)) {
                                if (cm_node->state != NES_CM_STATE_TSA) {
                                        if ((nexttimeout >
@@ -528,18 +533,18 @@ static void nes_cm_timer_tick(unsigned long pass)
                                                nexttimeout =
                                                        send_entry->timetosend;
                                                settimer = 1;
-                                               continue;
+                                               break;
                                        }
                                } else {
                                        free_retrans_entry(cm_node);
-                                       continue;
+                                       break;
                                }
                        }
 
                        if ((cm_node->state == NES_CM_STATE_TSA) ||
                                (cm_node->state == NES_CM_STATE_CLOSED)) {
                                free_retrans_entry(cm_node);
-                               continue;
+                               break;
                        }
 
                        if (!send_entry->retranscount ||
@@ -557,7 +562,7 @@ static void nes_cm_timer_tick(unsigned long pass)
                                                NES_CM_EVENT_ABORTED);
                                spin_lock_irqsave(&cm_node->retrans_list_lock,
                                        flags);
-                               continue;
+                               break;
                        }
                        atomic_inc(&send_entry->skb->users);
                        cm_packets_retrans++;
@@ -583,7 +588,7 @@ static void nes_cm_timer_tick(unsigned long pass)
                                send_entry->retrycount--;
                                nexttimeout = jiffies + NES_SHORT_TIME;
                                settimer = 1;
-                               continue;
+                               break;
                        } else {
                                cm_packets_sent++;
                        }
@@ -615,14 +620,12 @@ static void nes_cm_timer_tick(unsigned long pass)
 
                spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
                rem_ref_cm_node(cm_node->cm_core, cm_node);
-               spin_lock_irqsave(&cm_core->ht_lock, flags);
                if (ret != NETDEV_TX_OK) {
                        nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n",
                                cm_node);
                        break;
                }
        }
-       spin_unlock_irqrestore(&cm_core->ht_lock, flags);
 
        if (settimer) {
                if (!timer_pending(&cm_core->tcp_timer)) {
@@ -683,7 +686,7 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack,
        optionssize += 1;
 
        if (!skb)
-               skb = get_free_pkt(cm_node);
+               skb = dev_alloc_skb(MAX_CM_BUFFER);
        if (!skb) {
                nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
                return -1;
@@ -708,7 +711,7 @@ static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb)
        int flags = SET_RST | SET_ACK;
 
        if (!skb)
-               skb = get_free_pkt(cm_node);
+               skb = dev_alloc_skb(MAX_CM_BUFFER);
        if (!skb) {
                nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
                return -1;
@@ -729,7 +732,7 @@ static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb)
        int ret;
 
        if (!skb)
-               skb = get_free_pkt(cm_node);
+               skb = dev_alloc_skb(MAX_CM_BUFFER);
 
        if (!skb) {
                nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
@@ -752,7 +755,7 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
 
        /* if we didn't get a frame get one */
        if (!skb)
-               skb = get_free_pkt(cm_node);
+               skb = dev_alloc_skb(MAX_CM_BUFFER);
 
        if (!skb) {
                nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
@@ -766,46 +769,6 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
 }
 
 
-/**
- * get_free_pkt
- */
-static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node)
-{
-       struct sk_buff *skb, *new_skb;
-
-       /* check to see if we need to repopulate the free tx pkt queue */
-       if (skb_queue_len(&cm_node->cm_core->tx_free_list) < NES_CM_FREE_PKT_LO_WATERMARK) {
-               while (skb_queue_len(&cm_node->cm_core->tx_free_list) <
-                               cm_node->cm_core->free_tx_pkt_max) {
-                       /* replace the frame we took, we won't get it back */
-                       new_skb = dev_alloc_skb(cm_node->cm_core->mtu);
-                       BUG_ON(!new_skb);
-                       /* add a replacement frame to the free tx list head */
-                       skb_queue_head(&cm_node->cm_core->tx_free_list, new_skb);
-               }
-       }
-
-       skb = skb_dequeue(&cm_node->cm_core->tx_free_list);
-
-       return skb;
-}
-
-
-/**
- * make_hashkey - generate hash key from node tuple
- */
-static inline int make_hashkey(u16 loc_port, nes_addr_t loc_addr, u16 rem_port,
-               nes_addr_t rem_addr)
-{
-       u32 hashkey = 0;
-
-       hashkey = loc_addr + rem_addr + loc_port + rem_port;
-       hashkey = (hashkey % NES_CM_HASHTABLE_SIZE);
-
-       return hashkey;
-}
-
-
 /**
  * find_node - find a cm node that matches the reference cm node
  */
@@ -813,13 +776,9 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
                u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
 {
        unsigned long flags;
-       u32 hashkey;
        struct list_head *hte;
        struct nes_cm_node *cm_node;
 
-       /* make a hash index key for this packet */
-       hashkey = make_hashkey(loc_port, loc_addr, rem_port, rem_addr);
-
        /* get a handle on the hte */
        hte = &cm_core->connected_nodes;
 
@@ -887,7 +846,6 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
 static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
 {
        unsigned long flags;
-       u32 hashkey;
        struct list_head *hte;
 
        if (!cm_node || !cm_core)
@@ -896,11 +854,6 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node
        nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n",
                cm_node);
 
-       /* first, make an index into our hash table */
-       hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr,
-                       cm_node->rem_port, cm_node->rem_addr);
-       cm_node->hashkey = hashkey;
-
        spin_lock_irqsave(&cm_core->ht_lock, flags);
 
        /* get a handle on the hash table element (list head for this slot) */
@@ -925,28 +878,36 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
        struct list_head *list_pos = NULL;
        struct list_head *list_temp = NULL;
        struct nes_cm_node *cm_node = NULL;
+       struct list_head reset_list;
 
        nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, "
                "refcnt=%d\n", listener, free_hanging_nodes,
                atomic_read(&listener->ref_count));
        /* free non-accelerated child nodes for this listener */
+       INIT_LIST_HEAD(&reset_list);
        if (free_hanging_nodes) {
                spin_lock_irqsave(&cm_core->ht_lock, flags);
                list_for_each_safe(list_pos, list_temp,
-                       &g_cm_core->connected_nodes) {
+                                  &g_cm_core->connected_nodes) {
                        cm_node = container_of(list_pos, struct nes_cm_node,
                                list);
                        if ((cm_node->listener == listener) &&
-                               (!cm_node->accelerated)) {
-                               cleanup_retrans_entry(cm_node);
-                               spin_unlock_irqrestore(&cm_core->ht_lock,
-                                       flags);
-                               send_reset(cm_node, NULL);
-                               spin_lock_irqsave(&cm_core->ht_lock, flags);
+                           (!cm_node->accelerated)) {
+                               add_ref_cm_node(cm_node);
+                               list_add(&cm_node->reset_entry, &reset_list);
                        }
                }
                spin_unlock_irqrestore(&cm_core->ht_lock, flags);
        }
+
+       list_for_each_safe(list_pos, list_temp, &reset_list) {
+               cm_node = container_of(list_pos, struct nes_cm_node,
+                                       reset_entry);
+               cleanup_retrans_entry(cm_node);
+               send_reset(cm_node, NULL);
+               rem_ref_cm_node(cm_node->cm_core, cm_node);
+       }
+
        spin_lock_irqsave(&cm_core->listen_list_lock, flags);
        if (!atomic_dec_return(&listener->ref_count)) {
                list_del(&listener->list);
@@ -1126,7 +1087,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
 
        cm_node->loopbackpartner = NULL;
        /* get the mac addr for the remote node */
-       arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
+       if (ipv4_is_loopback(htonl(cm_node->rem_addr)))
+               arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE);
+       else
+               arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
        if (arpindex < 0) {
                arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr);
                if (arpindex < 0) {
@@ -1306,7 +1270,6 @@ static void drop_packet(struct sk_buff *skb)
 static void handle_fin_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
        struct tcphdr *tcph)
 {
-       atomic_inc(&cm_resets_recvd);
        nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
                "refcnt=%d\n", cm_node, cm_node->state,
                atomic_read(&cm_node->ref_count));
@@ -1344,6 +1307,7 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 {
 
        int     reset = 0;      /* whether to send reset in case of err.. */
+       int     passive_state;
        atomic_inc(&cm_resets_recvd);
        nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u."
                        " refcnt=%d\n", cm_node, cm_node->state,
@@ -1357,7 +1321,14 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                        cm_node->listener, cm_node->state);
                active_open_err(cm_node, skb, reset);
                break;
-       /* For PASSIVE open states, remove the cm_node event */
+       case NES_CM_STATE_MPAREQ_RCVD:
+               passive_state = atomic_add_return(1, &cm_node->passive_state);
+               if (passive_state ==  NES_SEND_RESET_EVENT)
+                       create_event(cm_node, NES_CM_EVENT_RESET);
+               cleanup_retrans_entry(cm_node);
+               cm_node->state = NES_CM_STATE_CLOSED;
+               dev_kfree_skb_any(skb);
+               break;
        case NES_CM_STATE_ESTABLISHED:
        case NES_CM_STATE_SYN_RCVD:
        case NES_CM_STATE_LISTENING:
@@ -1365,7 +1336,14 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                passive_open_err(cm_node, skb, reset);
                break;
        case NES_CM_STATE_TSA:
+               active_open_err(cm_node, skb, reset);
+               break;
+       case NES_CM_STATE_CLOSED:
+               cleanup_retrans_entry(cm_node);
+               drop_packet(skb);
+               break;
        default:
+               drop_packet(skb);
                break;
        }
 }
@@ -1394,6 +1372,9 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb,
                dev_kfree_skb_any(skb);
                if (type == NES_CM_EVENT_CONNECTED)
                        cm_node->state = NES_CM_STATE_TSA;
+               else
+                       atomic_set(&cm_node->passive_state,
+                                       NES_PASSIVE_STATE_INDICATED);
                create_event(cm_node, type);
 
        }
@@ -1474,7 +1455,7 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
        int optionsize;
 
        optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
-       skb_pull(skb, tcph->doff << 2);
+       skb_trim(skb, 0);
        inc_sequence = ntohl(tcph->seq);
 
        switch (cm_node->state) {
@@ -1507,6 +1488,10 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                cm_node->state = NES_CM_STATE_SYN_RCVD;
                send_syn(cm_node, 1, skb);
                break;
+       case NES_CM_STATE_CLOSED:
+               cleanup_retrans_entry(cm_node);
+               send_reset(cm_node, skb);
+               break;
        case NES_CM_STATE_TSA:
        case NES_CM_STATE_ESTABLISHED:
        case NES_CM_STATE_FIN_WAIT1:
@@ -1515,7 +1500,6 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
        case NES_CM_STATE_LAST_ACK:
        case NES_CM_STATE_CLOSING:
        case NES_CM_STATE_UNKNOWN:
-       case NES_CM_STATE_CLOSED:
        default:
                drop_packet(skb);
                break;
@@ -1531,7 +1515,7 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
        int optionsize;
 
        optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
-       skb_pull(skb, tcph->doff << 2);
+       skb_trim(skb, 0);
        inc_sequence = ntohl(tcph->seq);
        switch (cm_node->state) {
        case NES_CM_STATE_SYN_SENT:
@@ -1555,6 +1539,12 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                /* passive open, so should not be here */
                passive_open_err(cm_node, skb, 1);
                break;
+       case NES_CM_STATE_LISTENING:
+       case NES_CM_STATE_CLOSED:
+               cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+               cleanup_retrans_entry(cm_node);
+               send_reset(cm_node, skb);
+               break;
        case NES_CM_STATE_ESTABLISHED:
        case NES_CM_STATE_FIN_WAIT1:
        case NES_CM_STATE_FIN_WAIT2:
@@ -1562,7 +1552,6 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
        case NES_CM_STATE_TSA:
        case NES_CM_STATE_CLOSING:
        case NES_CM_STATE_UNKNOWN:
-       case NES_CM_STATE_CLOSED:
        case NES_CM_STATE_MPAREQ_SENT:
        default:
                drop_packet(skb);
@@ -1577,6 +1566,13 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
        u32 inc_sequence;
        u32 rem_seq_ack;
        u32 rem_seq;
+       int ret;
+       int optionsize;
+       u32 temp_seq = cm_node->tcp_cntxt.loc_seq_num;
+
+       optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+       cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+
        if (check_seq(cm_node, tcph, skb))
                return;
 
@@ -1589,7 +1585,18 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
        switch (cm_node->state) {
        case NES_CM_STATE_SYN_RCVD:
                /* Passive OPEN */
+               ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1);
+               if (ret)
+                       break;
                cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+               cm_node->tcp_cntxt.loc_seq_num = temp_seq;
+               if (cm_node->tcp_cntxt.rem_ack_num !=
+                   cm_node->tcp_cntxt.loc_seq_num) {
+                       nes_debug(NES_DBG_CM, "rem_ack_num != loc_seq_num\n");
+                       cleanup_retrans_entry(cm_node);
+                       send_reset(cm_node, skb);
+                       return;
+               }
                cm_node->state = NES_CM_STATE_ESTABLISHED;
                if (datasize) {
                        cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
@@ -1621,11 +1628,15 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
                        dev_kfree_skb_any(skb);
                }
                break;
+       case NES_CM_STATE_LISTENING:
+       case NES_CM_STATE_CLOSED:
+               cleanup_retrans_entry(cm_node);
+               send_reset(cm_node, skb);
+               break;
        case NES_CM_STATE_FIN_WAIT1:
        case NES_CM_STATE_SYN_SENT:
        case NES_CM_STATE_FIN_WAIT2:
        case NES_CM_STATE_TSA:
-       case NES_CM_STATE_CLOSED:
        case NES_CM_STATE_MPAREQ_RCVD:
        case NES_CM_STATE_LAST_ACK:
        case NES_CM_STATE_CLOSING:
@@ -1648,9 +1659,9 @@ static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
                        nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n",
                                __func__, cm_node);
                        if (passive)
-                               passive_open_err(cm_node, skb, 0);
+                               passive_open_err(cm_node, skb, 1);
                        else
-                               active_open_err(cm_node, skb, 0);
+                               active_open_err(cm_node, skb, 1);
                        return 1;
                }
        }
@@ -1970,6 +1981,7 @@ static int mini_cm_reject(struct nes_cm_core *cm_core,
        struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
 {
        int ret = 0;
+       int passive_state;
 
        nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n",
                __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
@@ -1977,9 +1989,13 @@ static int mini_cm_reject(struct nes_cm_core *cm_core,
        if (cm_node->tcp_cntxt.client)
                return ret;
        cleanup_retrans_entry(cm_node);
-       cm_node->state = NES_CM_STATE_CLOSED;
 
-       ret = send_reset(cm_node, NULL);
+       passive_state = atomic_add_return(1, &cm_node->passive_state);
+       cm_node->state = NES_CM_STATE_CLOSED;
+       if (passive_state == NES_SEND_RESET_EVENT)
+               rem_ref_cm_node(cm_core, cm_node);
+       else
+               ret = send_reset(cm_node, NULL);
        return ret;
 }
 
@@ -2037,7 +2053,7 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
  * recv_pkt - recv an ETHERNET packet, and process it through CM
  * node state machine
  */
-static void mini_cm_recv_pkt(struct nes_cm_core *cm_core,
+static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
        struct nes_vnic *nesvnic, struct sk_buff *skb)
 {
        struct nes_cm_node *cm_node = NULL;
@@ -2045,23 +2061,16 @@ static void mini_cm_recv_pkt(struct nes_cm_core *cm_core,
        struct iphdr *iph;
        struct tcphdr *tcph;
        struct nes_cm_info nfo;
+       int skb_handled = 1;
 
        if (!skb)
-               return;
+               return 0;
        if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) {
-               dev_kfree_skb_any(skb);
-               return;
+               return 0;
        }
 
        iph = (struct iphdr *)skb->data;
        tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr));
-       skb_reset_network_header(skb);
-       skb_set_transport_header(skb, sizeof(*tcph));
-       if (!tcph) {
-               dev_kfree_skb_any(skb);
-               return;
-       }
-       skb->len = ntohs(iph->tot_len);
 
        nfo.loc_addr = ntohl(iph->daddr);
        nfo.loc_port = ntohs(tcph->dest);
@@ -2082,23 +2091,21 @@ static void mini_cm_recv_pkt(struct nes_cm_core *cm_core,
                        /* Only type of packet accepted are for */
                        /* the PASSIVE open (syn only) */
                        if ((!tcph->syn) || (tcph->ack)) {
-                               cm_packets_dropped++;
+                               skb_handled = 0;
                                break;
                        }
                        listener = find_listener(cm_core, nfo.loc_addr,
                                nfo.loc_port,
                                NES_CM_LISTENER_ACTIVE_STATE);
-                       if (listener) {
-                               nfo.cm_id = listener->cm_id;
-                               nfo.conn_type = listener->conn_type;
-                       } else {
-                               nes_debug(NES_DBG_CM, "Unable to find listener "
-                                       "for the pkt\n");
-                               cm_packets_dropped++;
-                               dev_kfree_skb_any(skb);
+                       if (!listener) {
+                               nfo.cm_id = NULL;
+                               nfo.conn_type = 0;
+                               nes_debug(NES_DBG_CM, "Unable to find listener for the pkt\n");
+                               skb_handled = 0;
                                break;
                        }
-
+                       nfo.cm_id = listener->cm_id;
+                       nfo.conn_type = listener->conn_type;
                        cm_node = make_cm_node(cm_core, nesvnic, &nfo,
                                listener);
                        if (!cm_node) {
@@ -2124,9 +2131,13 @@ static void mini_cm_recv_pkt(struct nes_cm_core *cm_core,
                        dev_kfree_skb_any(skb);
                        break;
                }
+               skb_reset_network_header(skb);
+               skb_set_transport_header(skb, sizeof(*tcph));
+               skb->len = ntohs(iph->tot_len);
                process_packet(cm_node, skb, cm_core);
                rem_ref_cm_node(cm_core, cm_node);
        } while (0);
+       return skb_handled;
 }
 
 
@@ -2135,10 +2146,7 @@ static void mini_cm_recv_pkt(struct nes_cm_core *cm_core,
  */
 static struct nes_cm_core *nes_cm_alloc_core(void)
 {
-       int i;
-
        struct nes_cm_core *cm_core;
-       struct sk_buff *skb = NULL;
 
        /* setup the CM core */
        /* alloc top level core control structure */
@@ -2156,19 +2164,6 @@ static struct nes_cm_core *nes_cm_alloc_core(void)
 
        atomic_set(&cm_core->events_posted, 0);
 
-       /* init the packet lists */
-       skb_queue_head_init(&cm_core->tx_free_list);
-
-       for (i = 0; i < NES_CM_DEFAULT_FRAME_CNT; i++) {
-               skb = dev_alloc_skb(cm_core->mtu);
-               if (!skb) {
-                       kfree(cm_core);
-                       return NULL;
-               }
-               /* add 'raw' skb to free frame list */
-               skb_queue_head(&cm_core->tx_free_list, skb);
-       }
-
        cm_core->api = &nes_cm_api;
 
        spin_lock_init(&cm_core->ht_lock);
@@ -2397,7 +2392,6 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
                        atomic_inc(&cm_disconnects);
                        cm_event.event = IW_CM_EVENT_DISCONNECT;
                        if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) {
-                               issued_disconnect_reset = 1;
                                cm_event.status = IW_CM_EVENT_STATUS_RESET;
                                nes_debug(NES_DBG_CM, "Generating a CM "
                                        "Disconnect Event (status reset) for "
@@ -2547,6 +2541,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        struct nes_v4_quad nes_quad;
        u32 crc_value;
        int ret;
+       int passive_state;
 
        ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
        if (!ibqp)
@@ -2714,8 +2709,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                        conn_param->private_data_len +
                        sizeof(struct ietf_mpa_frame));
 
-       attr.qp_state = IB_QPS_RTS;
-       nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
 
        /* notify OF layer that accept event was successfull */
        cm_id->add_ref(cm_id);
@@ -2728,6 +2721,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        cm_event.private_data = NULL;
        cm_event.private_data_len = 0;
        ret = cm_id->event_handler(cm_id, &cm_event);
+       attr.qp_state = IB_QPS_RTS;
+       nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
        if (cm_node->loopbackpartner) {
                cm_node->loopbackpartner->mpa_frame_size =
                        nesqp->private_data_len;
@@ -2740,6 +2735,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
                        "ret=%d\n", __func__, __LINE__, ret);
 
+       passive_state = atomic_add_return(1, &cm_node->passive_state);
+       if (passive_state == NES_SEND_RESET_EVENT)
+               create_event(cm_node, NES_CM_EVENT_RESET);
        return 0;
 }
 
@@ -2943,15 +2941,16 @@ int nes_destroy_listen(struct iw_cm_id *cm_id)
  */
 int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice)
 {
+       int rc = 0;
        cm_packets_received++;
        if ((g_cm_core) && (g_cm_core->api)) {
-               g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
+               rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
        } else {
                nes_debug(NES_DBG_CM, "Unable to process packet for CM,"
                                " cm is not setup properly.\n");
        }
 
-       return 0;
+       return rc;
 }
 
 
@@ -3222,6 +3221,18 @@ static void cm_event_reset(struct nes_cm_event *event)
        cm_event.private_data_len = 0;
 
        ret = cm_id->event_handler(cm_id, &cm_event);
+       cm_id->add_ref(cm_id);
+       atomic_inc(&cm_closes);
+       cm_event.event = IW_CM_EVENT_CLOSE;
+       cm_event.status = IW_CM_EVENT_STATUS_OK;
+       cm_event.provider_data = cm_id->provider_data;
+       cm_event.local_addr = cm_id->local_addr;
+       cm_event.remote_addr = cm_id->remote_addr;
+       cm_event.private_data = NULL;
+       cm_event.private_data_len = 0;
+       nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node);
+       ret = cm_id->event_handler(cm_id, &cm_event);
+
        nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
 
 
index 367b3d29014074252424c798f110821b5ce0c7c4..fafa35042ebdcc46d56446acfb40b395aad9c81b 100644 (file)
@@ -76,6 +76,10 @@ enum nes_timer_type {
        NES_TIMER_TYPE_CLOSE,
 };
 
+#define NES_PASSIVE_STATE_INDICATED    0
+#define NES_DO_NOT_SEND_RESET_EVENT    1
+#define NES_SEND_RESET_EVENT           2
+
 #define MAX_NES_IFS 4
 
 #define SET_ACK 1
@@ -161,6 +165,8 @@ struct nes_timer_entry {
 
 #define NES_CM_DEF_SEQ2      0x18ed5740
 #define NES_CM_DEF_LOCAL_ID2 0xb807
+#define        MAX_CM_BUFFER   512
+
 
 typedef u32 nes_addr_t;
 
@@ -254,8 +260,6 @@ struct nes_cm_listener {
 
 /* per connection node and node state information */
 struct nes_cm_node {
-       u32                       hashkey;
-
        nes_addr_t                loc_addr, rem_addr;
        u16                       loc_port, rem_port;
 
@@ -292,7 +296,10 @@ struct nes_cm_node {
        int                       apbvt_set;
        int                       accept_pend;
        int                     freed;
+       struct list_head        timer_entry;
+       struct list_head        reset_entry;
        struct nes_qp           *nesqp;
+       atomic_t                passive_state;
 };
 
 /* structure for client or CM to fill when making CM api calls. */
@@ -350,7 +357,6 @@ struct nes_cm_core {
        u32                     mtu;
        u32                     free_tx_pkt_max;
        u32                     rx_pkt_posted;
-       struct sk_buff_head     tx_free_list;
        atomic_t                ht_node_cnt;
        struct list_head        connected_nodes;
        /* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */
@@ -390,7 +396,7 @@ struct nes_cm_ops {
                        struct nes_cm_node *);
        int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *,
                        struct nes_cm_node *);
-       void (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
+       int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
                        struct sk_buff *);
        int (*destroy_cm_core)(struct nes_cm_core *);
        int (*get)(struct nes_cm_core *);
index 7c49cc882d75c580c0c31e20b987cf177d04fc9c..8f70ff2dcc5855b3d8e69e3772f6f9bac260d2fd 100644 (file)
@@ -2700,27 +2700,33 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
                                                        pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */
 
                                if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) {
-                                       nes_cm_recv(rx_skb, nesvnic->netdev);
+                                       if (nes_cm_recv(rx_skb, nesvnic->netdev))
+                                               rx_skb = NULL;
+                               }
+                               if (rx_skb == NULL)
+                                       goto skip_rx_indicate0;
+
+
+                               if ((cqe_misc & NES_NIC_CQE_TAG_VALID) &&
+                                   (nesvnic->vlan_grp != NULL)) {
+                                       vlan_tag = (u16)(le32_to_cpu(
+                                                       cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])
+                                                       >> 16);
+                                       nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
+                                                       nesvnic->netdev->name, vlan_tag);
+                                       if (nes_use_lro)
+                                               lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb,
+                                                               nesvnic->vlan_grp, vlan_tag, NULL);
+                                       else
+                                               nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
                                } else {
-                                       if ((cqe_misc & NES_NIC_CQE_TAG_VALID) && (nesvnic->vlan_grp != NULL)) {
-                                               vlan_tag = (u16)(le32_to_cpu(
-                                                               cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])
-                                                               >> 16);
-                                               nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
-                                                               nesvnic->netdev->name, vlan_tag);
-                                               if (nes_use_lro)
-                                                       lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb,
-                                                                       nesvnic->vlan_grp, vlan_tag, NULL);
-                                               else
-                                                       nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
-                                       } else {
-                                               if (nes_use_lro)
-                                                       lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
-                                               else
-                                                       nes_netif_rx(rx_skb);
-                                       }
+                                       if (nes_use_lro)
+                                               lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
+                                       else
+                                               nes_netif_rx(rx_skb);
                                }
 
+skip_rx_indicate0:
                                nesvnic->netdev->last_rx = jiffies;
                                /* nesvnic->netstats.rx_packets++; */
                                /* nesvnic->netstats.rx_bytes += rx_pkt_size; */
index fb8cbd71a2ef86b17b98539a471e0c4d7bf7b409..5611a73d5831395f61e57559cd972f9949b726c0 100644 (file)
@@ -540,11 +540,14 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev)
 
        if (!list_empty(&nesdev->cqp_avail_reqs)) {
                spin_lock_irqsave(&nesdev->cqp.lock, flags);
-               cqp_request = list_entry(nesdev->cqp_avail_reqs.next,
+               if (!list_empty(&nesdev->cqp_avail_reqs)) {
+                       cqp_request = list_entry(nesdev->cqp_avail_reqs.next,
                                struct nes_cqp_request, list);
-               list_del_init(&cqp_request->list);
+                       list_del_init(&cqp_request->list);
+               }
                spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-       } else {
+       }
+       if (cqp_request == NULL) {
                cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL);
                if (cqp_request) {
                        cqp_request->dynamic = 1;
index d36c9a0bf1bb823619a04a8ce53902281b437cf7..4fdb72454f94f493045e56ce4a289fe01ad45266 100644 (file)
@@ -1695,13 +1695,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
                        /* use 4k pbl */
                        nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries);
                        if (nesadapter->free_4kpbl == 0) {
-                               if (cqp_request->dynamic) {
-                                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                                       kfree(cqp_request);
-                               } else {
-                                       list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                               }
+                               spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+                               nes_free_cqp_request(nesdev, cqp_request);
                                if (!context)
                                        pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
                                                        nescq->hw_cq.cq_pbase);
@@ -1717,13 +1712,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
                        /* use 256 byte pbl */
                        nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries);
                        if (nesadapter->free_256pbl == 0) {
-                               if (cqp_request->dynamic) {
-                                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                                       kfree(cqp_request);
-                               } else {
-                                       list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                               }
+                               spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+                               nes_free_cqp_request(nesdev, cqp_request);
                                if (!context)
                                        pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
                                                        nescq->hw_cq.cq_pbase);
@@ -1928,13 +1918,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
                        /* Two level PBL */
                        if ((pbl_count+1) > nesadapter->free_4kpbl) {
                                nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n");
-                               if (cqp_request->dynamic) {
-                                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                                       kfree(cqp_request);
-                               } else {
-                                       list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                               }
+                               spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+                               nes_free_cqp_request(nesdev, cqp_request);
                                return -ENOMEM;
                        } else {
                                nesadapter->free_4kpbl -= pbl_count+1;
@@ -1942,13 +1927,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
                } else if (residual_page_count > 32) {
                        if (pbl_count > nesadapter->free_4kpbl) {
                                nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n");
-                               if (cqp_request->dynamic) {
-                                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                                       kfree(cqp_request);
-                               } else {
-                                       list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                               }
+                               spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+                               nes_free_cqp_request(nesdev, cqp_request);
                                return -ENOMEM;
                        } else {
                                nesadapter->free_4kpbl -= pbl_count;
@@ -1956,13 +1936,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
                } else {
                        if (pbl_count > nesadapter->free_256pbl) {
                                nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n");
-                               if (cqp_request->dynamic) {
-                                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                                       kfree(cqp_request);
-                               } else {
-                                       list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-                                       spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-                               }
+                               spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+                               nes_free_cqp_request(nesdev, cqp_request);
                                return -ENOMEM;
                        } else {
                                nesadapter->free_256pbl -= pbl_count;
index b7ad2829d67ecbe608422658b16f79ac890fd526..ac57b6a42c6ee8791fcca7f385a2c703b4e96799 100644 (file)
@@ -189,7 +189,7 @@ EXPORT_SYMBOL_GPL(mlx4_cq_resize);
 
 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
                  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
-                 int collapsed)
+                 unsigned vector, int collapsed)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        struct mlx4_cq_table *cq_table = &priv->cq_table;
@@ -198,6 +198,11 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
        u64 mtt_addr;
        int err;
 
+       if (vector >= dev->caps.num_comp_vectors)
+               return -EINVAL;
+
+       cq->vector = vector;
+
        cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
        if (cq->cqn == -1)
                return -ENOMEM;
@@ -227,7 +232,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 
        cq_context->flags           = cpu_to_be32(!!collapsed << 18);
        cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
-       cq_context->comp_eqn        = priv->eq_table.eq[MLX4_EQ_COMP].eqn;
+       cq_context->comp_eqn        = priv->eq_table.eq[vector].eqn;
        cq_context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
 
        mtt_addr = mlx4_mtt_addr(dev, mtt);
@@ -276,7 +281,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
        if (err)
                mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
 
-       synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq);
+       synchronize_irq(priv->eq_table.eq[cq->vector].irq);
 
        spin_lock_irq(&cq_table->lock);
        radix_tree_delete(&cq_table->tree, cq->cqn);
index 1368a8010af4e98169923388dd216ea29c5e0673..674f836e225b98ede4b6c1a4412b894722eb7064 100644 (file)
@@ -51,10 +51,13 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
        int err;
 
        cq->size = entries;
-       if (mode == RX)
+       if (mode == RX) {
                cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
-       else
+               cq->vector   = ring % mdev->dev->caps.num_comp_vectors;
+       } else {
                cq->buf_size = sizeof(struct mlx4_cqe);
+               cq->vector   = 0;
+       }
 
        cq->ring = ring;
        cq->is_tx = mode;
@@ -86,7 +89,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
        memset(cq->buf, 0, cq->buf_size);
 
        err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
-                           cq->wqres.db.dma, &cq->mcq, cq->is_tx);
+                           cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx);
        if (err)
                return err;
 
index 4b9794e97a79fdcfac53ace0654305bd43381d7c..c1c05852a95ebf34e357f86a6b0465758e5458f9 100644 (file)
@@ -170,9 +170,9 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
                mlx4_info(mdev, "Using %d tx rings for port:%d\n",
                          mdev->profile.prof[i].tx_ring_num, i);
                if (!mdev->profile.prof[i].rx_ring_num) {
-                       mdev->profile.prof[i].rx_ring_num = 1;
+                       mdev->profile.prof[i].rx_ring_num = dev->caps.num_comp_vectors;
                        mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n",
-                                 1, i);
+                                 mdev->profile.prof[i].rx_ring_num, i);
                } else
                        mlx4_info(mdev, "Using %d rx rings for port:%d\n",
                                  mdev->profile.prof[i].rx_ring_num, i);
index de169338cd901a448fffc668ea083b2c8d5498e7..2c19bff7cbaba33efe3c10776c476594158c4b13 100644 (file)
@@ -243,10 +243,6 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
                 * least that often.
                 */
                if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) {
-                       /*
-                        * Conditional on hca_type is OK here because
-                        * this is a rare case, not the fast path.
-                        */
                        eq_set_ci(eq, 0);
                        set_ci = 0;
                }
@@ -266,7 +262,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)
 
        writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
 
-       for (i = 0; i < MLX4_NUM_EQ; ++i)
+       for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
                work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);
 
        return IRQ_RETVAL(work);
@@ -304,6 +300,17 @@ static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
                            MLX4_CMD_TIME_CLASS_A);
 }
 
+static int mlx4_num_eq_uar(struct mlx4_dev *dev)
+{
+       /*
+        * Each UAR holds 4 EQ doorbells.  To figure out how many UARs
+        * we need to map, take the difference of highest index and
+        * the lowest index we'll use and add 1.
+        */
+       return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 -
+               dev->caps.reserved_eqs / 4 + 1;
+}
+
 static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
@@ -483,9 +490,11 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)
 
        if (eq_table->have_irq)
                free_irq(dev->pdev->irq, dev);
-       for (i = 0; i < MLX4_NUM_EQ; ++i)
+       for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
                if (eq_table->eq[i].have_irq)
                        free_irq(eq_table->eq[i].irq, eq_table->eq + i);
+
+       kfree(eq_table->irq_names);
 }
 
 static int mlx4_map_clr_int(struct mlx4_dev *dev)
@@ -551,57 +560,93 @@ void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
        __free_page(priv->eq_table.icm_page);
 }
 
+int mlx4_alloc_eq_table(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs,
+                                   sizeof *priv->eq_table.eq, GFP_KERNEL);
+       if (!priv->eq_table.eq)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void mlx4_free_eq_table(struct mlx4_dev *dev)
+{
+       kfree(mlx4_priv(dev)->eq_table.eq);
+}
+
 int mlx4_init_eq_table(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        int err;
        int i;
 
+       priv->eq_table.uar_map = kcalloc(sizeof *priv->eq_table.uar_map,
+                                        mlx4_num_eq_uar(dev), GFP_KERNEL);
+       if (!priv->eq_table.uar_map) {
+               err = -ENOMEM;
+               goto err_out_free;
+       }
+
        err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs,
                               dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0);
        if (err)
-               return err;
+               goto err_out_free;
 
-       for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i)
+       for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
                priv->eq_table.uar_map[i] = NULL;
 
        err = mlx4_map_clr_int(dev);
        if (err)
-               goto err_out_free;
+               goto err_out_bitmap;
 
        priv->eq_table.clr_mask =
                swab32(1 << (priv->eq_table.inta_pin & 31));
        priv->eq_table.clr_int  = priv->clr_base +
                (priv->eq_table.inta_pin < 32 ? 4 : 0);
 
-       err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
-                            (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0,
-                            &priv->eq_table.eq[MLX4_EQ_COMP]);
-       if (err)
-               goto err_out_unmap;
+       priv->eq_table.irq_names = kmalloc(16 * dev->caps.num_comp_vectors, GFP_KERNEL);
+       if (!priv->eq_table.irq_names) {
+               err = -ENOMEM;
+               goto err_out_bitmap;
+       }
+
+       for (i = 0; i < dev->caps.num_comp_vectors; ++i) {
+               err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
+                                    (dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
+                                    &priv->eq_table.eq[i]);
+               if (err)
+                       goto err_out_unmap;
+       }
 
        err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE,
-                            (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0,
-                            &priv->eq_table.eq[MLX4_EQ_ASYNC]);
+                            (dev->flags & MLX4_FLAG_MSI_X) ? dev->caps.num_comp_vectors : 0,
+                            &priv->eq_table.eq[dev->caps.num_comp_vectors]);
        if (err)
                goto err_out_comp;
 
        if (dev->flags & MLX4_FLAG_MSI_X) {
-               static const char *eq_name[] = {
-                       [MLX4_EQ_COMP]  = DRV_NAME " (comp)",
-                       [MLX4_EQ_ASYNC] = DRV_NAME " (async)"
-               };
+               static const char async_eq_name[] = "mlx4-async";
+               const char *eq_name;
+
+               for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
+                       if (i < dev->caps.num_comp_vectors) {
+                               snprintf(priv->eq_table.irq_names + i * 16, 16,
+                                        "mlx4-comp-%d", i);
+                               eq_name = priv->eq_table.irq_names + i * 16;
+                       } else
+                               eq_name = async_eq_name;
 
-               for (i = 0; i < MLX4_NUM_EQ; ++i) {
                        err = request_irq(priv->eq_table.eq[i].irq,
-                                         mlx4_msi_x_interrupt,
-                                         0, eq_name[i], priv->eq_table.eq + i);
+                                         mlx4_msi_x_interrupt, 0, eq_name,
+                                         priv->eq_table.eq + i);
                        if (err)
                                goto err_out_async;
 
                        priv->eq_table.eq[i].have_irq = 1;
                }
-
        } else {
                err = request_irq(dev->pdev->irq, mlx4_interrupt,
                                  IRQF_SHARED, DRV_NAME, dev);
@@ -612,28 +657,36 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
        }
 
        err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
-                         priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
+                         priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
        if (err)
                mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
-                          priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err);
+                          priv->eq_table.eq[dev->caps.num_comp_vectors].eqn, err);
 
-       for (i = 0; i < MLX4_NUM_EQ; ++i)
+       for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
                eq_set_ci(&priv->eq_table.eq[i], 1);
 
        return 0;
 
 err_out_async:
-       mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]);
+       mlx4_free_eq(dev, &priv->eq_table.eq[dev->caps.num_comp_vectors]);
 
 err_out_comp:
-       mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]);
+       i = dev->caps.num_comp_vectors - 1;
 
 err_out_unmap:
+       while (i >= 0) {
+               mlx4_free_eq(dev, &priv->eq_table.eq[i]);
+               --i;
+       }
        mlx4_unmap_clr_int(dev);
        mlx4_free_irqs(dev);
 
-err_out_free:
+err_out_bitmap:
        mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
+
+err_out_free:
+       kfree(priv->eq_table.uar_map);
+
        return err;
 }
 
@@ -643,18 +696,20 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
        int i;
 
        mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1,
-                   priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
+                   priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
 
        mlx4_free_irqs(dev);
 
-       for (i = 0; i < MLX4_NUM_EQ; ++i)
+       for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
                mlx4_free_eq(dev, &priv->eq_table.eq[i]);
 
        mlx4_unmap_clr_int(dev);
 
-       for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i)
+       for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
                if (priv->eq_table.uar_map[i])
                        iounmap(priv->eq_table.uar_map[i]);
 
        mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
+
+       kfree(priv->eq_table.uar_map);
 }
index 90a0281d15ea22d96ce788107dee88d0deb77aff..710c79e7a2db244b567b485db51abf0978f04253 100644 (file)
@@ -421,9 +421,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
                                  ((u64) (MLX4_CMPT_TYPE_EQ *
                                          cmpt_entry_sz) << MLX4_CMPT_SHIFT),
                                  cmpt_entry_sz,
-                                 roundup_pow_of_two(MLX4_NUM_EQ +
-                                                    dev->caps.reserved_eqs),
-                                 MLX4_NUM_EQ + dev->caps.reserved_eqs, 0, 0);
+                                 dev->caps.num_eqs, dev->caps.num_eqs, 0, 0);
        if (err)
                goto err_cq;
 
@@ -810,12 +808,12 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
                if (dev->flags & MLX4_FLAG_MSI_X) {
                        mlx4_warn(dev, "NOP command failed to generate MSI-X "
                                  "interrupt IRQ %d).\n",
-                                 priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
+                                 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
                        mlx4_warn(dev, "Trying again without MSI-X.\n");
                } else {
                        mlx4_err(dev, "NOP command failed to generate interrupt "
                                 "(IRQ %d), aborting.\n",
-                                priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
+                                priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
                        mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
                }
 
@@ -908,31 +906,50 @@ err_uar_table_free:
 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
-       struct msix_entry entries[MLX4_NUM_EQ];
+       struct msix_entry *entries;
+       int nreq;
        int err;
        int i;
 
        if (msi_x) {
-               for (i = 0; i < MLX4_NUM_EQ; ++i)
+               nreq = min(dev->caps.num_eqs - dev->caps.reserved_eqs,
+                          num_possible_cpus() + 1);
+               entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
+               if (!entries)
+                       goto no_msi;
+
+               for (i = 0; i < nreq; ++i)
                        entries[i].entry = i;
 
-               err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries));
+       retry:
+               err = pci_enable_msix(dev->pdev, entries, nreq);
                if (err) {
-                       if (err > 0)
-                               mlx4_info(dev, "Only %d MSI-X vectors available, "
-                                         "not using MSI-X\n", err);
+                       /* Try again if at least 2 vectors are available */
+                       if (err > 1) {
+                               mlx4_info(dev, "Requested %d vectors, "
+                                         "but only %d MSI-X vectors available, "
+                                         "trying again\n", nreq, err);
+                               nreq = err;
+                               goto retry;
+                       }
+
                        goto no_msi;
                }
 
-               for (i = 0; i < MLX4_NUM_EQ; ++i)
+               dev->caps.num_comp_vectors = nreq - 1;
+               for (i = 0; i < nreq; ++i)
                        priv->eq_table.eq[i].irq = entries[i].vector;
 
                dev->flags |= MLX4_FLAG_MSI_X;
+
+               kfree(entries);
                return;
        }
 
 no_msi:
-       for (i = 0; i < MLX4_NUM_EQ; ++i)
+       dev->caps.num_comp_vectors = 1;
+
+       for (i = 0; i < 2; ++i)
                priv->eq_table.eq[i].irq = dev->pdev->irq;
 }
 
@@ -1074,6 +1091,10 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        if (err)
                goto err_cmd;
 
+       err = mlx4_alloc_eq_table(dev);
+       if (err)
+               goto err_close;
+
        mlx4_enable_msi_x(dev);
 
        err = mlx4_setup_hca(dev);
@@ -1084,7 +1105,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        }
 
        if (err)
-               goto err_close;
+               goto err_free_eq;
 
        for (port = 1; port <= dev->caps.num_ports; port++) {
                err = mlx4_init_port_info(dev, port);
@@ -1114,6 +1135,9 @@ err_port:
        mlx4_cleanup_pd_table(dev);
        mlx4_cleanup_uar_table(dev);
 
+err_free_eq:
+       mlx4_free_eq_table(dev);
+
 err_close:
        if (dev->flags & MLX4_FLAG_MSI_X)
                pci_disable_msix(pdev);
@@ -1177,6 +1201,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
                iounmap(priv->kar);
                mlx4_uar_free(dev, &priv->driver_uar);
                mlx4_cleanup_uar_table(dev);
+               mlx4_free_eq_table(dev);
                mlx4_close_hca(dev);
                mlx4_cmd_cleanup(dev);
 
index 34c909deaff325e9e90573065fa4d4efb45044b8..e0213bad61c7c6a7be36bcf8d93cda3d7878be37 100644 (file)
@@ -62,12 +62,6 @@ enum {
        MLX4_MTT_ENTRY_PER_SEG  = 8
 };
 
-enum {
-       MLX4_EQ_ASYNC,
-       MLX4_EQ_COMP,
-       MLX4_NUM_EQ
-};
-
 enum {
        MLX4_NUM_PDS            = 1 << 15
 };
@@ -205,10 +199,11 @@ struct mlx4_cq_table {
 
 struct mlx4_eq_table {
        struct mlx4_bitmap      bitmap;
+       char                   *irq_names;
        void __iomem           *clr_int;
-       void __iomem           *uar_map[(MLX4_NUM_EQ + 6) / 4];
+       void __iomem          **uar_map;
        u32                     clr_mask;
-       struct mlx4_eq          eq[MLX4_NUM_EQ];
+       struct mlx4_eq         *eq;
        u64                     icm_virt;
        struct page            *icm_page;
        dma_addr_t              icm_dma;
@@ -328,6 +323,9 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap);
 
 int mlx4_reset(struct mlx4_dev *dev);
 
+int mlx4_alloc_eq_table(struct mlx4_dev *dev);
+void mlx4_free_eq_table(struct mlx4_dev *dev);
+
 int mlx4_init_pd_table(struct mlx4_dev *dev);
 int mlx4_init_uar_table(struct mlx4_dev *dev);
 int mlx4_init_mr_table(struct mlx4_dev *dev);
index 9ca42b213d54edf7adae086d578abd3b7ea913b8..919fb9eb1b624af90e696b8a0c67418eec3c43f0 100644 (file)
@@ -107,7 +107,9 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
        profile[MLX4_RES_AUXC].num    = request->num_qp;
        profile[MLX4_RES_SRQ].num     = request->num_srq;
        profile[MLX4_RES_CQ].num      = request->num_cq;
-       profile[MLX4_RES_EQ].num      = MLX4_NUM_EQ + dev_cap->reserved_eqs;
+       profile[MLX4_RES_EQ].num      = min(dev_cap->max_eqs,
+                                           dev_cap->reserved_eqs +
+                                           num_possible_cpus() + 1);
        profile[MLX4_RES_DMPT].num    = request->num_mpt;
        profile[MLX4_RES_CMPT].num    = MLX4_NUM_CMPTS;
        profile[MLX4_RES_MTT].num     = request->num_mtt;
index 371086fd946f3f13a7cbeecab10354caae5cb15f..8f659cc2996026eb26c9c1649330be152388f2c7 100644 (file)
@@ -206,6 +206,7 @@ struct mlx4_caps {
        int                     reserved_cqs;
        int                     num_eqs;
        int                     reserved_eqs;
+       int                     num_comp_vectors;
        int                     num_mpts;
        int                     num_mtt_segs;
        int                     fmr_reserved_mtts;
@@ -328,6 +329,7 @@ struct mlx4_cq {
        int                     arm_sn;
 
        int                     cqn;
+       unsigned                vector;
 
        atomic_t                refcount;
        struct completion       free;
@@ -437,7 +439,7 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
 
 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
                  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
-                 int collapsed);
+                 unsigned vector, int collapsed);
 void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
 
 int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);