Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / drivers / infiniband / hw / hfi1 / driver.c
1 /*
2  * Copyright(c) 2015, 2016 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47
48 #include <linux/spinlock.h>
49 #include <linux/pci.h>
50 #include <linux/io.h>
51 #include <linux/delay.h>
52 #include <linux/netdevice.h>
53 #include <linux/vmalloc.h>
54 #include <linux/module.h>
55 #include <linux/prefetch.h>
56 #include <rdma/ib_verbs.h>
57
58 #include "hfi.h"
59 #include "trace.h"
60 #include "qp.h"
61 #include "sdma.h"
62
63 #undef pr_fmt
64 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
65
66 /*
67  * The size has to be longer than this string, so we can append
68  * board/chip information to it in the initialization code.
69  */
70 const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n";
71
72 DEFINE_SPINLOCK(hfi1_devs_lock);
73 LIST_HEAD(hfi1_dev_list);
74 DEFINE_MUTEX(hfi1_mutex);       /* general driver use */
75
76 unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;
77 module_param_named(max_mtu, hfi1_max_mtu, uint, S_IRUGO);
78 MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is " __stringify(
79                  HFI1_DEFAULT_MAX_MTU));
80
81 unsigned int hfi1_cu = 1;
82 module_param_named(cu, hfi1_cu, uint, S_IRUGO);
83 MODULE_PARM_DESC(cu, "Credit return units");
84
85 unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT;
86 static int hfi1_caps_set(const char *, const struct kernel_param *);
87 static int hfi1_caps_get(char *, const struct kernel_param *);
88 static const struct kernel_param_ops cap_ops = {
89         .set = hfi1_caps_set,
90         .get = hfi1_caps_get
91 };
92 module_param_cb(cap_mask, &cap_ops, &hfi1_cap_mask, S_IWUSR | S_IRUGO);
93 MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features");
94
95 MODULE_LICENSE("Dual BSD/GPL");
96 MODULE_DESCRIPTION("Intel Omni-Path Architecture driver");
97 MODULE_VERSION(HFI1_DRIVER_VERSION);
98
99 /*
100  * MAX_PKT_RCV is the max # if packets processed per receive interrupt.
101  */
102 #define MAX_PKT_RECV 64
103 /*
104  * MAX_PKT_THREAD_RCV is the max # of packets processed before
105  * the qp_wait_list queue is flushed.
106  */
107 #define MAX_PKT_RECV_THREAD (MAX_PKT_RECV * 4)
108 #define EGR_HEAD_UPDATE_THRESHOLD 16
109
110 struct hfi1_ib_stats hfi1_stats;
111
112 static int hfi1_caps_set(const char *val, const struct kernel_param *kp)
113 {
114         int ret = 0;
115         unsigned long *cap_mask_ptr = (unsigned long *)kp->arg,
116                 cap_mask = *cap_mask_ptr, value, diff,
117                 write_mask = ((HFI1_CAP_WRITABLE_MASK << HFI1_CAP_USER_SHIFT) |
118                               HFI1_CAP_WRITABLE_MASK);
119
120         ret = kstrtoul(val, 0, &value);
121         if (ret) {
122                 pr_warn("Invalid module parameter value for 'cap_mask'\n");
123                 goto done;
124         }
125         /* Get the changed bits (except the locked bit) */
126         diff = value ^ (cap_mask & ~HFI1_CAP_LOCKED_SMASK);
127
128         /* Remove any bits that are not allowed to change after driver load */
129         if (HFI1_CAP_LOCKED() && (diff & ~write_mask)) {
130                 pr_warn("Ignoring non-writable capability bits %#lx\n",
131                         diff & ~write_mask);
132                 diff &= write_mask;
133         }
134
135         /* Mask off any reserved bits */
136         diff &= ~HFI1_CAP_RESERVED_MASK;
137         /* Clear any previously set and changing bits */
138         cap_mask &= ~diff;
139         /* Update the bits with the new capability */
140         cap_mask |= (value & diff);
141         /* Check for any kernel/user restrictions */
142         diff = (cap_mask & (HFI1_CAP_MUST_HAVE_KERN << HFI1_CAP_USER_SHIFT)) ^
143                 ((cap_mask & HFI1_CAP_MUST_HAVE_KERN) << HFI1_CAP_USER_SHIFT);
144         cap_mask &= ~diff;
145         /* Set the bitmask to the final set */
146         *cap_mask_ptr = cap_mask;
147 done:
148         return ret;
149 }
150
151 static int hfi1_caps_get(char *buffer, const struct kernel_param *kp)
152 {
153         unsigned long cap_mask = *(unsigned long *)kp->arg;
154
155         cap_mask &= ~HFI1_CAP_LOCKED_SMASK;
156         cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT);
157
158         return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask);
159 }
160
161 const char *get_unit_name(int unit)
162 {
163         static char iname[16];
164
165         snprintf(iname, sizeof(iname), DRIVER_NAME "_%u", unit);
166         return iname;
167 }
168
169 const char *get_card_name(struct rvt_dev_info *rdi)
170 {
171         struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
172         struct hfi1_devdata *dd = container_of(ibdev,
173                                                struct hfi1_devdata, verbs_dev);
174         return get_unit_name(dd->unit);
175 }
176
177 struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
178 {
179         struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
180         struct hfi1_devdata *dd = container_of(ibdev,
181                                                struct hfi1_devdata, verbs_dev);
182         return dd->pcidev;
183 }
184
185 /*
186  * Return count of units with at least one port ACTIVE.
187  */
188 int hfi1_count_active_units(void)
189 {
190         struct hfi1_devdata *dd;
191         struct hfi1_pportdata *ppd;
192         unsigned long flags;
193         int pidx, nunits_active = 0;
194
195         spin_lock_irqsave(&hfi1_devs_lock, flags);
196         list_for_each_entry(dd, &hfi1_dev_list, list) {
197                 if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase)
198                         continue;
199                 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
200                         ppd = dd->pport + pidx;
201                         if (ppd->lid && ppd->linkup) {
202                                 nunits_active++;
203                                 break;
204                         }
205                 }
206         }
207         spin_unlock_irqrestore(&hfi1_devs_lock, flags);
208         return nunits_active;
209 }
210
211 /*
212  * Return count of all units, optionally return in arguments
213  * the number of usable (present) units, and the number of
214  * ports that are up.
215  */
216 int hfi1_count_units(int *npresentp, int *nupp)
217 {
218         int nunits = 0, npresent = 0, nup = 0;
219         struct hfi1_devdata *dd;
220         unsigned long flags;
221         int pidx;
222         struct hfi1_pportdata *ppd;
223
224         spin_lock_irqsave(&hfi1_devs_lock, flags);
225
226         list_for_each_entry(dd, &hfi1_dev_list, list) {
227                 nunits++;
228                 if ((dd->flags & HFI1_PRESENT) && dd->kregbase)
229                         npresent++;
230                 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
231                         ppd = dd->pport + pidx;
232                         if (ppd->lid && ppd->linkup)
233                                 nup++;
234                 }
235         }
236
237         spin_unlock_irqrestore(&hfi1_devs_lock, flags);
238
239         if (npresentp)
240                 *npresentp = npresent;
241         if (nupp)
242                 *nupp = nup;
243
244         return nunits;
245 }
246
247 /*
248  * Get address of eager buffer from it's index (allocated in chunks, not
249  * contiguous).
250  */
251 static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
252                                u8 *update)
253 {
254         u32 idx = rhf_egr_index(rhf), offset = rhf_egr_buf_offset(rhf);
255
256         *update |= !(idx & (rcd->egrbufs.threshold - 1)) && !offset;
257         return (void *)(((u64)(rcd->egrbufs.rcvtids[idx].addr)) +
258                         (offset * RCV_BUF_BLOCK_SIZE));
259 }
260
261 /*
262  * Validate and encode the a given RcvArray Buffer size.
263  * The function will check whether the given size falls within
264  * allowed size ranges for the respective type and, optionally,
265  * return the proper encoding.
266  */
267 int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
268 {
269         if (unlikely(!PAGE_ALIGNED(size)))
270                 return 0;
271         if (unlikely(size < MIN_EAGER_BUFFER))
272                 return 0;
273         if (size >
274             (type == PT_EAGER ? MAX_EAGER_BUFFER : MAX_EXPECTED_BUFFER))
275                 return 0;
276         if (encoded)
277                 *encoded = ilog2(size / PAGE_SIZE) + 1;
278         return 1;
279 }
280
281 static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
282                        struct hfi1_packet *packet)
283 {
284         struct ib_header *rhdr = packet->hdr;
285         u32 rte = rhf_rcv_type_err(packet->rhf);
286         int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
287         struct hfi1_ibport *ibp = rcd_to_iport(rcd);
288         struct hfi1_devdata *dd = ppd->dd;
289         struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
290
291         if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
292                 return;
293
294         if (packet->rhf & RHF_TID_ERR) {
295                 /* For TIDERR and RC QPs preemptively schedule a NAK */
296                 struct ib_other_headers *ohdr = NULL;
297                 u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
298                 u16 lid  = be16_to_cpu(rhdr->lrh[1]);
299                 u32 qp_num;
300                 u32 rcv_flags = 0;
301
302                 /* Sanity check packet */
303                 if (tlen < 24)
304                         goto drop;
305
306                 /* Check for GRH */
307                 if (lnh == HFI1_LRH_BTH) {
308                         ohdr = &rhdr->u.oth;
309                 } else if (lnh == HFI1_LRH_GRH) {
310                         u32 vtf;
311
312                         ohdr = &rhdr->u.l.oth;
313                         if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
314                                 goto drop;
315                         vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow);
316                         if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
317                                 goto drop;
318                         rcv_flags |= HFI1_HAS_GRH;
319                 } else {
320                         goto drop;
321                 }
322                 /* Get the destination QP number. */
323                 qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
324                 if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
325                         struct rvt_qp *qp;
326                         unsigned long flags;
327
328                         rcu_read_lock();
329                         qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
330                         if (!qp) {
331                                 rcu_read_unlock();
332                                 goto drop;
333                         }
334
335                         /*
336                          * Handle only RC QPs - for other QP types drop error
337                          * packet.
338                          */
339                         spin_lock_irqsave(&qp->r_lock, flags);
340
341                         /* Check for valid receive state. */
342                         if (!(ib_rvt_state_ops[qp->state] &
343                               RVT_PROCESS_RECV_OK)) {
344                                 ibp->rvp.n_pkt_drops++;
345                         }
346
347                         switch (qp->ibqp.qp_type) {
348                         case IB_QPT_RC:
349                                 hfi1_rc_hdrerr(
350                                         rcd,
351                                         rhdr,
352                                         rcv_flags,
353                                         qp);
354                                 break;
355                         default:
356                                 /* For now don't handle any other QP types */
357                                 break;
358                         }
359
360                         spin_unlock_irqrestore(&qp->r_lock, flags);
361                         rcu_read_unlock();
362                 } /* Unicast QP */
363         } /* Valid packet with TIDErr */
364
365         /* handle "RcvTypeErr" flags */
366         switch (rte) {
367         case RHF_RTE_ERROR_OP_CODE_ERR:
368         {
369                 u32 opcode;
370                 void *ebuf = NULL;
371                 __be32 *bth = NULL;
372
373                 if (rhf_use_egr_bfr(packet->rhf))
374                         ebuf = packet->ebuf;
375
376                 if (!ebuf)
377                         goto drop; /* this should never happen */
378
379                 if (lnh == HFI1_LRH_BTH)
380                         bth = (__be32 *)ebuf;
381                 else if (lnh == HFI1_LRH_GRH)
382                         bth = (__be32 *)((char *)ebuf + sizeof(struct ib_grh));
383                 else
384                         goto drop;
385
386                 opcode = be32_to_cpu(bth[0]) >> 24;
387                 opcode &= 0xff;
388
389                 if (opcode == IB_OPCODE_CNP) {
390                         /*
391                          * Only in pre-B0 h/w is the CNP_OPCODE handled
392                          * via this code path.
393                          */
394                         struct rvt_qp *qp = NULL;
395                         u32 lqpn, rqpn;
396                         u16 rlid;
397                         u8 svc_type, sl, sc5;
398
399                         sc5 = hdr2sc(rhdr, packet->rhf);
400                         sl = ibp->sc_to_sl[sc5];
401
402                         lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
403                         rcu_read_lock();
404                         qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
405                         if (!qp) {
406                                 rcu_read_unlock();
407                                 goto drop;
408                         }
409
410                         switch (qp->ibqp.qp_type) {
411                         case IB_QPT_UD:
412                                 rlid = 0;
413                                 rqpn = 0;
414                                 svc_type = IB_CC_SVCTYPE_UD;
415                                 break;
416                         case IB_QPT_UC:
417                                 rlid = be16_to_cpu(rhdr->lrh[3]);
418                                 rqpn = qp->remote_qpn;
419                                 svc_type = IB_CC_SVCTYPE_UC;
420                                 break;
421                         default:
422                                 goto drop;
423                         }
424
425                         process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
426                         rcu_read_unlock();
427                 }
428
429                 packet->rhf &= ~RHF_RCV_TYPE_ERR_SMASK;
430                 break;
431         }
432         default:
433                 break;
434         }
435
436 drop:
437         return;
438 }
439
440 static inline void init_packet(struct hfi1_ctxtdata *rcd,
441                                struct hfi1_packet *packet)
442 {
443         packet->rsize = rcd->rcvhdrqentsize; /* words */
444         packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
445         packet->rcd = rcd;
446         packet->updegr = 0;
447         packet->etail = -1;
448         packet->rhf_addr = get_rhf_addr(rcd);
449         packet->rhf = rhf_to_cpu(packet->rhf_addr);
450         packet->rhqoff = rcd->head;
451         packet->numpkt = 0;
452         packet->rcv_flags = 0;
453 }
454
455 void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
456                                bool do_cnp)
457 {
458         struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
459         struct ib_header *hdr = pkt->hdr;
460         struct ib_other_headers *ohdr = pkt->ohdr;
461         struct ib_grh *grh = NULL;
462         u32 rqpn = 0, bth1;
463         u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
464         u8 sc, svc_type;
465         bool is_mcast = false;
466
467         if (pkt->rcv_flags & HFI1_HAS_GRH)
468                 grh = &hdr->u.l.grh;
469
470         switch (qp->ibqp.qp_type) {
471         case IB_QPT_SMI:
472         case IB_QPT_GSI:
473         case IB_QPT_UD:
474                 rlid = be16_to_cpu(hdr->lrh[3]);
475                 rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
476                 svc_type = IB_CC_SVCTYPE_UD;
477                 is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
478                         (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
479                 break;
480         case IB_QPT_UC:
481                 rlid = qp->remote_ah_attr.dlid;
482                 rqpn = qp->remote_qpn;
483                 svc_type = IB_CC_SVCTYPE_UC;
484                 break;
485         case IB_QPT_RC:
486                 rlid = qp->remote_ah_attr.dlid;
487                 rqpn = qp->remote_qpn;
488                 svc_type = IB_CC_SVCTYPE_RC;
489                 break;
490         default:
491                 return;
492         }
493
494         sc = hdr2sc(hdr, pkt->rhf);
495
496         bth1 = be32_to_cpu(ohdr->bth[1]);
497         if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
498                 u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
499
500                 return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
501         }
502
503         if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) {
504                 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
505                 u32 lqpn = bth1 & RVT_QPN_MASK;
506                 u8 sl = ibp->sc_to_sl[sc];
507
508                 process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
509         }
510
511 }
512
513 struct ps_mdata {
514         struct hfi1_ctxtdata *rcd;
515         u32 rsize;
516         u32 maxcnt;
517         u32 ps_head;
518         u32 ps_tail;
519         u32 ps_seq;
520 };
521
522 static inline void init_ps_mdata(struct ps_mdata *mdata,
523                                  struct hfi1_packet *packet)
524 {
525         struct hfi1_ctxtdata *rcd = packet->rcd;
526
527         mdata->rcd = rcd;
528         mdata->rsize = packet->rsize;
529         mdata->maxcnt = packet->maxcnt;
530         mdata->ps_head = packet->rhqoff;
531
532         if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
533                 mdata->ps_tail = get_rcvhdrtail(rcd);
534                 if (rcd->ctxt == HFI1_CTRL_CTXT)
535                         mdata->ps_seq = rcd->seq_cnt;
536                 else
537                         mdata->ps_seq = 0; /* not used with DMA_RTAIL */
538         } else {
539                 mdata->ps_tail = 0; /* used only with DMA_RTAIL*/
540                 mdata->ps_seq = rcd->seq_cnt;
541         }
542 }
543
544 static inline int ps_done(struct ps_mdata *mdata, u64 rhf,
545                           struct hfi1_ctxtdata *rcd)
546 {
547         if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
548                 return mdata->ps_head == mdata->ps_tail;
549         return mdata->ps_seq != rhf_rcv_seq(rhf);
550 }
551
552 static inline int ps_skip(struct ps_mdata *mdata, u64 rhf,
553                           struct hfi1_ctxtdata *rcd)
554 {
555         /*
556          * Control context can potentially receive an invalid rhf.
557          * Drop such packets.
558          */
559         if ((rcd->ctxt == HFI1_CTRL_CTXT) && (mdata->ps_head != mdata->ps_tail))
560                 return mdata->ps_seq != rhf_rcv_seq(rhf);
561
562         return 0;
563 }
564
565 static inline void update_ps_mdata(struct ps_mdata *mdata,
566                                    struct hfi1_ctxtdata *rcd)
567 {
568         mdata->ps_head += mdata->rsize;
569         if (mdata->ps_head >= mdata->maxcnt)
570                 mdata->ps_head = 0;
571
572         /* Control context must do seq counting */
573         if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
574             (rcd->ctxt == HFI1_CTRL_CTXT)) {
575                 if (++mdata->ps_seq > 13)
576                         mdata->ps_seq = 1;
577         }
578 }
579
580 /*
581  * prescan_rxq - search through the receive queue looking for packets
582  * containing Excplicit Congestion Notifications (FECNs, or BECNs).
583  * When an ECN is found, process the Congestion Notification, and toggle
584  * it off.
585  * This is declared as a macro to allow quick checking of the port to avoid
586  * the overhead of a function call if not enabled.
587  */
588 #define prescan_rxq(rcd, packet) \
589         do { \
590                 if (rcd->ppd->cc_prescan) \
591                         __prescan_rxq(packet); \
592         } while (0)
593 static void __prescan_rxq(struct hfi1_packet *packet)
594 {
595         struct hfi1_ctxtdata *rcd = packet->rcd;
596         struct ps_mdata mdata;
597
598         init_ps_mdata(&mdata, packet);
599
600         while (1) {
601                 struct hfi1_devdata *dd = rcd->dd;
602                 struct hfi1_ibport *ibp = rcd_to_iport(rcd);
603                 __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
604                                          dd->rhf_offset;
605                 struct rvt_qp *qp;
606                 struct ib_header *hdr;
607                 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
608                 u64 rhf = rhf_to_cpu(rhf_addr);
609                 u32 etype = rhf_rcv_type(rhf), qpn, bth1;
610                 int is_ecn = 0;
611                 u8 lnh;
612
613                 if (ps_done(&mdata, rhf, rcd))
614                         break;
615
616                 if (ps_skip(&mdata, rhf, rcd))
617                         goto next;
618
619                 if (etype != RHF_RCV_TYPE_IB)
620                         goto next;
621
622                 packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
623                 hdr = packet->hdr;
624
625                 lnh = be16_to_cpu(hdr->lrh[0]) & 3;
626
627                 if (lnh == HFI1_LRH_BTH) {
628                         packet->ohdr = &hdr->u.oth;
629                 } else if (lnh == HFI1_LRH_GRH) {
630                         packet->ohdr = &hdr->u.l.oth;
631                         packet->rcv_flags |= HFI1_HAS_GRH;
632                 } else {
633                         goto next; /* just in case */
634                 }
635
636                 bth1 = be32_to_cpu(packet->ohdr->bth[1]);
637                 is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
638
639                 if (!is_ecn)
640                         goto next;
641
642                 qpn = bth1 & RVT_QPN_MASK;
643                 rcu_read_lock();
644                 qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
645
646                 if (!qp) {
647                         rcu_read_unlock();
648                         goto next;
649                 }
650
651                 process_ecn(qp, packet, true);
652                 rcu_read_unlock();
653
654                 /* turn off BECN, FECN */
655                 bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
656                 packet->ohdr->bth[1] = cpu_to_be32(bth1);
657 next:
658                 update_ps_mdata(&mdata, rcd);
659         }
660 }
661
662 static void process_rcv_qp_work(struct hfi1_ctxtdata *rcd)
663 {
664         struct rvt_qp *qp, *nqp;
665
666         /*
667          * Iterate over all QPs waiting to respond.
668          * The list won't change since the IRQ is only run on one CPU.
669          */
670         list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
671                 list_del_init(&qp->rspwait);
672                 if (qp->r_flags & RVT_R_RSP_NAK) {
673                         qp->r_flags &= ~RVT_R_RSP_NAK;
674                         hfi1_send_rc_ack(rcd, qp, 0);
675                 }
676                 if (qp->r_flags & RVT_R_RSP_SEND) {
677                         unsigned long flags;
678
679                         qp->r_flags &= ~RVT_R_RSP_SEND;
680                         spin_lock_irqsave(&qp->s_lock, flags);
681                         if (ib_rvt_state_ops[qp->state] &
682                                         RVT_PROCESS_OR_FLUSH_SEND)
683                                 hfi1_schedule_send(qp);
684                         spin_unlock_irqrestore(&qp->s_lock, flags);
685                 }
686                 rvt_put_qp(qp);
687         }
688 }
689
690 static noinline int max_packet_exceeded(struct hfi1_packet *packet, int thread)
691 {
692         if (thread) {
693                 if ((packet->numpkt & (MAX_PKT_RECV_THREAD - 1)) == 0)
694                         /* allow defered processing */
695                         process_rcv_qp_work(packet->rcd);
696                 cond_resched();
697                 return RCV_PKT_OK;
698         } else {
699                 this_cpu_inc(*packet->rcd->dd->rcv_limit);
700                 return RCV_PKT_LIMIT;
701         }
702 }
703
704 static inline int check_max_packet(struct hfi1_packet *packet, int thread)
705 {
706         int ret = RCV_PKT_OK;
707
708         if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0))
709                 ret = max_packet_exceeded(packet, thread);
710         return ret;
711 }
712
713 static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
714 {
715         int ret;
716
717         /* Set up for the next packet */
718         packet->rhqoff += packet->rsize;
719         if (packet->rhqoff >= packet->maxcnt)
720                 packet->rhqoff = 0;
721
722         packet->numpkt++;
723         ret = check_max_packet(packet, thread);
724
725         packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
726                                      packet->rcd->dd->rhf_offset;
727         packet->rhf = rhf_to_cpu(packet->rhf_addr);
728
729         return ret;
730 }
731
732 static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
733 {
734         int ret;
735
736         packet->hdr = hfi1_get_msgheader(packet->rcd->dd,
737                                          packet->rhf_addr);
738         packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
739         packet->etype = rhf_rcv_type(packet->rhf);
740         /* total length */
741         packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
742         /* retrieve eager buffer details */
743         packet->ebuf = NULL;
744         if (rhf_use_egr_bfr(packet->rhf)) {
745                 packet->etail = rhf_egr_index(packet->rhf);
746                 packet->ebuf = get_egrbuf(packet->rcd, packet->rhf,
747                                  &packet->updegr);
748                 /*
749                  * Prefetch the contents of the eager buffer.  It is
750                  * OK to send a negative length to prefetch_range().
751                  * The +2 is the size of the RHF.
752                  */
753                 prefetch_range(packet->ebuf,
754                                packet->tlen - ((packet->rcd->rcvhdrqentsize -
755                                                (rhf_hdrq_offset(packet->rhf)
756                                                 + 2)) * 4));
757         }
758
759         /*
760          * Call a type specific handler for the packet. We
761          * should be able to trust that etype won't be beyond
762          * the range of valid indexes. If so something is really
763          * wrong and we can probably just let things come
764          * crashing down. There is no need to eat another
765          * comparison in this performance critical code.
766          */
767         packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet);
768         packet->numpkt++;
769
770         /* Set up for the next packet */
771         packet->rhqoff += packet->rsize;
772         if (packet->rhqoff >= packet->maxcnt)
773                 packet->rhqoff = 0;
774
775         ret = check_max_packet(packet, thread);
776
777         packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
778                                       packet->rcd->dd->rhf_offset;
779         packet->rhf = rhf_to_cpu(packet->rhf_addr);
780
781         return ret;
782 }
783
784 static inline void process_rcv_update(int last, struct hfi1_packet *packet)
785 {
786         /*
787          * Update head regs etc., every 16 packets, if not last pkt,
788          * to help prevent rcvhdrq overflows, when many packets
789          * are processed and queue is nearly full.
790          * Don't request an interrupt for intermediate updates.
791          */
792         if (!last && !(packet->numpkt & 0xf)) {
793                 update_usrhead(packet->rcd, packet->rhqoff, packet->updegr,
794                                packet->etail, 0, 0);
795                 packet->updegr = 0;
796         }
797         packet->rcv_flags = 0;
798 }
799
800 static inline void finish_packet(struct hfi1_packet *packet)
801 {
802         /*
803          * Nothing we need to free for the packet.
804          *
805          * The only thing we need to do is a final update and call for an
806          * interrupt
807          */
808         update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
809                        packet->etail, rcv_intr_dynamic, packet->numpkt);
810 }
811
812 /*
813  * Handle receive interrupts when using the no dma rtail option.
814  */
815 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
816 {
817         u32 seq;
818         int last = RCV_PKT_OK;
819         struct hfi1_packet packet;
820
821         init_packet(rcd, &packet);
822         seq = rhf_rcv_seq(packet.rhf);
823         if (seq != rcd->seq_cnt) {
824                 last = RCV_PKT_DONE;
825                 goto bail;
826         }
827
828         prescan_rxq(rcd, &packet);
829
830         while (last == RCV_PKT_OK) {
831                 last = process_rcv_packet(&packet, thread);
832                 seq = rhf_rcv_seq(packet.rhf);
833                 if (++rcd->seq_cnt > 13)
834                         rcd->seq_cnt = 1;
835                 if (seq != rcd->seq_cnt)
836                         last = RCV_PKT_DONE;
837                 process_rcv_update(last, &packet);
838         }
839         process_rcv_qp_work(rcd);
840         rcd->head = packet.rhqoff;
841 bail:
842         finish_packet(&packet);
843         return last;
844 }
845
846 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
847 {
848         u32 hdrqtail;
849         int last = RCV_PKT_OK;
850         struct hfi1_packet packet;
851
852         init_packet(rcd, &packet);
853         hdrqtail = get_rcvhdrtail(rcd);
854         if (packet.rhqoff == hdrqtail) {
855                 last = RCV_PKT_DONE;
856                 goto bail;
857         }
858         smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
859
860         prescan_rxq(rcd, &packet);
861
862         while (last == RCV_PKT_OK) {
863                 last = process_rcv_packet(&packet, thread);
864                 if (packet.rhqoff == hdrqtail)
865                         last = RCV_PKT_DONE;
866                 process_rcv_update(last, &packet);
867         }
868         process_rcv_qp_work(rcd);
869         rcd->head = packet.rhqoff;
870 bail:
871         finish_packet(&packet);
872         return last;
873 }
874
875 static inline void set_all_nodma_rtail(struct hfi1_devdata *dd)
876 {
877         int i;
878
879         for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
880                 dd->rcd[i]->do_interrupt =
881                         &handle_receive_interrupt_nodma_rtail;
882 }
883
884 static inline void set_all_dma_rtail(struct hfi1_devdata *dd)
885 {
886         int i;
887
888         for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
889                 dd->rcd[i]->do_interrupt =
890                         &handle_receive_interrupt_dma_rtail;
891 }
892
893 void set_all_slowpath(struct hfi1_devdata *dd)
894 {
895         int i;
896
897         /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
898         for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
899                 dd->rcd[i]->do_interrupt = &handle_receive_interrupt;
900 }
901
902 static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
903                                       struct hfi1_packet *packet,
904                                       struct hfi1_devdata *dd)
905 {
906         struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
907         struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
908                                                    packet->rhf_addr);
909         u8 etype = rhf_rcv_type(packet->rhf);
910
911         if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
912                 int hwstate = read_logical_state(dd);
913
914                 if (hwstate != LSTATE_ACTIVE) {
915                         dd_dev_info(dd, "Unexpected link state %d\n", hwstate);
916                         return 0;
917                 }
918
919                 queue_work(rcd->ppd->hfi1_wq, lsaw);
920                 return 1;
921         }
922         return 0;
923 }
924
925 /*
926  * handle_receive_interrupt - receive a packet
927  * @rcd: the context
928  *
929  * Called from interrupt handler for errors or receive interrupt.
930  * This is the slow path interrupt handler.
931  */
932 int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
933 {
934         struct hfi1_devdata *dd = rcd->dd;
935         u32 hdrqtail;
936         int needset, last = RCV_PKT_OK;
937         struct hfi1_packet packet;
938         int skip_pkt = 0;
939
940         /* Control context will always use the slow path interrupt handler */
941         needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1;
942
943         init_packet(rcd, &packet);
944
945         if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
946                 u32 seq = rhf_rcv_seq(packet.rhf);
947
948                 if (seq != rcd->seq_cnt) {
949                         last = RCV_PKT_DONE;
950                         goto bail;
951                 }
952                 hdrqtail = 0;
953         } else {
954                 hdrqtail = get_rcvhdrtail(rcd);
955                 if (packet.rhqoff == hdrqtail) {
956                         last = RCV_PKT_DONE;
957                         goto bail;
958                 }
959                 smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
960
961                 /*
962                  * Control context can potentially receive an invalid
963                  * rhf. Drop such packets.
964                  */
965                 if (rcd->ctxt == HFI1_CTRL_CTXT) {
966                         u32 seq = rhf_rcv_seq(packet.rhf);
967
968                         if (seq != rcd->seq_cnt)
969                                 skip_pkt = 1;
970                 }
971         }
972
973         prescan_rxq(rcd, &packet);
974
975         while (last == RCV_PKT_OK) {
976                 if (unlikely(dd->do_drop &&
977                              atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
978                              DROP_PACKET_ON)) {
979                         dd->do_drop = 0;
980
981                         /* On to the next packet */
982                         packet.rhqoff += packet.rsize;
983                         packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
984                                           packet.rhqoff +
985                                           dd->rhf_offset;
986                         packet.rhf = rhf_to_cpu(packet.rhf_addr);
987
988                 } else if (skip_pkt) {
989                         last = skip_rcv_packet(&packet, thread);
990                         skip_pkt = 0;
991                 } else {
992                         /* Auto activate link on non-SC15 packet receive */
993                         if (unlikely(rcd->ppd->host_link_state ==
994                                      HLS_UP_ARMED) &&
995                             set_armed_to_active(rcd, &packet, dd))
996                                 goto bail;
997                         last = process_rcv_packet(&packet, thread);
998                 }
999
1000                 if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
1001                         u32 seq = rhf_rcv_seq(packet.rhf);
1002
1003                         if (++rcd->seq_cnt > 13)
1004                                 rcd->seq_cnt = 1;
1005                         if (seq != rcd->seq_cnt)
1006                                 last = RCV_PKT_DONE;
1007                         if (needset) {
1008                                 dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
1009                                 set_all_nodma_rtail(dd);
1010                                 needset = 0;
1011                         }
1012                 } else {
1013                         if (packet.rhqoff == hdrqtail)
1014                                 last = RCV_PKT_DONE;
1015                         /*
1016                          * Control context can potentially receive an invalid
1017                          * rhf. Drop such packets.
1018                          */
1019                         if (rcd->ctxt == HFI1_CTRL_CTXT) {
1020                                 u32 seq = rhf_rcv_seq(packet.rhf);
1021
1022                                 if (++rcd->seq_cnt > 13)
1023                                         rcd->seq_cnt = 1;
1024                                 if (!last && (seq != rcd->seq_cnt))
1025                                         skip_pkt = 1;
1026                         }
1027
1028                         if (needset) {
1029                                 dd_dev_info(dd,
1030                                             "Switching to DMA_RTAIL\n");
1031                                 set_all_dma_rtail(dd);
1032                                 needset = 0;
1033                         }
1034                 }
1035
1036                 process_rcv_update(last, &packet);
1037         }
1038
1039         process_rcv_qp_work(rcd);
1040         rcd->head = packet.rhqoff;
1041
1042 bail:
1043         /*
1044          * Always write head at end, and setup rcv interrupt, even
1045          * if no packets were processed.
1046          */
1047         finish_packet(&packet);
1048         return last;
1049 }
1050
1051 /*
1052  * We may discover in the interrupt that the hardware link state has
1053  * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
1054  * and we need to update the driver's notion of the link state.  We cannot
1055  * run set_link_state from interrupt context, so we queue this function on
1056  * a workqueue.
1057  *
1058  * We delay the regular interrupt processing until after the state changes
1059  * so that the link will be in the correct state by the time any application
1060  * we wake up attempts to send a reply to any message it received.
1061  * (Subsequent receive interrupts may possibly force the wakeup before we
1062  * update the link state.)
1063  *
1064  * The rcd is freed in hfi1_free_ctxtdata after hfi1_postinit_cleanup invokes
1065  * dd->f_cleanup(dd) to disable the interrupt handler and flush workqueues,
1066  * so we're safe from use-after-free of the rcd.
1067  */
1068 void receive_interrupt_work(struct work_struct *work)
1069 {
1070         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
1071                                                   linkstate_active_work);
1072         struct hfi1_devdata *dd = ppd->dd;
1073         int i;
1074
1075         /* Received non-SC15 packet implies neighbor_normal */
1076         ppd->neighbor_normal = 1;
1077         set_link_state(ppd, HLS_UP_ACTIVE);
1078
1079         /*
1080          * Interrupt all kernel contexts that could have had an
1081          * interrupt during auto activation.
1082          */
1083         for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++)
1084                 force_recv_intr(dd->rcd[i]);
1085 }
1086
1087 /*
1088  * Convert a given MTU size to the on-wire MAD packet enumeration.
1089  * Return -1 if the size is invalid.
1090  */
1091 int mtu_to_enum(u32 mtu, int default_if_bad)
1092 {
1093         switch (mtu) {
1094         case     0: return OPA_MTU_0;
1095         case   256: return OPA_MTU_256;
1096         case   512: return OPA_MTU_512;
1097         case  1024: return OPA_MTU_1024;
1098         case  2048: return OPA_MTU_2048;
1099         case  4096: return OPA_MTU_4096;
1100         case  8192: return OPA_MTU_8192;
1101         case 10240: return OPA_MTU_10240;
1102         }
1103         return default_if_bad;
1104 }
1105
1106 u16 enum_to_mtu(int mtu)
1107 {
1108         switch (mtu) {
1109         case OPA_MTU_0:     return 0;
1110         case OPA_MTU_256:   return 256;
1111         case OPA_MTU_512:   return 512;
1112         case OPA_MTU_1024:  return 1024;
1113         case OPA_MTU_2048:  return 2048;
1114         case OPA_MTU_4096:  return 4096;
1115         case OPA_MTU_8192:  return 8192;
1116         case OPA_MTU_10240: return 10240;
1117         default: return 0xffff;
1118         }
1119 }
1120
1121 /*
1122  * set_mtu - set the MTU
1123  * @ppd: the per port data
1124  *
1125  * We can handle "any" incoming size, the issue here is whether we
1126  * need to restrict our outgoing size.  We do not deal with what happens
1127  * to programs that are already running when the size changes.
1128  */
1129 int set_mtu(struct hfi1_pportdata *ppd)
1130 {
1131         struct hfi1_devdata *dd = ppd->dd;
1132         int i, drain, ret = 0, is_up = 0;
1133
1134         ppd->ibmtu = 0;
1135         for (i = 0; i < ppd->vls_supported; i++)
1136                 if (ppd->ibmtu < dd->vld[i].mtu)
1137                         ppd->ibmtu = dd->vld[i].mtu;
1138         ppd->ibmaxlen = ppd->ibmtu + lrh_max_header_bytes(ppd->dd);
1139
1140         mutex_lock(&ppd->hls_lock);
1141         if (ppd->host_link_state == HLS_UP_INIT ||
1142             ppd->host_link_state == HLS_UP_ARMED ||
1143             ppd->host_link_state == HLS_UP_ACTIVE)
1144                 is_up = 1;
1145
1146         drain = !is_ax(dd) && is_up;
1147
1148         if (drain)
1149                 /*
1150                  * MTU is specified per-VL. To ensure that no packet gets
1151                  * stuck (due, e.g., to the MTU for the packet's VL being
1152                  * reduced), empty the per-VL FIFOs before adjusting MTU.
1153                  */
1154                 ret = stop_drain_data_vls(dd);
1155
1156         if (ret) {
1157                 dd_dev_err(dd, "%s: cannot stop/drain VLs - refusing to change per-VL MTUs\n",
1158                            __func__);
1159                 goto err;
1160         }
1161
1162         hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_MTU, 0);
1163
1164         if (drain)
1165                 open_fill_data_vls(dd); /* reopen all VLs */
1166
1167 err:
1168         mutex_unlock(&ppd->hls_lock);
1169
1170         return ret;
1171 }
1172
1173 int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc)
1174 {
1175         struct hfi1_devdata *dd = ppd->dd;
1176
1177         ppd->lid = lid;
1178         ppd->lmc = lmc;
1179         hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0);
1180
1181         dd_dev_info(dd, "port %u: got a lid: 0x%x\n", ppd->port, lid);
1182
1183         return 0;
1184 }
1185
1186 void shutdown_led_override(struct hfi1_pportdata *ppd)
1187 {
1188         struct hfi1_devdata *dd = ppd->dd;
1189
1190         /*
1191          * This pairs with the memory barrier in hfi1_start_led_override to
1192          * ensure that we read the correct state of LED beaconing represented
1193          * by led_override_timer_active
1194          */
1195         smp_rmb();
1196         if (atomic_read(&ppd->led_override_timer_active)) {
1197                 del_timer_sync(&ppd->led_override_timer);
1198                 atomic_set(&ppd->led_override_timer_active, 0);
1199                 /* Ensure the atomic_set is visible to all CPUs */
1200                 smp_wmb();
1201         }
1202
1203         /* Hand control of the LED to the DC for normal operation */
1204         write_csr(dd, DCC_CFG_LED_CNTRL, 0);
1205 }
1206
1207 static void run_led_override(unsigned long opaque)
1208 {
1209         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque;
1210         struct hfi1_devdata *dd = ppd->dd;
1211         unsigned long timeout;
1212         int phase_idx;
1213
1214         if (!(dd->flags & HFI1_INITTED))
1215                 return;
1216
1217         phase_idx = ppd->led_override_phase & 1;
1218
1219         setextled(dd, phase_idx);
1220
1221         timeout = ppd->led_override_vals[phase_idx];
1222
1223         /* Set up for next phase */
1224         ppd->led_override_phase = !ppd->led_override_phase;
1225
1226         mod_timer(&ppd->led_override_timer, jiffies + timeout);
1227 }
1228
1229 /*
1230  * To have the LED blink in a particular pattern, provide timeon and timeoff
1231  * in milliseconds.
1232  * To turn off custom blinking and return to normal operation, use
1233  * shutdown_led_override()
1234  */
1235 void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
1236                              unsigned int timeoff)
1237 {
1238         if (!(ppd->dd->flags & HFI1_INITTED))
1239                 return;
1240
1241         /* Convert to jiffies for direct use in timer */
1242         ppd->led_override_vals[0] = msecs_to_jiffies(timeoff);
1243         ppd->led_override_vals[1] = msecs_to_jiffies(timeon);
1244
1245         /* Arbitrarily start from LED on phase */
1246         ppd->led_override_phase = 1;
1247
1248         /*
1249          * If the timer has not already been started, do so. Use a "quick"
1250          * timeout so the handler will be called soon to look at our request.
1251          */
1252         if (!timer_pending(&ppd->led_override_timer)) {
1253                 setup_timer(&ppd->led_override_timer, run_led_override,
1254                             (unsigned long)ppd);
1255                 ppd->led_override_timer.expires = jiffies + 1;
1256                 add_timer(&ppd->led_override_timer);
1257                 atomic_set(&ppd->led_override_timer_active, 1);
1258                 /* Ensure the atomic_set is visible to all CPUs */
1259                 smp_wmb();
1260         }
1261 }
1262
1263 /**
1264  * hfi1_reset_device - reset the chip if possible
1265  * @unit: the device to reset
1266  *
1267  * Whether or not reset is successful, we attempt to re-initialize the chip
1268  * (that is, much like a driver unload/reload).  We clear the INITTED flag
1269  * so that the various entry points will fail until we reinitialize.  For
1270  * now, we only allow this if no user contexts are open that use chip resources
1271  */
1272 int hfi1_reset_device(int unit)
1273 {
1274         int ret, i;
1275         struct hfi1_devdata *dd = hfi1_lookup(unit);
1276         struct hfi1_pportdata *ppd;
1277         unsigned long flags;
1278         int pidx;
1279
1280         if (!dd) {
1281                 ret = -ENODEV;
1282                 goto bail;
1283         }
1284
1285         dd_dev_info(dd, "Reset on unit %u requested\n", unit);
1286
1287         if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) {
1288                 dd_dev_info(dd,
1289                             "Invalid unit number %u or not initialized or not present\n",
1290                             unit);
1291                 ret = -ENXIO;
1292                 goto bail;
1293         }
1294
1295         spin_lock_irqsave(&dd->uctxt_lock, flags);
1296         if (dd->rcd)
1297                 for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
1298                         if (!dd->rcd[i] || !dd->rcd[i]->cnt)
1299                                 continue;
1300                         spin_unlock_irqrestore(&dd->uctxt_lock, flags);
1301                         ret = -EBUSY;
1302                         goto bail;
1303                 }
1304         spin_unlock_irqrestore(&dd->uctxt_lock, flags);
1305
1306         for (pidx = 0; pidx < dd->num_pports; ++pidx) {
1307                 ppd = dd->pport + pidx;
1308
1309                 shutdown_led_override(ppd);
1310         }
1311         if (dd->flags & HFI1_HAS_SEND_DMA)
1312                 sdma_exit(dd);
1313
1314         hfi1_reset_cpu_counters(dd);
1315
1316         ret = hfi1_init(dd, 1);
1317
1318         if (ret)
1319                 dd_dev_err(dd,
1320                            "Reinitialize unit %u after reset failed with %d\n",
1321                            unit, ret);
1322         else
1323                 dd_dev_info(dd, "Reinitialized unit %u after resetting\n",
1324                             unit);
1325
1326 bail:
1327         return ret;
1328 }
1329
1330 void handle_eflags(struct hfi1_packet *packet)
1331 {
1332         struct hfi1_ctxtdata *rcd = packet->rcd;
1333         u32 rte = rhf_rcv_type_err(packet->rhf);
1334
1335         rcv_hdrerr(rcd, rcd->ppd, packet);
1336         if (rhf_err_flags(packet->rhf))
1337                 dd_dev_err(rcd->dd,
1338                            "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
1339                            rcd->ctxt, packet->rhf,
1340                            packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
1341                            packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
1342                            packet->rhf & RHF_DC_ERR ? "dc " : "",
1343                            packet->rhf & RHF_TID_ERR ? "tid " : "",
1344                            packet->rhf & RHF_LEN_ERR ? "len " : "",
1345                            packet->rhf & RHF_ECC_ERR ? "ecc " : "",
1346                            packet->rhf & RHF_VCRC_ERR ? "vcrc " : "",
1347                            packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
1348                            rte);
1349 }
1350
1351 /*
1352  * The following functions are called by the interrupt handler. They are type
1353  * specific handlers for each packet type.
1354  */
1355 int process_receive_ib(struct hfi1_packet *packet)
1356 {
1357         trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
1358                           packet->rcd->ctxt,
1359                           rhf_err_flags(packet->rhf),
1360                           RHF_RCV_TYPE_IB,
1361                           packet->hlen,
1362                           packet->tlen,
1363                           packet->updegr,
1364                           rhf_egr_index(packet->rhf));
1365
1366         if (unlikely(rhf_err_flags(packet->rhf))) {
1367                 handle_eflags(packet);
1368                 return RHF_RCV_CONTINUE;
1369         }
1370
1371         hfi1_ib_rcv(packet);
1372         return RHF_RCV_CONTINUE;
1373 }
1374
1375 int process_receive_bypass(struct hfi1_packet *packet)
1376 {
1377         struct hfi1_devdata *dd = packet->rcd->dd;
1378
1379         if (unlikely(rhf_err_flags(packet->rhf)))
1380                 handle_eflags(packet);
1381
1382         dd_dev_err(dd,
1383                    "Bypass packets are not supported in normal operation. Dropping\n");
1384         incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
1385         if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
1386                 u64 *flits = packet->ebuf;
1387
1388                 if (flits && !(packet->rhf & RHF_LEN_ERR)) {
1389                         dd->err_info_rcvport.packet_flit1 = flits[0];
1390                         dd->err_info_rcvport.packet_flit2 =
1391                                 packet->tlen > sizeof(flits[0]) ? flits[1] : 0;
1392                 }
1393                 dd->err_info_rcvport.status_and_code |=
1394                         (OPA_EI_STATUS_SMASK | BAD_L2_ERR);
1395         }
1396         return RHF_RCV_CONTINUE;
1397 }
1398
1399 int process_receive_error(struct hfi1_packet *packet)
1400 {
1401         handle_eflags(packet);
1402
1403         if (unlikely(rhf_err_flags(packet->rhf)))
1404                 dd_dev_err(packet->rcd->dd,
1405                            "Unhandled error packet received. Dropping.\n");
1406
1407         return RHF_RCV_CONTINUE;
1408 }
1409
1410 int kdeth_process_expected(struct hfi1_packet *packet)
1411 {
1412         if (unlikely(rhf_err_flags(packet->rhf)))
1413                 handle_eflags(packet);
1414
1415         dd_dev_err(packet->rcd->dd,
1416                    "Unhandled expected packet received. Dropping.\n");
1417         return RHF_RCV_CONTINUE;
1418 }
1419
1420 int kdeth_process_eager(struct hfi1_packet *packet)
1421 {
1422         if (unlikely(rhf_err_flags(packet->rhf)))
1423                 handle_eflags(packet);
1424
1425         dd_dev_err(packet->rcd->dd,
1426                    "Unhandled eager packet received. Dropping.\n");
1427         return RHF_RCV_CONTINUE;
1428 }
1429
1430 int process_receive_invalid(struct hfi1_packet *packet)
1431 {
1432         dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
1433                    rhf_rcv_type(packet->rhf));
1434         return RHF_RCV_CONTINUE;
1435 }