IB/hfi1: Add counter to track unsupported packets drop
[linux-2.6-block.git] / drivers / infiniband / hw / hfi1 / mad.c
1 /*
2  * Copyright(c) 2015, 2016 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47
48 #include <linux/net.h>
49 #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
50                         / (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
51
52 #include "hfi.h"
53 #include "mad.h"
54 #include "trace.h"
55 #include "qp.h"
56
57 /* the reset value from the FM is supposed to be 0xffff, handle both */
58 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
59 #define OPA_LINK_WIDTH_RESET 0xffff
60
61 static int reply(struct ib_mad_hdr *smp)
62 {
63         /*
64          * The verbs framework will handle the directed/LID route
65          * packet changes.
66          */
67         smp->method = IB_MGMT_METHOD_GET_RESP;
68         if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
69                 smp->status |= IB_SMP_DIRECTION;
70         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
71 }
72
73 static inline void clear_opa_smp_data(struct opa_smp *smp)
74 {
75         void *data = opa_get_smp_data(smp);
76         size_t size = opa_get_smp_data_size(smp);
77
78         memset(data, 0, size);
79 }
80
81 void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
82 {
83         struct ib_event event;
84
85         event.event = IB_EVENT_PKEY_CHANGE;
86         event.device = &dd->verbs_dev.rdi.ibdev;
87         event.element.port_num = port;
88         ib_dispatch_event(&event);
89 }
90
91 static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
92 {
93         struct ib_mad_send_buf *send_buf;
94         struct ib_mad_agent *agent;
95         struct opa_smp *smp;
96         int ret;
97         unsigned long flags;
98         unsigned long timeout;
99         int pkey_idx;
100         u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
101
102         agent = ibp->rvp.send_agent;
103         if (!agent)
104                 return;
105
106         /* o14-3.2.1 */
107         if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE)
108                 return;
109
110         /* o14-2 */
111         if (ibp->rvp.trap_timeout && time_before(jiffies,
112                                                  ibp->rvp.trap_timeout))
113                 return;
114
115         pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
116         if (pkey_idx < 0) {
117                 pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
118                         __func__, hfi1_get_pkey(ibp, 1));
119                 pkey_idx = 1;
120         }
121
122         send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
123                                       IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
124                                       GFP_ATOMIC, IB_MGMT_BASE_VERSION);
125         if (IS_ERR(send_buf))
126                 return;
127
128         smp = send_buf->mad;
129         smp->base_version = OPA_MGMT_BASE_VERSION;
130         smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
131         smp->class_version = OPA_SMI_CLASS_VERSION;
132         smp->method = IB_MGMT_METHOD_TRAP;
133         ibp->rvp.tid++;
134         smp->tid = cpu_to_be64(ibp->rvp.tid);
135         smp->attr_id = IB_SMP_ATTR_NOTICE;
136         /* o14-1: smp->mkey = 0; */
137         memcpy(smp->route.lid.data, data, len);
138
139         spin_lock_irqsave(&ibp->rvp.lock, flags);
140         if (!ibp->rvp.sm_ah) {
141                 if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
142                         struct ib_ah *ah;
143
144                         ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
145                         if (IS_ERR(ah)) {
146                                 ret = PTR_ERR(ah);
147                         } else {
148                                 send_buf->ah = ah;
149                                 ibp->rvp.sm_ah = ibah_to_rvtah(ah);
150                                 ret = 0;
151                         }
152                 } else {
153                         ret = -EINVAL;
154                 }
155         } else {
156                 send_buf->ah = &ibp->rvp.sm_ah->ibah;
157                 ret = 0;
158         }
159         spin_unlock_irqrestore(&ibp->rvp.lock, flags);
160
161         if (!ret)
162                 ret = ib_post_send_mad(send_buf, NULL);
163         if (!ret) {
164                 /* 4.096 usec. */
165                 timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
166                 ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
167         } else {
168                 ib_free_send_mad(send_buf);
169                 ibp->rvp.trap_timeout = 0;
170         }
171 }
172
173 /*
174  * Send a bad [PQ]_Key trap (ch. 14.3.8).
175  */
176 void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
177                     u32 qp1, u32 qp2, u16 lid1, u16 lid2)
178 {
179         struct opa_mad_notice_attr data;
180         u32 lid = ppd_from_ibp(ibp)->lid;
181         u32 _lid1 = lid1;
182         u32 _lid2 = lid2;
183
184         memset(&data, 0, sizeof(data));
185
186         if (trap_num == OPA_TRAP_BAD_P_KEY)
187                 ibp->rvp.pkey_violations++;
188         else
189                 ibp->rvp.qkey_violations++;
190         ibp->rvp.n_pkt_drops++;
191
192         /* Send violation trap */
193         data.generic_type = IB_NOTICE_TYPE_SECURITY;
194         data.prod_type_lsb = IB_NOTICE_PROD_CA;
195         data.trap_num = trap_num;
196         data.issuer_lid = cpu_to_be32(lid);
197         data.ntc_257_258.lid1 = cpu_to_be32(_lid1);
198         data.ntc_257_258.lid2 = cpu_to_be32(_lid2);
199         data.ntc_257_258.key = cpu_to_be32(key);
200         data.ntc_257_258.sl = sl << 3;
201         data.ntc_257_258.qp1 = cpu_to_be32(qp1);
202         data.ntc_257_258.qp2 = cpu_to_be32(qp2);
203
204         send_trap(ibp, &data, sizeof(data));
205 }
206
207 /*
208  * Send a bad M_Key trap (ch. 14.3.9).
209  */
210 static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
211                      __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
212 {
213         struct opa_mad_notice_attr data;
214         u32 lid = ppd_from_ibp(ibp)->lid;
215
216         memset(&data, 0, sizeof(data));
217         /* Send violation trap */
218         data.generic_type = IB_NOTICE_TYPE_SECURITY;
219         data.prod_type_lsb = IB_NOTICE_PROD_CA;
220         data.trap_num = OPA_TRAP_BAD_M_KEY;
221         data.issuer_lid = cpu_to_be32(lid);
222         data.ntc_256.lid = data.issuer_lid;
223         data.ntc_256.method = mad->method;
224         data.ntc_256.attr_id = mad->attr_id;
225         data.ntc_256.attr_mod = mad->attr_mod;
226         data.ntc_256.mkey = mkey;
227         if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
228                 data.ntc_256.dr_slid = dr_slid;
229                 data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
230                 if (hop_cnt > ARRAY_SIZE(data.ntc_256.dr_rtn_path)) {
231                         data.ntc_256.dr_trunc_hop |=
232                                 IB_NOTICE_TRAP_DR_TRUNC;
233                         hop_cnt = ARRAY_SIZE(data.ntc_256.dr_rtn_path);
234                 }
235                 data.ntc_256.dr_trunc_hop |= hop_cnt;
236                 memcpy(data.ntc_256.dr_rtn_path, return_path,
237                        hop_cnt);
238         }
239
240         send_trap(ibp, &data, sizeof(data));
241 }
242
243 /*
244  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
245  */
246 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
247 {
248         struct opa_mad_notice_attr data;
249         struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
250         struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
251         struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
252         u32 lid = ppd_from_ibp(ibp)->lid;
253
254         memset(&data, 0, sizeof(data));
255
256         data.generic_type = IB_NOTICE_TYPE_INFO;
257         data.prod_type_lsb = IB_NOTICE_PROD_CA;
258         data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
259         data.issuer_lid = cpu_to_be32(lid);
260         data.ntc_144.lid = data.issuer_lid;
261         data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
262
263         send_trap(ibp, &data, sizeof(data));
264 }
265
266 /*
267  * Send a System Image GUID Changed trap (ch. 14.3.12).
268  */
269 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
270 {
271         struct opa_mad_notice_attr data;
272         u32 lid = ppd_from_ibp(ibp)->lid;
273
274         memset(&data, 0, sizeof(data));
275
276         data.generic_type = IB_NOTICE_TYPE_INFO;
277         data.prod_type_lsb = IB_NOTICE_PROD_CA;
278         data.trap_num = OPA_TRAP_CHANGE_SYSGUID;
279         data.issuer_lid = cpu_to_be32(lid);
280         data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
281         data.ntc_145.lid = data.issuer_lid;
282
283         send_trap(ibp, &data, sizeof(data));
284 }
285
286 /*
287  * Send a Node Description Changed trap (ch. 14.3.13).
288  */
289 void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
290 {
291         struct opa_mad_notice_attr data;
292         u32 lid = ppd_from_ibp(ibp)->lid;
293
294         memset(&data, 0, sizeof(data));
295
296         data.generic_type = IB_NOTICE_TYPE_INFO;
297         data.prod_type_lsb = IB_NOTICE_PROD_CA;
298         data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
299         data.issuer_lid = cpu_to_be32(lid);
300         data.ntc_144.lid = data.issuer_lid;
301         data.ntc_144.change_flags =
302                 cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
303
304         send_trap(ibp, &data, sizeof(data));
305 }
306
307 static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
308                                    u8 *data, struct ib_device *ibdev,
309                                    u8 port, u32 *resp_len)
310 {
311         struct opa_node_description *nd;
312
313         if (am) {
314                 smp->status |= IB_SMP_INVALID_FIELD;
315                 return reply((struct ib_mad_hdr *)smp);
316         }
317
318         nd = (struct opa_node_description *)data;
319
320         memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
321
322         if (resp_len)
323                 *resp_len += sizeof(*nd);
324
325         return reply((struct ib_mad_hdr *)smp);
326 }
327
328 static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
329                                    struct ib_device *ibdev, u8 port,
330                                    u32 *resp_len)
331 {
332         struct opa_node_info *ni;
333         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
334         unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
335
336         ni = (struct opa_node_info *)data;
337
338         /* GUID 0 is illegal */
339         if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) {
340                 smp->status |= IB_SMP_INVALID_FIELD;
341                 return reply((struct ib_mad_hdr *)smp);
342         }
343
344         ni->port_guid = cpu_to_be64(dd->pport[pidx].guid);
345         ni->base_version = OPA_MGMT_BASE_VERSION;
346         ni->class_version = OPA_SMI_CLASS_VERSION;
347         ni->node_type = 1;     /* channel adapter */
348         ni->num_ports = ibdev->phys_port_cnt;
349         /* This is already in network order */
350         ni->system_image_guid = ib_hfi1_sys_image_guid;
351         /* Use first-port GUID as node */
352         ni->node_guid = cpu_to_be64(dd->pport->guid);
353         ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
354         ni->device_id = cpu_to_be16(dd->pcidev->device);
355         ni->revision = cpu_to_be32(dd->minrev);
356         ni->local_port_num = port;
357         ni->vendor_id[0] = dd->oui1;
358         ni->vendor_id[1] = dd->oui2;
359         ni->vendor_id[2] = dd->oui3;
360
361         if (resp_len)
362                 *resp_len += sizeof(*ni);
363
364         return reply((struct ib_mad_hdr *)smp);
365 }
366
367 static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
368                              u8 port)
369 {
370         struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
371         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
372         unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
373
374         /* GUID 0 is illegal */
375         if (smp->attr_mod || pidx >= dd->num_pports ||
376             dd->pport[pidx].guid == 0)
377                 smp->status |= IB_SMP_INVALID_FIELD;
378         else
379                 nip->port_guid = cpu_to_be64(dd->pport[pidx].guid);
380
381         nip->base_version = OPA_MGMT_BASE_VERSION;
382         nip->class_version = OPA_SMI_CLASS_VERSION;
383         nip->node_type = 1;     /* channel adapter */
384         nip->num_ports = ibdev->phys_port_cnt;
385         /* This is already in network order */
386         nip->sys_guid = ib_hfi1_sys_image_guid;
387          /* Use first-port GUID as node */
388         nip->node_guid = cpu_to_be64(dd->pport->guid);
389         nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
390         nip->device_id = cpu_to_be16(dd->pcidev->device);
391         nip->revision = cpu_to_be32(dd->minrev);
392         nip->local_port_num = port;
393         nip->vendor_id[0] = dd->oui1;
394         nip->vendor_id[1] = dd->oui2;
395         nip->vendor_id[2] = dd->oui3;
396
397         return reply((struct ib_mad_hdr *)smp);
398 }
399
400 static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
401 {
402         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
403 }
404
405 static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
406 {
407         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
408 }
409
410 static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
411 {
412         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
413 }
414
415 static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
416                       int mad_flags, __be64 mkey, __be32 dr_slid,
417                       u8 return_path[], u8 hop_cnt)
418 {
419         int valid_mkey = 0;
420         int ret = 0;
421
422         /* Is the mkey in the process of expiring? */
423         if (ibp->rvp.mkey_lease_timeout &&
424             time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
425                 /* Clear timeout and mkey protection field. */
426                 ibp->rvp.mkey_lease_timeout = 0;
427                 ibp->rvp.mkeyprot = 0;
428         }
429
430         if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
431             ibp->rvp.mkey == mkey)
432                 valid_mkey = 1;
433
434         /* Unset lease timeout on any valid Get/Set/TrapRepress */
435         if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
436             (mad->method == IB_MGMT_METHOD_GET ||
437              mad->method == IB_MGMT_METHOD_SET ||
438              mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
439                 ibp->rvp.mkey_lease_timeout = 0;
440
441         if (!valid_mkey) {
442                 switch (mad->method) {
443                 case IB_MGMT_METHOD_GET:
444                         /* Bad mkey not a violation below level 2 */
445                         if (ibp->rvp.mkeyprot < 2)
446                                 break;
447                 case IB_MGMT_METHOD_SET:
448                 case IB_MGMT_METHOD_TRAP_REPRESS:
449                         if (ibp->rvp.mkey_violations != 0xFFFF)
450                                 ++ibp->rvp.mkey_violations;
451                         if (!ibp->rvp.mkey_lease_timeout &&
452                             ibp->rvp.mkey_lease_period)
453                                 ibp->rvp.mkey_lease_timeout = jiffies +
454                                         ibp->rvp.mkey_lease_period * HZ;
455                         /* Generate a trap notice. */
456                         bad_mkey(ibp, mad, mkey, dr_slid, return_path,
457                                  hop_cnt);
458                         ret = 1;
459                 }
460         }
461
462         return ret;
463 }
464
465 /*
466  * The SMA caches reads from LCB registers in case the LCB is unavailable.
467  * (The LCB is unavailable in certain link states, for example.)
468  */
469 struct lcb_datum {
470         u32 off;
471         u64 val;
472 };
473
474 static struct lcb_datum lcb_cache[] = {
475         { DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
476 };
477
478 static int write_lcb_cache(u32 off, u64 val)
479 {
480         int i;
481
482         for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
483                 if (lcb_cache[i].off == off) {
484                         lcb_cache[i].val = val;
485                         return 0;
486                 }
487         }
488
489         pr_warn("%s bad offset 0x%x\n", __func__, off);
490         return -1;
491 }
492
493 static int read_lcb_cache(u32 off, u64 *val)
494 {
495         int i;
496
497         for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
498                 if (lcb_cache[i].off == off) {
499                         *val = lcb_cache[i].val;
500                         return 0;
501                 }
502         }
503
504         pr_warn("%s bad offset 0x%x\n", __func__, off);
505         return -1;
506 }
507
508 void read_ltp_rtt(struct hfi1_devdata *dd)
509 {
510         u64 reg;
511
512         if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
513                 dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
514         else
515                 write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
516 }
517
518 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
519                                    struct ib_device *ibdev, u8 port,
520                                    u32 *resp_len)
521 {
522         int i;
523         struct hfi1_devdata *dd;
524         struct hfi1_pportdata *ppd;
525         struct hfi1_ibport *ibp;
526         struct opa_port_info *pi = (struct opa_port_info *)data;
527         u8 mtu;
528         u8 credit_rate;
529         u8 is_beaconing_active;
530         u32 state;
531         u32 num_ports = OPA_AM_NPORT(am);
532         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
533         u32 buffer_units;
534         u64 tmp = 0;
535
536         if (num_ports != 1) {
537                 smp->status |= IB_SMP_INVALID_FIELD;
538                 return reply((struct ib_mad_hdr *)smp);
539         }
540
541         dd = dd_from_ibdev(ibdev);
542         /* IB numbers ports from 1, hw from 0 */
543         ppd = dd->pport + (port - 1);
544         ibp = &ppd->ibport_data;
545
546         if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
547             ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
548                 smp->status |= IB_SMP_INVALID_FIELD;
549                 return reply((struct ib_mad_hdr *)smp);
550         }
551
552         pi->lid = cpu_to_be32(ppd->lid);
553
554         /* Only return the mkey if the protection field allows it. */
555         if (!(smp->method == IB_MGMT_METHOD_GET &&
556               ibp->rvp.mkey != smp->mkey &&
557               ibp->rvp.mkeyprot == 1))
558                 pi->mkey = ibp->rvp.mkey;
559
560         pi->subnet_prefix = ibp->rvp.gid_prefix;
561         pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
562         pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
563         pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
564         pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
565         pi->sa_qp = cpu_to_be32(ppd->sa_qp);
566
567         pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
568         pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
569         pi->link_width.active = cpu_to_be16(ppd->link_width_active);
570
571         pi->link_width_downgrade.supported =
572                         cpu_to_be16(ppd->link_width_downgrade_supported);
573         pi->link_width_downgrade.enabled =
574                         cpu_to_be16(ppd->link_width_downgrade_enabled);
575         pi->link_width_downgrade.tx_active =
576                         cpu_to_be16(ppd->link_width_downgrade_tx_active);
577         pi->link_width_downgrade.rx_active =
578                         cpu_to_be16(ppd->link_width_downgrade_rx_active);
579
580         pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
581         pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
582         pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
583
584         state = driver_lstate(ppd);
585
586         if (start_of_sm_config && (state == IB_PORT_INIT))
587                 ppd->is_sm_config_started = 1;
588
589         pi->port_phys_conf = (ppd->port_type & 0xf);
590
591         pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
592         pi->port_states.ledenable_offlinereason |=
593                 ppd->is_sm_config_started << 5;
594         /*
595          * This pairs with the memory barrier in hfi1_start_led_override to
596          * ensure that we read the correct state of LED beaconing represented
597          * by led_override_timer_active
598          */
599         smp_rmb();
600         is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
601         pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
602         pi->port_states.ledenable_offlinereason |=
603                 ppd->offline_disabled_reason;
604
605         pi->port_states.portphysstate_portstate =
606                 (hfi1_ibphys_portstate(ppd) << 4) | state;
607
608         pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
609
610         memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
611         for (i = 0; i < ppd->vls_supported; i++) {
612                 mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
613                 if ((i % 2) == 0)
614                         pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
615                 else
616                         pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
617         }
618         /* don't forget VL 15 */
619         mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
620         pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
621         pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
622         pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
623         pi->partenforce_filterraw |=
624                 (ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
625         if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
626                 pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
627         if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
628                 pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
629         pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
630         /* P_KeyViolations are counted by hardware. */
631         pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
632         pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
633
634         pi->vl.cap = ppd->vls_supported;
635         pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
636         pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
637         pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
638
639         pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
640
641         pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
642                                           OPA_PORT_LINK_MODE_OPA << 5 |
643                                           OPA_PORT_LINK_MODE_OPA);
644
645         pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
646
647         pi->port_mode = cpu_to_be16(
648                                 ppd->is_active_optimize_enabled ?
649                                         OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
650
651         pi->port_packet_format.supported =
652                 cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
653         pi->port_packet_format.enabled =
654                 cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
655
656         /* flit_control.interleave is (OPA V1, version .76):
657          * bits         use
658          * ----         ---
659          * 2            res
660          * 2            DistanceSupported
661          * 2            DistanceEnabled
662          * 5            MaxNextLevelTxEnabled
663          * 5            MaxNestLevelRxSupported
664          *
665          * HFI supports only "distance mode 1" (see OPA V1, version .76,
666          * section 9.6.2), so set DistanceSupported, DistanceEnabled
667          * to 0x1.
668          */
669         pi->flit_control.interleave = cpu_to_be16(0x1400);
670
671         pi->link_down_reason = ppd->local_link_down_reason.sma;
672         pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
673         pi->port_error_action = cpu_to_be32(ppd->port_error_action);
674         pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
675
676         /* 32.768 usec. response time (guessing) */
677         pi->resptimevalue = 3;
678
679         pi->local_port_num = port;
680
681         /* buffer info for FM */
682         pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
683
684         pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
685         pi->neigh_port_num = ppd->neighbor_port_number;
686         pi->port_neigh_mode =
687                 (ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
688                 (ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
689                 (ppd->neighbor_fm_security ?
690                         OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
691
692         /* HFIs shall always return VL15 credits to their
693          * neighbor in a timely manner, without any credit return pacing.
694          */
695         credit_rate = 0;
696         buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
697         buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
698         buffer_units |= (credit_rate << 6) &
699                                 OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
700         buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
701         pi->buffer_units = cpu_to_be32(buffer_units);
702
703         pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported);
704
705         /* HFI supports a replay buffer 128 LTPs in size */
706         pi->replay_depth.buffer = 0x80;
707         /* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
708         read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
709
710         /*
711          * this counter is 16 bits wide, but the replay_depth.wire
712          * variable is only 8 bits
713          */
714         if (tmp > 0xff)
715                 tmp = 0xff;
716         pi->replay_depth.wire = tmp;
717
718         if (resp_len)
719                 *resp_len += sizeof(struct opa_port_info);
720
721         return reply((struct ib_mad_hdr *)smp);
722 }
723
724 /**
725  * get_pkeys - return the PKEY table
726  * @dd: the hfi1_ib device
727  * @port: the IB port number
728  * @pkeys: the pkey table is placed here
729  */
730 static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
731 {
732         struct hfi1_pportdata *ppd = dd->pport + port - 1;
733
734         memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
735
736         return 0;
737 }
738
739 static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
740                                     struct ib_device *ibdev, u8 port,
741                                     u32 *resp_len)
742 {
743         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
744         u32 n_blocks_req = OPA_AM_NBLK(am);
745         u32 start_block = am & 0x7ff;
746         __be16 *p;
747         u16 *q;
748         int i;
749         u16 n_blocks_avail;
750         unsigned npkeys = hfi1_get_npkeys(dd);
751         size_t size;
752
753         if (n_blocks_req == 0) {
754                 pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
755                         port, start_block, n_blocks_req);
756                 smp->status |= IB_SMP_INVALID_FIELD;
757                 return reply((struct ib_mad_hdr *)smp);
758         }
759
760         n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
761
762         size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
763
764         if (start_block + n_blocks_req > n_blocks_avail ||
765             n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
766                 pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
767                         "avail 0x%x; blk/smp 0x%lx\n",
768                         start_block, n_blocks_req, n_blocks_avail,
769                         OPA_NUM_PKEY_BLOCKS_PER_SMP);
770                 smp->status |= IB_SMP_INVALID_FIELD;
771                 return reply((struct ib_mad_hdr *)smp);
772         }
773
774         p = (__be16 *)data;
775         q = (u16 *)data;
776         /* get the real pkeys if we are requesting the first block */
777         if (start_block == 0) {
778                 get_pkeys(dd, port, q);
779                 for (i = 0; i < npkeys; i++)
780                         p[i] = cpu_to_be16(q[i]);
781                 if (resp_len)
782                         *resp_len += size;
783         } else {
784                 smp->status |= IB_SMP_INVALID_FIELD;
785         }
786         return reply((struct ib_mad_hdr *)smp);
787 }
788
789 enum {
790         HFI_TRANSITION_DISALLOWED,
791         HFI_TRANSITION_IGNORED,
792         HFI_TRANSITION_ALLOWED,
793         HFI_TRANSITION_UNDEFINED,
794 };
795
796 /*
797  * Use shortened names to improve readability of
798  * {logical,physical}_state_transitions
799  */
800 enum {
801         __D = HFI_TRANSITION_DISALLOWED,
802         __I = HFI_TRANSITION_IGNORED,
803         __A = HFI_TRANSITION_ALLOWED,
804         __U = HFI_TRANSITION_UNDEFINED,
805 };
806
807 /*
808  * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
809  * represented in physical_state_transitions.
810  */
811 #define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
812
813 /*
814  * Within physical_state_transitions, rows represent "old" states,
815  * columns "new" states, and physical_state_transitions.allowed[old][new]
816  * indicates if the transition from old state to new state is legal (see
817  * OPAg1v1, Table 6-4).
818  */
819 static const struct {
820         u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
821 } physical_state_transitions = {
822         {
823                 /* 2    3    4    5    6    7    8    9   10   11 */
824         /* 2 */ { __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
825         /* 3 */ { __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
826         /* 4 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
827         /* 5 */ { __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
828         /* 6 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
829         /* 7 */ { __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
830         /* 8 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
831         /* 9 */ { __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
832         /*10 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
833         /*11 */ { __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
834         }
835 };
836
837 /*
838  * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
839  * logical_state_transitions
840  */
841
842 #define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
843
844 /*
845  * Within logical_state_transitions rows represent "old" states,
846  * columns "new" states, and logical_state_transitions.allowed[old][new]
847  * indicates if the transition from old state to new state is legal (see
848  * OPAg1v1, Table 9-12).
849  */
850 static const struct {
851         u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
852 } logical_state_transitions = {
853         {
854                 /* 1    2    3    4    5 */
855         /* 1 */ { __I, __D, __D, __D, __U},
856         /* 2 */ { __D, __I, __A, __D, __U},
857         /* 3 */ { __D, __D, __I, __A, __U},
858         /* 4 */ { __D, __D, __I, __I, __U},
859         /* 5 */ { __U, __U, __U, __U, __U},
860         }
861 };
862
863 static int logical_transition_allowed(int old, int new)
864 {
865         if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
866             new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
867                 pr_warn("invalid logical state(s) (old %d new %d)\n",
868                         old, new);
869                 return HFI_TRANSITION_UNDEFINED;
870         }
871
872         if (new == IB_PORT_NOP)
873                 return HFI_TRANSITION_ALLOWED; /* always allowed */
874
875         /* adjust states for indexing into logical_state_transitions */
876         old -= IB_PORT_DOWN;
877         new -= IB_PORT_DOWN;
878
879         if (old < 0 || new < 0)
880                 return HFI_TRANSITION_UNDEFINED;
881         return logical_state_transitions.allowed[old][new];
882 }
883
884 static int physical_transition_allowed(int old, int new)
885 {
886         if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
887             new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
888                 pr_warn("invalid physical state(s) (old %d new %d)\n",
889                         old, new);
890                 return HFI_TRANSITION_UNDEFINED;
891         }
892
893         if (new == IB_PORTPHYSSTATE_NOP)
894                 return HFI_TRANSITION_ALLOWED; /* always allowed */
895
896         /* adjust states for indexing into physical_state_transitions */
897         old -= IB_PORTPHYSSTATE_POLLING;
898         new -= IB_PORTPHYSSTATE_POLLING;
899
900         if (old < 0 || new < 0)
901                 return HFI_TRANSITION_UNDEFINED;
902         return physical_state_transitions.allowed[old][new];
903 }
904
905 static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
906                                           u32 logical_new, u32 physical_new)
907 {
908         u32 physical_old = driver_physical_state(ppd);
909         u32 logical_old = driver_logical_state(ppd);
910         int ret, logical_allowed, physical_allowed;
911
912         ret = logical_transition_allowed(logical_old, logical_new);
913         logical_allowed = ret;
914
915         if (ret == HFI_TRANSITION_DISALLOWED ||
916             ret == HFI_TRANSITION_UNDEFINED) {
917                 pr_warn("invalid logical state transition %s -> %s\n",
918                         opa_lstate_name(logical_old),
919                         opa_lstate_name(logical_new));
920                 return ret;
921         }
922
923         ret = physical_transition_allowed(physical_old, physical_new);
924         physical_allowed = ret;
925
926         if (ret == HFI_TRANSITION_DISALLOWED ||
927             ret == HFI_TRANSITION_UNDEFINED) {
928                 pr_warn("invalid physical state transition %s -> %s\n",
929                         opa_pstate_name(physical_old),
930                         opa_pstate_name(physical_new));
931                 return ret;
932         }
933
934         if (logical_allowed == HFI_TRANSITION_IGNORED &&
935             physical_allowed == HFI_TRANSITION_IGNORED)
936                 return HFI_TRANSITION_IGNORED;
937
938         /*
939          * A change request of Physical Port State from
940          * 'Offline' to 'Polling' should be ignored.
941          */
942         if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
943             (physical_new == IB_PORTPHYSSTATE_POLLING))
944                 return HFI_TRANSITION_IGNORED;
945
946         /*
947          * Either physical_allowed or logical_allowed is
948          * HFI_TRANSITION_ALLOWED.
949          */
950         return HFI_TRANSITION_ALLOWED;
951 }
952
953 static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
954                            u32 logical_state, u32 phys_state,
955                            int suppress_idle_sma)
956 {
957         struct hfi1_devdata *dd = ppd->dd;
958         u32 link_state;
959         int ret;
960
961         ret = port_states_transition_allowed(ppd, logical_state, phys_state);
962         if (ret == HFI_TRANSITION_DISALLOWED ||
963             ret == HFI_TRANSITION_UNDEFINED) {
964                 /* error message emitted above */
965                 smp->status |= IB_SMP_INVALID_FIELD;
966                 return 0;
967         }
968
969         if (ret == HFI_TRANSITION_IGNORED)
970                 return 0;
971
972         if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
973             !(logical_state == IB_PORT_DOWN ||
974               logical_state == IB_PORT_NOP)){
975                 pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
976                         logical_state, phys_state);
977                 smp->status |= IB_SMP_INVALID_FIELD;
978         }
979
980         /*
981          * Logical state changes are summarized in OPAv1g1 spec.,
982          * Table 9-12; physical state changes are summarized in
983          * OPAv1g1 spec., Table 6.4.
984          */
985         switch (logical_state) {
986         case IB_PORT_NOP:
987                 if (phys_state == IB_PORTPHYSSTATE_NOP)
988                         break;
989                 /* FALLTHROUGH */
990         case IB_PORT_DOWN:
991                 if (phys_state == IB_PORTPHYSSTATE_NOP) {
992                         link_state = HLS_DN_DOWNDEF;
993                 } else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
994                         link_state = HLS_DN_POLL;
995                         set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
996                                              0, OPA_LINKDOWN_REASON_FM_BOUNCE);
997                 } else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
998                         link_state = HLS_DN_DISABLE;
999                 } else {
1000                         pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
1001                                 phys_state);
1002                         smp->status |= IB_SMP_INVALID_FIELD;
1003                         break;
1004                 }
1005
1006                 if ((link_state == HLS_DN_POLL ||
1007                      link_state == HLS_DN_DOWNDEF)) {
1008                         /*
1009                          * Going to poll.  No matter what the current state,
1010                          * always move offline first, then tune and start the
1011                          * link.  This correctly handles a FM link bounce and
1012                          * a link enable.  Going offline is a no-op if already
1013                          * offline.
1014                          */
1015                         set_link_state(ppd, HLS_DN_OFFLINE);
1016                         tune_serdes(ppd);
1017                         start_link(ppd);
1018                 } else {
1019                         set_link_state(ppd, link_state);
1020                 }
1021                 if (link_state == HLS_DN_DISABLE &&
1022                     (ppd->offline_disabled_reason >
1023                      HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
1024                      ppd->offline_disabled_reason ==
1025                      HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
1026                         ppd->offline_disabled_reason =
1027                         HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
1028                 /*
1029                  * Don't send a reply if the response would be sent
1030                  * through the disabled port.
1031                  */
1032                 if (link_state == HLS_DN_DISABLE && smp->hop_cnt)
1033                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1034                 break;
1035         case IB_PORT_ARMED:
1036                 ret = set_link_state(ppd, HLS_UP_ARMED);
1037                 if ((ret == 0) && (suppress_idle_sma == 0))
1038                         send_idle_sma(dd, SMA_IDLE_ARM);
1039                 break;
1040         case IB_PORT_ACTIVE:
1041                 if (ppd->neighbor_normal) {
1042                         ret = set_link_state(ppd, HLS_UP_ACTIVE);
1043                         if (ret == 0)
1044                                 send_idle_sma(dd, SMA_IDLE_ACTIVE);
1045                 } else {
1046                         pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
1047                         smp->status |= IB_SMP_INVALID_FIELD;
1048                 }
1049                 break;
1050         default:
1051                 pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
1052                         logical_state);
1053                 smp->status |= IB_SMP_INVALID_FIELD;
1054         }
1055
1056         return 0;
1057 }
1058
1059 /**
1060  * subn_set_opa_portinfo - set port information
1061  * @smp: the incoming SM packet
1062  * @ibdev: the infiniband device
1063  * @port: the port on the device
1064  *
1065  */
1066 static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
1067                                    struct ib_device *ibdev, u8 port,
1068                                    u32 *resp_len)
1069 {
1070         struct opa_port_info *pi = (struct opa_port_info *)data;
1071         struct ib_event event;
1072         struct hfi1_devdata *dd;
1073         struct hfi1_pportdata *ppd;
1074         struct hfi1_ibport *ibp;
1075         u8 clientrereg;
1076         unsigned long flags;
1077         u32 smlid, opa_lid; /* tmp vars to hold LID values */
1078         u16 lid;
1079         u8 ls_old, ls_new, ps_new;
1080         u8 vls;
1081         u8 msl;
1082         u8 crc_enabled;
1083         u16 lse, lwe, mtu;
1084         u32 num_ports = OPA_AM_NPORT(am);
1085         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1086         int ret, i, invalid = 0, call_set_mtu = 0;
1087         int call_link_downgrade_policy = 0;
1088
1089         if (num_ports != 1) {
1090                 smp->status |= IB_SMP_INVALID_FIELD;
1091                 return reply((struct ib_mad_hdr *)smp);
1092         }
1093
1094         opa_lid = be32_to_cpu(pi->lid);
1095         if (opa_lid & 0xFFFF0000) {
1096                 pr_warn("OPA_PortInfo lid out of range: %X\n", opa_lid);
1097                 smp->status |= IB_SMP_INVALID_FIELD;
1098                 goto get_only;
1099         }
1100
1101         lid = (u16)(opa_lid & 0x0000FFFF);
1102
1103         smlid = be32_to_cpu(pi->sm_lid);
1104         if (smlid & 0xFFFF0000) {
1105                 pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
1106                 smp->status |= IB_SMP_INVALID_FIELD;
1107                 goto get_only;
1108         }
1109         smlid &= 0x0000FFFF;
1110
1111         clientrereg = (pi->clientrereg_subnettimeout &
1112                         OPA_PI_MASK_CLIENT_REREGISTER);
1113
1114         dd = dd_from_ibdev(ibdev);
1115         /* IB numbers ports from 1, hw from 0 */
1116         ppd = dd->pport + (port - 1);
1117         ibp = &ppd->ibport_data;
1118         event.device = ibdev;
1119         event.element.port_num = port;
1120
1121         ls_old = driver_lstate(ppd);
1122
1123         ibp->rvp.mkey = pi->mkey;
1124         ibp->rvp.gid_prefix = pi->subnet_prefix;
1125         ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
1126
1127         /* Must be a valid unicast LID address. */
1128         if ((lid == 0 && ls_old > IB_PORT_INIT) ||
1129             lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1130                 smp->status |= IB_SMP_INVALID_FIELD;
1131                 pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
1132                         lid);
1133         } else if (ppd->lid != lid ||
1134                  ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
1135                 if (ppd->lid != lid)
1136                         hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
1137                 if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
1138                         hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
1139                 hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
1140                 event.event = IB_EVENT_LID_CHANGE;
1141                 ib_dispatch_event(&event);
1142         }
1143
1144         msl = pi->smsl & OPA_PI_MASK_SMSL;
1145         if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
1146                 ppd->linkinit_reason =
1147                         (pi->partenforce_filterraw &
1148                          OPA_PI_MASK_LINKINIT_REASON);
1149         /* enable/disable SW pkey checking as per FM control */
1150         if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN)
1151                 ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
1152         else
1153                 ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN;
1154
1155         if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT)
1156                 ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
1157         else
1158                 ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT;
1159
1160         /* Must be a valid unicast LID address. */
1161         if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
1162             smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1163                 smp->status |= IB_SMP_INVALID_FIELD;
1164                 pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
1165         } else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
1166                 pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
1167                 spin_lock_irqsave(&ibp->rvp.lock, flags);
1168                 if (ibp->rvp.sm_ah) {
1169                         if (smlid != ibp->rvp.sm_lid)
1170                                 ibp->rvp.sm_ah->attr.dlid = smlid;
1171                         if (msl != ibp->rvp.sm_sl)
1172                                 ibp->rvp.sm_ah->attr.sl = msl;
1173                 }
1174                 spin_unlock_irqrestore(&ibp->rvp.lock, flags);
1175                 if (smlid != ibp->rvp.sm_lid)
1176                         ibp->rvp.sm_lid = smlid;
1177                 if (msl != ibp->rvp.sm_sl)
1178                         ibp->rvp.sm_sl = msl;
1179                 event.event = IB_EVENT_SM_CHANGE;
1180                 ib_dispatch_event(&event);
1181         }
1182
1183         if (pi->link_down_reason == 0) {
1184                 ppd->local_link_down_reason.sma = 0;
1185                 ppd->local_link_down_reason.latest = 0;
1186         }
1187
1188         if (pi->neigh_link_down_reason == 0) {
1189                 ppd->neigh_link_down_reason.sma = 0;
1190                 ppd->neigh_link_down_reason.latest = 0;
1191         }
1192
1193         ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
1194         ppd->sa_qp = be32_to_cpu(pi->sa_qp);
1195
1196         ppd->port_error_action = be32_to_cpu(pi->port_error_action);
1197         lwe = be16_to_cpu(pi->link_width.enabled);
1198         if (lwe) {
1199                 if (lwe == OPA_LINK_WIDTH_RESET ||
1200                     lwe == OPA_LINK_WIDTH_RESET_OLD)
1201                         set_link_width_enabled(ppd, ppd->link_width_supported);
1202                 else if ((lwe & ~ppd->link_width_supported) == 0)
1203                         set_link_width_enabled(ppd, lwe);
1204                 else
1205                         smp->status |= IB_SMP_INVALID_FIELD;
1206         }
1207         lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
1208         /* LWD.E is always applied - 0 means "disabled" */
1209         if (lwe == OPA_LINK_WIDTH_RESET ||
1210             lwe == OPA_LINK_WIDTH_RESET_OLD) {
1211                 set_link_width_downgrade_enabled(ppd,
1212                                                  ppd->
1213                                                  link_width_downgrade_supported
1214                                                  );
1215         } else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
1216                 /* only set and apply if something changed */
1217                 if (lwe != ppd->link_width_downgrade_enabled) {
1218                         set_link_width_downgrade_enabled(ppd, lwe);
1219                         call_link_downgrade_policy = 1;
1220                 }
1221         } else {
1222                 smp->status |= IB_SMP_INVALID_FIELD;
1223         }
1224         lse = be16_to_cpu(pi->link_speed.enabled);
1225         if (lse) {
1226                 if (lse & be16_to_cpu(pi->link_speed.supported))
1227                         set_link_speed_enabled(ppd, lse);
1228                 else
1229                         smp->status |= IB_SMP_INVALID_FIELD;
1230         }
1231
1232         ibp->rvp.mkeyprot =
1233                 (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
1234         ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
1235         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
1236                                     ibp->rvp.vl_high_limit);
1237
1238         if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
1239             ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
1240                 smp->status |= IB_SMP_INVALID_FIELD;
1241                 return reply((struct ib_mad_hdr *)smp);
1242         }
1243         for (i = 0; i < ppd->vls_supported; i++) {
1244                 if ((i % 2) == 0)
1245                         mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
1246                                            4) & 0xF);
1247                 else
1248                         mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
1249                                           0xF);
1250                 if (mtu == 0xffff) {
1251                         pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
1252                                 mtu,
1253                                 (pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
1254                         smp->status |= IB_SMP_INVALID_FIELD;
1255                         mtu = hfi1_max_mtu; /* use a valid MTU */
1256                 }
1257                 if (dd->vld[i].mtu != mtu) {
1258                         dd_dev_info(dd,
1259                                     "MTU change on vl %d from %d to %d\n",
1260                                     i, dd->vld[i].mtu, mtu);
1261                         dd->vld[i].mtu = mtu;
1262                         call_set_mtu++;
1263                 }
1264         }
1265         /* As per OPAV1 spec: VL15 must support and be configured
1266          * for operation with a 2048 or larger MTU.
1267          */
1268         mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
1269         if (mtu < 2048 || mtu == 0xffff)
1270                 mtu = 2048;
1271         if (dd->vld[15].mtu != mtu) {
1272                 dd_dev_info(dd,
1273                             "MTU change on vl 15 from %d to %d\n",
1274                             dd->vld[15].mtu, mtu);
1275                 dd->vld[15].mtu = mtu;
1276                 call_set_mtu++;
1277         }
1278         if (call_set_mtu)
1279                 set_mtu(ppd);
1280
1281         /* Set operational VLs */
1282         vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
1283         if (vls) {
1284                 if (vls > ppd->vls_supported) {
1285                         pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
1286                                 pi->operational_vls);
1287                         smp->status |= IB_SMP_INVALID_FIELD;
1288                 } else {
1289                         if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
1290                                             vls) == -EINVAL)
1291                                 smp->status |= IB_SMP_INVALID_FIELD;
1292                 }
1293         }
1294
1295         if (pi->mkey_violations == 0)
1296                 ibp->rvp.mkey_violations = 0;
1297
1298         if (pi->pkey_violations == 0)
1299                 ibp->rvp.pkey_violations = 0;
1300
1301         if (pi->qkey_violations == 0)
1302                 ibp->rvp.qkey_violations = 0;
1303
1304         ibp->rvp.subnet_timeout =
1305                 pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
1306
1307         crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
1308         crc_enabled >>= 4;
1309         crc_enabled &= 0xf;
1310
1311         if (crc_enabled != 0)
1312                 ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
1313
1314         ppd->is_active_optimize_enabled =
1315                         !!(be16_to_cpu(pi->port_mode)
1316                                         & OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
1317
1318         ls_new = pi->port_states.portphysstate_portstate &
1319                         OPA_PI_MASK_PORT_STATE;
1320         ps_new = (pi->port_states.portphysstate_portstate &
1321                         OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
1322
1323         if (ls_old == IB_PORT_INIT) {
1324                 if (start_of_sm_config) {
1325                         if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1326                                 ppd->is_sm_config_started = 1;
1327                 } else if (ls_new == IB_PORT_ARMED) {
1328                         if (ppd->is_sm_config_started == 0)
1329                                 invalid = 1;
1330                 }
1331         }
1332
1333         /* Handle CLIENT_REREGISTER event b/c SM asked us for it */
1334         if (clientrereg) {
1335                 event.event = IB_EVENT_CLIENT_REREGISTER;
1336                 ib_dispatch_event(&event);
1337         }
1338
1339         /*
1340          * Do the port state change now that the other link parameters
1341          * have been set.
1342          * Changing the port physical state only makes sense if the link
1343          * is down or is being set to down.
1344          */
1345
1346         ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1347         if (ret)
1348                 return ret;
1349
1350         ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1351
1352         /* restore re-reg bit per o14-12.2.1 */
1353         pi->clientrereg_subnettimeout |= clientrereg;
1354
1355         /*
1356          * Apply the new link downgrade policy.  This may result in a link
1357          * bounce.  Do this after everything else so things are settled.
1358          * Possible problem: if setting the port state above fails, then
1359          * the policy change is not applied.
1360          */
1361         if (call_link_downgrade_policy)
1362                 apply_link_downgrade_policy(ppd, 0);
1363
1364         return ret;
1365
1366 get_only:
1367         return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1368 }
1369
1370 /**
1371  * set_pkeys - set the PKEY table for ctxt 0
1372  * @dd: the hfi1_ib device
1373  * @port: the IB port number
1374  * @pkeys: the PKEY table
1375  */
1376 static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
1377 {
1378         struct hfi1_pportdata *ppd;
1379         int i;
1380         int changed = 0;
1381         int update_includes_mgmt_partition = 0;
1382
1383         /*
1384          * IB port one/two always maps to context zero/one,
1385          * always a kernel context, no locking needed
1386          * If we get here with ppd setup, no need to check
1387          * that rcd is valid.
1388          */
1389         ppd = dd->pport + (port - 1);
1390         /*
1391          * If the update does not include the management pkey, don't do it.
1392          */
1393         for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1394                 if (pkeys[i] == LIM_MGMT_P_KEY) {
1395                         update_includes_mgmt_partition = 1;
1396                         break;
1397                 }
1398         }
1399
1400         if (!update_includes_mgmt_partition)
1401                 return 1;
1402
1403         for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1404                 u16 key = pkeys[i];
1405                 u16 okey = ppd->pkeys[i];
1406
1407                 if (key == okey)
1408                         continue;
1409                 /*
1410                  * Don't update pkeys[2], if an HFI port without MgmtAllowed
1411                  * by neighbor is a switch.
1412                  */
1413                 if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1)
1414                         continue;
1415                 /*
1416                  * The SM gives us the complete PKey table. We have
1417                  * to ensure that we put the PKeys in the matching
1418                  * slots.
1419                  */
1420                 ppd->pkeys[i] = key;
1421                 changed = 1;
1422         }
1423
1424         if (changed) {
1425                 (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
1426                 hfi1_event_pkey_change(dd, port);
1427         }
1428
1429         return 0;
1430 }
1431
1432 static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1433                                     struct ib_device *ibdev, u8 port,
1434                                     u32 *resp_len)
1435 {
1436         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1437         u32 n_blocks_sent = OPA_AM_NBLK(am);
1438         u32 start_block = am & 0x7ff;
1439         u16 *p = (u16 *)data;
1440         __be16 *q = (__be16 *)data;
1441         int i;
1442         u16 n_blocks_avail;
1443         unsigned npkeys = hfi1_get_npkeys(dd);
1444
1445         if (n_blocks_sent == 0) {
1446                 pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
1447                         port, start_block, n_blocks_sent);
1448                 smp->status |= IB_SMP_INVALID_FIELD;
1449                 return reply((struct ib_mad_hdr *)smp);
1450         }
1451
1452         n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1453
1454         if (start_block + n_blocks_sent > n_blocks_avail ||
1455             n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1456                 pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
1457                         start_block, n_blocks_sent, n_blocks_avail,
1458                         OPA_NUM_PKEY_BLOCKS_PER_SMP);
1459                 smp->status |= IB_SMP_INVALID_FIELD;
1460                 return reply((struct ib_mad_hdr *)smp);
1461         }
1462
1463         for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
1464                 p[i] = be16_to_cpu(q[i]);
1465
1466         if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
1467                 smp->status |= IB_SMP_INVALID_FIELD;
1468                 return reply((struct ib_mad_hdr *)smp);
1469         }
1470
1471         return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
1472 }
1473
1474 static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1475 {
1476         u64 *val = data;
1477
1478         *val++ = read_csr(dd, SEND_SC2VLT0);
1479         *val++ = read_csr(dd, SEND_SC2VLT1);
1480         *val++ = read_csr(dd, SEND_SC2VLT2);
1481         *val++ = read_csr(dd, SEND_SC2VLT3);
1482         return 0;
1483 }
1484
1485 #define ILLEGAL_VL 12
1486 /*
1487  * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
1488  * for SC15, which must map to VL15). If we don't remap things this
1489  * way it is possible for VL15 counters to increment when we try to
1490  * send on a SC which is mapped to an invalid VL.
1491  */
1492 static void filter_sc2vlt(void *data)
1493 {
1494         int i;
1495         u8 *pd = data;
1496
1497         for (i = 0; i < OPA_MAX_SCS; i++) {
1498                 if (i == 15)
1499                         continue;
1500                 if ((pd[i] & 0x1f) == 0xf)
1501                         pd[i] = ILLEGAL_VL;
1502         }
1503 }
1504
1505 static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1506 {
1507         u64 *val = data;
1508
1509         filter_sc2vlt(data);
1510
1511         write_csr(dd, SEND_SC2VLT0, *val++);
1512         write_csr(dd, SEND_SC2VLT1, *val++);
1513         write_csr(dd, SEND_SC2VLT2, *val++);
1514         write_csr(dd, SEND_SC2VLT3, *val++);
1515         write_seqlock_irq(&dd->sc2vl_lock);
1516         memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
1517         write_sequnlock_irq(&dd->sc2vl_lock);
1518         return 0;
1519 }
1520
1521 static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1522                                    struct ib_device *ibdev, u8 port,
1523                                    u32 *resp_len)
1524 {
1525         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1526         u8 *p = data;
1527         size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
1528         unsigned i;
1529
1530         if (am) {
1531                 smp->status |= IB_SMP_INVALID_FIELD;
1532                 return reply((struct ib_mad_hdr *)smp);
1533         }
1534
1535         for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
1536                 *p++ = ibp->sl_to_sc[i];
1537
1538         if (resp_len)
1539                 *resp_len += size;
1540
1541         return reply((struct ib_mad_hdr *)smp);
1542 }
1543
1544 static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1545                                    struct ib_device *ibdev, u8 port,
1546                                    u32 *resp_len)
1547 {
1548         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1549         u8 *p = data;
1550         int i;
1551         u8 sc;
1552
1553         if (am) {
1554                 smp->status |= IB_SMP_INVALID_FIELD;
1555                 return reply((struct ib_mad_hdr *)smp);
1556         }
1557
1558         for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
1559                 sc = *p++;
1560                 if (ibp->sl_to_sc[i] != sc) {
1561                         ibp->sl_to_sc[i] = sc;
1562
1563                         /* Put all stale qps into error state */
1564                         hfi1_error_port_qps(ibp, i);
1565                 }
1566         }
1567
1568         return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
1569 }
1570
1571 static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1572                                    struct ib_device *ibdev, u8 port,
1573                                    u32 *resp_len)
1574 {
1575         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1576         u8 *p = data;
1577         size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
1578         unsigned i;
1579
1580         if (am) {
1581                 smp->status |= IB_SMP_INVALID_FIELD;
1582                 return reply((struct ib_mad_hdr *)smp);
1583         }
1584
1585         for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1586                 *p++ = ibp->sc_to_sl[i];
1587
1588         if (resp_len)
1589                 *resp_len += size;
1590
1591         return reply((struct ib_mad_hdr *)smp);
1592 }
1593
1594 static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1595                                    struct ib_device *ibdev, u8 port,
1596                                    u32 *resp_len)
1597 {
1598         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1599         u8 *p = data;
1600         int i;
1601
1602         if (am) {
1603                 smp->status |= IB_SMP_INVALID_FIELD;
1604                 return reply((struct ib_mad_hdr *)smp);
1605         }
1606
1607         for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1608                 ibp->sc_to_sl[i] = *p++;
1609
1610         return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len);
1611 }
1612
1613 static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1614                                     struct ib_device *ibdev, u8 port,
1615                                     u32 *resp_len)
1616 {
1617         u32 n_blocks = OPA_AM_NBLK(am);
1618         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1619         void *vp = (void *)data;
1620         size_t size = 4 * sizeof(u64);
1621
1622         if (n_blocks != 1) {
1623                 smp->status |= IB_SMP_INVALID_FIELD;
1624                 return reply((struct ib_mad_hdr *)smp);
1625         }
1626
1627         get_sc2vlt_tables(dd, vp);
1628
1629         if (resp_len)
1630                 *resp_len += size;
1631
1632         return reply((struct ib_mad_hdr *)smp);
1633 }
1634
1635 static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1636                                     struct ib_device *ibdev, u8 port,
1637                                     u32 *resp_len)
1638 {
1639         u32 n_blocks = OPA_AM_NBLK(am);
1640         int async_update = OPA_AM_ASYNC(am);
1641         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1642         void *vp = (void *)data;
1643         struct hfi1_pportdata *ppd;
1644         int lstate;
1645
1646         if (n_blocks != 1 || async_update) {
1647                 smp->status |= IB_SMP_INVALID_FIELD;
1648                 return reply((struct ib_mad_hdr *)smp);
1649         }
1650
1651         /* IB numbers ports from 1, hw from 0 */
1652         ppd = dd->pport + (port - 1);
1653         lstate = driver_lstate(ppd);
1654         /*
1655          * it's known that async_update is 0 by this point, but include
1656          * the explicit check for clarity
1657          */
1658         if (!async_update &&
1659             (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
1660                 smp->status |= IB_SMP_INVALID_FIELD;
1661                 return reply((struct ib_mad_hdr *)smp);
1662         }
1663
1664         set_sc2vlt_tables(dd, vp);
1665
1666         return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len);
1667 }
1668
1669 static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1670                                      struct ib_device *ibdev, u8 port,
1671                                      u32 *resp_len)
1672 {
1673         u32 n_blocks = OPA_AM_NPORT(am);
1674         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1675         struct hfi1_pportdata *ppd;
1676         void *vp = (void *)data;
1677         int size;
1678
1679         if (n_blocks != 1) {
1680                 smp->status |= IB_SMP_INVALID_FIELD;
1681                 return reply((struct ib_mad_hdr *)smp);
1682         }
1683
1684         ppd = dd->pport + (port - 1);
1685
1686         size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
1687
1688         if (resp_len)
1689                 *resp_len += size;
1690
1691         return reply((struct ib_mad_hdr *)smp);
1692 }
1693
1694 static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1695                                      struct ib_device *ibdev, u8 port,
1696                                      u32 *resp_len)
1697 {
1698         u32 n_blocks = OPA_AM_NPORT(am);
1699         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1700         struct hfi1_pportdata *ppd;
1701         void *vp = (void *)data;
1702         int lstate;
1703
1704         if (n_blocks != 1) {
1705                 smp->status |= IB_SMP_INVALID_FIELD;
1706                 return reply((struct ib_mad_hdr *)smp);
1707         }
1708
1709         /* IB numbers ports from 1, hw from 0 */
1710         ppd = dd->pport + (port - 1);
1711         lstate = driver_lstate(ppd);
1712         if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
1713                 smp->status |= IB_SMP_INVALID_FIELD;
1714                 return reply((struct ib_mad_hdr *)smp);
1715         }
1716
1717         ppd = dd->pport + (port - 1);
1718
1719         fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
1720
1721         return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
1722                                          resp_len);
1723 }
1724
1725 static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1726                               struct ib_device *ibdev, u8 port,
1727                               u32 *resp_len)
1728 {
1729         u32 nports = OPA_AM_NPORT(am);
1730         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1731         u32 lstate;
1732         struct hfi1_ibport *ibp;
1733         struct hfi1_pportdata *ppd;
1734         struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1735
1736         if (nports != 1) {
1737                 smp->status |= IB_SMP_INVALID_FIELD;
1738                 return reply((struct ib_mad_hdr *)smp);
1739         }
1740
1741         ibp = to_iport(ibdev, port);
1742         ppd = ppd_from_ibp(ibp);
1743
1744         lstate = driver_lstate(ppd);
1745
1746         if (start_of_sm_config && (lstate == IB_PORT_INIT))
1747                 ppd->is_sm_config_started = 1;
1748
1749         psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
1750         psi->port_states.ledenable_offlinereason |=
1751                 ppd->is_sm_config_started << 5;
1752         psi->port_states.ledenable_offlinereason |=
1753                 ppd->offline_disabled_reason;
1754
1755         psi->port_states.portphysstate_portstate =
1756                 (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
1757         psi->link_width_downgrade_tx_active =
1758                 cpu_to_be16(ppd->link_width_downgrade_tx_active);
1759         psi->link_width_downgrade_rx_active =
1760                 cpu_to_be16(ppd->link_width_downgrade_rx_active);
1761         if (resp_len)
1762                 *resp_len += sizeof(struct opa_port_state_info);
1763
1764         return reply((struct ib_mad_hdr *)smp);
1765 }
1766
1767 static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1768                               struct ib_device *ibdev, u8 port,
1769                               u32 *resp_len)
1770 {
1771         u32 nports = OPA_AM_NPORT(am);
1772         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1773         u32 ls_old;
1774         u8 ls_new, ps_new;
1775         struct hfi1_ibport *ibp;
1776         struct hfi1_pportdata *ppd;
1777         struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1778         int ret, invalid = 0;
1779
1780         if (nports != 1) {
1781                 smp->status |= IB_SMP_INVALID_FIELD;
1782                 return reply((struct ib_mad_hdr *)smp);
1783         }
1784
1785         ibp = to_iport(ibdev, port);
1786         ppd = ppd_from_ibp(ibp);
1787
1788         ls_old = driver_lstate(ppd);
1789
1790         ls_new = port_states_to_logical_state(&psi->port_states);
1791         ps_new = port_states_to_phys_state(&psi->port_states);
1792
1793         if (ls_old == IB_PORT_INIT) {
1794                 if (start_of_sm_config) {
1795                         if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1796                                 ppd->is_sm_config_started = 1;
1797                 } else if (ls_new == IB_PORT_ARMED) {
1798                         if (ppd->is_sm_config_started == 0)
1799                                 invalid = 1;
1800                 }
1801         }
1802
1803         ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1804         if (ret)
1805                 return ret;
1806
1807         if (invalid)
1808                 smp->status |= IB_SMP_INVALID_FIELD;
1809
1810         return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len);
1811 }
1812
1813 static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
1814                                      struct ib_device *ibdev, u8 port,
1815                                      u32 *resp_len)
1816 {
1817         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1818         u32 addr = OPA_AM_CI_ADDR(am);
1819         u32 len = OPA_AM_CI_LEN(am) + 1;
1820         int ret;
1821
1822 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
1823 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
1824 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
1825
1826         /*
1827          * check that addr is within spec, and
1828          * addr and (addr + len - 1) are on the same "page"
1829          */
1830         if (addr >= 4096 ||
1831             (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
1832                 smp->status |= IB_SMP_INVALID_FIELD;
1833                 return reply((struct ib_mad_hdr *)smp);
1834         }
1835
1836         ret = get_cable_info(dd, port, addr, len, data);
1837
1838         if (ret == -ENODEV) {
1839                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
1840                 return reply((struct ib_mad_hdr *)smp);
1841         }
1842
1843         /* The address range for the CableInfo SMA query is wider than the
1844          * memory available on the QSFP cable. We want to return a valid
1845          * response, albeit zeroed out, for address ranges beyond available
1846          * memory but that are within the CableInfo query spec
1847          */
1848         if (ret < 0 && ret != -ERANGE) {
1849                 smp->status |= IB_SMP_INVALID_FIELD;
1850                 return reply((struct ib_mad_hdr *)smp);
1851         }
1852
1853         if (resp_len)
1854                 *resp_len += len;
1855
1856         return reply((struct ib_mad_hdr *)smp);
1857 }
1858
1859 static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1860                               struct ib_device *ibdev, u8 port, u32 *resp_len)
1861 {
1862         u32 num_ports = OPA_AM_NPORT(am);
1863         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1864         struct hfi1_pportdata *ppd;
1865         struct buffer_control *p = (struct buffer_control *)data;
1866         int size;
1867
1868         if (num_ports != 1) {
1869                 smp->status |= IB_SMP_INVALID_FIELD;
1870                 return reply((struct ib_mad_hdr *)smp);
1871         }
1872
1873         ppd = dd->pport + (port - 1);
1874         size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
1875         trace_bct_get(dd, p);
1876         if (resp_len)
1877                 *resp_len += size;
1878
1879         return reply((struct ib_mad_hdr *)smp);
1880 }
1881
1882 static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1883                               struct ib_device *ibdev, u8 port, u32 *resp_len)
1884 {
1885         u32 num_ports = OPA_AM_NPORT(am);
1886         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1887         struct hfi1_pportdata *ppd;
1888         struct buffer_control *p = (struct buffer_control *)data;
1889
1890         if (num_ports != 1) {
1891                 smp->status |= IB_SMP_INVALID_FIELD;
1892                 return reply((struct ib_mad_hdr *)smp);
1893         }
1894         ppd = dd->pport + (port - 1);
1895         trace_bct_set(dd, p);
1896         if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
1897                 smp->status |= IB_SMP_INVALID_FIELD;
1898                 return reply((struct ib_mad_hdr *)smp);
1899         }
1900
1901         return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len);
1902 }
1903
1904 static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1905                                  struct ib_device *ibdev, u8 port,
1906                                  u32 *resp_len)
1907 {
1908         struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1909         u32 num_ports = OPA_AM_NPORT(am);
1910         u8 section = (am & 0x00ff0000) >> 16;
1911         u8 *p = data;
1912         int size = 0;
1913
1914         if (num_ports != 1) {
1915                 smp->status |= IB_SMP_INVALID_FIELD;
1916                 return reply((struct ib_mad_hdr *)smp);
1917         }
1918
1919         switch (section) {
1920         case OPA_VLARB_LOW_ELEMENTS:
1921                 size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
1922                 break;
1923         case OPA_VLARB_HIGH_ELEMENTS:
1924                 size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1925                 break;
1926         case OPA_VLARB_PREEMPT_ELEMENTS:
1927                 size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
1928                 break;
1929         case OPA_VLARB_PREEMPT_MATRIX:
1930                 size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
1931                 break;
1932         default:
1933                 pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
1934                         be32_to_cpu(smp->attr_mod));
1935                 smp->status |= IB_SMP_INVALID_FIELD;
1936                 break;
1937         }
1938
1939         if (size > 0 && resp_len)
1940                 *resp_len += size;
1941
1942         return reply((struct ib_mad_hdr *)smp);
1943 }
1944
1945 static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1946                                  struct ib_device *ibdev, u8 port,
1947                                  u32 *resp_len)
1948 {
1949         struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1950         u32 num_ports = OPA_AM_NPORT(am);
1951         u8 section = (am & 0x00ff0000) >> 16;
1952         u8 *p = data;
1953
1954         if (num_ports != 1) {
1955                 smp->status |= IB_SMP_INVALID_FIELD;
1956                 return reply((struct ib_mad_hdr *)smp);
1957         }
1958
1959         switch (section) {
1960         case OPA_VLARB_LOW_ELEMENTS:
1961                 (void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
1962                 break;
1963         case OPA_VLARB_HIGH_ELEMENTS:
1964                 (void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1965                 break;
1966         /*
1967          * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
1968          * can be changed from the default values
1969          */
1970         case OPA_VLARB_PREEMPT_ELEMENTS:
1971                 /* FALLTHROUGH */
1972         case OPA_VLARB_PREEMPT_MATRIX:
1973                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
1974                 break;
1975         default:
1976                 pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
1977                         be32_to_cpu(smp->attr_mod));
1978                 smp->status |= IB_SMP_INVALID_FIELD;
1979                 break;
1980         }
1981
1982         return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len);
1983 }
1984
1985 struct opa_pma_mad {
1986         struct ib_mad_hdr mad_hdr;
1987         u8 data[2024];
1988 } __packed;
1989
1990 struct opa_class_port_info {
1991         u8 base_version;
1992         u8 class_version;
1993         __be16 cap_mask;
1994         __be32 cap_mask2_resp_time;
1995
1996         u8 redirect_gid[16];
1997         __be32 redirect_tc_fl;
1998         __be32 redirect_lid;
1999         __be32 redirect_sl_qp;
2000         __be32 redirect_qkey;
2001
2002         u8 trap_gid[16];
2003         __be32 trap_tc_fl;
2004         __be32 trap_lid;
2005         __be32 trap_hl_qp;
2006         __be32 trap_qkey;
2007
2008         __be16 trap_pkey;
2009         __be16 redirect_pkey;
2010
2011         u8 trap_sl_rsvd;
2012         u8 reserved[3];
2013 } __packed;
2014
2015 struct opa_port_status_req {
2016         __u8 port_num;
2017         __u8 reserved[3];
2018         __be32 vl_select_mask;
2019 };
2020
2021 #define VL_MASK_ALL             0x000080ff
2022
2023 struct opa_port_status_rsp {
2024         __u8 port_num;
2025         __u8 reserved[3];
2026         __be32  vl_select_mask;
2027
2028         /* Data counters */
2029         __be64 port_xmit_data;
2030         __be64 port_rcv_data;
2031         __be64 port_xmit_pkts;
2032         __be64 port_rcv_pkts;
2033         __be64 port_multicast_xmit_pkts;
2034         __be64 port_multicast_rcv_pkts;
2035         __be64 port_xmit_wait;
2036         __be64 sw_port_congestion;
2037         __be64 port_rcv_fecn;
2038         __be64 port_rcv_becn;
2039         __be64 port_xmit_time_cong;
2040         __be64 port_xmit_wasted_bw;
2041         __be64 port_xmit_wait_data;
2042         __be64 port_rcv_bubble;
2043         __be64 port_mark_fecn;
2044         /* Error counters */
2045         __be64 port_rcv_constraint_errors;
2046         __be64 port_rcv_switch_relay_errors;
2047         __be64 port_xmit_discards;
2048         __be64 port_xmit_constraint_errors;
2049         __be64 port_rcv_remote_physical_errors;
2050         __be64 local_link_integrity_errors;
2051         __be64 port_rcv_errors;
2052         __be64 excessive_buffer_overruns;
2053         __be64 fm_config_errors;
2054         __be32 link_error_recovery;
2055         __be32 link_downed;
2056         u8 uncorrectable_errors;
2057
2058         u8 link_quality_indicator; /* 5res, 3bit */
2059         u8 res2[6];
2060         struct _vls_pctrs {
2061                 /* per-VL Data counters */
2062                 __be64 port_vl_xmit_data;
2063                 __be64 port_vl_rcv_data;
2064                 __be64 port_vl_xmit_pkts;
2065                 __be64 port_vl_rcv_pkts;
2066                 __be64 port_vl_xmit_wait;
2067                 __be64 sw_port_vl_congestion;
2068                 __be64 port_vl_rcv_fecn;
2069                 __be64 port_vl_rcv_becn;
2070                 __be64 port_xmit_time_cong;
2071                 __be64 port_vl_xmit_wasted_bw;
2072                 __be64 port_vl_xmit_wait_data;
2073                 __be64 port_vl_rcv_bubble;
2074                 __be64 port_vl_mark_fecn;
2075                 __be64 port_vl_xmit_discards;
2076         } vls[0]; /* real array size defined by # bits set in vl_select_mask */
2077 };
2078
2079 enum counter_selects {
2080         CS_PORT_XMIT_DATA                       = (1 << 31),
2081         CS_PORT_RCV_DATA                        = (1 << 30),
2082         CS_PORT_XMIT_PKTS                       = (1 << 29),
2083         CS_PORT_RCV_PKTS                        = (1 << 28),
2084         CS_PORT_MCAST_XMIT_PKTS                 = (1 << 27),
2085         CS_PORT_MCAST_RCV_PKTS                  = (1 << 26),
2086         CS_PORT_XMIT_WAIT                       = (1 << 25),
2087         CS_SW_PORT_CONGESTION                   = (1 << 24),
2088         CS_PORT_RCV_FECN                        = (1 << 23),
2089         CS_PORT_RCV_BECN                        = (1 << 22),
2090         CS_PORT_XMIT_TIME_CONG                  = (1 << 21),
2091         CS_PORT_XMIT_WASTED_BW                  = (1 << 20),
2092         CS_PORT_XMIT_WAIT_DATA                  = (1 << 19),
2093         CS_PORT_RCV_BUBBLE                      = (1 << 18),
2094         CS_PORT_MARK_FECN                       = (1 << 17),
2095         CS_PORT_RCV_CONSTRAINT_ERRORS           = (1 << 16),
2096         CS_PORT_RCV_SWITCH_RELAY_ERRORS         = (1 << 15),
2097         CS_PORT_XMIT_DISCARDS                   = (1 << 14),
2098         CS_PORT_XMIT_CONSTRAINT_ERRORS          = (1 << 13),
2099         CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS      = (1 << 12),
2100         CS_LOCAL_LINK_INTEGRITY_ERRORS          = (1 << 11),
2101         CS_PORT_RCV_ERRORS                      = (1 << 10),
2102         CS_EXCESSIVE_BUFFER_OVERRUNS            = (1 << 9),
2103         CS_FM_CONFIG_ERRORS                     = (1 << 8),
2104         CS_LINK_ERROR_RECOVERY                  = (1 << 7),
2105         CS_LINK_DOWNED                          = (1 << 6),
2106         CS_UNCORRECTABLE_ERRORS                 = (1 << 5),
2107 };
2108
2109 struct opa_clear_port_status {
2110         __be64 port_select_mask[4];
2111         __be32 counter_select_mask;
2112 };
2113
2114 struct opa_aggregate {
2115         __be16 attr_id;
2116         __be16 err_reqlength;   /* 1 bit, 8 res, 7 bit */
2117         __be32 attr_mod;
2118         u8 data[0];
2119 };
2120
2121 #define MSK_LLI 0x000000f0
2122 #define MSK_LLI_SFT 4
2123 #define MSK_LER 0x0000000f
2124 #define MSK_LER_SFT 0
2125 #define ADD_LLI 8
2126 #define ADD_LER 2
2127
2128 /* Request contains first three fields, response contains those plus the rest */
2129 struct opa_port_data_counters_msg {
2130         __be64 port_select_mask[4];
2131         __be32 vl_select_mask;
2132         __be32 resolution;
2133
2134         /* Response fields follow */
2135         struct _port_dctrs {
2136                 u8 port_number;
2137                 u8 reserved2[3];
2138                 __be32 link_quality_indicator; /* 29res, 3bit */
2139
2140                 /* Data counters */
2141                 __be64 port_xmit_data;
2142                 __be64 port_rcv_data;
2143                 __be64 port_xmit_pkts;
2144                 __be64 port_rcv_pkts;
2145                 __be64 port_multicast_xmit_pkts;
2146                 __be64 port_multicast_rcv_pkts;
2147                 __be64 port_xmit_wait;
2148                 __be64 sw_port_congestion;
2149                 __be64 port_rcv_fecn;
2150                 __be64 port_rcv_becn;
2151                 __be64 port_xmit_time_cong;
2152                 __be64 port_xmit_wasted_bw;
2153                 __be64 port_xmit_wait_data;
2154                 __be64 port_rcv_bubble;
2155                 __be64 port_mark_fecn;
2156
2157                 __be64 port_error_counter_summary;
2158                 /* Sum of error counts/port */
2159
2160                 struct _vls_dctrs {
2161                         /* per-VL Data counters */
2162                         __be64 port_vl_xmit_data;
2163                         __be64 port_vl_rcv_data;
2164                         __be64 port_vl_xmit_pkts;
2165                         __be64 port_vl_rcv_pkts;
2166                         __be64 port_vl_xmit_wait;
2167                         __be64 sw_port_vl_congestion;
2168                         __be64 port_vl_rcv_fecn;
2169                         __be64 port_vl_rcv_becn;
2170                         __be64 port_xmit_time_cong;
2171                         __be64 port_vl_xmit_wasted_bw;
2172                         __be64 port_vl_xmit_wait_data;
2173                         __be64 port_vl_rcv_bubble;
2174                         __be64 port_vl_mark_fecn;
2175                 } vls[0];
2176                 /* array size defined by #bits set in vl_select_mask*/
2177         } port[1]; /* array size defined by  #ports in attribute modifier */
2178 };
2179
2180 struct opa_port_error_counters64_msg {
2181         /*
2182          * Request contains first two fields, response contains the
2183          * whole magilla
2184          */
2185         __be64 port_select_mask[4];
2186         __be32 vl_select_mask;
2187
2188         /* Response-only fields follow */
2189         __be32 reserved1;
2190         struct _port_ectrs {
2191                 u8 port_number;
2192                 u8 reserved2[7];
2193                 __be64 port_rcv_constraint_errors;
2194                 __be64 port_rcv_switch_relay_errors;
2195                 __be64 port_xmit_discards;
2196                 __be64 port_xmit_constraint_errors;
2197                 __be64 port_rcv_remote_physical_errors;
2198                 __be64 local_link_integrity_errors;
2199                 __be64 port_rcv_errors;
2200                 __be64 excessive_buffer_overruns;
2201                 __be64 fm_config_errors;
2202                 __be32 link_error_recovery;
2203                 __be32 link_downed;
2204                 u8 uncorrectable_errors;
2205                 u8 reserved3[7];
2206                 struct _vls_ectrs {
2207                         __be64 port_vl_xmit_discards;
2208                 } vls[0];
2209                 /* array size defined by #bits set in vl_select_mask */
2210         } port[1]; /* array size defined by #ports in attribute modifier */
2211 };
2212
2213 struct opa_port_error_info_msg {
2214         __be64 port_select_mask[4];
2215         __be32 error_info_select_mask;
2216         __be32 reserved1;
2217         struct _port_ei {
2218                 u8 port_number;
2219                 u8 reserved2[7];
2220
2221                 /* PortRcvErrorInfo */
2222                 struct {
2223                         u8 status_and_code;
2224                         union {
2225                                 u8 raw[17];
2226                                 struct {
2227                                         /* EI1to12 format */
2228                                         u8 packet_flit1[8];
2229                                         u8 packet_flit2[8];
2230                                         u8 remaining_flit_bits12;
2231                                 } ei1to12;
2232                                 struct {
2233                                         u8 packet_bytes[8];
2234                                         u8 remaining_flit_bits;
2235                                 } ei13;
2236                         } ei;
2237                         u8 reserved3[6];
2238                 } __packed port_rcv_ei;
2239
2240                 /* ExcessiveBufferOverrunInfo */
2241                 struct {
2242                         u8 status_and_sc;
2243                         u8 reserved4[7];
2244                 } __packed excessive_buffer_overrun_ei;
2245
2246                 /* PortXmitConstraintErrorInfo */
2247                 struct {
2248                         u8 status;
2249                         u8 reserved5;
2250                         __be16 pkey;
2251                         __be32 slid;
2252                 } __packed port_xmit_constraint_ei;
2253
2254                 /* PortRcvConstraintErrorInfo */
2255                 struct {
2256                         u8 status;
2257                         u8 reserved6;
2258                         __be16 pkey;
2259                         __be32 slid;
2260                 } __packed port_rcv_constraint_ei;
2261
2262                 /* PortRcvSwitchRelayErrorInfo */
2263                 struct {
2264                         u8 status_and_code;
2265                         u8 reserved7[3];
2266                         __u32 error_info;
2267                 } __packed port_rcv_switch_relay_ei;
2268
2269                 /* UncorrectableErrorInfo */
2270                 struct {
2271                         u8 status_and_code;
2272                         u8 reserved8;
2273                 } __packed uncorrectable_ei;
2274
2275                 /* FMConfigErrorInfo */
2276                 struct {
2277                         u8 status_and_code;
2278                         u8 error_info;
2279                 } __packed fm_config_ei;
2280                 __u32 reserved9;
2281         } port[1]; /* actual array size defined by #ports in attr modifier */
2282 };
2283
2284 /* opa_port_error_info_msg error_info_select_mask bit definitions */
2285 enum error_info_selects {
2286         ES_PORT_RCV_ERROR_INFO                  = (1 << 31),
2287         ES_EXCESSIVE_BUFFER_OVERRUN_INFO        = (1 << 30),
2288         ES_PORT_XMIT_CONSTRAINT_ERROR_INFO      = (1 << 29),
2289         ES_PORT_RCV_CONSTRAINT_ERROR_INFO       = (1 << 28),
2290         ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO     = (1 << 27),
2291         ES_UNCORRECTABLE_ERROR_INFO             = (1 << 26),
2292         ES_FM_CONFIG_ERROR_INFO                 = (1 << 25)
2293 };
2294
2295 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
2296                                      struct ib_device *ibdev, u32 *resp_len)
2297 {
2298         struct opa_class_port_info *p =
2299                 (struct opa_class_port_info *)pmp->data;
2300
2301         memset(pmp->data, 0, sizeof(pmp->data));
2302
2303         if (pmp->mad_hdr.attr_mod != 0)
2304                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2305
2306         p->base_version = OPA_MGMT_BASE_VERSION;
2307         p->class_version = OPA_SMI_CLASS_VERSION;
2308         /*
2309          * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2310          */
2311         p->cap_mask2_resp_time = cpu_to_be32(18);
2312
2313         if (resp_len)
2314                 *resp_len += sizeof(*p);
2315
2316         return reply((struct ib_mad_hdr *)pmp);
2317 }
2318
2319 static void a0_portstatus(struct hfi1_pportdata *ppd,
2320                           struct opa_port_status_rsp *rsp, u32 vl_select_mask)
2321 {
2322         if (!is_bx(ppd->dd)) {
2323                 unsigned long vl;
2324                 u64 sum_vl_xmit_wait = 0;
2325                 u32 vl_all_mask = VL_MASK_ALL;
2326
2327                 for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2328                                  8 * sizeof(vl_all_mask)) {
2329                         u64 tmp = sum_vl_xmit_wait +
2330                                   read_port_cntr(ppd, C_TX_WAIT_VL,
2331                                                  idx_from_vl(vl));
2332                         if (tmp < sum_vl_xmit_wait) {
2333                                 /* we wrapped */
2334                                 sum_vl_xmit_wait = (u64)~0;
2335                                 break;
2336                         }
2337                         sum_vl_xmit_wait = tmp;
2338                 }
2339                 if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2340                         rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2341         }
2342 }
2343
2344 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
2345                                   struct ib_device *ibdev,
2346                                   u8 port, u32 *resp_len)
2347 {
2348         struct opa_port_status_req *req =
2349                 (struct opa_port_status_req *)pmp->data;
2350         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2351         struct opa_port_status_rsp *rsp;
2352         u32 vl_select_mask = be32_to_cpu(req->vl_select_mask);
2353         unsigned long vl;
2354         size_t response_data_size;
2355         u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2356         u8 port_num = req->port_num;
2357         u8 num_vls = hweight32(vl_select_mask);
2358         struct _vls_pctrs *vlinfo;
2359         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2360         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2361         int vfi;
2362         u64 tmp, tmp2;
2363
2364         response_data_size = sizeof(struct opa_port_status_rsp) +
2365                                 num_vls * sizeof(struct _vls_pctrs);
2366         if (response_data_size > sizeof(pmp->data)) {
2367                 pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
2368                 return reply((struct ib_mad_hdr *)pmp);
2369         }
2370
2371         if (nports != 1 || (port_num && port_num != port) ||
2372             num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
2373                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2374                 return reply((struct ib_mad_hdr *)pmp);
2375         }
2376
2377         memset(pmp->data, 0, sizeof(pmp->data));
2378
2379         rsp = (struct opa_port_status_rsp *)pmp->data;
2380         if (port_num)
2381                 rsp->port_num = port_num;
2382         else
2383                 rsp->port_num = port;
2384
2385         rsp->port_rcv_constraint_errors =
2386                 cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2387                                            CNTR_INVALID_VL));
2388
2389         hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
2390
2391         rsp->vl_select_mask = cpu_to_be32(vl_select_mask);
2392         rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2393                                           CNTR_INVALID_VL));
2394         rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2395                                          CNTR_INVALID_VL));
2396         rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2397                                           CNTR_INVALID_VL));
2398         rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2399                                          CNTR_INVALID_VL));
2400         rsp->port_multicast_xmit_pkts =
2401                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2402                                           CNTR_INVALID_VL));
2403         rsp->port_multicast_rcv_pkts =
2404                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2405                                           CNTR_INVALID_VL));
2406         rsp->port_xmit_wait =
2407                 cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2408         rsp->port_rcv_fecn =
2409                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2410         rsp->port_rcv_becn =
2411                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2412         rsp->port_xmit_discards =
2413                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2414                                            CNTR_INVALID_VL));
2415         rsp->port_xmit_constraint_errors =
2416                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2417                                            CNTR_INVALID_VL));
2418         rsp->port_rcv_remote_physical_errors =
2419                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2420                                           CNTR_INVALID_VL));
2421         tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
2422         tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
2423         if (tmp2 < tmp) {
2424                 /* overflow/wrapped */
2425                 rsp->local_link_integrity_errors = cpu_to_be64(~0);
2426         } else {
2427                 rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
2428         }
2429         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2430         tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2431                                    CNTR_INVALID_VL);
2432         if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2433                 /* overflow/wrapped */
2434                 rsp->link_error_recovery = cpu_to_be32(~0);
2435         } else {
2436                 rsp->link_error_recovery = cpu_to_be32(tmp2);
2437         }
2438         rsp->port_rcv_errors =
2439                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2440         rsp->excessive_buffer_overruns =
2441                 cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2442         rsp->fm_config_errors =
2443                 cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2444                                           CNTR_INVALID_VL));
2445         rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2446                                                       CNTR_INVALID_VL));
2447
2448         /* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
2449         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2450         rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2451
2452         vlinfo = &rsp->vls[0];
2453         vfi = 0;
2454         /* The vl_select_mask has been checked above, and we know
2455          * that it contains only entries which represent valid VLs.
2456          * So in the for_each_set_bit() loop below, we don't need
2457          * any additional checks for vl.
2458          */
2459         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2460                          8 * sizeof(vl_select_mask)) {
2461                 memset(vlinfo, 0, sizeof(*vlinfo));
2462
2463                 tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
2464                 rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
2465
2466                 rsp->vls[vfi].port_vl_rcv_pkts =
2467                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2468                                                   idx_from_vl(vl)));
2469
2470                 rsp->vls[vfi].port_vl_xmit_data =
2471                         cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2472                                                    idx_from_vl(vl)));
2473
2474                 rsp->vls[vfi].port_vl_xmit_pkts =
2475                         cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2476                                                    idx_from_vl(vl)));
2477
2478                 rsp->vls[vfi].port_vl_xmit_wait =
2479                         cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2480                                                    idx_from_vl(vl)));
2481
2482                 rsp->vls[vfi].port_vl_rcv_fecn =
2483                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2484                                                   idx_from_vl(vl)));
2485
2486                 rsp->vls[vfi].port_vl_rcv_becn =
2487                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2488                                                   idx_from_vl(vl)));
2489
2490                 rsp->vls[vfi].port_vl_xmit_discards =
2491                         cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2492                                                    idx_from_vl(vl)));
2493                 vlinfo++;
2494                 vfi++;
2495         }
2496
2497         a0_portstatus(ppd, rsp, vl_select_mask);
2498
2499         if (resp_len)
2500                 *resp_len += response_data_size;
2501
2502         return reply((struct ib_mad_hdr *)pmp);
2503 }
2504
2505 static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
2506                                      u8 res_lli, u8 res_ler)
2507 {
2508         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2509         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2510         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2511         u64 error_counter_summary = 0, tmp;
2512
2513         error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2514                                                 CNTR_INVALID_VL);
2515         /* port_rcv_switch_relay_errors is 0 for HFIs */
2516         error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
2517                                                 CNTR_INVALID_VL);
2518         error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2519                                                 CNTR_INVALID_VL);
2520         error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2521                                                CNTR_INVALID_VL);
2522         /* local link integrity must be right-shifted by the lli resolution */
2523         tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
2524         tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
2525         error_counter_summary += (tmp >> res_lli);
2526         /* link error recovery must b right-shifted by the ler resolution */
2527         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2528         tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
2529         error_counter_summary += (tmp >> res_ler);
2530         error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
2531                                                CNTR_INVALID_VL);
2532         error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2533         error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2534                                                CNTR_INVALID_VL);
2535         /* ppd->link_downed is a 32-bit value */
2536         error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
2537                                                 CNTR_INVALID_VL);
2538         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2539         /* this is an 8-bit quantity */
2540         error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
2541
2542         return error_counter_summary;
2543 }
2544
2545 static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
2546                             u32 vl_select_mask)
2547 {
2548         if (!is_bx(ppd->dd)) {
2549                 unsigned long vl;
2550                 u64 sum_vl_xmit_wait = 0;
2551                 u32 vl_all_mask = VL_MASK_ALL;
2552
2553                 for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2554                                  8 * sizeof(vl_all_mask)) {
2555                         u64 tmp = sum_vl_xmit_wait +
2556                                   read_port_cntr(ppd, C_TX_WAIT_VL,
2557                                                  idx_from_vl(vl));
2558                         if (tmp < sum_vl_xmit_wait) {
2559                                 /* we wrapped */
2560                                 sum_vl_xmit_wait = (u64)~0;
2561                                 break;
2562                         }
2563                         sum_vl_xmit_wait = tmp;
2564                 }
2565                 if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2566                         rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2567         }
2568 }
2569
2570 static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
2571                                    struct _port_dctrs *rsp)
2572 {
2573         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2574
2575         rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2576                                                 CNTR_INVALID_VL));
2577         rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2578                                                 CNTR_INVALID_VL));
2579         rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2580                                                 CNTR_INVALID_VL));
2581         rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2582                                                 CNTR_INVALID_VL));
2583         rsp->port_multicast_xmit_pkts =
2584                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2585                                           CNTR_INVALID_VL));
2586         rsp->port_multicast_rcv_pkts =
2587                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2588                                           CNTR_INVALID_VL));
2589 }
2590
2591 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
2592                                     struct ib_device *ibdev,
2593                                     u8 port, u32 *resp_len)
2594 {
2595         struct opa_port_data_counters_msg *req =
2596                 (struct opa_port_data_counters_msg *)pmp->data;
2597         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2598         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2599         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2600         struct _port_dctrs *rsp;
2601         struct _vls_dctrs *vlinfo;
2602         size_t response_data_size;
2603         u32 num_ports;
2604         u8 num_pslm;
2605         u8 lq, num_vls;
2606         u8 res_lli, res_ler;
2607         u64 port_mask;
2608         unsigned long port_num;
2609         unsigned long vl;
2610         u32 vl_select_mask;
2611         int vfi;
2612
2613         num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2614         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2615         num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2616         vl_select_mask = be32_to_cpu(req->vl_select_mask);
2617         res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
2618         res_lli = res_lli ? res_lli + ADD_LLI : 0;
2619         res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
2620         res_ler = res_ler ? res_ler + ADD_LER : 0;
2621
2622         if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
2623                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2624                 return reply((struct ib_mad_hdr *)pmp);
2625         }
2626
2627         /* Sanity check */
2628         response_data_size = sizeof(struct opa_port_data_counters_msg) +
2629                                 num_vls * sizeof(struct _vls_dctrs);
2630
2631         if (response_data_size > sizeof(pmp->data)) {
2632                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2633                 return reply((struct ib_mad_hdr *)pmp);
2634         }
2635
2636         /*
2637          * The bit set in the mask needs to be consistent with the
2638          * port the request came in on.
2639          */
2640         port_mask = be64_to_cpu(req->port_select_mask[3]);
2641         port_num = find_first_bit((unsigned long *)&port_mask,
2642                                   sizeof(port_mask));
2643
2644         if ((u8)port_num != port) {
2645                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2646                 return reply((struct ib_mad_hdr *)pmp);
2647         }
2648
2649         rsp = &req->port[0];
2650         memset(rsp, 0, sizeof(*rsp));
2651
2652         rsp->port_number = port;
2653         /*
2654          * Note that link_quality_indicator is a 32 bit quantity in
2655          * 'datacounters' queries (as opposed to 'portinfo' queries,
2656          * where it's a byte).
2657          */
2658         hfi1_read_link_quality(dd, &lq);
2659         rsp->link_quality_indicator = cpu_to_be32((u32)lq);
2660         pma_get_opa_port_dctrs(ibdev, rsp);
2661
2662         rsp->port_xmit_wait =
2663                 cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2664         rsp->port_rcv_fecn =
2665                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2666         rsp->port_rcv_becn =
2667                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2668         rsp->port_error_counter_summary =
2669                 cpu_to_be64(get_error_counter_summary(ibdev, port,
2670                                                       res_lli, res_ler));
2671
2672         vlinfo = &rsp->vls[0];
2673         vfi = 0;
2674         /* The vl_select_mask has been checked above, and we know
2675          * that it contains only entries which represent valid VLs.
2676          * So in the for_each_set_bit() loop below, we don't need
2677          * any additional checks for vl.
2678          */
2679         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2680                          8 * sizeof(req->vl_select_mask)) {
2681                 memset(vlinfo, 0, sizeof(*vlinfo));
2682
2683                 rsp->vls[vfi].port_vl_xmit_data =
2684                         cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2685                                                    idx_from_vl(vl)));
2686
2687                 rsp->vls[vfi].port_vl_rcv_data =
2688                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
2689                                                   idx_from_vl(vl)));
2690
2691                 rsp->vls[vfi].port_vl_xmit_pkts =
2692                         cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2693                                                    idx_from_vl(vl)));
2694
2695                 rsp->vls[vfi].port_vl_rcv_pkts =
2696                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2697                                                   idx_from_vl(vl)));
2698
2699                 rsp->vls[vfi].port_vl_xmit_wait =
2700                         cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2701                                                    idx_from_vl(vl)));
2702
2703                 rsp->vls[vfi].port_vl_rcv_fecn =
2704                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2705                                                   idx_from_vl(vl)));
2706                 rsp->vls[vfi].port_vl_rcv_becn =
2707                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2708                                                   idx_from_vl(vl)));
2709
2710                 /* rsp->port_vl_xmit_time_cong is 0 for HFIs */
2711                 /* rsp->port_vl_xmit_wasted_bw ??? */
2712                 /* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
2713                  * does this differ from rsp->vls[vfi].port_vl_xmit_wait
2714                  */
2715                 /*rsp->vls[vfi].port_vl_mark_fecn =
2716                  *      cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
2717                  *              + offset));
2718                  */
2719                 vlinfo++;
2720                 vfi++;
2721         }
2722
2723         a0_datacounters(ppd, rsp, vl_select_mask);
2724
2725         if (resp_len)
2726                 *resp_len += response_data_size;
2727
2728         return reply((struct ib_mad_hdr *)pmp);
2729 }
2730
2731 static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
2732                                        struct ib_device *ibdev, u8 port)
2733 {
2734         struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
2735                                                 pmp->data;
2736         struct _port_dctrs rsp;
2737
2738         if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2739                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2740                 goto bail;
2741         }
2742
2743         memset(&rsp, 0, sizeof(rsp));
2744         pma_get_opa_port_dctrs(ibdev, &rsp);
2745
2746         p->port_xmit_data = rsp.port_xmit_data;
2747         p->port_rcv_data = rsp.port_rcv_data;
2748         p->port_xmit_packets = rsp.port_xmit_pkts;
2749         p->port_rcv_packets = rsp.port_rcv_pkts;
2750         p->port_unicast_xmit_packets = 0;
2751         p->port_unicast_rcv_packets =  0;
2752         p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
2753         p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
2754
2755 bail:
2756         return reply((struct ib_mad_hdr *)pmp);
2757 }
2758
2759 static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
2760                                    struct _port_ectrs *rsp, u8 port)
2761 {
2762         u64 tmp, tmp2;
2763         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2764         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2765         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2766
2767         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2768         tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2769                                         CNTR_INVALID_VL);
2770         if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2771                 /* overflow/wrapped */
2772                 rsp->link_error_recovery = cpu_to_be32(~0);
2773         } else {
2774                 rsp->link_error_recovery = cpu_to_be32(tmp2);
2775         }
2776
2777         rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2778                                                 CNTR_INVALID_VL));
2779         rsp->port_rcv_errors =
2780                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2781         rsp->port_rcv_remote_physical_errors =
2782                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2783                                           CNTR_INVALID_VL));
2784         rsp->port_rcv_switch_relay_errors = 0;
2785         rsp->port_xmit_discards =
2786                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2787                                            CNTR_INVALID_VL));
2788         rsp->port_xmit_constraint_errors =
2789                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2790                                            CNTR_INVALID_VL));
2791         rsp->port_rcv_constraint_errors =
2792                 cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2793                                            CNTR_INVALID_VL));
2794         tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
2795         tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
2796         if (tmp2 < tmp) {
2797                 /* overflow/wrapped */
2798                 rsp->local_link_integrity_errors = cpu_to_be64(~0);
2799         } else {
2800                 rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
2801         }
2802         rsp->excessive_buffer_overruns =
2803                 cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2804 }
2805
2806 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
2807                                   struct ib_device *ibdev,
2808                                   u8 port, u32 *resp_len)
2809 {
2810         size_t response_data_size;
2811         struct _port_ectrs *rsp;
2812         u8 port_num;
2813         struct opa_port_error_counters64_msg *req;
2814         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2815         u32 num_ports;
2816         u8 num_pslm;
2817         u8 num_vls;
2818         struct hfi1_ibport *ibp;
2819         struct hfi1_pportdata *ppd;
2820         struct _vls_ectrs *vlinfo;
2821         unsigned long vl;
2822         u64 port_mask, tmp;
2823         u32 vl_select_mask;
2824         int vfi;
2825
2826         req = (struct opa_port_error_counters64_msg *)pmp->data;
2827
2828         num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2829
2830         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2831         num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2832
2833         if (num_ports != 1 || num_ports != num_pslm) {
2834                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2835                 return reply((struct ib_mad_hdr *)pmp);
2836         }
2837
2838         response_data_size = sizeof(struct opa_port_error_counters64_msg) +
2839                                 num_vls * sizeof(struct _vls_ectrs);
2840
2841         if (response_data_size > sizeof(pmp->data)) {
2842                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2843                 return reply((struct ib_mad_hdr *)pmp);
2844         }
2845         /*
2846          * The bit set in the mask needs to be consistent with the
2847          * port the request came in on.
2848          */
2849         port_mask = be64_to_cpu(req->port_select_mask[3]);
2850         port_num = find_first_bit((unsigned long *)&port_mask,
2851                                   sizeof(port_mask));
2852
2853         if (port_num != port) {
2854                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2855                 return reply((struct ib_mad_hdr *)pmp);
2856         }
2857
2858         rsp = &req->port[0];
2859
2860         ibp = to_iport(ibdev, port_num);
2861         ppd = ppd_from_ibp(ibp);
2862
2863         memset(rsp, 0, sizeof(*rsp));
2864         rsp->port_number = port_num;
2865
2866         pma_get_opa_port_ectrs(ibdev, rsp, port_num);
2867
2868         rsp->port_rcv_remote_physical_errors =
2869                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2870                                           CNTR_INVALID_VL));
2871         rsp->fm_config_errors =
2872                 cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2873                                           CNTR_INVALID_VL));
2874         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2875
2876         rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2877         rsp->port_rcv_errors =
2878                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2879         vlinfo = &rsp->vls[0];
2880         vfi = 0;
2881         vl_select_mask = be32_to_cpu(req->vl_select_mask);
2882         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2883                          8 * sizeof(req->vl_select_mask)) {
2884                 memset(vlinfo, 0, sizeof(*vlinfo));
2885                 rsp->vls[vfi].port_vl_xmit_discards =
2886                         cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2887                                                    idx_from_vl(vl)));
2888                 vlinfo += 1;
2889                 vfi++;
2890         }
2891
2892         if (resp_len)
2893                 *resp_len += response_data_size;
2894
2895         return reply((struct ib_mad_hdr *)pmp);
2896 }
2897
2898 static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
2899                                    struct ib_device *ibdev, u8 port)
2900 {
2901         struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
2902                 pmp->data;
2903         struct _port_ectrs rsp;
2904         u64 temp_link_overrun_errors;
2905         u64 temp_64;
2906         u32 temp_32;
2907
2908         memset(&rsp, 0, sizeof(rsp));
2909         pma_get_opa_port_ectrs(ibdev, &rsp, port);
2910
2911         if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2912                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2913                 goto bail;
2914         }
2915
2916         p->symbol_error_counter = 0; /* N/A for OPA */
2917
2918         temp_32 = be32_to_cpu(rsp.link_error_recovery);
2919         if (temp_32 > 0xFFUL)
2920                 p->link_error_recovery_counter = 0xFF;
2921         else
2922                 p->link_error_recovery_counter = (u8)temp_32;
2923
2924         temp_32 = be32_to_cpu(rsp.link_downed);
2925         if (temp_32 > 0xFFUL)
2926                 p->link_downed_counter = 0xFF;
2927         else
2928                 p->link_downed_counter = (u8)temp_32;
2929
2930         temp_64 = be64_to_cpu(rsp.port_rcv_errors);
2931         if (temp_64 > 0xFFFFUL)
2932                 p->port_rcv_errors = cpu_to_be16(0xFFFF);
2933         else
2934                 p->port_rcv_errors = cpu_to_be16((u16)temp_64);
2935
2936         temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
2937         if (temp_64 > 0xFFFFUL)
2938                 p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
2939         else
2940                 p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
2941
2942         temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
2943         p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
2944
2945         temp_64 = be64_to_cpu(rsp.port_xmit_discards);
2946         if (temp_64 > 0xFFFFUL)
2947                 p->port_xmit_discards = cpu_to_be16(0xFFFF);
2948         else
2949                 p->port_xmit_discards = cpu_to_be16((u16)temp_64);
2950
2951         temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
2952         if (temp_64 > 0xFFUL)
2953                 p->port_xmit_constraint_errors = 0xFF;
2954         else
2955                 p->port_xmit_constraint_errors = (u8)temp_64;
2956
2957         temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
2958         if (temp_64 > 0xFFUL)
2959                 p->port_rcv_constraint_errors = 0xFFUL;
2960         else
2961                 p->port_rcv_constraint_errors = (u8)temp_64;
2962
2963         /* LocalLink: 7:4, BufferOverrun: 3:0 */
2964         temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
2965         if (temp_64 > 0xFUL)
2966                 temp_64 = 0xFUL;
2967
2968         temp_link_overrun_errors = temp_64 << 4;
2969
2970         temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
2971         if (temp_64 > 0xFUL)
2972                 temp_64 = 0xFUL;
2973         temp_link_overrun_errors |= temp_64;
2974
2975         p->link_overrun_errors = (u8)temp_link_overrun_errors;
2976
2977         p->vl15_dropped = 0; /* N/A for OPA */
2978
2979 bail:
2980         return reply((struct ib_mad_hdr *)pmp);
2981 }
2982
2983 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
2984                                  struct ib_device *ibdev,
2985                                  u8 port, u32 *resp_len)
2986 {
2987         size_t response_data_size;
2988         struct _port_ei *rsp;
2989         struct opa_port_error_info_msg *req;
2990         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2991         u64 port_mask;
2992         u32 num_ports;
2993         u8 port_num;
2994         u8 num_pslm;
2995         u64 reg;
2996
2997         req = (struct opa_port_error_info_msg *)pmp->data;
2998         rsp = &req->port[0];
2999
3000         num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3001         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3002
3003         memset(rsp, 0, sizeof(*rsp));
3004
3005         if (num_ports != 1 || num_ports != num_pslm) {
3006                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3007                 return reply((struct ib_mad_hdr *)pmp);
3008         }
3009
3010         /* Sanity check */
3011         response_data_size = sizeof(struct opa_port_error_info_msg);
3012
3013         if (response_data_size > sizeof(pmp->data)) {
3014                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3015                 return reply((struct ib_mad_hdr *)pmp);
3016         }
3017
3018         /*
3019          * The bit set in the mask needs to be consistent with the port
3020          * the request came in on.
3021          */
3022         port_mask = be64_to_cpu(req->port_select_mask[3]);
3023         port_num = find_first_bit((unsigned long *)&port_mask,
3024                                   sizeof(port_mask));
3025
3026         if (port_num != port) {
3027                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3028                 return reply((struct ib_mad_hdr *)pmp);
3029         }
3030
3031         /* PortRcvErrorInfo */
3032         rsp->port_rcv_ei.status_and_code =
3033                 dd->err_info_rcvport.status_and_code;
3034         memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
3035                &dd->err_info_rcvport.packet_flit1, sizeof(u64));
3036         memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
3037                &dd->err_info_rcvport.packet_flit2, sizeof(u64));
3038
3039         /* ExcessiverBufferOverrunInfo */
3040         reg = read_csr(dd, RCV_ERR_INFO);
3041         if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
3042                 /*
3043                  * if the RcvExcessBufferOverrun bit is set, save SC of
3044                  * first pkt that encountered an excess buffer overrun
3045                  */
3046                 u8 tmp = (u8)reg;
3047
3048                 tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
3049                 tmp <<= 2;
3050                 rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
3051                 /* set the status bit */
3052                 rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
3053         }
3054
3055         rsp->port_xmit_constraint_ei.status =
3056                 dd->err_info_xmit_constraint.status;
3057         rsp->port_xmit_constraint_ei.pkey =
3058                 cpu_to_be16(dd->err_info_xmit_constraint.pkey);
3059         rsp->port_xmit_constraint_ei.slid =
3060                 cpu_to_be32(dd->err_info_xmit_constraint.slid);
3061
3062         rsp->port_rcv_constraint_ei.status =
3063                 dd->err_info_rcv_constraint.status;
3064         rsp->port_rcv_constraint_ei.pkey =
3065                 cpu_to_be16(dd->err_info_rcv_constraint.pkey);
3066         rsp->port_rcv_constraint_ei.slid =
3067                 cpu_to_be32(dd->err_info_rcv_constraint.slid);
3068
3069         /* UncorrectableErrorInfo */
3070         rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
3071
3072         /* FMConfigErrorInfo */
3073         rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
3074
3075         if (resp_len)
3076                 *resp_len += response_data_size;
3077
3078         return reply((struct ib_mad_hdr *)pmp);
3079 }
3080
3081 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
3082                                   struct ib_device *ibdev,
3083                                   u8 port, u32 *resp_len)
3084 {
3085         struct opa_clear_port_status *req =
3086                 (struct opa_clear_port_status *)pmp->data;
3087         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3088         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3089         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3090         u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3091         u64 portn = be64_to_cpu(req->port_select_mask[3]);
3092         u32 counter_select = be32_to_cpu(req->counter_select_mask);
3093         u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
3094         unsigned long vl;
3095
3096         if ((nports != 1) || (portn != 1 << port)) {
3097                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3098                 return reply((struct ib_mad_hdr *)pmp);
3099         }
3100         /*
3101          * only counters returned by pma_get_opa_portstatus() are
3102          * handled, so when pma_get_opa_portstatus() gets a fix,
3103          * the corresponding change should be made here as well.
3104          */
3105
3106         if (counter_select & CS_PORT_XMIT_DATA)
3107                 write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
3108
3109         if (counter_select & CS_PORT_RCV_DATA)
3110                 write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
3111
3112         if (counter_select & CS_PORT_XMIT_PKTS)
3113                 write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3114
3115         if (counter_select & CS_PORT_RCV_PKTS)
3116                 write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
3117
3118         if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
3119                 write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3120
3121         if (counter_select & CS_PORT_MCAST_RCV_PKTS)
3122                 write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
3123
3124         if (counter_select & CS_PORT_XMIT_WAIT)
3125                 write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
3126
3127         /* ignore cs_sw_portCongestion for HFIs */
3128
3129         if (counter_select & CS_PORT_RCV_FECN)
3130                 write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
3131
3132         if (counter_select & CS_PORT_RCV_BECN)
3133                 write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
3134
3135         /* ignore cs_port_xmit_time_cong for HFIs */
3136         /* ignore cs_port_xmit_wasted_bw for now */
3137         /* ignore cs_port_xmit_wait_data for now */
3138         if (counter_select & CS_PORT_RCV_BUBBLE)
3139                 write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
3140
3141         /* Only applicable for switch */
3142         /* if (counter_select & CS_PORT_MARK_FECN)
3143          *      write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
3144          */
3145
3146         if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
3147                 write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
3148
3149         /* ignore cs_port_rcv_switch_relay_errors for HFIs */
3150         if (counter_select & CS_PORT_XMIT_DISCARDS)
3151                 write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
3152
3153         if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
3154                 write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
3155
3156         if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
3157                 write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
3158
3159         if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS) {
3160                 write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0);
3161                 write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3162         }
3163
3164         if (counter_select & CS_LINK_ERROR_RECOVERY) {
3165                 write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3166                 write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3167                                CNTR_INVALID_VL, 0);
3168         }
3169
3170         if (counter_select & CS_PORT_RCV_ERRORS)
3171                 write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3172
3173         if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
3174                 write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3175                 dd->rcv_ovfl_cnt = 0;
3176         }
3177
3178         if (counter_select & CS_FM_CONFIG_ERRORS)
3179                 write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
3180
3181         if (counter_select & CS_LINK_DOWNED)
3182                 write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
3183
3184         if (counter_select & CS_UNCORRECTABLE_ERRORS)
3185                 write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
3186
3187         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
3188                          8 * sizeof(vl_select_mask)) {
3189                 if (counter_select & CS_PORT_XMIT_DATA)
3190                         write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
3191
3192                 if (counter_select & CS_PORT_RCV_DATA)
3193                         write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
3194
3195                 if (counter_select & CS_PORT_XMIT_PKTS)
3196                         write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
3197
3198                 if (counter_select & CS_PORT_RCV_PKTS)
3199                         write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
3200
3201                 if (counter_select & CS_PORT_XMIT_WAIT)
3202                         write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
3203
3204                 /* sw_port_vl_congestion is 0 for HFIs */
3205                 if (counter_select & CS_PORT_RCV_FECN)
3206                         write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
3207
3208                 if (counter_select & CS_PORT_RCV_BECN)
3209                         write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
3210
3211                 /* port_vl_xmit_time_cong is 0 for HFIs */
3212                 /* port_vl_xmit_wasted_bw ??? */
3213                 /* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
3214                 if (counter_select & CS_PORT_RCV_BUBBLE)
3215                         write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
3216
3217                 /* if (counter_select & CS_PORT_MARK_FECN)
3218                  *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
3219                  */
3220                 if (counter_select & C_SW_XMIT_DSCD_VL)
3221                         write_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3222                                         idx_from_vl(vl), 0);
3223         }
3224
3225         if (resp_len)
3226                 *resp_len += sizeof(*req);
3227
3228         return reply((struct ib_mad_hdr *)pmp);
3229 }
3230
3231 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
3232                                  struct ib_device *ibdev,
3233                                  u8 port, u32 *resp_len)
3234 {
3235         struct _port_ei *rsp;
3236         struct opa_port_error_info_msg *req;
3237         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3238         u64 port_mask;
3239         u32 num_ports;
3240         u8 port_num;
3241         u8 num_pslm;
3242         u32 error_info_select;
3243
3244         req = (struct opa_port_error_info_msg *)pmp->data;
3245         rsp = &req->port[0];
3246
3247         num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3248         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3249
3250         memset(rsp, 0, sizeof(*rsp));
3251
3252         if (num_ports != 1 || num_ports != num_pslm) {
3253                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3254                 return reply((struct ib_mad_hdr *)pmp);
3255         }
3256
3257         /*
3258          * The bit set in the mask needs to be consistent with the port
3259          * the request came in on.
3260          */
3261         port_mask = be64_to_cpu(req->port_select_mask[3]);
3262         port_num = find_first_bit((unsigned long *)&port_mask,
3263                                   sizeof(port_mask));
3264
3265         if (port_num != port) {
3266                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3267                 return reply((struct ib_mad_hdr *)pmp);
3268         }
3269
3270         error_info_select = be32_to_cpu(req->error_info_select_mask);
3271
3272         /* PortRcvErrorInfo */
3273         if (error_info_select & ES_PORT_RCV_ERROR_INFO)
3274                 /* turn off status bit */
3275                 dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3276
3277         /* ExcessiverBufferOverrunInfo */
3278         if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
3279                 /*
3280                  * status bit is essentially kept in the h/w - bit 5 of
3281                  * RCV_ERR_INFO
3282                  */
3283                 write_csr(dd, RCV_ERR_INFO,
3284                           RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
3285
3286         if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
3287                 dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3288
3289         if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
3290                 dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
3291
3292         /* UncorrectableErrorInfo */
3293         if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
3294                 /* turn off status bit */
3295                 dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
3296
3297         /* FMConfigErrorInfo */
3298         if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
3299                 /* turn off status bit */
3300                 dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
3301
3302         if (resp_len)
3303                 *resp_len += sizeof(*req);
3304
3305         return reply((struct ib_mad_hdr *)pmp);
3306 }
3307
3308 struct opa_congestion_info_attr {
3309         __be16 congestion_info;
3310         u8 control_table_cap;   /* Multiple of 64 entry unit CCTs */
3311         u8 congestion_log_length;
3312 } __packed;
3313
3314 static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
3315                                     struct ib_device *ibdev, u8 port,
3316                                     u32 *resp_len)
3317 {
3318         struct opa_congestion_info_attr *p =
3319                 (struct opa_congestion_info_attr *)data;
3320         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3321         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3322
3323         p->congestion_info = 0;
3324         p->control_table_cap = ppd->cc_max_table_entries;
3325         p->congestion_log_length = OPA_CONG_LOG_ELEMS;
3326
3327         if (resp_len)
3328                 *resp_len += sizeof(*p);
3329
3330         return reply((struct ib_mad_hdr *)smp);
3331 }
3332
3333 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3334                                        u8 *data, struct ib_device *ibdev,
3335                                        u8 port, u32 *resp_len)
3336 {
3337         int i;
3338         struct opa_congestion_setting_attr *p =
3339                 (struct opa_congestion_setting_attr *)data;
3340         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3341         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3342         struct opa_congestion_setting_entry_shadow *entries;
3343         struct cc_state *cc_state;
3344
3345         rcu_read_lock();
3346
3347         cc_state = get_cc_state(ppd);
3348
3349         if (!cc_state) {
3350                 rcu_read_unlock();
3351                 return reply((struct ib_mad_hdr *)smp);
3352         }
3353
3354         entries = cc_state->cong_setting.entries;
3355         p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
3356         p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
3357         for (i = 0; i < OPA_MAX_SLS; i++) {
3358                 p->entries[i].ccti_increase = entries[i].ccti_increase;
3359                 p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
3360                 p->entries[i].trigger_threshold =
3361                         entries[i].trigger_threshold;
3362                 p->entries[i].ccti_min = entries[i].ccti_min;
3363         }
3364
3365         rcu_read_unlock();
3366
3367         if (resp_len)
3368                 *resp_len += sizeof(*p);
3369
3370         return reply((struct ib_mad_hdr *)smp);
3371 }
3372
3373 /*
3374  * Apply congestion control information stored in the ppd to the
3375  * active structure.
3376  */
3377 static void apply_cc_state(struct hfi1_pportdata *ppd)
3378 {
3379         struct cc_state *old_cc_state, *new_cc_state;
3380
3381         new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
3382         if (!new_cc_state)
3383                 return;
3384
3385         /*
3386          * Hold the lock for updating *and* to prevent ppd information
3387          * from changing during the update.
3388          */
3389         spin_lock(&ppd->cc_state_lock);
3390
3391         old_cc_state = get_cc_state(ppd);
3392         if (!old_cc_state) {
3393                 /* never active, or shutting down */
3394                 spin_unlock(&ppd->cc_state_lock);
3395                 kfree(new_cc_state);
3396                 return;
3397         }
3398
3399         *new_cc_state = *old_cc_state;
3400
3401         new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3402         memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3403                ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3404
3405         new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3406         new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3407         memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3408                OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3409
3410         rcu_assign_pointer(ppd->cc_state, new_cc_state);
3411
3412         spin_unlock(&ppd->cc_state_lock);
3413
3414         call_rcu(&old_cc_state->rcu, cc_state_reclaim);
3415 }
3416
3417 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3418                                        struct ib_device *ibdev, u8 port,
3419                                        u32 *resp_len)
3420 {
3421         struct opa_congestion_setting_attr *p =
3422                 (struct opa_congestion_setting_attr *)data;
3423         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3424         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3425         struct opa_congestion_setting_entry_shadow *entries;
3426         int i;
3427
3428         /*
3429          * Save details from packet into the ppd.  Hold the cc_state_lock so
3430          * our information is consistent with anyone trying to apply the state.
3431          */
3432         spin_lock(&ppd->cc_state_lock);
3433         ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3434
3435         entries = ppd->congestion_entries;
3436         for (i = 0; i < OPA_MAX_SLS; i++) {
3437                 entries[i].ccti_increase = p->entries[i].ccti_increase;
3438                 entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
3439                 entries[i].trigger_threshold =
3440                         p->entries[i].trigger_threshold;
3441                 entries[i].ccti_min = p->entries[i].ccti_min;
3442         }
3443         spin_unlock(&ppd->cc_state_lock);
3444
3445         /* now apply the information */
3446         apply_cc_state(ppd);
3447
3448         return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3449                                            resp_len);
3450 }
3451
3452 static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
3453                                         u8 *data, struct ib_device *ibdev,
3454                                         u8 port, u32 *resp_len)
3455 {
3456         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3457         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3458         struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
3459         s64 ts;
3460         int i;
3461
3462         if (am != 0) {
3463                 smp->status |= IB_SMP_INVALID_FIELD;
3464                 return reply((struct ib_mad_hdr *)smp);
3465         }
3466
3467         spin_lock_irq(&ppd->cc_log_lock);
3468
3469         cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
3470         cong_log->congestion_flags = 0;
3471         cong_log->threshold_event_counter =
3472                 cpu_to_be16(ppd->threshold_event_counter);
3473         memcpy(cong_log->threshold_cong_event_map,
3474                ppd->threshold_cong_event_map,
3475                sizeof(cong_log->threshold_cong_event_map));
3476         /* keep timestamp in units of 1.024 usec */
3477         ts = ktime_to_ns(ktime_get()) / 1024;
3478         cong_log->current_time_stamp = cpu_to_be32(ts);
3479         for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
3480                 struct opa_hfi1_cong_log_event_internal *cce =
3481                         &ppd->cc_events[ppd->cc_mad_idx++];
3482                 if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
3483                         ppd->cc_mad_idx = 0;
3484                 /*
3485                  * Entries which are older than twice the time
3486                  * required to wrap the counter are supposed to
3487                  * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
3488                  */
3489                 if ((u64)(ts - cce->timestamp) > (2 * UINT_MAX))
3490                         continue;
3491                 memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
3492                 memcpy(cong_log->events[i].remote_qp_number_cn_entry,
3493                        &cce->rqpn, 3);
3494                 cong_log->events[i].sl_svc_type_cn_entry =
3495                         ((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
3496                 cong_log->events[i].remote_lid_cn_entry =
3497                         cpu_to_be32(cce->rlid);
3498                 cong_log->events[i].timestamp_cn_entry =
3499                         cpu_to_be32(cce->timestamp);
3500         }
3501
3502         /*
3503          * Reset threshold_cong_event_map, and threshold_event_counter
3504          * to 0 when log is read.
3505          */
3506         memset(ppd->threshold_cong_event_map, 0x0,
3507                sizeof(ppd->threshold_cong_event_map));
3508         ppd->threshold_event_counter = 0;
3509
3510         spin_unlock_irq(&ppd->cc_log_lock);
3511
3512         if (resp_len)
3513                 *resp_len += sizeof(struct opa_hfi1_cong_log);
3514
3515         return reply((struct ib_mad_hdr *)smp);
3516 }
3517
3518 static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3519                                    struct ib_device *ibdev, u8 port,
3520                                    u32 *resp_len)
3521 {
3522         struct ib_cc_table_attr *cc_table_attr =
3523                 (struct ib_cc_table_attr *)data;
3524         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3525         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3526         u32 start_block = OPA_AM_START_BLK(am);
3527         u32 n_blocks = OPA_AM_NBLK(am);
3528         struct ib_cc_table_entry_shadow *entries;
3529         int i, j;
3530         u32 sentry, eentry;
3531         struct cc_state *cc_state;
3532
3533         /* sanity check n_blocks, start_block */
3534         if (n_blocks == 0 ||
3535             start_block + n_blocks > ppd->cc_max_table_entries) {
3536                 smp->status |= IB_SMP_INVALID_FIELD;
3537                 return reply((struct ib_mad_hdr *)smp);
3538         }
3539
3540         rcu_read_lock();
3541
3542         cc_state = get_cc_state(ppd);
3543
3544         if (!cc_state) {
3545                 rcu_read_unlock();
3546                 return reply((struct ib_mad_hdr *)smp);
3547         }
3548
3549         sentry = start_block * IB_CCT_ENTRIES;
3550         eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
3551
3552         cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
3553
3554         entries = cc_state->cct.entries;
3555
3556         /* return n_blocks, though the last block may not be full */
3557         for (j = 0, i = sentry; i < eentry; j++, i++)
3558                 cc_table_attr->ccti_entries[j].entry =
3559                         cpu_to_be16(entries[i].entry);
3560
3561         rcu_read_unlock();
3562
3563         if (resp_len)
3564                 *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3565
3566         return reply((struct ib_mad_hdr *)smp);
3567 }
3568
3569 void cc_state_reclaim(struct rcu_head *rcu)
3570 {
3571         struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu);
3572
3573         kfree(cc_state);
3574 }
3575
3576 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3577                                    struct ib_device *ibdev, u8 port,
3578                                    u32 *resp_len)
3579 {
3580         struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
3581         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3582         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3583         u32 start_block = OPA_AM_START_BLK(am);
3584         u32 n_blocks = OPA_AM_NBLK(am);
3585         struct ib_cc_table_entry_shadow *entries;
3586         int i, j;
3587         u32 sentry, eentry;
3588         u16 ccti_limit;
3589
3590         /* sanity check n_blocks, start_block */
3591         if (n_blocks == 0 ||
3592             start_block + n_blocks > ppd->cc_max_table_entries) {
3593                 smp->status |= IB_SMP_INVALID_FIELD;
3594                 return reply((struct ib_mad_hdr *)smp);
3595         }
3596
3597         sentry = start_block * IB_CCT_ENTRIES;
3598         eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
3599                  (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
3600
3601         /* sanity check ccti_limit */
3602         ccti_limit = be16_to_cpu(p->ccti_limit);
3603         if (ccti_limit + 1 > eentry) {
3604                 smp->status |= IB_SMP_INVALID_FIELD;
3605                 return reply((struct ib_mad_hdr *)smp);
3606         }
3607
3608         /*
3609          * Save details from packet into the ppd.  Hold the cc_state_lock so
3610          * our information is consistent with anyone trying to apply the state.
3611          */
3612         spin_lock(&ppd->cc_state_lock);
3613         ppd->total_cct_entry = ccti_limit + 1;
3614         entries = ppd->ccti_entries;
3615         for (j = 0, i = sentry; i < eentry; j++, i++)
3616                 entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3617         spin_unlock(&ppd->cc_state_lock);
3618
3619         /* now apply the information */
3620         apply_cc_state(ppd);
3621
3622         return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
3623 }
3624
3625 struct opa_led_info {
3626         __be32 rsvd_led_mask;
3627         __be32 rsvd;
3628 };
3629
3630 #define OPA_LED_SHIFT   31
3631 #define OPA_LED_MASK    BIT(OPA_LED_SHIFT)
3632
3633 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3634                                    struct ib_device *ibdev, u8 port,
3635                                    u32 *resp_len)
3636 {
3637         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3638         struct hfi1_pportdata *ppd = dd->pport;
3639         struct opa_led_info *p = (struct opa_led_info *)data;
3640         u32 nport = OPA_AM_NPORT(am);
3641         u32 is_beaconing_active;
3642
3643         if (nport != 1) {
3644                 smp->status |= IB_SMP_INVALID_FIELD;
3645                 return reply((struct ib_mad_hdr *)smp);
3646         }
3647
3648         /*
3649          * This pairs with the memory barrier in hfi1_start_led_override to
3650          * ensure that we read the correct state of LED beaconing represented
3651          * by led_override_timer_active
3652          */
3653         smp_rmb();
3654         is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
3655         p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
3656
3657         if (resp_len)
3658                 *resp_len += sizeof(struct opa_led_info);
3659
3660         return reply((struct ib_mad_hdr *)smp);
3661 }
3662
3663 static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3664                                    struct ib_device *ibdev, u8 port,
3665                                    u32 *resp_len)
3666 {
3667         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3668         struct opa_led_info *p = (struct opa_led_info *)data;
3669         u32 nport = OPA_AM_NPORT(am);
3670         int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
3671
3672         if (nport != 1) {
3673                 smp->status |= IB_SMP_INVALID_FIELD;
3674                 return reply((struct ib_mad_hdr *)smp);
3675         }
3676
3677         if (on)
3678                 hfi1_start_led_override(dd->pport, 2000, 1500);
3679         else
3680                 shutdown_led_override(dd->pport);
3681
3682         return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
3683 }
3684
3685 static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3686                             u8 *data, struct ib_device *ibdev, u8 port,
3687                             u32 *resp_len)
3688 {
3689         int ret;
3690         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3691
3692         switch (attr_id) {
3693         case IB_SMP_ATTR_NODE_DESC:
3694                 ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
3695                                               resp_len);
3696                 break;
3697         case IB_SMP_ATTR_NODE_INFO:
3698                 ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
3699                                               resp_len);
3700                 break;
3701         case IB_SMP_ATTR_PORT_INFO:
3702                 ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
3703                                               resp_len);
3704                 break;
3705         case IB_SMP_ATTR_PKEY_TABLE:
3706                 ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
3707                                                resp_len);
3708                 break;
3709         case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3710                 ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
3711                                               resp_len);
3712                 break;
3713         case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3714                 ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
3715                                               resp_len);
3716                 break;
3717         case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3718                 ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
3719                                                resp_len);
3720                 break;
3721         case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3722                 ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3723                                                 resp_len);
3724                 break;
3725         case OPA_ATTRIB_ID_PORT_STATE_INFO:
3726                 ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
3727                                          resp_len);
3728                 break;
3729         case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3730                 ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
3731                                          resp_len);
3732                 break;
3733         case OPA_ATTRIB_ID_CABLE_INFO:
3734                 ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
3735                                                 resp_len);
3736                 break;
3737         case IB_SMP_ATTR_VL_ARB_TABLE:
3738                 ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
3739                                             resp_len);
3740                 break;
3741         case OPA_ATTRIB_ID_CONGESTION_INFO:
3742                 ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
3743                                                resp_len);
3744                 break;
3745         case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3746                 ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
3747                                                   port, resp_len);
3748                 break;
3749         case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
3750                 ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
3751                                                    port, resp_len);
3752                 break;
3753         case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3754                 ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
3755                                               resp_len);
3756                 break;
3757         case IB_SMP_ATTR_LED_INFO:
3758                 ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
3759                                               resp_len);
3760                 break;
3761         case IB_SMP_ATTR_SM_INFO:
3762                 if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3763                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3764                 if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3765                         return IB_MAD_RESULT_SUCCESS;
3766                 /* FALLTHROUGH */
3767         default:
3768                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
3769                 ret = reply((struct ib_mad_hdr *)smp);
3770                 break;
3771         }
3772         return ret;
3773 }
3774
3775 static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3776                             u8 *data, struct ib_device *ibdev, u8 port,
3777                             u32 *resp_len)
3778 {
3779         int ret;
3780         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3781
3782         switch (attr_id) {
3783         case IB_SMP_ATTR_PORT_INFO:
3784                 ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
3785                                               resp_len);
3786                 break;
3787         case IB_SMP_ATTR_PKEY_TABLE:
3788                 ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
3789                                                resp_len);
3790                 break;
3791         case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3792                 ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
3793                                               resp_len);
3794                 break;
3795         case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3796                 ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
3797                                               resp_len);
3798                 break;
3799         case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3800                 ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
3801                                                resp_len);
3802                 break;
3803         case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3804                 ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3805                                                 resp_len);
3806                 break;
3807         case OPA_ATTRIB_ID_PORT_STATE_INFO:
3808                 ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
3809                                          resp_len);
3810                 break;
3811         case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3812                 ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
3813                                          resp_len);
3814                 break;
3815         case IB_SMP_ATTR_VL_ARB_TABLE:
3816                 ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
3817                                             resp_len);
3818                 break;
3819         case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3820                 ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
3821                                                   port, resp_len);
3822                 break;
3823         case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3824                 ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
3825                                               resp_len);
3826                 break;
3827         case IB_SMP_ATTR_LED_INFO:
3828                 ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
3829                                               resp_len);
3830                 break;
3831         case IB_SMP_ATTR_SM_INFO:
3832                 if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3833                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3834                 if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3835                         return IB_MAD_RESULT_SUCCESS;
3836                 /* FALLTHROUGH */
3837         default:
3838                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
3839                 ret = reply((struct ib_mad_hdr *)smp);
3840                 break;
3841         }
3842         return ret;
3843 }
3844
3845 static inline void set_aggr_error(struct opa_aggregate *ag)
3846 {
3847         ag->err_reqlength |= cpu_to_be16(0x8000);
3848 }
3849
3850 static int subn_get_opa_aggregate(struct opa_smp *smp,
3851                                   struct ib_device *ibdev, u8 port,
3852                                   u32 *resp_len)
3853 {
3854         int i;
3855         u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3856         u8 *next_smp = opa_get_smp_data(smp);
3857
3858         if (num_attr < 1 || num_attr > 117) {
3859                 smp->status |= IB_SMP_INVALID_FIELD;
3860                 return reply((struct ib_mad_hdr *)smp);
3861         }
3862
3863         for (i = 0; i < num_attr; i++) {
3864                 struct opa_aggregate *agg;
3865                 size_t agg_data_len;
3866                 size_t agg_size;
3867                 u32 am;
3868
3869                 agg = (struct opa_aggregate *)next_smp;
3870                 agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3871                 agg_size = sizeof(*agg) + agg_data_len;
3872                 am = be32_to_cpu(agg->attr_mod);
3873
3874                 *resp_len += agg_size;
3875
3876                 if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3877                         smp->status |= IB_SMP_INVALID_FIELD;
3878                         return reply((struct ib_mad_hdr *)smp);
3879                 }
3880
3881                 /* zero the payload for this segment */
3882                 memset(next_smp + sizeof(*agg), 0, agg_data_len);
3883
3884                 (void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
3885                                         ibdev, port, NULL);
3886                 if (smp->status & ~IB_SMP_DIRECTION) {
3887                         set_aggr_error(agg);
3888                         return reply((struct ib_mad_hdr *)smp);
3889                 }
3890                 next_smp += agg_size;
3891         }
3892
3893         return reply((struct ib_mad_hdr *)smp);
3894 }
3895
3896 static int subn_set_opa_aggregate(struct opa_smp *smp,
3897                                   struct ib_device *ibdev, u8 port,
3898                                   u32 *resp_len)
3899 {
3900         int i;
3901         u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3902         u8 *next_smp = opa_get_smp_data(smp);
3903
3904         if (num_attr < 1 || num_attr > 117) {
3905                 smp->status |= IB_SMP_INVALID_FIELD;
3906                 return reply((struct ib_mad_hdr *)smp);
3907         }
3908
3909         for (i = 0; i < num_attr; i++) {
3910                 struct opa_aggregate *agg;
3911                 size_t agg_data_len;
3912                 size_t agg_size;
3913                 u32 am;
3914
3915                 agg = (struct opa_aggregate *)next_smp;
3916                 agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3917                 agg_size = sizeof(*agg) + agg_data_len;
3918                 am = be32_to_cpu(agg->attr_mod);
3919
3920                 *resp_len += agg_size;
3921
3922                 if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3923                         smp->status |= IB_SMP_INVALID_FIELD;
3924                         return reply((struct ib_mad_hdr *)smp);
3925                 }
3926
3927                 (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
3928                                         ibdev, port, NULL);
3929                 if (smp->status & ~IB_SMP_DIRECTION) {
3930                         set_aggr_error(agg);
3931                         return reply((struct ib_mad_hdr *)smp);
3932                 }
3933                 next_smp += agg_size;
3934         }
3935
3936         return reply((struct ib_mad_hdr *)smp);
3937 }
3938
3939 /*
3940  * OPAv1 specifies that, on the transition to link up, these counters
3941  * are cleared:
3942  *   PortRcvErrors [*]
3943  *   LinkErrorRecovery
3944  *   LocalLinkIntegrityErrors
3945  *   ExcessiveBufferOverruns [*]
3946  *
3947  * [*] Error info associated with these counters is retained, but the
3948  * error info status is reset to 0.
3949  */
3950 void clear_linkup_counters(struct hfi1_devdata *dd)
3951 {
3952         /* PortRcvErrors */
3953         write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3954         dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3955         /* LinkErrorRecovery */
3956         write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3957         write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
3958         /* LocalLinkIntegrityErrors */
3959         write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0);
3960         write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3961         /* ExcessiveBufferOverruns */
3962         write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3963         dd->rcv_ovfl_cnt = 0;
3964         dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3965 }
3966
3967 /*
3968  * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
3969  * local node, 0 otherwise.
3970  */
3971 static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
3972                         const struct ib_wc *in_wc)
3973 {
3974         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3975         const struct opa_smp *smp = (const struct opa_smp *)mad;
3976
3977         if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
3978                 return (smp->hop_cnt == 0 &&
3979                         smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
3980                         smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
3981         }
3982
3983         return (in_wc->slid == ppd->lid);
3984 }
3985
3986 /*
3987  * opa_local_smp_check() should only be called on MADs for which
3988  * is_local_mad() returns true. It applies the SMP checks that are
3989  * specific to SMPs which are sent from, and destined to this node.
3990  * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
3991  * otherwise.
3992  *
3993  * SMPs which arrive from other nodes are instead checked by
3994  * opa_smp_check().
3995  */
3996 static int opa_local_smp_check(struct hfi1_ibport *ibp,
3997                                const struct ib_wc *in_wc)
3998 {
3999         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4000         u16 slid = in_wc->slid;
4001         u16 pkey;
4002
4003         if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
4004                 return 1;
4005
4006         pkey = ppd->pkeys[in_wc->pkey_index];
4007         /*
4008          * We need to do the "node-local" checks specified in OPAv1,
4009          * rev 0.90, section 9.10.26, which are:
4010          *   - pkey is 0x7fff, or 0xffff
4011          *   - Source QPN == 0 || Destination QPN == 0
4012          *   - the MAD header's management class is either
4013          *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
4014          *     IB_MGMT_CLASS_SUBN_LID_ROUTED
4015          *   - SLID != 0
4016          *
4017          * However, we know (and so don't need to check again) that,
4018          * for local SMPs, the MAD stack passes MADs with:
4019          *   - Source QPN of 0
4020          *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4021          *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
4022          *     our own port's lid
4023          *
4024          */
4025         if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
4026                 return 0;
4027         ingress_pkey_table_fail(ppd, pkey, slid);
4028         return 1;
4029 }
4030
4031 static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
4032                             u8 port, const struct opa_mad *in_mad,
4033                             struct opa_mad *out_mad,
4034                             u32 *resp_len)
4035 {
4036         struct opa_smp *smp = (struct opa_smp *)out_mad;
4037         struct hfi1_ibport *ibp = to_iport(ibdev, port);
4038         u8 *data;
4039         u32 am;
4040         __be16 attr_id;
4041         int ret;
4042
4043         *out_mad = *in_mad;
4044         data = opa_get_smp_data(smp);
4045
4046         am = be32_to_cpu(smp->attr_mod);
4047         attr_id = smp->attr_id;
4048         if (smp->class_version != OPA_SMI_CLASS_VERSION) {
4049                 smp->status |= IB_SMP_UNSUP_VERSION;
4050                 ret = reply((struct ib_mad_hdr *)smp);
4051                 return ret;
4052         }
4053         ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
4054                          smp->route.dr.dr_slid, smp->route.dr.return_path,
4055                          smp->hop_cnt);
4056         if (ret) {
4057                 u32 port_num = be32_to_cpu(smp->attr_mod);
4058
4059                 /*
4060                  * If this is a get/set portinfo, we already check the
4061                  * M_Key if the MAD is for another port and the M_Key
4062                  * is OK on the receiving port. This check is needed
4063                  * to increment the error counters when the M_Key
4064                  * fails to match on *both* ports.
4065                  */
4066                 if (attr_id == IB_SMP_ATTR_PORT_INFO &&
4067                     (smp->method == IB_MGMT_METHOD_GET ||
4068                      smp->method == IB_MGMT_METHOD_SET) &&
4069                     port_num && port_num <= ibdev->phys_port_cnt &&
4070                     port != port_num)
4071                         (void)check_mkey(to_iport(ibdev, port_num),
4072                                           (struct ib_mad_hdr *)smp, 0,
4073                                           smp->mkey, smp->route.dr.dr_slid,
4074                                           smp->route.dr.return_path,
4075                                           smp->hop_cnt);
4076                 ret = IB_MAD_RESULT_FAILURE;
4077                 return ret;
4078         }
4079
4080         *resp_len = opa_get_smp_header_size(smp);
4081
4082         switch (smp->method) {
4083         case IB_MGMT_METHOD_GET:
4084                 switch (attr_id) {
4085                 default:
4086                         clear_opa_smp_data(smp);
4087                         ret = subn_get_opa_sma(attr_id, smp, am, data,
4088                                                ibdev, port, resp_len);
4089                         break;
4090                 case OPA_ATTRIB_ID_AGGREGATE:
4091                         ret = subn_get_opa_aggregate(smp, ibdev, port,
4092                                                      resp_len);
4093                         break;
4094                 }
4095                 break;
4096         case IB_MGMT_METHOD_SET:
4097                 switch (attr_id) {
4098                 default:
4099                         ret = subn_set_opa_sma(attr_id, smp, am, data,
4100                                                ibdev, port, resp_len);
4101                         break;
4102                 case OPA_ATTRIB_ID_AGGREGATE:
4103                         ret = subn_set_opa_aggregate(smp, ibdev, port,
4104                                                      resp_len);
4105                         break;
4106                 }
4107                 break;
4108         case IB_MGMT_METHOD_TRAP:
4109         case IB_MGMT_METHOD_REPORT:
4110         case IB_MGMT_METHOD_REPORT_RESP:
4111         case IB_MGMT_METHOD_GET_RESP:
4112                 /*
4113                  * The ib_mad module will call us to process responses
4114                  * before checking for other consumers.
4115                  * Just tell the caller to process it normally.
4116                  */
4117                 ret = IB_MAD_RESULT_SUCCESS;
4118                 break;
4119         default:
4120                 smp->status |= IB_SMP_UNSUP_METHOD;
4121                 ret = reply((struct ib_mad_hdr *)smp);
4122                 break;
4123         }
4124
4125         return ret;
4126 }
4127
4128 static int process_subn(struct ib_device *ibdev, int mad_flags,
4129                         u8 port, const struct ib_mad *in_mad,
4130                         struct ib_mad *out_mad)
4131 {
4132         struct ib_smp *smp = (struct ib_smp *)out_mad;
4133         struct hfi1_ibport *ibp = to_iport(ibdev, port);
4134         int ret;
4135
4136         *out_mad = *in_mad;
4137         if (smp->class_version != 1) {
4138                 smp->status |= IB_SMP_UNSUP_VERSION;
4139                 ret = reply((struct ib_mad_hdr *)smp);
4140                 return ret;
4141         }
4142
4143         ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
4144                          smp->mkey, (__force __be32)smp->dr_slid,
4145                          smp->return_path, smp->hop_cnt);
4146         if (ret) {
4147                 u32 port_num = be32_to_cpu(smp->attr_mod);
4148
4149                 /*
4150                  * If this is a get/set portinfo, we already check the
4151                  * M_Key if the MAD is for another port and the M_Key
4152                  * is OK on the receiving port. This check is needed
4153                  * to increment the error counters when the M_Key
4154                  * fails to match on *both* ports.
4155                  */
4156                 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
4157                     (smp->method == IB_MGMT_METHOD_GET ||
4158                      smp->method == IB_MGMT_METHOD_SET) &&
4159                     port_num && port_num <= ibdev->phys_port_cnt &&
4160                     port != port_num)
4161                         (void)check_mkey(to_iport(ibdev, port_num),
4162                                          (struct ib_mad_hdr *)smp, 0,
4163                                          smp->mkey,
4164                                          (__force __be32)smp->dr_slid,
4165                                          smp->return_path, smp->hop_cnt);
4166                 ret = IB_MAD_RESULT_FAILURE;
4167                 return ret;
4168         }
4169
4170         switch (smp->method) {
4171         case IB_MGMT_METHOD_GET:
4172                 switch (smp->attr_id) {
4173                 case IB_SMP_ATTR_NODE_INFO:
4174                         ret = subn_get_nodeinfo(smp, ibdev, port);
4175                         break;
4176                 default:
4177                         smp->status |= IB_SMP_UNSUP_METH_ATTR;
4178                         ret = reply((struct ib_mad_hdr *)smp);
4179                         break;
4180                 }
4181                 break;
4182         }
4183
4184         return ret;
4185 }
4186
4187 static int process_perf(struct ib_device *ibdev, u8 port,
4188                         const struct ib_mad *in_mad,
4189                         struct ib_mad *out_mad)
4190 {
4191         struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
4192         struct ib_class_port_info *cpi = (struct ib_class_port_info *)
4193                                                 &pmp->data;
4194         int ret = IB_MAD_RESULT_FAILURE;
4195
4196         *out_mad = *in_mad;
4197         if (pmp->mad_hdr.class_version != 1) {
4198                 pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4199                 ret = reply((struct ib_mad_hdr *)pmp);
4200                 return ret;
4201         }
4202
4203         switch (pmp->mad_hdr.method) {
4204         case IB_MGMT_METHOD_GET:
4205                 switch (pmp->mad_hdr.attr_id) {
4206                 case IB_PMA_PORT_COUNTERS:
4207                         ret = pma_get_ib_portcounters(pmp, ibdev, port);
4208                         break;
4209                 case IB_PMA_PORT_COUNTERS_EXT:
4210                         ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
4211                         break;
4212                 case IB_PMA_CLASS_PORT_INFO:
4213                         cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
4214                         ret = reply((struct ib_mad_hdr *)pmp);
4215                         break;
4216                 default:
4217                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4218                         ret = reply((struct ib_mad_hdr *)pmp);
4219                         break;
4220                 }
4221                 break;
4222
4223         case IB_MGMT_METHOD_SET:
4224                 if (pmp->mad_hdr.attr_id) {
4225                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4226                         ret = reply((struct ib_mad_hdr *)pmp);
4227                 }
4228                 break;
4229
4230         case IB_MGMT_METHOD_TRAP:
4231         case IB_MGMT_METHOD_GET_RESP:
4232                 /*
4233                  * The ib_mad module will call us to process responses
4234                  * before checking for other consumers.
4235                  * Just tell the caller to process it normally.
4236                  */
4237                 ret = IB_MAD_RESULT_SUCCESS;
4238                 break;
4239
4240         default:
4241                 pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4242                 ret = reply((struct ib_mad_hdr *)pmp);
4243                 break;
4244         }
4245
4246         return ret;
4247 }
4248
4249 static int process_perf_opa(struct ib_device *ibdev, u8 port,
4250                             const struct opa_mad *in_mad,
4251                             struct opa_mad *out_mad, u32 *resp_len)
4252 {
4253         struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
4254         int ret;
4255
4256         *out_mad = *in_mad;
4257
4258         if (pmp->mad_hdr.class_version != OPA_SMI_CLASS_VERSION) {
4259                 pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4260                 return reply((struct ib_mad_hdr *)pmp);
4261         }
4262
4263         *resp_len = sizeof(pmp->mad_hdr);
4264
4265         switch (pmp->mad_hdr.method) {
4266         case IB_MGMT_METHOD_GET:
4267                 switch (pmp->mad_hdr.attr_id) {
4268                 case IB_PMA_CLASS_PORT_INFO:
4269                         ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
4270                         break;
4271                 case OPA_PM_ATTRIB_ID_PORT_STATUS:
4272                         ret = pma_get_opa_portstatus(pmp, ibdev, port,
4273                                                      resp_len);
4274                         break;
4275                 case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
4276                         ret = pma_get_opa_datacounters(pmp, ibdev, port,
4277                                                        resp_len);
4278                         break;
4279                 case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
4280                         ret = pma_get_opa_porterrors(pmp, ibdev, port,
4281                                                      resp_len);
4282                         break;
4283                 case OPA_PM_ATTRIB_ID_ERROR_INFO:
4284                         ret = pma_get_opa_errorinfo(pmp, ibdev, port,
4285                                                     resp_len);
4286                         break;
4287                 default:
4288                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4289                         ret = reply((struct ib_mad_hdr *)pmp);
4290                         break;
4291                 }
4292                 break;
4293
4294         case IB_MGMT_METHOD_SET:
4295                 switch (pmp->mad_hdr.attr_id) {
4296                 case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
4297                         ret = pma_set_opa_portstatus(pmp, ibdev, port,
4298                                                      resp_len);
4299                         break;
4300                 case OPA_PM_ATTRIB_ID_ERROR_INFO:
4301                         ret = pma_set_opa_errorinfo(pmp, ibdev, port,
4302                                                     resp_len);
4303                         break;
4304                 default:
4305                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4306                         ret = reply((struct ib_mad_hdr *)pmp);
4307                         break;
4308                 }
4309                 break;
4310
4311         case IB_MGMT_METHOD_TRAP:
4312         case IB_MGMT_METHOD_GET_RESP:
4313                 /*
4314                  * The ib_mad module will call us to process responses
4315                  * before checking for other consumers.
4316                  * Just tell the caller to process it normally.
4317                  */
4318                 ret = IB_MAD_RESULT_SUCCESS;
4319                 break;
4320
4321         default:
4322                 pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4323                 ret = reply((struct ib_mad_hdr *)pmp);
4324                 break;
4325         }
4326
4327         return ret;
4328 }
4329
4330 static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4331                                 u8 port, const struct ib_wc *in_wc,
4332                                 const struct ib_grh *in_grh,
4333                                 const struct opa_mad *in_mad,
4334                                 struct opa_mad *out_mad, size_t *out_mad_size,
4335                                 u16 *out_mad_pkey_index)
4336 {
4337         int ret;
4338         int pkey_idx;
4339         u32 resp_len = 0;
4340         struct hfi1_ibport *ibp = to_iport(ibdev, port);
4341
4342         pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
4343         if (pkey_idx < 0) {
4344                 pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
4345                         hfi1_get_pkey(ibp, 1));
4346                 pkey_idx = 1;
4347         }
4348         *out_mad_pkey_index = (u16)pkey_idx;
4349
4350         switch (in_mad->mad_hdr.mgmt_class) {
4351         case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4352         case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4353                 if (is_local_mad(ibp, in_mad, in_wc)) {
4354                         ret = opa_local_smp_check(ibp, in_wc);
4355                         if (ret)
4356                                 return IB_MAD_RESULT_FAILURE;
4357                 }
4358                 ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
4359                                        out_mad, &resp_len);
4360                 goto bail;
4361         case IB_MGMT_CLASS_PERF_MGMT:
4362                 ret = process_perf_opa(ibdev, port, in_mad, out_mad,
4363                                        &resp_len);
4364                 goto bail;
4365
4366         default:
4367                 ret = IB_MAD_RESULT_SUCCESS;
4368         }
4369
4370 bail:
4371         if (ret & IB_MAD_RESULT_REPLY)
4372                 *out_mad_size = round_up(resp_len, 8);
4373         else if (ret & IB_MAD_RESULT_SUCCESS)
4374                 *out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
4375
4376         return ret;
4377 }
4378
4379 static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4380                                const struct ib_wc *in_wc,
4381                                const struct ib_grh *in_grh,
4382                                const struct ib_mad *in_mad,
4383                                struct ib_mad *out_mad)
4384 {
4385         int ret;
4386
4387         switch (in_mad->mad_hdr.mgmt_class) {
4388         case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4389         case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4390                 ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
4391                 break;
4392         case IB_MGMT_CLASS_PERF_MGMT:
4393                 ret = process_perf(ibdev, port, in_mad, out_mad);
4394                 break;
4395         default:
4396                 ret = IB_MAD_RESULT_SUCCESS;
4397                 break;
4398         }
4399
4400         return ret;
4401 }
4402
4403 /**
4404  * hfi1_process_mad - process an incoming MAD packet
4405  * @ibdev: the infiniband device this packet came in on
4406  * @mad_flags: MAD flags
4407  * @port: the port number this packet came in on
4408  * @in_wc: the work completion entry for this packet
4409  * @in_grh: the global route header for this packet
4410  * @in_mad: the incoming MAD
4411  * @out_mad: any outgoing MAD reply
4412  *
4413  * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
4414  * interested in processing.
4415  *
4416  * Note that the verbs framework has already done the MAD sanity checks,
4417  * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4418  * MADs.
4419  *
4420  * This is called by the ib_mad module.
4421  */
4422 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4423                      const struct ib_wc *in_wc, const struct ib_grh *in_grh,
4424                      const struct ib_mad_hdr *in_mad, size_t in_mad_size,
4425                      struct ib_mad_hdr *out_mad, size_t *out_mad_size,
4426                      u16 *out_mad_pkey_index)
4427 {
4428         switch (in_mad->base_version) {
4429         case OPA_MGMT_BASE_VERSION:
4430                 if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
4431                         dev_err(ibdev->dma_device, "invalid in_mad_size\n");
4432                         return IB_MAD_RESULT_FAILURE;
4433                 }
4434                 return hfi1_process_opa_mad(ibdev, mad_flags, port,
4435                                             in_wc, in_grh,
4436                                             (struct opa_mad *)in_mad,
4437                                             (struct opa_mad *)out_mad,
4438                                             out_mad_size,
4439                                             out_mad_pkey_index);
4440         case IB_MGMT_BASE_VERSION:
4441                 return hfi1_process_ib_mad(ibdev, mad_flags, port,
4442                                           in_wc, in_grh,
4443                                           (const struct ib_mad *)in_mad,
4444                                           (struct ib_mad *)out_mad);
4445         default:
4446                 break;
4447         }
4448
4449         return IB_MAD_RESULT_FAILURE;
4450 }