Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
a4cf0443 UB |
2 | /* |
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | |
4 | * | |
5 | * IB infrastructure: | |
6 | * Establish SMC-R as an Infiniband Client to be notified about added and | |
7 | * removed IB devices of type RDMA. | |
8 | * Determine device and port characteristics for these IB devices. | |
9 | * | |
10 | * Copyright IBM Corp. 2016 | |
11 | * | |
12 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | |
13 | */ | |
14 | ||
b6459415 JK |
15 | #include <linux/etherdevice.h> |
16 | #include <linux/if_vlan.h> | |
a4cf0443 | 17 | #include <linux/random.h> |
bd4ad577 | 18 | #include <linux/workqueue.h> |
10428dd8 | 19 | #include <linux/scatterlist.h> |
6dabd405 | 20 | #include <linux/wait.h> |
92f3cb0e | 21 | #include <linux/mutex.h> |
24fb6811 | 22 | #include <linux/inetdevice.h> |
a4cf0443 | 23 | #include <rdma/ib_verbs.h> |
ddb457c6 | 24 | #include <rdma/ib_cache.h> |
a4cf0443 | 25 | |
6812baab | 26 | #include "smc_pnet.h" |
a4cf0443 | 27 | #include "smc_ib.h" |
cd6851f3 | 28 | #include "smc_core.h" |
f38ba179 | 29 | #include "smc_wr.h" |
a4cf0443 | 30 | #include "smc.h" |
a3db10ef | 31 | #include "smc_netlink.h" |
a4cf0443 | 32 | |
c9f4c6cf UB |
33 | #define SMC_MAX_CQE 32766 /* max. # of completion queue elements */ |
34 | ||
bd4ad577 UB |
35 | #define SMC_QP_MIN_RNR_TIMER 5 |
36 | #define SMC_QP_TIMEOUT 15 /* 4096 * 2 ** timeout usec */ | |
37 | #define SMC_QP_RETRY_CNT 7 /* 7: infinite */ | |
38 | #define SMC_QP_RNR_RETRY 7 /* 7: infinite */ | |
39 | ||
a4cf0443 | 40 | struct smc_ib_devices smc_ib_devices = { /* smc-registered ib devices */ |
92f3cb0e | 41 | .mutex = __MUTEX_INITIALIZER(smc_ib_devices.mutex), |
a4cf0443 UB |
42 | .list = LIST_HEAD_INIT(smc_ib_devices.list), |
43 | }; | |
44 | ||
366bb249 | 45 | u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */ |
a4cf0443 | 46 | |
bd4ad577 UB |
47 | static int smc_ib_modify_qp_init(struct smc_link *lnk) |
48 | { | |
49 | struct ib_qp_attr qp_attr; | |
50 | ||
51 | memset(&qp_attr, 0, sizeof(qp_attr)); | |
52 | qp_attr.qp_state = IB_QPS_INIT; | |
53 | qp_attr.pkey_index = 0; | |
54 | qp_attr.port_num = lnk->ibport; | |
55 | qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE | |
56 | | IB_ACCESS_REMOTE_WRITE; | |
57 | return ib_modify_qp(lnk->roce_qp, &qp_attr, | |
58 | IB_QP_STATE | IB_QP_PKEY_INDEX | | |
59 | IB_QP_ACCESS_FLAGS | IB_QP_PORT); | |
60 | } | |
61 | ||
62 | static int smc_ib_modify_qp_rtr(struct smc_link *lnk) | |
63 | { | |
64 | enum ib_qp_attr_mask qp_attr_mask = | |
65 | IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | | |
66 | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; | |
67 | struct ib_qp_attr qp_attr; | |
24fb6811 | 68 | u8 hop_lim = 1; |
bd4ad577 UB |
69 | |
70 | memset(&qp_attr, 0, sizeof(qp_attr)); | |
71 | qp_attr.qp_state = IB_QPS_RTR; | |
72 | qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); | |
44c58487 | 73 | qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; |
d8966fcd | 74 | rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); |
24fb6811 KG |
75 | if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway) |
76 | hop_lim = IPV6_DEFAULT_HOPLIMIT; | |
77 | rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, hop_lim, 0); | |
d8966fcd | 78 | rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); |
24fb6811 KG |
79 | if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway) |
80 | memcpy(&qp_attr.ah_attr.roce.dmac, lnk->lgr->nexthop_mac, | |
81 | sizeof(lnk->lgr->nexthop_mac)); | |
82 | else | |
83 | memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, | |
84 | sizeof(lnk->peer_mac)); | |
bd4ad577 UB |
85 | qp_attr.dest_qp_num = lnk->peer_qpn; |
86 | qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */ | |
87 | qp_attr.max_dest_rd_atomic = 1; /* max # of resources for incoming | |
88 | * requests | |
89 | */ | |
90 | qp_attr.min_rnr_timer = SMC_QP_MIN_RNR_TIMER; | |
91 | ||
92 | return ib_modify_qp(lnk->roce_qp, &qp_attr, qp_attr_mask); | |
93 | } | |
94 | ||
95 | int smc_ib_modify_qp_rts(struct smc_link *lnk) | |
96 | { | |
97 | struct ib_qp_attr qp_attr; | |
98 | ||
99 | memset(&qp_attr, 0, sizeof(qp_attr)); | |
100 | qp_attr.qp_state = IB_QPS_RTS; | |
101 | qp_attr.timeout = SMC_QP_TIMEOUT; /* local ack timeout */ | |
102 | qp_attr.retry_cnt = SMC_QP_RETRY_CNT; /* retry count */ | |
103 | qp_attr.rnr_retry = SMC_QP_RNR_RETRY; /* RNR retries, 7=infinite */ | |
104 | qp_attr.sq_psn = lnk->psn_initial; /* starting send packet seq # */ | |
105 | qp_attr.max_rd_atomic = 1; /* # of outstanding RDMA reads and | |
106 | * atomic ops allowed | |
107 | */ | |
108 | return ib_modify_qp(lnk->roce_qp, &qp_attr, | |
109 | IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | | |
110 | IB_QP_SQ_PSN | IB_QP_RNR_RETRY | | |
111 | IB_QP_MAX_QP_RD_ATOMIC); | |
112 | } | |
113 | ||
349d4312 | 114 | int smc_ib_modify_qp_error(struct smc_link *lnk) |
bd4ad577 UB |
115 | { |
116 | struct ib_qp_attr qp_attr; | |
117 | ||
118 | memset(&qp_attr, 0, sizeof(qp_attr)); | |
349d4312 | 119 | qp_attr.qp_state = IB_QPS_ERR; |
bd4ad577 UB |
120 | return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE); |
121 | } | |
122 | ||
123 | int smc_ib_ready_link(struct smc_link *lnk) | |
124 | { | |
00e5fb26 | 125 | struct smc_link_group *lgr = smc_get_lgr(lnk); |
bd4ad577 UB |
126 | int rc = 0; |
127 | ||
128 | rc = smc_ib_modify_qp_init(lnk); | |
129 | if (rc) | |
130 | goto out; | |
131 | ||
132 | rc = smc_ib_modify_qp_rtr(lnk); | |
133 | if (rc) | |
134 | goto out; | |
135 | smc_wr_remember_qp_attr(lnk); | |
136 | rc = ib_req_notify_cq(lnk->smcibdev->roce_cq_recv, | |
137 | IB_CQ_SOLICITED_MASK); | |
138 | if (rc) | |
139 | goto out; | |
140 | rc = smc_wr_rx_post_init(lnk); | |
141 | if (rc) | |
142 | goto out; | |
143 | smc_wr_remember_qp_attr(lnk); | |
144 | ||
145 | if (lgr->role == SMC_SERV) { | |
146 | rc = smc_ib_modify_qp_rts(lnk); | |
147 | if (rc) | |
148 | goto out; | |
149 | smc_wr_remember_qp_attr(lnk); | |
150 | } | |
151 | out: | |
152 | return rc; | |
153 | } | |
154 | ||
7005ada6 | 155 | static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport) |
be6a3f38 | 156 | { |
b4c296f9 | 157 | const struct ib_gid_attr *attr; |
5102eca9 | 158 | int rc; |
be6a3f38 | 159 | |
b4c296f9 JG |
160 | attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0); |
161 | if (IS_ERR(attr)) | |
be6a3f38 UB |
162 | return -ENODEV; |
163 | ||
5102eca9 | 164 | rc = rdma_read_gid_l2_fields(attr, NULL, smcibdev->mac[ibport - 1]); |
b4c296f9 JG |
165 | rdma_put_gid_attr(attr); |
166 | return rc; | |
be6a3f38 UB |
167 | } |
168 | ||
169 | /* Create an identifier unique for this instance of SMC-R. | |
170 | * The MAC-address of the first active registered IB device | |
171 | * plus a random 2-byte number is used to create this identifier. | |
172 | * This name is delivered to the peer during connection initialization. | |
173 | */ | |
174 | static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev, | |
175 | u8 ibport) | |
176 | { | |
177 | memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1], | |
178 | sizeof(smcibdev->mac[ibport - 1])); | |
366bb249 HW |
179 | } |
180 | ||
a082ec89 | 181 | bool smc_ib_is_valid_local_systemid(void) |
366bb249 HW |
182 | { |
183 | return !is_zero_ether_addr(&local_systemid[2]); | |
184 | } | |
185 | ||
186 | static void smc_ib_init_local_systemid(void) | |
187 | { | |
be6a3f38 UB |
188 | get_random_bytes(&local_systemid[0], 2); |
189 | } | |
190 | ||
191 | bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) | |
192 | { | |
193 | return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; | |
194 | } | |
195 | ||
e5c4744c KG |
196 | int smc_ib_find_route(__be32 saddr, __be32 daddr, |
197 | u8 nexthop_mac[], u8 *uses_gateway) | |
198 | { | |
199 | struct neighbour *neigh = NULL; | |
200 | struct rtable *rt = NULL; | |
201 | struct flowi4 fl4 = { | |
202 | .saddr = saddr, | |
203 | .daddr = daddr | |
204 | }; | |
205 | ||
206 | if (daddr == cpu_to_be32(INADDR_NONE)) | |
207 | goto out; | |
208 | rt = ip_route_output_flow(&init_net, &fl4, NULL); | |
209 | if (IS_ERR(rt)) | |
210 | goto out; | |
211 | if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET) | |
212 | goto out; | |
213 | neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr); | |
214 | if (neigh) { | |
215 | memcpy(nexthop_mac, neigh->ha, ETH_ALEN); | |
216 | *uses_gateway = rt->rt_uses_gateway; | |
217 | return 0; | |
218 | } | |
219 | out: | |
220 | return -ENOENT; | |
221 | } | |
222 | ||
24fb6811 KG |
223 | static int smc_ib_determine_gid_rcu(const struct net_device *ndev, |
224 | const struct ib_gid_attr *attr, | |
225 | u8 gid[], u8 *sgid_index, | |
226 | struct smc_init_info_smcrv2 *smcrv2) | |
227 | { | |
228 | if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) { | |
229 | if (gid) | |
230 | memcpy(gid, &attr->gid, SMC_GID_SIZE); | |
231 | if (sgid_index) | |
232 | *sgid_index = attr->index; | |
233 | return 0; | |
234 | } | |
235 | if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && | |
236 | smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) { | |
237 | struct in_device *in_dev = __in_dev_get_rcu(ndev); | |
238 | const struct in_ifaddr *ifa; | |
239 | bool subnet_match = false; | |
240 | ||
241 | if (!in_dev) | |
242 | goto out; | |
243 | in_dev_for_each_ifa_rcu(ifa, in_dev) { | |
244 | if (!inet_ifa_match(smcrv2->saddr, ifa)) | |
245 | continue; | |
246 | subnet_match = true; | |
247 | break; | |
248 | } | |
249 | if (!subnet_match) | |
250 | goto out; | |
251 | if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr, | |
252 | smcrv2->daddr, | |
253 | smcrv2->nexthop_mac, | |
254 | &smcrv2->uses_gateway)) | |
255 | goto out; | |
256 | ||
257 | if (gid) | |
258 | memcpy(gid, &attr->gid, SMC_GID_SIZE); | |
259 | if (sgid_index) | |
260 | *sgid_index = attr->index; | |
261 | return 0; | |
262 | } | |
263 | out: | |
264 | return -ENODEV; | |
265 | } | |
266 | ||
7005ada6 UB |
267 | /* determine the gid for an ib-device port and vlan id */ |
268 | int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, | |
24fb6811 KG |
269 | unsigned short vlan_id, u8 gid[], u8 *sgid_index, |
270 | struct smc_init_info_smcrv2 *smcrv2) | |
7005ada6 | 271 | { |
b4c296f9 | 272 | const struct ib_gid_attr *attr; |
5102eca9 | 273 | const struct net_device *ndev; |
7005ada6 UB |
274 | int i; |
275 | ||
276 | for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { | |
b4c296f9 JG |
277 | attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i); |
278 | if (IS_ERR(attr)) | |
7005ada6 | 279 | continue; |
b4c296f9 | 280 | |
5102eca9 PP |
281 | rcu_read_lock(); |
282 | ndev = rdma_read_gid_attr_ndev_rcu(attr); | |
283 | if (!IS_ERR(ndev) && | |
41a0be3f KG |
284 | ((!vlan_id && !is_vlan_dev(ndev)) || |
285 | (vlan_id && is_vlan_dev(ndev) && | |
24fb6811 KG |
286 | vlan_dev_vlan_id(ndev) == vlan_id))) { |
287 | if (!smc_ib_determine_gid_rcu(ndev, attr, gid, | |
288 | sgid_index, smcrv2)) { | |
289 | rcu_read_unlock(); | |
290 | rdma_put_gid_attr(attr); | |
291 | return 0; | |
292 | } | |
7005ada6 | 293 | } |
5102eca9 | 294 | rcu_read_unlock(); |
b4c296f9 | 295 | rdma_put_gid_attr(attr); |
7005ada6 UB |
296 | } |
297 | return -ENODEV; | |
298 | } | |
299 | ||
29397e34 KG |
300 | /* check if gid is still defined on smcibdev */ |
301 | static bool smc_ib_check_link_gid(u8 gid[SMC_GID_SIZE], bool smcrv2, | |
302 | struct smc_ib_device *smcibdev, u8 ibport) | |
303 | { | |
304 | const struct ib_gid_attr *attr; | |
305 | bool rc = false; | |
306 | int i; | |
307 | ||
308 | for (i = 0; !rc && i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { | |
309 | attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i); | |
310 | if (IS_ERR(attr)) | |
311 | continue; | |
312 | ||
313 | rcu_read_lock(); | |
314 | if ((!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) || | |
315 | (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && | |
316 | !(ipv6_addr_type((const struct in6_addr *)&attr->gid) | |
317 | & IPV6_ADDR_LINKLOCAL))) | |
318 | if (!memcmp(gid, &attr->gid, SMC_GID_SIZE)) | |
319 | rc = true; | |
320 | rcu_read_unlock(); | |
321 | rdma_put_gid_attr(attr); | |
322 | } | |
323 | return rc; | |
324 | } | |
325 | ||
326 | /* check all links if the gid is still defined on smcibdev */ | |
327 | static void smc_ib_gid_check(struct smc_ib_device *smcibdev, u8 ibport) | |
328 | { | |
329 | struct smc_link_group *lgr; | |
330 | int i; | |
331 | ||
332 | spin_lock_bh(&smc_lgr_list.lock); | |
333 | list_for_each_entry(lgr, &smc_lgr_list.list, list) { | |
334 | if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, | |
335 | SMC_MAX_PNETID_LEN)) | |
336 | continue; /* lgr is not affected */ | |
337 | if (list_empty(&lgr->list)) | |
338 | continue; | |
339 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | |
340 | if (lgr->lnk[i].state == SMC_LNK_UNUSED || | |
341 | lgr->lnk[i].smcibdev != smcibdev) | |
342 | continue; | |
343 | if (!smc_ib_check_link_gid(lgr->lnk[i].gid, | |
344 | lgr->smc_version == SMC_V2, | |
345 | smcibdev, ibport)) | |
346 | smcr_port_err(smcibdev, ibport); | |
347 | } | |
348 | } | |
349 | spin_unlock_bh(&smc_lgr_list.lock); | |
350 | } | |
351 | ||
be6a3f38 UB |
352 | static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) |
353 | { | |
354 | int rc; | |
355 | ||
356 | memset(&smcibdev->pattr[ibport - 1], 0, | |
357 | sizeof(smcibdev->pattr[ibport - 1])); | |
358 | rc = ib_query_port(smcibdev->ibdev, ibport, | |
359 | &smcibdev->pattr[ibport - 1]); | |
360 | if (rc) | |
361 | goto out; | |
362 | /* the SMC protocol requires specification of the RoCE MAC address */ | |
7005ada6 | 363 | rc = smc_ib_fill_mac(smcibdev, ibport); |
be6a3f38 UB |
364 | if (rc) |
365 | goto out; | |
366bb249 | 366 | if (!smc_ib_is_valid_local_systemid() && |
be6a3f38 UB |
367 | smc_ib_port_active(smcibdev, ibport)) |
368 | /* create unique system identifier */ | |
369 | smc_ib_define_local_systemid(smcibdev, ibport); | |
370 | out: | |
371 | return rc; | |
372 | } | |
373 | ||
bd4ad577 UB |
374 | /* process context wrapper for might_sleep smc_ib_remember_port_attr */ |
375 | static void smc_ib_port_event_work(struct work_struct *work) | |
376 | { | |
377 | struct smc_ib_device *smcibdev = container_of( | |
378 | work, struct smc_ib_device, port_event_work); | |
379 | u8 port_idx; | |
380 | ||
381 | for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) { | |
382 | smc_ib_remember_port_attr(smcibdev, port_idx + 1); | |
383 | clear_bit(port_idx, &smcibdev->port_event_mask); | |
c3d9494e UB |
384 | if (!smc_ib_port_active(smcibdev, port_idx + 1)) { |
385 | set_bit(port_idx, smcibdev->ports_going_away); | |
541afa10 | 386 | smcr_port_err(smcibdev, port_idx + 1); |
c3d9494e UB |
387 | } else { |
388 | clear_bit(port_idx, smcibdev->ports_going_away); | |
1f90a05d | 389 | smcr_port_add(smcibdev, port_idx + 1); |
29397e34 | 390 | smc_ib_gid_check(smcibdev, port_idx + 1); |
c3d9494e | 391 | } |
bd4ad577 UB |
392 | } |
393 | } | |
394 | ||
395 | /* can be called in IRQ context */ | |
396 | static void smc_ib_global_event_handler(struct ib_event_handler *handler, | |
397 | struct ib_event *ibevent) | |
398 | { | |
399 | struct smc_ib_device *smcibdev; | |
5613f20c | 400 | bool schedule = false; |
bd4ad577 UB |
401 | u8 port_idx; |
402 | ||
403 | smcibdev = container_of(handler, struct smc_ib_device, event_handler); | |
bd4ad577 UB |
404 | |
405 | switch (ibevent->event) { | |
bd4ad577 | 406 | case IB_EVENT_DEVICE_FATAL: |
81cf6430 | 407 | /* terminate all ports on device */ |
c3d9494e | 408 | for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) { |
81cf6430 | 409 | set_bit(port_idx, &smcibdev->port_event_mask); |
5613f20c UB |
410 | if (!test_and_set_bit(port_idx, |
411 | smcibdev->ports_going_away)) | |
412 | schedule = true; | |
c3d9494e | 413 | } |
5613f20c UB |
414 | if (schedule) |
415 | schedule_work(&smcibdev->port_event_work); | |
81cf6430 | 416 | break; |
bd4ad577 UB |
417 | case IB_EVENT_PORT_ACTIVE: |
418 | port_idx = ibevent->element.port_num - 1; | |
5613f20c UB |
419 | if (port_idx >= SMC_MAX_PORTS) |
420 | break; | |
421 | set_bit(port_idx, &smcibdev->port_event_mask); | |
422 | if (test_and_clear_bit(port_idx, smcibdev->ports_going_away)) | |
423 | schedule_work(&smcibdev->port_event_work); | |
424 | break; | |
425 | case IB_EVENT_PORT_ERR: | |
426 | port_idx = ibevent->element.port_num - 1; | |
427 | if (port_idx >= SMC_MAX_PORTS) | |
428 | break; | |
429 | set_bit(port_idx, &smcibdev->port_event_mask); | |
430 | if (!test_and_set_bit(port_idx, smcibdev->ports_going_away)) | |
81cf6430 | 431 | schedule_work(&smcibdev->port_event_work); |
5613f20c UB |
432 | break; |
433 | case IB_EVENT_GID_CHANGE: | |
434 | port_idx = ibevent->element.port_num - 1; | |
435 | if (port_idx >= SMC_MAX_PORTS) | |
436 | break; | |
437 | set_bit(port_idx, &smcibdev->port_event_mask); | |
438 | schedule_work(&smcibdev->port_event_work); | |
bd4ad577 UB |
439 | break; |
440 | default: | |
441 | break; | |
442 | } | |
443 | } | |
444 | ||
f38ba179 UB |
445 | void smc_ib_dealloc_protection_domain(struct smc_link *lnk) |
446 | { | |
da05bf29 UB |
447 | if (lnk->roce_pd) |
448 | ib_dealloc_pd(lnk->roce_pd); | |
f38ba179 UB |
449 | lnk->roce_pd = NULL; |
450 | } | |
451 | ||
452 | int smc_ib_create_protection_domain(struct smc_link *lnk) | |
453 | { | |
454 | int rc; | |
455 | ||
897e1c24 | 456 | lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0); |
f38ba179 UB |
457 | rc = PTR_ERR_OR_ZERO(lnk->roce_pd); |
458 | if (IS_ERR(lnk->roce_pd)) | |
459 | lnk->roce_pd = NULL; | |
460 | return rc; | |
461 | } | |
462 | ||
a3db10ef GG |
463 | static bool smcr_diag_is_dev_critical(struct smc_lgr_list *smc_lgr, |
464 | struct smc_ib_device *smcibdev) | |
465 | { | |
466 | struct smc_link_group *lgr; | |
467 | bool rc = false; | |
468 | int i; | |
469 | ||
470 | spin_lock_bh(&smc_lgr->lock); | |
471 | list_for_each_entry(lgr, &smc_lgr->list, list) { | |
472 | if (lgr->is_smcd) | |
473 | continue; | |
474 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | |
475 | if (lgr->lnk[i].state == SMC_LNK_UNUSED || | |
476 | lgr->lnk[i].smcibdev != smcibdev) | |
477 | continue; | |
478 | if (lgr->type == SMC_LGR_SINGLE || | |
479 | lgr->type == SMC_LGR_ASYMMETRIC_LOCAL) { | |
480 | rc = true; | |
481 | goto out; | |
482 | } | |
483 | } | |
484 | } | |
485 | out: | |
486 | spin_unlock_bh(&smc_lgr->lock); | |
487 | return rc; | |
488 | } | |
489 | ||
490 | static int smc_nl_handle_dev_port(struct sk_buff *skb, | |
491 | struct ib_device *ibdev, | |
492 | struct smc_ib_device *smcibdev, | |
493 | int port) | |
494 | { | |
495 | char smc_pnet[SMC_MAX_PNETID_LEN + 1]; | |
496 | struct nlattr *port_attrs; | |
497 | unsigned char port_state; | |
498 | int lnk_count = 0; | |
499 | ||
500 | port_attrs = nla_nest_start(skb, SMC_NLA_DEV_PORT + port); | |
501 | if (!port_attrs) | |
502 | goto errout; | |
503 | ||
504 | if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR, | |
505 | smcibdev->pnetid_by_user[port])) | |
506 | goto errattr; | |
8a446536 GG |
507 | memcpy(smc_pnet, &smcibdev->pnetid[port], SMC_MAX_PNETID_LEN); |
508 | smc_pnet[SMC_MAX_PNETID_LEN] = 0; | |
a3db10ef GG |
509 | if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet)) |
510 | goto errattr; | |
511 | if (nla_put_u32(skb, SMC_NLA_DEV_PORT_NETDEV, | |
512 | smcibdev->ndev_ifidx[port])) | |
513 | goto errattr; | |
514 | if (nla_put_u8(skb, SMC_NLA_DEV_PORT_VALID, 1)) | |
515 | goto errattr; | |
516 | port_state = smc_ib_port_active(smcibdev, port + 1); | |
517 | if (nla_put_u8(skb, SMC_NLA_DEV_PORT_STATE, port_state)) | |
518 | goto errattr; | |
519 | lnk_count = atomic_read(&smcibdev->lnk_cnt_by_port[port]); | |
520 | if (nla_put_u32(skb, SMC_NLA_DEV_PORT_LNK_CNT, lnk_count)) | |
521 | goto errattr; | |
522 | nla_nest_end(skb, port_attrs); | |
523 | return 0; | |
524 | errattr: | |
525 | nla_nest_cancel(skb, port_attrs); | |
526 | errout: | |
527 | return -EMSGSIZE; | |
528 | } | |
529 | ||
995433b7 KG |
530 | static bool smc_nl_handle_pci_values(const struct smc_pci_dev *smc_pci_dev, |
531 | struct sk_buff *skb) | |
532 | { | |
533 | if (nla_put_u32(skb, SMC_NLA_DEV_PCI_FID, smc_pci_dev->pci_fid)) | |
534 | return false; | |
535 | if (nla_put_u16(skb, SMC_NLA_DEV_PCI_CHID, smc_pci_dev->pci_pchid)) | |
536 | return false; | |
537 | if (nla_put_u16(skb, SMC_NLA_DEV_PCI_VENDOR, smc_pci_dev->pci_vendor)) | |
538 | return false; | |
539 | if (nla_put_u16(skb, SMC_NLA_DEV_PCI_DEVICE, smc_pci_dev->pci_device)) | |
540 | return false; | |
541 | if (nla_put_string(skb, SMC_NLA_DEV_PCI_ID, smc_pci_dev->pci_id)) | |
542 | return false; | |
543 | return true; | |
544 | } | |
545 | ||
a3db10ef GG |
546 | static int smc_nl_handle_smcr_dev(struct smc_ib_device *smcibdev, |
547 | struct sk_buff *skb, | |
548 | struct netlink_callback *cb) | |
549 | { | |
8a446536 | 550 | char smc_ibname[IB_DEVICE_NAME_MAX]; |
a3db10ef GG |
551 | struct smc_pci_dev smc_pci_dev; |
552 | struct pci_dev *pci_dev; | |
553 | unsigned char is_crit; | |
554 | struct nlattr *attrs; | |
555 | void *nlh; | |
556 | int i; | |
557 | ||
558 | nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, | |
559 | &smc_gen_nl_family, NLM_F_MULTI, | |
560 | SMC_NETLINK_GET_DEV_SMCR); | |
561 | if (!nlh) | |
562 | goto errmsg; | |
563 | attrs = nla_nest_start(skb, SMC_GEN_DEV_SMCR); | |
564 | if (!attrs) | |
565 | goto errout; | |
566 | is_crit = smcr_diag_is_dev_critical(&smc_lgr_list, smcibdev); | |
567 | if (nla_put_u8(skb, SMC_NLA_DEV_IS_CRIT, is_crit)) | |
568 | goto errattr; | |
995433b7 KG |
569 | if (smcibdev->ibdev->dev.parent) { |
570 | memset(&smc_pci_dev, 0, sizeof(smc_pci_dev)); | |
571 | pci_dev = to_pci_dev(smcibdev->ibdev->dev.parent); | |
572 | smc_set_pci_values(pci_dev, &smc_pci_dev); | |
573 | if (!smc_nl_handle_pci_values(&smc_pci_dev, skb)) | |
574 | goto errattr; | |
575 | } | |
a3db10ef GG |
576 | snprintf(smc_ibname, sizeof(smc_ibname), "%s", smcibdev->ibdev->name); |
577 | if (nla_put_string(skb, SMC_NLA_DEV_IB_NAME, smc_ibname)) | |
578 | goto errattr; | |
579 | for (i = 1; i <= SMC_MAX_PORTS; i++) { | |
580 | if (!rdma_is_port_valid(smcibdev->ibdev, i)) | |
581 | continue; | |
582 | if (smc_nl_handle_dev_port(skb, smcibdev->ibdev, | |
583 | smcibdev, i - 1)) | |
584 | goto errattr; | |
585 | } | |
586 | ||
587 | nla_nest_end(skb, attrs); | |
588 | genlmsg_end(skb, nlh); | |
589 | return 0; | |
590 | ||
591 | errattr: | |
592 | nla_nest_cancel(skb, attrs); | |
593 | errout: | |
594 | genlmsg_cancel(skb, nlh); | |
595 | errmsg: | |
596 | return -EMSGSIZE; | |
597 | } | |
598 | ||
599 | static void smc_nl_prep_smcr_dev(struct smc_ib_devices *dev_list, | |
600 | struct sk_buff *skb, | |
601 | struct netlink_callback *cb) | |
602 | { | |
603 | struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); | |
604 | struct smc_ib_device *smcibdev; | |
605 | int snum = cb_ctx->pos[0]; | |
606 | int num = 0; | |
607 | ||
608 | mutex_lock(&dev_list->mutex); | |
609 | list_for_each_entry(smcibdev, &dev_list->list, list) { | |
610 | if (num < snum) | |
611 | goto next; | |
612 | if (smc_nl_handle_smcr_dev(smcibdev, skb, cb)) | |
613 | goto errout; | |
614 | next: | |
615 | num++; | |
616 | } | |
617 | errout: | |
618 | mutex_unlock(&dev_list->mutex); | |
619 | cb_ctx->pos[0] = num; | |
620 | } | |
621 | ||
622 | int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb) | |
623 | { | |
624 | smc_nl_prep_smcr_dev(&smc_ib_devices, skb, cb); | |
625 | return skb->len; | |
626 | } | |
627 | ||
f38ba179 UB |
628 | static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) |
629 | { | |
e5f3aa04 KG |
630 | struct smc_link *lnk = (struct smc_link *)priv; |
631 | struct smc_ib_device *smcibdev = lnk->smcibdev; | |
da05bf29 UB |
632 | u8 port_idx; |
633 | ||
f38ba179 | 634 | switch (ibevent->event) { |
81cf6430 | 635 | case IB_EVENT_QP_FATAL: |
f38ba179 | 636 | case IB_EVENT_QP_ACCESS_ERR: |
e5f3aa04 | 637 | port_idx = ibevent->element.qp->port - 1; |
5613f20c UB |
638 | if (port_idx >= SMC_MAX_PORTS) |
639 | break; | |
640 | set_bit(port_idx, &smcibdev->port_event_mask); | |
641 | if (!test_and_set_bit(port_idx, smcibdev->ports_going_away)) | |
81cf6430 | 642 | schedule_work(&smcibdev->port_event_work); |
f38ba179 UB |
643 | break; |
644 | default: | |
645 | break; | |
646 | } | |
647 | } | |
648 | ||
649 | void smc_ib_destroy_queue_pair(struct smc_link *lnk) | |
650 | { | |
da05bf29 UB |
651 | if (lnk->roce_qp) |
652 | ib_destroy_qp(lnk->roce_qp); | |
f38ba179 UB |
653 | lnk->roce_qp = NULL; |
654 | } | |
655 | ||
656 | /* create a queue pair within the protection domain for a link */ | |
657 | int smc_ib_create_queue_pair(struct smc_link *lnk) | |
658 | { | |
8799e310 | 659 | int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; |
f38ba179 UB |
660 | struct ib_qp_init_attr qp_attr = { |
661 | .event_handler = smc_ib_qp_event_handler, | |
662 | .qp_context = lnk, | |
663 | .send_cq = lnk->smcibdev->roce_cq_send, | |
664 | .recv_cq = lnk->smcibdev->roce_cq_recv, | |
665 | .srq = NULL, | |
666 | .cap = { | |
f38ba179 UB |
667 | /* include unsolicited rdma_writes as well, |
668 | * there are max. 2 RDMA_WRITE per 1 WR_SEND | |
669 | */ | |
652a1e41 | 670 | .max_send_wr = SMC_WR_BUF_CNT * 3, |
f38ba179 UB |
671 | .max_recv_wr = SMC_WR_BUF_CNT * 3, |
672 | .max_send_sge = SMC_IB_MAX_SEND_SGE, | |
8799e310 | 673 | .max_recv_sge = sges_per_buf, |
b632eb06 | 674 | .max_inline_data = 0, |
f38ba179 UB |
675 | }, |
676 | .sq_sig_type = IB_SIGNAL_REQ_WR, | |
677 | .qp_type = IB_QPT_RC, | |
678 | }; | |
679 | int rc; | |
680 | ||
681 | lnk->roce_qp = ib_create_qp(lnk->roce_pd, &qp_attr); | |
682 | rc = PTR_ERR_OR_ZERO(lnk->roce_qp); | |
683 | if (IS_ERR(lnk->roce_qp)) | |
684 | lnk->roce_qp = NULL; | |
685 | else | |
686 | smc_wr_remember_qp_attr(lnk); | |
687 | return rc; | |
688 | } | |
689 | ||
897e1c24 UB |
690 | void smc_ib_put_memory_region(struct ib_mr *mr) |
691 | { | |
692 | ib_dereg_mr(mr); | |
693 | } | |
694 | ||
387707fd | 695 | static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx) |
897e1c24 UB |
696 | { |
697 | unsigned int offset = 0; | |
698 | int sg_num; | |
699 | ||
700 | /* map the largest prefix of a dma mapped SG list */ | |
b8d19945 | 701 | sg_num = ib_map_mr_sg(buf_slot->mr[link_idx], |
387707fd KG |
702 | buf_slot->sgt[link_idx].sgl, |
703 | buf_slot->sgt[link_idx].orig_nents, | |
897e1c24 UB |
704 | &offset, PAGE_SIZE); |
705 | ||
706 | return sg_num; | |
707 | } | |
708 | ||
709 | /* Allocate a memory region and map the dma mapped SG list of buf_slot */ | |
710 | int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, | |
387707fd | 711 | struct smc_buf_desc *buf_slot, u8 link_idx) |
897e1c24 | 712 | { |
b8d19945 | 713 | if (buf_slot->mr[link_idx]) |
897e1c24 UB |
714 | return 0; /* already done */ |
715 | ||
b8d19945 | 716 | buf_slot->mr[link_idx] = |
897e1c24 | 717 | ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order); |
b8d19945 | 718 | if (IS_ERR(buf_slot->mr[link_idx])) { |
897e1c24 UB |
719 | int rc; |
720 | ||
b8d19945 WG |
721 | rc = PTR_ERR(buf_slot->mr[link_idx]); |
722 | buf_slot->mr[link_idx] = NULL; | |
897e1c24 UB |
723 | return rc; |
724 | } | |
725 | ||
b8d19945 WG |
726 | if (smc_ib_map_mr_sg(buf_slot, link_idx) != |
727 | buf_slot->sgt[link_idx].orig_nents) | |
897e1c24 UB |
728 | return -EINVAL; |
729 | ||
730 | return 0; | |
731 | } | |
732 | ||
0ef69e78 GW |
733 | bool smc_ib_is_sg_need_sync(struct smc_link *lnk, |
734 | struct smc_buf_desc *buf_slot) | |
735 | { | |
736 | struct scatterlist *sg; | |
737 | unsigned int i; | |
738 | bool ret = false; | |
739 | ||
740 | /* for now there is just one DMA address */ | |
741 | for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, | |
742 | buf_slot->sgt[lnk->link_idx].nents, i) { | |
743 | if (!sg_dma_len(sg)) | |
744 | break; | |
745 | if (dma_need_sync(lnk->smcibdev->ibdev->dma_device, | |
746 | sg_dma_address(sg))) { | |
747 | ret = true; | |
748 | goto out; | |
749 | } | |
750 | } | |
751 | ||
752 | out: | |
753 | return ret; | |
754 | } | |
755 | ||
10428dd8 | 756 | /* synchronize buffer usage for cpu access */ |
387707fd | 757 | void smc_ib_sync_sg_for_cpu(struct smc_link *lnk, |
10428dd8 UB |
758 | struct smc_buf_desc *buf_slot, |
759 | enum dma_data_direction data_direction) | |
760 | { | |
761 | struct scatterlist *sg; | |
762 | unsigned int i; | |
763 | ||
0ef69e78 GW |
764 | if (!(buf_slot->is_dma_need_sync & (1U << lnk->link_idx))) |
765 | return; | |
766 | ||
10428dd8 | 767 | /* for now there is just one DMA address */ |
387707fd KG |
768 | for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, |
769 | buf_slot->sgt[lnk->link_idx].nents, i) { | |
10428dd8 UB |
770 | if (!sg_dma_len(sg)) |
771 | break; | |
387707fd | 772 | ib_dma_sync_single_for_cpu(lnk->smcibdev->ibdev, |
10428dd8 UB |
773 | sg_dma_address(sg), |
774 | sg_dma_len(sg), | |
775 | data_direction); | |
776 | } | |
777 | } | |
778 | ||
779 | /* synchronize buffer usage for device access */ | |
387707fd | 780 | void smc_ib_sync_sg_for_device(struct smc_link *lnk, |
10428dd8 UB |
781 | struct smc_buf_desc *buf_slot, |
782 | enum dma_data_direction data_direction) | |
783 | { | |
784 | struct scatterlist *sg; | |
785 | unsigned int i; | |
786 | ||
0ef69e78 GW |
787 | if (!(buf_slot->is_dma_need_sync & (1U << lnk->link_idx))) |
788 | return; | |
789 | ||
10428dd8 | 790 | /* for now there is just one DMA address */ |
387707fd KG |
791 | for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, |
792 | buf_slot->sgt[lnk->link_idx].nents, i) { | |
10428dd8 UB |
793 | if (!sg_dma_len(sg)) |
794 | break; | |
387707fd | 795 | ib_dma_sync_single_for_device(lnk->smcibdev->ibdev, |
10428dd8 UB |
796 | sg_dma_address(sg), |
797 | sg_dma_len(sg), | |
798 | data_direction); | |
799 | } | |
800 | } | |
801 | ||
a3fe3d01 | 802 | /* Map a new TX or RX buffer SG-table to DMA */ |
387707fd | 803 | int smc_ib_buf_map_sg(struct smc_link *lnk, |
a3fe3d01 UB |
804 | struct smc_buf_desc *buf_slot, |
805 | enum dma_data_direction data_direction) | |
806 | { | |
807 | int mapped_nents; | |
808 | ||
387707fd KG |
809 | mapped_nents = ib_dma_map_sg(lnk->smcibdev->ibdev, |
810 | buf_slot->sgt[lnk->link_idx].sgl, | |
811 | buf_slot->sgt[lnk->link_idx].orig_nents, | |
a3fe3d01 UB |
812 | data_direction); |
813 | if (!mapped_nents) | |
814 | return -ENOMEM; | |
815 | ||
816 | return mapped_nents; | |
817 | } | |
818 | ||
387707fd | 819 | void smc_ib_buf_unmap_sg(struct smc_link *lnk, |
a3fe3d01 UB |
820 | struct smc_buf_desc *buf_slot, |
821 | enum dma_data_direction data_direction) | |
822 | { | |
387707fd | 823 | if (!buf_slot->sgt[lnk->link_idx].sgl->dma_address) |
a3fe3d01 UB |
824 | return; /* already unmapped */ |
825 | ||
387707fd KG |
826 | ib_dma_unmap_sg(lnk->smcibdev->ibdev, |
827 | buf_slot->sgt[lnk->link_idx].sgl, | |
828 | buf_slot->sgt[lnk->link_idx].orig_nents, | |
a3fe3d01 | 829 | data_direction); |
387707fd | 830 | buf_slot->sgt[lnk->link_idx].sgl->dma_address = 0; |
a3fe3d01 UB |
831 | } |
832 | ||
bd4ad577 UB |
833 | long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) |
834 | { | |
835 | struct ib_cq_init_attr cqattr = { | |
c9f4c6cf UB |
836 | .cqe = SMC_MAX_CQE, .comp_vector = 0 }; |
837 | int cqe_size_order, smc_order; | |
bd4ad577 UB |
838 | long rc; |
839 | ||
63673597 KG |
840 | mutex_lock(&smcibdev->mutex); |
841 | rc = 0; | |
842 | if (smcibdev->initialized) | |
843 | goto out; | |
c9f4c6cf UB |
844 | /* the calculated number of cq entries fits to mlx5 cq allocation */ |
845 | cqe_size_order = cache_line_size() == 128 ? 7 : 6; | |
846 | smc_order = MAX_ORDER - cqe_size_order - 1; | |
847 | if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE) | |
848 | cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2; | |
bd4ad577 UB |
849 | smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev, |
850 | smc_wr_tx_cq_handler, NULL, | |
851 | smcibdev, &cqattr); | |
852 | rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_send); | |
853 | if (IS_ERR(smcibdev->roce_cq_send)) { | |
854 | smcibdev->roce_cq_send = NULL; | |
63673597 | 855 | goto out; |
bd4ad577 UB |
856 | } |
857 | smcibdev->roce_cq_recv = ib_create_cq(smcibdev->ibdev, | |
858 | smc_wr_rx_cq_handler, NULL, | |
859 | smcibdev, &cqattr); | |
860 | rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_recv); | |
861 | if (IS_ERR(smcibdev->roce_cq_recv)) { | |
862 | smcibdev->roce_cq_recv = NULL; | |
863 | goto err; | |
864 | } | |
bd4ad577 UB |
865 | smc_wr_add_dev(smcibdev); |
866 | smcibdev->initialized = 1; | |
63673597 | 867 | goto out; |
bd4ad577 UB |
868 | |
869 | err: | |
870 | ib_destroy_cq(smcibdev->roce_cq_send); | |
63673597 KG |
871 | out: |
872 | mutex_unlock(&smcibdev->mutex); | |
bd4ad577 UB |
873 | return rc; |
874 | } | |
875 | ||
876 | static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) | |
877 | { | |
63673597 | 878 | mutex_lock(&smcibdev->mutex); |
bd4ad577 | 879 | if (!smcibdev->initialized) |
63673597 | 880 | goto out; |
da05bf29 | 881 | smcibdev->initialized = 0; |
bd4ad577 UB |
882 | ib_destroy_cq(smcibdev->roce_cq_recv); |
883 | ib_destroy_cq(smcibdev->roce_cq_send); | |
6a37ad3d | 884 | smc_wr_remove_dev(smcibdev); |
63673597 KG |
885 | out: |
886 | mutex_unlock(&smcibdev->mutex); | |
bd4ad577 UB |
887 | } |
888 | ||
a4cf0443 UB |
889 | static struct ib_client smc_ib_client; |
890 | ||
3d453f53 GG |
891 | static void smc_copy_netdev_ifindex(struct smc_ib_device *smcibdev, int port) |
892 | { | |
893 | struct ib_device *ibdev = smcibdev->ibdev; | |
894 | struct net_device *ndev; | |
895 | ||
896 | if (!ibdev->ops.get_netdev) | |
897 | return; | |
898 | ndev = ibdev->ops.get_netdev(ibdev, port + 1); | |
899 | if (ndev) { | |
900 | smcibdev->ndev_ifidx[port] = ndev->ifindex; | |
901 | dev_put(ndev); | |
902 | } | |
903 | } | |
904 | ||
905 | void smc_ib_ndev_change(struct net_device *ndev, unsigned long event) | |
906 | { | |
907 | struct smc_ib_device *smcibdev; | |
908 | struct ib_device *libdev; | |
909 | struct net_device *lndev; | |
910 | u8 port_cnt; | |
911 | int i; | |
912 | ||
913 | mutex_lock(&smc_ib_devices.mutex); | |
914 | list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { | |
915 | port_cnt = smcibdev->ibdev->phys_port_cnt; | |
916 | for (i = 0; i < min_t(size_t, port_cnt, SMC_MAX_PORTS); i++) { | |
917 | libdev = smcibdev->ibdev; | |
918 | if (!libdev->ops.get_netdev) | |
919 | continue; | |
920 | lndev = libdev->ops.get_netdev(libdev, i + 1); | |
1160dfa1 | 921 | dev_put(lndev); |
3d453f53 GG |
922 | if (lndev != ndev) |
923 | continue; | |
924 | if (event == NETDEV_REGISTER) | |
925 | smcibdev->ndev_ifidx[i] = ndev->ifindex; | |
926 | if (event == NETDEV_UNREGISTER) | |
927 | smcibdev->ndev_ifidx[i] = 0; | |
928 | } | |
929 | } | |
930 | mutex_unlock(&smc_ib_devices.mutex); | |
931 | } | |
932 | ||
a4cf0443 | 933 | /* callback function for ib_register_client() */ |
11a0ae4c | 934 | static int smc_ib_add_dev(struct ib_device *ibdev) |
a4cf0443 UB |
935 | { |
936 | struct smc_ib_device *smcibdev; | |
be6a3f38 UB |
937 | u8 port_cnt; |
938 | int i; | |
a4cf0443 UB |
939 | |
940 | if (ibdev->node_type != RDMA_NODE_IB_CA) | |
11a0ae4c | 941 | return -EOPNOTSUPP; |
a4cf0443 UB |
942 | |
943 | smcibdev = kzalloc(sizeof(*smcibdev), GFP_KERNEL); | |
944 | if (!smcibdev) | |
11a0ae4c | 945 | return -ENOMEM; |
a4cf0443 UB |
946 | |
947 | smcibdev->ibdev = ibdev; | |
bd4ad577 | 948 | INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work); |
6dabd405 UB |
949 | atomic_set(&smcibdev->lnk_cnt, 0); |
950 | init_waitqueue_head(&smcibdev->lnks_deleted); | |
63673597 | 951 | mutex_init(&smcibdev->mutex); |
92f3cb0e | 952 | mutex_lock(&smc_ib_devices.mutex); |
a4cf0443 | 953 | list_add_tail(&smcibdev->list, &smc_ib_devices.list); |
92f3cb0e | 954 | mutex_unlock(&smc_ib_devices.mutex); |
a4cf0443 | 955 | ib_set_client_data(ibdev, &smc_ib_client, smcibdev); |
be6a3f38 UB |
956 | INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, |
957 | smc_ib_global_event_handler); | |
958 | ib_register_event_handler(&smcibdev->event_handler); | |
959 | ||
960 | /* trigger reading of the port attributes */ | |
961 | port_cnt = smcibdev->ibdev->phys_port_cnt; | |
0a99be43 KG |
962 | pr_warn_ratelimited("smc: adding ib device %s with port count %d\n", |
963 | smcibdev->ibdev->name, port_cnt); | |
be6a3f38 UB |
964 | for (i = 0; |
965 | i < min_t(size_t, port_cnt, SMC_MAX_PORTS); | |
0afff91c | 966 | i++) { |
be6a3f38 | 967 | set_bit(i, &smcibdev->port_event_mask); |
0afff91c | 968 | /* determine pnetids of the port */ |
fdff704d KG |
969 | if (smc_pnetid_by_dev_port(ibdev->dev.parent, i, |
970 | smcibdev->pnetid[i])) | |
971 | smc_pnetid_by_table_ib(smcibdev, i + 1); | |
3d453f53 | 972 | smc_copy_netdev_ifindex(smcibdev, i); |
0a99be43 KG |
973 | pr_warn_ratelimited("smc: ib device %s port %d has pnetid " |
974 | "%.16s%s\n", | |
975 | smcibdev->ibdev->name, i + 1, | |
976 | smcibdev->pnetid[i], | |
977 | smcibdev->pnetid_by_user[i] ? | |
978 | " (user defined)" : | |
979 | ""); | |
0afff91c | 980 | } |
be6a3f38 | 981 | schedule_work(&smcibdev->port_event_work); |
11a0ae4c | 982 | return 0; |
a4cf0443 UB |
983 | } |
984 | ||
0b29ec64 | 985 | /* callback function for ib_unregister_client() */ |
a4cf0443 UB |
986 | static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) |
987 | { | |
1587982e | 988 | struct smc_ib_device *smcibdev = client_data; |
a4cf0443 | 989 | |
92f3cb0e | 990 | mutex_lock(&smc_ib_devices.mutex); |
a4cf0443 | 991 | list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ |
92f3cb0e | 992 | mutex_unlock(&smc_ib_devices.mutex); |
0a99be43 KG |
993 | pr_warn_ratelimited("smc: removing ib device %s\n", |
994 | smcibdev->ibdev->name); | |
0b29ec64 | 995 | smc_smcr_terminate_all(smcibdev); |
bd4ad577 | 996 | smc_ib_cleanup_per_ibdev(smcibdev); |
be6a3f38 | 997 | ib_unregister_event_handler(&smcibdev->event_handler); |
ece0d7bd | 998 | cancel_work_sync(&smcibdev->port_event_work); |
a4cf0443 UB |
999 | kfree(smcibdev); |
1000 | } | |
1001 | ||
1002 | static struct ib_client smc_ib_client = { | |
1003 | .name = "smc_ib", | |
1004 | .add = smc_ib_add_dev, | |
1005 | .remove = smc_ib_remove_dev, | |
1006 | }; | |
1007 | ||
1008 | int __init smc_ib_register_client(void) | |
1009 | { | |
366bb249 | 1010 | smc_ib_init_local_systemid(); |
a4cf0443 UB |
1011 | return ib_register_client(&smc_ib_client); |
1012 | } | |
1013 | ||
1014 | void smc_ib_unregister_client(void) | |
1015 | { | |
1016 | ib_unregister_client(&smc_ib_client); | |
1017 | } |