Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
6812baab TR |
2 | /* |
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | |
4 | * | |
5 | * Generic netlink support functions to configure an SMC-R PNET table | |
6 | * | |
7 | * Copyright IBM Corp. 2016 | |
8 | * | |
9 | * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> | |
10 | */ | |
11 | ||
12 | #include <linux/module.h> | |
13 | #include <linux/list.h> | |
14 | #include <linux/ctype.h> | |
15 | #include <net/netlink.h> | |
16 | #include <net/genetlink.h> | |
17 | ||
18 | #include <uapi/linux/if.h> | |
19 | #include <uapi/linux/smc.h> | |
20 | ||
21 | #include <rdma/ib_verbs.h> | |
22 | ||
64e28b52 HW |
23 | #include <net/netns/generic.h> |
24 | #include "smc_netns.h" | |
25 | ||
6812baab TR |
26 | #include "smc_pnet.h" |
27 | #include "smc_ib.h" | |
1619f770 | 28 | #include "smc_ism.h" |
bc36d2fc | 29 | #include "smc_core.h" |
6812baab | 30 | |
890a2cb4 HW |
31 | #define SMC_ASCII_BLANK 32 |
32 | ||
33 | static struct net_device *pnet_find_base_ndev(struct net_device *ndev); | |
34 | ||
6812baab TR |
35 | static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { |
36 | [SMC_PNETID_NAME] = { | |
37 | .type = NLA_NUL_STRING, | |
ca8dc133 | 38 | .len = SMC_MAX_PNETID_LEN |
6812baab TR |
39 | }, |
40 | [SMC_PNETID_ETHNAME] = { | |
41 | .type = NLA_NUL_STRING, | |
42 | .len = IFNAMSIZ - 1 | |
43 | }, | |
44 | [SMC_PNETID_IBNAME] = { | |
45 | .type = NLA_NUL_STRING, | |
46 | .len = IB_DEVICE_NAME_MAX - 1 | |
47 | }, | |
48 | [SMC_PNETID_IBPORT] = { .type = NLA_U8 } | |
49 | }; | |
50 | ||
51 | static struct genl_family smc_pnet_nl_family; | |
52 | ||
6812baab | 53 | /** |
890a2cb4 | 54 | * struct smc_user_pnetentry - pnet identifier name entry for/from user |
6812baab TR |
55 | * @list: List node. |
56 | * @pnet_name: Pnet identifier name | |
57 | * @ndev: pointer to network device. | |
58 | * @smcibdev: Pointer to IB device. | |
890a2cb4 | 59 | * @ib_port: Port of IB device. |
f3d74b22 | 60 | * @smcd_dev: Pointer to smcd device. |
6812baab | 61 | */ |
890a2cb4 | 62 | struct smc_user_pnetentry { |
6812baab | 63 | struct list_head list; |
0afff91c | 64 | char pnet_name[SMC_MAX_PNETID_LEN + 1]; |
6812baab TR |
65 | struct net_device *ndev; |
66 | struct smc_ib_device *smcibdev; | |
67 | u8 ib_port; | |
f3d74b22 | 68 | struct smcd_dev *smcd_dev; |
6812baab TR |
69 | }; |
70 | ||
890a2cb4 HW |
71 | /* pnet entry stored in pnet table */ |
72 | struct smc_pnetentry { | |
73 | struct list_head list; | |
74 | char pnet_name[SMC_MAX_PNETID_LEN + 1]; | |
75 | struct net_device *ndev; | |
76 | }; | |
6812baab | 77 | |
890a2cb4 HW |
78 | /* Check if two given pnetids match */ |
79 | static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) | |
6812baab | 80 | { |
890a2cb4 | 81 | int i; |
6812baab | 82 | |
890a2cb4 HW |
83 | for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { |
84 | if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) && | |
85 | (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK)) | |
6812baab | 86 | break; |
890a2cb4 HW |
87 | if (pnetid1[i] != pnetid2[i]) |
88 | return false; | |
6812baab | 89 | } |
890a2cb4 | 90 | return true; |
6812baab TR |
91 | } |
92 | ||
93 | /* Remove a pnetid from the pnet table. | |
94 | */ | |
64e28b52 | 95 | static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) |
6812baab TR |
96 | { |
97 | struct smc_pnetentry *pnetelem, *tmp_pe; | |
64e28b52 | 98 | struct smc_pnettable *pnettable; |
890a2cb4 | 99 | struct smc_ib_device *ibdev; |
f3d74b22 | 100 | struct smcd_dev *smcd_dev; |
64e28b52 | 101 | struct smc_net *sn; |
6812baab | 102 | int rc = -ENOENT; |
890a2cb4 | 103 | int ibport; |
6812baab | 104 | |
64e28b52 HW |
105 | /* get pnettable for namespace */ |
106 | sn = net_generic(net, smc_net_id); | |
107 | pnettable = &sn->pnettable; | |
108 | ||
890a2cb4 | 109 | /* remove netdevices */ |
64e28b52 HW |
110 | write_lock(&pnettable->lock); |
111 | list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, | |
6812baab | 112 | list) { |
890a2cb4 HW |
113 | if (!pnet_name || |
114 | smc_pnet_match(pnetelem->pnet_name, pnet_name)) { | |
6812baab TR |
115 | list_del(&pnetelem->list); |
116 | dev_put(pnetelem->ndev); | |
117 | kfree(pnetelem); | |
118 | rc = 0; | |
6812baab TR |
119 | } |
120 | } | |
64e28b52 HW |
121 | write_unlock(&pnettable->lock); |
122 | ||
123 | /* if this is not the initial namespace, stop here */ | |
124 | if (net != &init_net) | |
125 | return rc; | |
126 | ||
890a2cb4 HW |
127 | /* remove ib devices */ |
128 | spin_lock(&smc_ib_devices.lock); | |
129 | list_for_each_entry(ibdev, &smc_ib_devices.list, list) { | |
130 | for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { | |
131 | if (ibdev->pnetid_by_user[ibport] && | |
132 | (!pnet_name || | |
133 | smc_pnet_match(pnet_name, | |
134 | ibdev->pnetid[ibport]))) { | |
135 | memset(ibdev->pnetid[ibport], 0, | |
136 | SMC_MAX_PNETID_LEN); | |
137 | ibdev->pnetid_by_user[ibport] = false; | |
138 | rc = 0; | |
139 | } | |
140 | } | |
141 | } | |
142 | spin_unlock(&smc_ib_devices.lock); | |
f3d74b22 HW |
143 | /* remove smcd devices */ |
144 | spin_lock(&smcd_dev_list.lock); | |
145 | list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { | |
146 | if (smcd_dev->pnetid_by_user && | |
147 | (!pnet_name || | |
148 | smc_pnet_match(pnet_name, smcd_dev->pnetid))) { | |
149 | memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN); | |
150 | smcd_dev->pnetid_by_user = false; | |
151 | rc = 0; | |
152 | } | |
153 | } | |
154 | spin_unlock(&smcd_dev_list.lock); | |
6812baab TR |
155 | return rc; |
156 | } | |
157 | ||
158 | /* Remove a pnet entry mentioning a given network device from the pnet table. | |
159 | */ | |
160 | static int smc_pnet_remove_by_ndev(struct net_device *ndev) | |
161 | { | |
162 | struct smc_pnetentry *pnetelem, *tmp_pe; | |
64e28b52 HW |
163 | struct smc_pnettable *pnettable; |
164 | struct net *net = dev_net(ndev); | |
165 | struct smc_net *sn; | |
6812baab TR |
166 | int rc = -ENOENT; |
167 | ||
64e28b52 HW |
168 | /* get pnettable for namespace */ |
169 | sn = net_generic(net, smc_net_id); | |
170 | pnettable = &sn->pnettable; | |
171 | ||
172 | write_lock(&pnettable->lock); | |
173 | list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { | |
6812baab TR |
174 | if (pnetelem->ndev == ndev) { |
175 | list_del(&pnetelem->list); | |
176 | dev_put(pnetelem->ndev); | |
177 | kfree(pnetelem); | |
178 | rc = 0; | |
179 | break; | |
180 | } | |
181 | } | |
64e28b52 | 182 | write_unlock(&pnettable->lock); |
6812baab TR |
183 | return rc; |
184 | } | |
185 | ||
890a2cb4 | 186 | /* Append a pnetid to the end of the pnet table if not already on this list. |
6812baab | 187 | */ |
64e28b52 HW |
188 | static int smc_pnet_enter(struct smc_pnettable *pnettable, |
189 | struct smc_user_pnetentry *new_pnetelem) | |
6812baab | 190 | { |
890a2cb4 HW |
191 | u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; |
192 | u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; | |
193 | struct smc_pnetentry *tmp_pnetelem; | |
194 | struct smc_pnetentry *pnetelem; | |
f3d74b22 | 195 | bool new_smcddev = false; |
890a2cb4 HW |
196 | struct net_device *ndev; |
197 | bool new_netdev = true; | |
198 | bool new_ibdev = false; | |
199 | ||
200 | if (new_pnetelem->smcibdev) { | |
201 | struct smc_ib_device *ib_dev = new_pnetelem->smcibdev; | |
202 | int ib_port = new_pnetelem->ib_port; | |
203 | ||
204 | spin_lock(&smc_ib_devices.lock); | |
205 | if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { | |
206 | memcpy(ib_dev->pnetid[ib_port - 1], | |
207 | new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN); | |
208 | ib_dev->pnetid_by_user[ib_port - 1] = true; | |
209 | new_ibdev = true; | |
6812baab | 210 | } |
890a2cb4 | 211 | spin_unlock(&smc_ib_devices.lock); |
6812baab | 212 | } |
f3d74b22 HW |
213 | if (new_pnetelem->smcd_dev) { |
214 | struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev; | |
215 | ||
216 | spin_lock(&smcd_dev_list.lock); | |
217 | if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { | |
218 | memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name, | |
219 | SMC_MAX_PNETID_LEN); | |
220 | smcd_dev->pnetid_by_user = true; | |
221 | new_smcddev = true; | |
222 | } | |
223 | spin_unlock(&smcd_dev_list.lock); | |
224 | } | |
6812baab | 225 | |
890a2cb4 | 226 | if (!new_pnetelem->ndev) |
f3d74b22 | 227 | return (new_ibdev || new_smcddev) ? 0 : -EEXIST; |
890a2cb4 HW |
228 | |
229 | /* check if (base) netdev already has a pnetid. If there is one, we do | |
230 | * not want to add a pnet table entry | |
231 | */ | |
232 | ndev = pnet_find_base_ndev(new_pnetelem->ndev); | |
233 | if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, | |
234 | ndev_pnetid)) | |
f3d74b22 | 235 | return (new_ibdev || new_smcddev) ? 0 : -EEXIST; |
890a2cb4 HW |
236 | |
237 | /* add a new netdev entry to the pnet table if there isn't one */ | |
238 | tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); | |
239 | if (!tmp_pnetelem) | |
240 | return -ENOMEM; | |
241 | memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name, | |
242 | SMC_MAX_PNETID_LEN); | |
243 | tmp_pnetelem->ndev = new_pnetelem->ndev; | |
6812baab | 244 | |
64e28b52 HW |
245 | write_lock(&pnettable->lock); |
246 | list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { | |
890a2cb4 HW |
247 | if (pnetelem->ndev == new_pnetelem->ndev) |
248 | new_netdev = false; | |
6812baab | 249 | } |
890a2cb4 HW |
250 | if (new_netdev) { |
251 | dev_hold(tmp_pnetelem->ndev); | |
64e28b52 HW |
252 | list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist); |
253 | write_unlock(&pnettable->lock); | |
890a2cb4 | 254 | } else { |
64e28b52 | 255 | write_unlock(&pnettable->lock); |
890a2cb4 HW |
256 | kfree(tmp_pnetelem); |
257 | } | |
258 | ||
f3d74b22 | 259 | return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST; |
6812baab TR |
260 | } |
261 | ||
262 | /* The limit for pnetid is 16 characters. | |
263 | * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. | |
264 | * Lower case letters are converted to upper case. | |
265 | * Interior blanks should not be used. | |
266 | */ | |
267 | static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) | |
268 | { | |
269 | char *bf = skip_spaces(pnet_name); | |
270 | size_t len = strlen(bf); | |
271 | char *end = bf + len; | |
272 | ||
273 | if (!len) | |
274 | return false; | |
275 | while (--end >= bf && isspace(*end)) | |
276 | ; | |
0afff91c | 277 | if (end - bf >= SMC_MAX_PNETID_LEN) |
6812baab TR |
278 | return false; |
279 | while (bf <= end) { | |
280 | if (!isalnum(*bf)) | |
281 | return false; | |
282 | *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; | |
283 | bf++; | |
284 | } | |
285 | *pnetid = '\0'; | |
286 | return true; | |
287 | } | |
288 | ||
289 | /* Find an infiniband device by a given name. The device might not exist. */ | |
249633a4 | 290 | static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) |
6812baab TR |
291 | { |
292 | struct smc_ib_device *ibdev; | |
293 | ||
294 | spin_lock(&smc_ib_devices.lock); | |
295 | list_for_each_entry(ibdev, &smc_ib_devices.list, list) { | |
296 | if (!strncmp(ibdev->ibdev->name, ib_name, | |
af5f60c7 HW |
297 | sizeof(ibdev->ibdev->name)) || |
298 | !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, | |
299 | IB_DEVICE_NAME_MAX - 1)) { | |
6812baab TR |
300 | goto out; |
301 | } | |
302 | } | |
303 | ibdev = NULL; | |
304 | out: | |
305 | spin_unlock(&smc_ib_devices.lock); | |
306 | return ibdev; | |
307 | } | |
308 | ||
f3d74b22 HW |
309 | /* Find an smcd device by a given name. The device might not exist. */ |
310 | static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) | |
311 | { | |
312 | struct smcd_dev *smcd_dev; | |
313 | ||
314 | spin_lock(&smcd_dev_list.lock); | |
315 | list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { | |
316 | if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, | |
317 | IB_DEVICE_NAME_MAX - 1)) | |
318 | goto out; | |
319 | } | |
320 | smcd_dev = NULL; | |
321 | out: | |
322 | spin_unlock(&smcd_dev_list.lock); | |
323 | return smcd_dev; | |
324 | } | |
325 | ||
6812baab TR |
326 | /* Parse the supplied netlink attributes and fill a pnetentry structure. |
327 | * For ethernet and infiniband device names verify that the devices exist. | |
328 | */ | |
890a2cb4 HW |
329 | static int smc_pnet_fill_entry(struct net *net, |
330 | struct smc_user_pnetentry *pnetelem, | |
6812baab TR |
331 | struct nlattr *tb[]) |
332 | { | |
d49baa7e EB |
333 | char *string, *ibname; |
334 | int rc; | |
6812baab TR |
335 | |
336 | memset(pnetelem, 0, sizeof(*pnetelem)); | |
337 | INIT_LIST_HEAD(&pnetelem->list); | |
d49baa7e EB |
338 | |
339 | rc = -EINVAL; | |
340 | if (!tb[SMC_PNETID_NAME]) | |
341 | goto error; | |
342 | string = (char *)nla_data(tb[SMC_PNETID_NAME]); | |
343 | if (!smc_pnetid_valid(string, pnetelem->pnet_name)) | |
344 | goto error; | |
345 | ||
346 | rc = -EINVAL; | |
890a2cb4 HW |
347 | if (tb[SMC_PNETID_ETHNAME]) { |
348 | string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); | |
349 | pnetelem->ndev = dev_get_by_name(net, string); | |
350 | if (!pnetelem->ndev) | |
351 | goto error; | |
352 | } | |
d49baa7e | 353 | |
64e28b52 HW |
354 | /* if this is not the initial namespace, stop here */ |
355 | if (net != &init_net) | |
356 | return 0; | |
357 | ||
d49baa7e | 358 | rc = -EINVAL; |
890a2cb4 HW |
359 | if (tb[SMC_PNETID_IBNAME]) { |
360 | ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); | |
361 | ibname = strim(ibname); | |
362 | pnetelem->smcibdev = smc_pnet_find_ib(ibname); | |
f3d74b22 HW |
363 | pnetelem->smcd_dev = smc_pnet_find_smcd(ibname); |
364 | if (!pnetelem->smcibdev && !pnetelem->smcd_dev) | |
890a2cb4 HW |
365 | goto error; |
366 | if (pnetelem->smcibdev) { | |
367 | if (!tb[SMC_PNETID_IBPORT]) | |
368 | goto error; | |
369 | pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); | |
370 | if (pnetelem->ib_port < 1 || | |
371 | pnetelem->ib_port > SMC_MAX_PORTS) | |
372 | goto error; | |
373 | } | |
374 | } | |
d49baa7e | 375 | |
6812baab TR |
376 | return 0; |
377 | ||
378 | error: | |
379 | if (pnetelem->ndev) | |
380 | dev_put(pnetelem->ndev); | |
381 | return rc; | |
382 | } | |
383 | ||
384 | /* Convert an smc_pnetentry to a netlink attribute sequence */ | |
890a2cb4 HW |
385 | static int smc_pnet_set_nla(struct sk_buff *msg, |
386 | struct smc_user_pnetentry *pnetelem) | |
6812baab | 387 | { |
890a2cb4 | 388 | if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name)) |
6812baab | 389 | return -1; |
890a2cb4 HW |
390 | if (pnetelem->ndev) { |
391 | if (nla_put_string(msg, SMC_PNETID_ETHNAME, | |
392 | pnetelem->ndev->name)) | |
393 | return -1; | |
394 | } else { | |
395 | if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a")) | |
396 | return -1; | |
6812baab | 397 | } |
890a2cb4 HW |
398 | if (pnetelem->smcibdev) { |
399 | if (nla_put_string(msg, SMC_PNETID_IBNAME, | |
af5f60c7 | 400 | dev_name(pnetelem->smcibdev->ibdev->dev.parent)) || |
890a2cb4 HW |
401 | nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) |
402 | return -1; | |
f3d74b22 HW |
403 | } else if (pnetelem->smcd_dev) { |
404 | if (nla_put_string(msg, SMC_PNETID_IBNAME, | |
405 | dev_name(&pnetelem->smcd_dev->dev)) || | |
406 | nla_put_u8(msg, SMC_PNETID_IBPORT, 1)) | |
407 | return -1; | |
890a2cb4 HW |
408 | } else { |
409 | if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || | |
410 | nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) | |
411 | return -1; | |
6812baab TR |
412 | } |
413 | ||
890a2cb4 | 414 | return 0; |
6812baab TR |
415 | } |
416 | ||
417 | static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) | |
418 | { | |
419 | struct net *net = genl_info_net(info); | |
890a2cb4 | 420 | struct smc_user_pnetentry pnetelem; |
64e28b52 HW |
421 | struct smc_pnettable *pnettable; |
422 | struct smc_net *sn; | |
6812baab TR |
423 | int rc; |
424 | ||
64e28b52 HW |
425 | /* get pnettable for namespace */ |
426 | sn = net_generic(net, smc_net_id); | |
427 | pnettable = &sn->pnettable; | |
428 | ||
890a2cb4 | 429 | rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs); |
6812baab | 430 | if (!rc) |
64e28b52 | 431 | rc = smc_pnet_enter(pnettable, &pnetelem); |
890a2cb4 HW |
432 | if (pnetelem.ndev) |
433 | dev_put(pnetelem.ndev); | |
6812baab TR |
434 | return rc; |
435 | } | |
436 | ||
437 | static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) | |
438 | { | |
64e28b52 HW |
439 | struct net *net = genl_info_net(info); |
440 | ||
d49baa7e EB |
441 | if (!info->attrs[SMC_PNETID_NAME]) |
442 | return -EINVAL; | |
64e28b52 | 443 | return smc_pnet_remove_by_pnetid(net, |
6812baab TR |
444 | (char *)nla_data(info->attrs[SMC_PNETID_NAME])); |
445 | } | |
446 | ||
447 | static int smc_pnet_dump_start(struct netlink_callback *cb) | |
448 | { | |
449 | cb->args[0] = 0; | |
450 | return 0; | |
451 | } | |
452 | ||
453 | static int smc_pnet_dumpinfo(struct sk_buff *skb, | |
454 | u32 portid, u32 seq, u32 flags, | |
890a2cb4 | 455 | struct smc_user_pnetentry *pnetelem) |
6812baab TR |
456 | { |
457 | void *hdr; | |
458 | ||
459 | hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, | |
460 | flags, SMC_PNETID_GET); | |
461 | if (!hdr) | |
462 | return -ENOMEM; | |
463 | if (smc_pnet_set_nla(skb, pnetelem) < 0) { | |
464 | genlmsg_cancel(skb, hdr); | |
465 | return -EMSGSIZE; | |
466 | } | |
467 | genlmsg_end(skb, hdr); | |
468 | return 0; | |
469 | } | |
470 | ||
64e28b52 HW |
471 | static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, |
472 | u32 seq, u8 *pnetid, int start_idx) | |
6812baab | 473 | { |
890a2cb4 | 474 | struct smc_user_pnetentry tmp_entry; |
64e28b52 | 475 | struct smc_pnettable *pnettable; |
6812baab | 476 | struct smc_pnetentry *pnetelem; |
890a2cb4 | 477 | struct smc_ib_device *ibdev; |
f3d74b22 | 478 | struct smcd_dev *smcd_dev; |
64e28b52 | 479 | struct smc_net *sn; |
6812baab | 480 | int idx = 0; |
890a2cb4 | 481 | int ibport; |
6812baab | 482 | |
64e28b52 HW |
483 | /* get pnettable for namespace */ |
484 | sn = net_generic(net, smc_net_id); | |
485 | pnettable = &sn->pnettable; | |
486 | ||
890a2cb4 | 487 | /* dump netdevices */ |
64e28b52 HW |
488 | read_lock(&pnettable->lock); |
489 | list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { | |
890a2cb4 HW |
490 | if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) |
491 | continue; | |
492 | if (idx++ < start_idx) | |
6812baab | 493 | continue; |
890a2cb4 HW |
494 | memset(&tmp_entry, 0, sizeof(tmp_entry)); |
495 | memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name, | |
496 | SMC_MAX_PNETID_LEN); | |
497 | tmp_entry.ndev = pnetelem->ndev; | |
498 | if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, | |
499 | &tmp_entry)) { | |
6812baab TR |
500 | --idx; |
501 | break; | |
502 | } | |
503 | } | |
64e28b52 HW |
504 | read_unlock(&pnettable->lock); |
505 | ||
506 | /* if this is not the initial namespace, stop here */ | |
507 | if (net != &init_net) | |
508 | return idx; | |
890a2cb4 HW |
509 | |
510 | /* dump ib devices */ | |
511 | spin_lock(&smc_ib_devices.lock); | |
512 | list_for_each_entry(ibdev, &smc_ib_devices.list, list) { | |
513 | for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { | |
514 | if (ibdev->pnetid_by_user[ibport]) { | |
515 | if (pnetid && | |
516 | !smc_pnet_match(ibdev->pnetid[ibport], | |
517 | pnetid)) | |
518 | continue; | |
519 | if (idx++ < start_idx) | |
520 | continue; | |
521 | memset(&tmp_entry, 0, sizeof(tmp_entry)); | |
522 | memcpy(&tmp_entry.pnet_name, | |
523 | ibdev->pnetid[ibport], | |
524 | SMC_MAX_PNETID_LEN); | |
525 | tmp_entry.smcibdev = ibdev; | |
526 | tmp_entry.ib_port = ibport + 1; | |
527 | if (smc_pnet_dumpinfo(skb, portid, seq, | |
528 | NLM_F_MULTI, | |
529 | &tmp_entry)) { | |
530 | --idx; | |
531 | break; | |
532 | } | |
533 | } | |
534 | } | |
535 | } | |
536 | spin_unlock(&smc_ib_devices.lock); | |
537 | ||
f3d74b22 HW |
538 | /* dump smcd devices */ |
539 | spin_lock(&smcd_dev_list.lock); | |
540 | list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { | |
541 | if (smcd_dev->pnetid_by_user) { | |
542 | if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid)) | |
543 | continue; | |
544 | if (idx++ < start_idx) | |
545 | continue; | |
546 | memset(&tmp_entry, 0, sizeof(tmp_entry)); | |
547 | memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid, | |
548 | SMC_MAX_PNETID_LEN); | |
549 | tmp_entry.smcd_dev = smcd_dev; | |
550 | if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, | |
551 | &tmp_entry)) { | |
552 | --idx; | |
553 | break; | |
554 | } | |
555 | } | |
556 | } | |
557 | spin_unlock(&smcd_dev_list.lock); | |
558 | ||
890a2cb4 HW |
559 | return idx; |
560 | } | |
561 | ||
562 | static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) | |
563 | { | |
64e28b52 | 564 | struct net *net = sock_net(skb->sk); |
890a2cb4 HW |
565 | int idx; |
566 | ||
64e28b52 | 567 | idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid, |
890a2cb4 HW |
568 | cb->nlh->nlmsg_seq, NULL, cb->args[0]); |
569 | ||
570 | cb->args[0] = idx; | |
6812baab TR |
571 | return skb->len; |
572 | } | |
573 | ||
890a2cb4 HW |
574 | /* Retrieve one PNETID entry */ |
575 | static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) | |
576 | { | |
64e28b52 | 577 | struct net *net = genl_info_net(info); |
890a2cb4 HW |
578 | struct sk_buff *msg; |
579 | void *hdr; | |
580 | ||
581 | if (!info->attrs[SMC_PNETID_NAME]) | |
582 | return -EINVAL; | |
583 | ||
584 | msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | |
585 | if (!msg) | |
586 | return -ENOMEM; | |
587 | ||
64e28b52 | 588 | _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq, |
890a2cb4 HW |
589 | nla_data(info->attrs[SMC_PNETID_NAME]), 0); |
590 | ||
591 | /* finish multi part message and send it */ | |
592 | hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0, | |
593 | NLM_F_MULTI); | |
594 | if (!hdr) { | |
595 | nlmsg_free(msg); | |
596 | return -EMSGSIZE; | |
597 | } | |
598 | return genlmsg_reply(msg, info); | |
599 | } | |
600 | ||
6812baab TR |
601 | /* Remove and delete all pnetids from pnet table. |
602 | */ | |
603 | static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) | |
604 | { | |
64e28b52 HW |
605 | struct net *net = genl_info_net(info); |
606 | ||
8ef659f1 KG |
607 | smc_pnet_remove_by_pnetid(net, NULL); |
608 | return 0; | |
6812baab TR |
609 | } |
610 | ||
611 | /* SMC_PNETID generic netlink operation definition */ | |
612 | static const struct genl_ops smc_pnet_ops[] = { | |
613 | { | |
614 | .cmd = SMC_PNETID_GET, | |
ef6243ac | 615 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
6812baab | 616 | .flags = GENL_ADMIN_PERM, |
6812baab TR |
617 | .doit = smc_pnet_get, |
618 | .dumpit = smc_pnet_dump, | |
619 | .start = smc_pnet_dump_start | |
620 | }, | |
621 | { | |
622 | .cmd = SMC_PNETID_ADD, | |
ef6243ac | 623 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
6812baab | 624 | .flags = GENL_ADMIN_PERM, |
6812baab TR |
625 | .doit = smc_pnet_add |
626 | }, | |
627 | { | |
628 | .cmd = SMC_PNETID_DEL, | |
ef6243ac | 629 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
6812baab | 630 | .flags = GENL_ADMIN_PERM, |
6812baab TR |
631 | .doit = smc_pnet_del |
632 | }, | |
633 | { | |
634 | .cmd = SMC_PNETID_FLUSH, | |
ef6243ac | 635 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
6812baab | 636 | .flags = GENL_ADMIN_PERM, |
6812baab TR |
637 | .doit = smc_pnet_flush |
638 | } | |
639 | }; | |
640 | ||
641 | /* SMC_PNETID family definition */ | |
56ce3c5a | 642 | static struct genl_family smc_pnet_nl_family __ro_after_init = { |
6812baab TR |
643 | .hdrsize = 0, |
644 | .name = SMCR_GENL_FAMILY_NAME, | |
645 | .version = SMCR_GENL_FAMILY_VERSION, | |
646 | .maxattr = SMC_PNETID_MAX, | |
3b0f31f2 | 647 | .policy = smc_pnet_policy, |
6812baab TR |
648 | .netnsok = true, |
649 | .module = THIS_MODULE, | |
650 | .ops = smc_pnet_ops, | |
651 | .n_ops = ARRAY_SIZE(smc_pnet_ops) | |
652 | }; | |
653 | ||
654 | static int smc_pnet_netdev_event(struct notifier_block *this, | |
655 | unsigned long event, void *ptr) | |
656 | { | |
657 | struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); | |
658 | ||
659 | switch (event) { | |
660 | case NETDEV_REBOOT: | |
661 | case NETDEV_UNREGISTER: | |
662 | smc_pnet_remove_by_ndev(event_dev); | |
be6a3f38 | 663 | return NOTIFY_OK; |
6812baab | 664 | default: |
be6a3f38 | 665 | return NOTIFY_DONE; |
6812baab | 666 | } |
6812baab TR |
667 | } |
668 | ||
669 | static struct notifier_block smc_netdev_notifier = { | |
670 | .notifier_call = smc_pnet_netdev_event | |
671 | }; | |
672 | ||
64e28b52 HW |
673 | /* init network namespace */ |
674 | int smc_pnet_net_init(struct net *net) | |
675 | { | |
676 | struct smc_net *sn = net_generic(net, smc_net_id); | |
677 | struct smc_pnettable *pnettable = &sn->pnettable; | |
678 | ||
679 | INIT_LIST_HEAD(&pnettable->pnetlist); | |
680 | rwlock_init(&pnettable->lock); | |
681 | ||
682 | return 0; | |
683 | } | |
684 | ||
6812baab TR |
685 | int __init smc_pnet_init(void) |
686 | { | |
687 | int rc; | |
688 | ||
689 | rc = genl_register_family(&smc_pnet_nl_family); | |
690 | if (rc) | |
691 | return rc; | |
692 | rc = register_netdevice_notifier(&smc_netdev_notifier); | |
693 | if (rc) | |
694 | genl_unregister_family(&smc_pnet_nl_family); | |
695 | return rc; | |
696 | } | |
697 | ||
64e28b52 HW |
698 | /* exit network namespace */ |
699 | void smc_pnet_net_exit(struct net *net) | |
700 | { | |
701 | /* flush pnet table */ | |
702 | smc_pnet_remove_by_pnetid(net, NULL); | |
703 | } | |
704 | ||
6812baab TR |
705 | void smc_pnet_exit(void) |
706 | { | |
6812baab TR |
707 | unregister_netdevice_notifier(&smc_netdev_notifier); |
708 | genl_unregister_family(&smc_pnet_nl_family); | |
709 | } | |
710 | ||
0afff91c UB |
711 | /* Determine one base device for stacked net devices. |
712 | * If the lower device level contains more than one devices | |
713 | * (for instance with bonding slaves), just the first device | |
714 | * is used to reach a base device. | |
6812baab | 715 | */ |
0afff91c | 716 | static struct net_device *pnet_find_base_ndev(struct net_device *ndev) |
6812baab | 717 | { |
0afff91c | 718 | int i, nest_lvl; |
6812baab | 719 | |
0afff91c UB |
720 | rtnl_lock(); |
721 | nest_lvl = dev_get_nest_level(ndev); | |
722 | for (i = 0; i < nest_lvl; i++) { | |
723 | struct list_head *lower = &ndev->adj_list.lower; | |
724 | ||
725 | if (list_empty(lower)) | |
726 | break; | |
727 | lower = lower->next; | |
728 | ndev = netdev_lower_get_next(ndev, &lower); | |
729 | } | |
730 | rtnl_unlock(); | |
731 | return ndev; | |
732 | } | |
733 | ||
64e28b52 | 734 | static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, |
890a2cb4 HW |
735 | u8 *pnetid) |
736 | { | |
64e28b52 HW |
737 | struct smc_pnettable *pnettable; |
738 | struct net *net = dev_net(ndev); | |
890a2cb4 | 739 | struct smc_pnetentry *pnetelem; |
64e28b52 | 740 | struct smc_net *sn; |
890a2cb4 HW |
741 | int rc = -ENOENT; |
742 | ||
64e28b52 HW |
743 | /* get pnettable for namespace */ |
744 | sn = net_generic(net, smc_net_id); | |
745 | pnettable = &sn->pnettable; | |
746 | ||
747 | read_lock(&pnettable->lock); | |
748 | list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { | |
749 | if (ndev == pnetelem->ndev) { | |
890a2cb4 HW |
750 | /* get pnetid of netdev device */ |
751 | memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); | |
752 | rc = 0; | |
753 | break; | |
754 | } | |
755 | } | |
64e28b52 | 756 | read_unlock(&pnettable->lock); |
890a2cb4 HW |
757 | return rc; |
758 | } | |
759 | ||
54903572 UB |
760 | /* if handshake network device belongs to a roce device, return its |
761 | * IB device and port | |
762 | */ | |
763 | static void smc_pnet_find_rdma_dev(struct net_device *netdev, | |
bc36d2fc | 764 | struct smc_init_info *ini) |
54903572 UB |
765 | { |
766 | struct smc_ib_device *ibdev; | |
767 | ||
768 | spin_lock(&smc_ib_devices.lock); | |
769 | list_for_each_entry(ibdev, &smc_ib_devices.list, list) { | |
770 | struct net_device *ndev; | |
771 | int i; | |
772 | ||
773 | for (i = 1; i <= SMC_MAX_PORTS; i++) { | |
774 | if (!rdma_is_port_valid(ibdev->ibdev, i)) | |
775 | continue; | |
776 | if (!ibdev->ibdev->ops.get_netdev) | |
777 | continue; | |
778 | ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); | |
779 | if (!ndev) | |
780 | continue; | |
781 | dev_put(ndev); | |
782 | if (netdev == ndev && | |
783 | smc_ib_port_active(ibdev, i) && | |
bc36d2fc KG |
784 | !smc_ib_determine_gid(ibdev, i, ini->vlan_id, |
785 | ini->ib_gid, NULL)) { | |
786 | ini->ib_dev = ibdev; | |
787 | ini->ib_port = i; | |
54903572 UB |
788 | break; |
789 | } | |
790 | } | |
791 | } | |
792 | spin_unlock(&smc_ib_devices.lock); | |
793 | } | |
794 | ||
0afff91c | 795 | /* Determine the corresponding IB device port based on the hardware PNETID. |
7005ada6 UB |
796 | * Searching stops at the first matching active IB device port with vlan_id |
797 | * configured. | |
54903572 UB |
798 | * If nothing found, check pnetid table. |
799 | * If nothing found, try to use handshake device | |
0afff91c UB |
800 | */ |
801 | static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, | |
bc36d2fc | 802 | struct smc_init_info *ini) |
0afff91c UB |
803 | { |
804 | u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; | |
805 | struct smc_ib_device *ibdev; | |
806 | int i; | |
807 | ||
808 | ndev = pnet_find_base_ndev(ndev); | |
809 | if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, | |
890a2cb4 | 810 | ndev_pnetid) && |
54903572 | 811 | smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { |
bc36d2fc | 812 | smc_pnet_find_rdma_dev(ndev, ini); |
0afff91c | 813 | return; /* pnetid could not be determined */ |
54903572 | 814 | } |
0afff91c UB |
815 | |
816 | spin_lock(&smc_ib_devices.lock); | |
817 | list_for_each_entry(ibdev, &smc_ib_devices.list, list) { | |
818 | for (i = 1; i <= SMC_MAX_PORTS; i++) { | |
7005ada6 UB |
819 | if (!rdma_is_port_valid(ibdev->ibdev, i)) |
820 | continue; | |
890a2cb4 | 821 | if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) && |
7005ada6 | 822 | smc_ib_port_active(ibdev, i) && |
bc36d2fc KG |
823 | !smc_ib_determine_gid(ibdev, i, ini->vlan_id, |
824 | ini->ib_gid, NULL)) { | |
825 | ini->ib_dev = ibdev; | |
826 | ini->ib_port = i; | |
7005ada6 | 827 | goto out; |
0afff91c UB |
828 | } |
829 | } | |
830 | } | |
7005ada6 | 831 | out: |
0afff91c UB |
832 | spin_unlock(&smc_ib_devices.lock); |
833 | } | |
834 | ||
1619f770 | 835 | static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, |
bc36d2fc | 836 | struct smc_init_info *ini) |
1619f770 HW |
837 | { |
838 | u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; | |
839 | struct smcd_dev *ismdev; | |
840 | ||
841 | ndev = pnet_find_base_ndev(ndev); | |
842 | if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, | |
f3d74b22 HW |
843 | ndev_pnetid) && |
844 | smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) | |
1619f770 HW |
845 | return; /* pnetid could not be determined */ |
846 | ||
847 | spin_lock(&smcd_dev_list.lock); | |
848 | list_for_each_entry(ismdev, &smcd_dev_list.list, list) { | |
f3d74b22 | 849 | if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) { |
bc36d2fc | 850 | ini->ism_dev = ismdev; |
1619f770 HW |
851 | break; |
852 | } | |
853 | } | |
854 | spin_unlock(&smcd_dev_list.lock); | |
855 | } | |
856 | ||
0afff91c UB |
857 | /* PNET table analysis for a given sock: |
858 | * determine ib_device and port belonging to used internal TCP socket | |
859 | * ethernet interface. | |
860 | */ | |
bc36d2fc | 861 | void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) |
0afff91c UB |
862 | { |
863 | struct dst_entry *dst = sk_dst_get(sk); | |
864 | ||
bc36d2fc KG |
865 | ini->ib_dev = NULL; |
866 | ini->ib_port = 0; | |
0afff91c UB |
867 | if (!dst) |
868 | goto out; | |
869 | if (!dst->dev) | |
870 | goto out_rel; | |
871 | ||
bc36d2fc | 872 | smc_pnet_find_roce_by_pnetid(dst->dev, ini); |
0afff91c | 873 | |
6812baab TR |
874 | out_rel: |
875 | dst_release(dst); | |
0afff91c UB |
876 | out: |
877 | return; | |
6812baab | 878 | } |
1619f770 | 879 | |
bc36d2fc | 880 | void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini) |
1619f770 HW |
881 | { |
882 | struct dst_entry *dst = sk_dst_get(sk); | |
883 | ||
bc36d2fc | 884 | ini->ism_dev = NULL; |
1619f770 HW |
885 | if (!dst) |
886 | goto out; | |
887 | if (!dst->dev) | |
888 | goto out_rel; | |
889 | ||
bc36d2fc | 890 | smc_pnet_find_ism_by_pnetid(dst->dev, ini); |
1619f770 HW |
891 | |
892 | out_rel: | |
893 | dst_release(dst); | |
894 | out: | |
895 | return; | |
896 | } |