Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
6812baab TR |
2 | /* |
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | |
4 | * | |
5 | * Generic netlink support functions to configure an SMC-R PNET table | |
6 | * | |
7 | * Copyright IBM Corp. 2016 | |
8 | * | |
9 | * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> | |
10 | */ | |
11 | ||
12 | #include <linux/module.h> | |
13 | #include <linux/list.h> | |
14 | #include <linux/ctype.h> | |
15 | #include <net/netlink.h> | |
16 | #include <net/genetlink.h> | |
17 | ||
18 | #include <uapi/linux/if.h> | |
19 | #include <uapi/linux/smc.h> | |
20 | ||
21 | #include <rdma/ib_verbs.h> | |
22 | ||
23 | #include "smc_pnet.h" | |
24 | #include "smc_ib.h" | |
25 | ||
26 | #define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */ | |
27 | ||
28 | static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { | |
29 | [SMC_PNETID_NAME] = { | |
30 | .type = NLA_NUL_STRING, | |
31 | .len = SMC_MAX_PNET_ID_LEN - 1 | |
32 | }, | |
33 | [SMC_PNETID_ETHNAME] = { | |
34 | .type = NLA_NUL_STRING, | |
35 | .len = IFNAMSIZ - 1 | |
36 | }, | |
37 | [SMC_PNETID_IBNAME] = { | |
38 | .type = NLA_NUL_STRING, | |
39 | .len = IB_DEVICE_NAME_MAX - 1 | |
40 | }, | |
41 | [SMC_PNETID_IBPORT] = { .type = NLA_U8 } | |
42 | }; | |
43 | ||
44 | static struct genl_family smc_pnet_nl_family; | |
45 | ||
46 | /** | |
47 | * struct smc_pnettable - SMC PNET table anchor | |
48 | * @lock: Lock for list action | |
49 | * @pnetlist: List of PNETIDs | |
50 | */ | |
51 | static struct smc_pnettable { | |
52 | rwlock_t lock; | |
53 | struct list_head pnetlist; | |
54 | } smc_pnettable = { | |
55 | .pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist), | |
56 | .lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock) | |
57 | }; | |
58 | ||
59 | /** | |
60 | * struct smc_pnetentry - pnet identifier name entry | |
61 | * @list: List node. | |
62 | * @pnet_name: Pnet identifier name | |
63 | * @ndev: pointer to network device. | |
64 | * @smcibdev: Pointer to IB device. | |
65 | */ | |
66 | struct smc_pnetentry { | |
67 | struct list_head list; | |
68 | char pnet_name[SMC_MAX_PNET_ID_LEN + 1]; | |
69 | struct net_device *ndev; | |
70 | struct smc_ib_device *smcibdev; | |
71 | u8 ib_port; | |
72 | }; | |
73 | ||
74 | /* Check if two RDMA device entries are identical. Use device name and port | |
75 | * number for comparison. | |
76 | */ | |
77 | static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname, | |
78 | u8 ibport) | |
79 | { | |
80 | return pnetelem->ib_port == ibport && | |
81 | !strncmp(pnetelem->smcibdev->ibdev->name, ibname, | |
82 | sizeof(pnetelem->smcibdev->ibdev->name)); | |
83 | } | |
84 | ||
85 | /* Find a pnetid in the pnet table. | |
86 | */ | |
87 | static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name) | |
88 | { | |
89 | struct smc_pnetentry *pnetelem, *found_pnetelem = NULL; | |
90 | ||
91 | read_lock(&smc_pnettable.lock); | |
92 | list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { | |
93 | if (!strncmp(pnetelem->pnet_name, pnet_name, | |
94 | sizeof(pnetelem->pnet_name))) { | |
95 | found_pnetelem = pnetelem; | |
96 | break; | |
97 | } | |
98 | } | |
99 | read_unlock(&smc_pnettable.lock); | |
100 | return found_pnetelem; | |
101 | } | |
102 | ||
103 | /* Remove a pnetid from the pnet table. | |
104 | */ | |
105 | static int smc_pnet_remove_by_pnetid(char *pnet_name) | |
106 | { | |
107 | struct smc_pnetentry *pnetelem, *tmp_pe; | |
108 | int rc = -ENOENT; | |
109 | ||
110 | write_lock(&smc_pnettable.lock); | |
111 | list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, | |
112 | list) { | |
113 | if (!strncmp(pnetelem->pnet_name, pnet_name, | |
114 | sizeof(pnetelem->pnet_name))) { | |
115 | list_del(&pnetelem->list); | |
116 | dev_put(pnetelem->ndev); | |
117 | kfree(pnetelem); | |
118 | rc = 0; | |
119 | break; | |
120 | } | |
121 | } | |
122 | write_unlock(&smc_pnettable.lock); | |
123 | return rc; | |
124 | } | |
125 | ||
126 | /* Remove a pnet entry mentioning a given network device from the pnet table. | |
127 | */ | |
128 | static int smc_pnet_remove_by_ndev(struct net_device *ndev) | |
129 | { | |
130 | struct smc_pnetentry *pnetelem, *tmp_pe; | |
131 | int rc = -ENOENT; | |
132 | ||
133 | write_lock(&smc_pnettable.lock); | |
134 | list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, | |
135 | list) { | |
136 | if (pnetelem->ndev == ndev) { | |
137 | list_del(&pnetelem->list); | |
138 | dev_put(pnetelem->ndev); | |
139 | kfree(pnetelem); | |
140 | rc = 0; | |
141 | break; | |
142 | } | |
143 | } | |
144 | write_unlock(&smc_pnettable.lock); | |
145 | return rc; | |
146 | } | |
147 | ||
148 | /* Remove a pnet entry mentioning a given ib device from the pnet table. | |
149 | */ | |
150 | int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev) | |
151 | { | |
152 | struct smc_pnetentry *pnetelem, *tmp_pe; | |
153 | int rc = -ENOENT; | |
154 | ||
155 | write_lock(&smc_pnettable.lock); | |
156 | list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, | |
157 | list) { | |
158 | if (pnetelem->smcibdev == ibdev) { | |
159 | list_del(&pnetelem->list); | |
160 | dev_put(pnetelem->ndev); | |
161 | kfree(pnetelem); | |
162 | rc = 0; | |
163 | break; | |
164 | } | |
165 | } | |
166 | write_unlock(&smc_pnettable.lock); | |
167 | return rc; | |
168 | } | |
169 | ||
170 | /* Append a pnetid to the end of the pnet table if not already on this list. | |
171 | */ | |
172 | static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem) | |
173 | { | |
174 | struct smc_pnetentry *pnetelem; | |
175 | int rc = -EEXIST; | |
176 | ||
177 | write_lock(&smc_pnettable.lock); | |
178 | list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { | |
179 | if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name, | |
180 | sizeof(new_pnetelem->pnet_name)) || | |
181 | !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name, | |
182 | sizeof(new_pnetelem->ndev->name)) || | |
183 | smc_pnet_same_ibname(pnetelem, | |
184 | new_pnetelem->smcibdev->ibdev->name, | |
a6832c3a UB |
185 | new_pnetelem->ib_port)) { |
186 | dev_put(pnetelem->ndev); | |
6812baab | 187 | goto found; |
a6832c3a | 188 | } |
6812baab TR |
189 | } |
190 | list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist); | |
191 | rc = 0; | |
192 | found: | |
193 | write_unlock(&smc_pnettable.lock); | |
194 | return rc; | |
195 | } | |
196 | ||
197 | /* The limit for pnetid is 16 characters. | |
198 | * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. | |
199 | * Lower case letters are converted to upper case. | |
200 | * Interior blanks should not be used. | |
201 | */ | |
202 | static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) | |
203 | { | |
204 | char *bf = skip_spaces(pnet_name); | |
205 | size_t len = strlen(bf); | |
206 | char *end = bf + len; | |
207 | ||
208 | if (!len) | |
209 | return false; | |
210 | while (--end >= bf && isspace(*end)) | |
211 | ; | |
212 | if (end - bf >= SMC_MAX_PNET_ID_LEN) | |
213 | return false; | |
214 | while (bf <= end) { | |
215 | if (!isalnum(*bf)) | |
216 | return false; | |
217 | *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; | |
218 | bf++; | |
219 | } | |
220 | *pnetid = '\0'; | |
221 | return true; | |
222 | } | |
223 | ||
224 | /* Find an infiniband device by a given name. The device might not exist. */ | |
249633a4 | 225 | static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) |
6812baab TR |
226 | { |
227 | struct smc_ib_device *ibdev; | |
228 | ||
229 | spin_lock(&smc_ib_devices.lock); | |
230 | list_for_each_entry(ibdev, &smc_ib_devices.list, list) { | |
231 | if (!strncmp(ibdev->ibdev->name, ib_name, | |
232 | sizeof(ibdev->ibdev->name))) { | |
233 | goto out; | |
234 | } | |
235 | } | |
236 | ibdev = NULL; | |
237 | out: | |
238 | spin_unlock(&smc_ib_devices.lock); | |
239 | return ibdev; | |
240 | } | |
241 | ||
242 | /* Parse the supplied netlink attributes and fill a pnetentry structure. | |
243 | * For ethernet and infiniband device names verify that the devices exist. | |
244 | */ | |
245 | static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem, | |
246 | struct nlattr *tb[]) | |
247 | { | |
d49baa7e EB |
248 | char *string, *ibname; |
249 | int rc; | |
6812baab TR |
250 | |
251 | memset(pnetelem, 0, sizeof(*pnetelem)); | |
252 | INIT_LIST_HEAD(&pnetelem->list); | |
d49baa7e EB |
253 | |
254 | rc = -EINVAL; | |
255 | if (!tb[SMC_PNETID_NAME]) | |
256 | goto error; | |
257 | string = (char *)nla_data(tb[SMC_PNETID_NAME]); | |
258 | if (!smc_pnetid_valid(string, pnetelem->pnet_name)) | |
259 | goto error; | |
260 | ||
261 | rc = -EINVAL; | |
262 | if (!tb[SMC_PNETID_ETHNAME]) | |
263 | goto error; | |
264 | rc = -ENOENT; | |
265 | string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); | |
266 | pnetelem->ndev = dev_get_by_name(net, string); | |
267 | if (!pnetelem->ndev) | |
268 | goto error; | |
269 | ||
270 | rc = -EINVAL; | |
271 | if (!tb[SMC_PNETID_IBNAME]) | |
272 | goto error; | |
273 | rc = -ENOENT; | |
274 | ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); | |
275 | ibname = strim(ibname); | |
276 | pnetelem->smcibdev = smc_pnet_find_ib(ibname); | |
277 | if (!pnetelem->smcibdev) | |
278 | goto error; | |
279 | ||
280 | rc = -EINVAL; | |
281 | if (!tb[SMC_PNETID_IBPORT]) | |
282 | goto error; | |
283 | pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); | |
284 | if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS) | |
285 | goto error; | |
286 | ||
6812baab TR |
287 | return 0; |
288 | ||
289 | error: | |
290 | if (pnetelem->ndev) | |
291 | dev_put(pnetelem->ndev); | |
292 | return rc; | |
293 | } | |
294 | ||
295 | /* Convert an smc_pnetentry to a netlink attribute sequence */ | |
296 | static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem) | |
297 | { | |
298 | if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) || | |
299 | nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) || | |
300 | nla_put_string(msg, SMC_PNETID_IBNAME, | |
301 | pnetelem->smcibdev->ibdev->name) || | |
302 | nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) | |
303 | return -1; | |
304 | return 0; | |
305 | } | |
306 | ||
307 | /* Retrieve one PNETID entry */ | |
308 | static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) | |
309 | { | |
310 | struct smc_pnetentry *pnetelem; | |
311 | struct sk_buff *msg; | |
312 | void *hdr; | |
313 | int rc; | |
314 | ||
d49baa7e EB |
315 | if (!info->attrs[SMC_PNETID_NAME]) |
316 | return -EINVAL; | |
6812baab TR |
317 | pnetelem = smc_pnet_find_pnetid( |
318 | (char *)nla_data(info->attrs[SMC_PNETID_NAME])); | |
319 | if (!pnetelem) | |
320 | return -ENOENT; | |
321 | msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | |
322 | if (!msg) | |
323 | return -ENOMEM; | |
324 | ||
325 | hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, | |
326 | &smc_pnet_nl_family, 0, SMC_PNETID_GET); | |
327 | if (!hdr) { | |
328 | rc = -EMSGSIZE; | |
329 | goto err_out; | |
330 | } | |
331 | ||
332 | if (smc_pnet_set_nla(msg, pnetelem)) { | |
333 | rc = -ENOBUFS; | |
334 | goto err_out; | |
335 | } | |
336 | ||
337 | genlmsg_end(msg, hdr); | |
338 | return genlmsg_reply(msg, info); | |
339 | ||
340 | err_out: | |
341 | nlmsg_free(msg); | |
342 | return rc; | |
343 | } | |
344 | ||
345 | static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) | |
346 | { | |
347 | struct net *net = genl_info_net(info); | |
348 | struct smc_pnetentry *pnetelem; | |
349 | int rc; | |
350 | ||
351 | pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); | |
352 | if (!pnetelem) | |
353 | return -ENOMEM; | |
354 | rc = smc_pnet_fill_entry(net, pnetelem, info->attrs); | |
355 | if (!rc) | |
356 | rc = smc_pnet_enter(pnetelem); | |
357 | if (rc) { | |
358 | kfree(pnetelem); | |
359 | return rc; | |
360 | } | |
361 | rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port); | |
362 | if (rc) | |
363 | smc_pnet_remove_by_pnetid(pnetelem->pnet_name); | |
364 | return rc; | |
365 | } | |
366 | ||
367 | static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) | |
368 | { | |
d49baa7e EB |
369 | if (!info->attrs[SMC_PNETID_NAME]) |
370 | return -EINVAL; | |
6812baab TR |
371 | return smc_pnet_remove_by_pnetid( |
372 | (char *)nla_data(info->attrs[SMC_PNETID_NAME])); | |
373 | } | |
374 | ||
375 | static int smc_pnet_dump_start(struct netlink_callback *cb) | |
376 | { | |
377 | cb->args[0] = 0; | |
378 | return 0; | |
379 | } | |
380 | ||
381 | static int smc_pnet_dumpinfo(struct sk_buff *skb, | |
382 | u32 portid, u32 seq, u32 flags, | |
383 | struct smc_pnetentry *pnetelem) | |
384 | { | |
385 | void *hdr; | |
386 | ||
387 | hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, | |
388 | flags, SMC_PNETID_GET); | |
389 | if (!hdr) | |
390 | return -ENOMEM; | |
391 | if (smc_pnet_set_nla(skb, pnetelem) < 0) { | |
392 | genlmsg_cancel(skb, hdr); | |
393 | return -EMSGSIZE; | |
394 | } | |
395 | genlmsg_end(skb, hdr); | |
396 | return 0; | |
397 | } | |
398 | ||
399 | static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) | |
400 | { | |
401 | struct smc_pnetentry *pnetelem; | |
402 | int idx = 0; | |
403 | ||
404 | read_lock(&smc_pnettable.lock); | |
405 | list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { | |
406 | if (idx++ < cb->args[0]) | |
407 | continue; | |
408 | if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid, | |
409 | cb->nlh->nlmsg_seq, NLM_F_MULTI, | |
410 | pnetelem)) { | |
411 | --idx; | |
412 | break; | |
413 | } | |
414 | } | |
415 | cb->args[0] = idx; | |
416 | read_unlock(&smc_pnettable.lock); | |
417 | return skb->len; | |
418 | } | |
419 | ||
420 | /* Remove and delete all pnetids from pnet table. | |
421 | */ | |
422 | static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) | |
423 | { | |
424 | struct smc_pnetentry *pnetelem, *tmp_pe; | |
425 | ||
426 | write_lock(&smc_pnettable.lock); | |
427 | list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, | |
428 | list) { | |
429 | list_del(&pnetelem->list); | |
430 | dev_put(pnetelem->ndev); | |
431 | kfree(pnetelem); | |
432 | } | |
433 | write_unlock(&smc_pnettable.lock); | |
434 | return 0; | |
435 | } | |
436 | ||
437 | /* SMC_PNETID generic netlink operation definition */ | |
438 | static const struct genl_ops smc_pnet_ops[] = { | |
439 | { | |
440 | .cmd = SMC_PNETID_GET, | |
441 | .flags = GENL_ADMIN_PERM, | |
442 | .policy = smc_pnet_policy, | |
443 | .doit = smc_pnet_get, | |
444 | .dumpit = smc_pnet_dump, | |
445 | .start = smc_pnet_dump_start | |
446 | }, | |
447 | { | |
448 | .cmd = SMC_PNETID_ADD, | |
449 | .flags = GENL_ADMIN_PERM, | |
450 | .policy = smc_pnet_policy, | |
451 | .doit = smc_pnet_add | |
452 | }, | |
453 | { | |
454 | .cmd = SMC_PNETID_DEL, | |
455 | .flags = GENL_ADMIN_PERM, | |
456 | .policy = smc_pnet_policy, | |
457 | .doit = smc_pnet_del | |
458 | }, | |
459 | { | |
460 | .cmd = SMC_PNETID_FLUSH, | |
461 | .flags = GENL_ADMIN_PERM, | |
462 | .policy = smc_pnet_policy, | |
463 | .doit = smc_pnet_flush | |
464 | } | |
465 | }; | |
466 | ||
467 | /* SMC_PNETID family definition */ | |
468 | static struct genl_family smc_pnet_nl_family = { | |
469 | .hdrsize = 0, | |
470 | .name = SMCR_GENL_FAMILY_NAME, | |
471 | .version = SMCR_GENL_FAMILY_VERSION, | |
472 | .maxattr = SMC_PNETID_MAX, | |
473 | .netnsok = true, | |
474 | .module = THIS_MODULE, | |
475 | .ops = smc_pnet_ops, | |
476 | .n_ops = ARRAY_SIZE(smc_pnet_ops) | |
477 | }; | |
478 | ||
479 | static int smc_pnet_netdev_event(struct notifier_block *this, | |
480 | unsigned long event, void *ptr) | |
481 | { | |
482 | struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); | |
483 | ||
484 | switch (event) { | |
485 | case NETDEV_REBOOT: | |
486 | case NETDEV_UNREGISTER: | |
487 | smc_pnet_remove_by_ndev(event_dev); | |
488 | default: | |
489 | break; | |
490 | } | |
491 | return NOTIFY_DONE; | |
492 | } | |
493 | ||
494 | static struct notifier_block smc_netdev_notifier = { | |
495 | .notifier_call = smc_pnet_netdev_event | |
496 | }; | |
497 | ||
498 | int __init smc_pnet_init(void) | |
499 | { | |
500 | int rc; | |
501 | ||
502 | rc = genl_register_family(&smc_pnet_nl_family); | |
503 | if (rc) | |
504 | return rc; | |
505 | rc = register_netdevice_notifier(&smc_netdev_notifier); | |
506 | if (rc) | |
507 | genl_unregister_family(&smc_pnet_nl_family); | |
508 | return rc; | |
509 | } | |
510 | ||
511 | void smc_pnet_exit(void) | |
512 | { | |
513 | smc_pnet_flush(NULL, NULL); | |
514 | unregister_netdevice_notifier(&smc_netdev_notifier); | |
515 | genl_unregister_family(&smc_pnet_nl_family); | |
516 | } | |
517 | ||
518 | /* PNET table analysis for a given sock: | |
519 | * determine ib_device and port belonging to used internal TCP socket | |
520 | * ethernet interface. | |
521 | */ | |
522 | void smc_pnet_find_roce_resource(struct sock *sk, | |
523 | struct smc_ib_device **smcibdev, u8 *ibport) | |
524 | { | |
525 | struct dst_entry *dst = sk_dst_get(sk); | |
526 | struct smc_pnetentry *pnetelem; | |
527 | ||
528 | *smcibdev = NULL; | |
529 | *ibport = 0; | |
530 | ||
531 | if (!dst) | |
532 | return; | |
533 | if (!dst->dev) | |
534 | goto out_rel; | |
535 | read_lock(&smc_pnettable.lock); | |
536 | list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { | |
537 | if (dst->dev == pnetelem->ndev) { | |
5da7e4d3 UB |
538 | if (smc_ib_port_active(pnetelem->smcibdev, |
539 | pnetelem->ib_port)) { | |
540 | *smcibdev = pnetelem->smcibdev; | |
541 | *ibport = pnetelem->ib_port; | |
542 | } | |
6812baab TR |
543 | break; |
544 | } | |
545 | } | |
546 | read_unlock(&smc_pnettable.lock); | |
547 | out_rel: | |
548 | dst_release(dst); | |
549 | } |