Linux 6.12-rc1
[linux-block.git] / drivers / infiniband / core / addr.c
CommitLineData
7025fcd3
SH
1/*
2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005 Intel Corporation. All rights reserved.
6 *
a9474917
SH
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
7025fcd3 12 *
a9474917
SH
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
7025fcd3 16 *
a9474917
SH
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
7025fcd3 20 *
a9474917
SH
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
7025fcd3 25 *
a9474917
SH
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
7025fcd3
SH
34 */
35
36#include <linux/mutex.h>
37#include <linux/inetdevice.h>
5a0e3ad6 38#include <linux/slab.h>
7025fcd3 39#include <linux/workqueue.h>
7025fcd3
SH
40#include <net/arp.h>
41#include <net/neighbour.h>
42#include <net/route.h>
e795d092 43#include <net/netevent.h>
3616d08b 44#include <net/ipv6_stubs.h>
38617c64 45#include <net/ip6_route.h>
7025fcd3 46#include <rdma/ib_addr.h>
adb4a57a 47#include <rdma/ib_cache.h>
6aaecd38 48#include <rdma/ib_sa.h>
ef560861 49#include <rdma/ib.h>
ae43f828
MB
50#include <rdma/rdma_netlink.h>
51#include <net/netlink.h>
52
53#include "core_priv.h"
7025fcd3 54
7025fcd3
SH
55struct addr_req {
56 struct list_head list;
38617c64
AS
57 struct sockaddr_storage src_addr;
58 struct sockaddr_storage dst_addr;
7025fcd3
SH
59 struct rdma_dev_addr *addr;
60 void *context;
61 void (*callback)(int status, struct sockaddr *src_addr,
62 struct rdma_dev_addr *addr, void *context);
63 unsigned long timeout;
5fff41e1 64 struct delayed_work work;
0e9d2c19 65 bool resolve_by_gid_attr; /* Consider gid attr in resolve phase */
7025fcd3 66 int status;
ae43f828 67 u32 seq;
7025fcd3
SH
68};
69
ae43f828
MB
70static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
71
e19c0d23 72static DEFINE_SPINLOCK(lock);
7025fcd3 73static LIST_HEAD(req_list);
7025fcd3
SH
74static struct workqueue_struct *addr_wq;
75
ae43f828
MB
76static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
77 [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
d1c803a9
LR
78 .len = sizeof(struct rdma_nla_ls_gid),
79 .validation_type = NLA_VALIDATE_MIN,
80 .min = sizeof(struct rdma_nla_ls_gid)},
ae43f828
MB
81};
82
83static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
84{
85 struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
86 int ret;
87
88 if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
89 return false;
90
8cb08174
JB
91 ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
92 nlmsg_len(nlh), ib_nl_addr_policy, NULL);
ae43f828
MB
93 if (ret)
94 return false;
95
96 return true;
97}
98
99static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
100{
101 const struct nlattr *head, *curr;
102 union ib_gid gid;
103 struct addr_req *req;
104 int len, rem;
105 int found = 0;
106
107 head = (const struct nlattr *)nlmsg_data(nlh);
108 len = nlmsg_len(nlh);
109
110 nla_for_each_attr(curr, head, len, rem) {
111 if (curr->nla_type == LS_NLA_TYPE_DGID)
112 memcpy(&gid, nla_data(curr), nla_len(curr));
113 }
114
e19c0d23 115 spin_lock_bh(&lock);
ae43f828
MB
116 list_for_each_entry(req, &req_list, list) {
117 if (nlh->nlmsg_seq != req->seq)
118 continue;
119 /* We set the DGID part, the rest was set earlier */
120 rdma_addr_set_dgid(req->addr, &gid);
121 req->status = 0;
122 found = 1;
123 break;
124 }
e19c0d23 125 spin_unlock_bh(&lock);
ae43f828
MB
126
127 if (!found)
128 pr_info("Couldn't find request waiting for DGID: %pI6\n",
129 &gid);
130}
131
132int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
647c75ac
LR
133 struct nlmsghdr *nlh,
134 struct netlink_ext_ack *extack)
ae43f828 135{
ae43f828 136 if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
e3a2b93d 137 !(NETLINK_CB(skb).sk))
ae43f828
MB
138 return -EPERM;
139
140 if (ib_nl_is_good_ip_resp(nlh))
141 ib_nl_process_good_ip_rsep(nlh);
142
a242c369 143 return 0;
ae43f828
MB
144}
145
146static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
147 const void *daddr,
148 u32 seq, u16 family)
149{
150 struct sk_buff *skb = NULL;
151 struct nlmsghdr *nlh;
152 struct rdma_ls_ip_resolve_header *header;
153 void *data;
154 size_t size;
155 int attrtype;
156 int len;
157
158 if (family == AF_INET) {
159 size = sizeof(struct in_addr);
160 attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4;
161 } else {
162 size = sizeof(struct in6_addr);
163 attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
164 }
165
166 len = nla_total_size(sizeof(size));
167 len += NLMSG_ALIGN(sizeof(*header));
168
169 skb = nlmsg_new(len, GFP_KERNEL);
170 if (!skb)
171 return -ENOMEM;
172
173 data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS,
174 RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST);
175 if (!data) {
176 nlmsg_free(skb);
177 return -ENODATA;
178 }
179
180 /* Construct the family header first */
4df864c1 181 header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
ae43f828
MB
182 header->ifindex = dev_addr->bound_dev_if;
183 nla_put(skb, attrtype, size, daddr);
184
185 /* Repair the nlmsg header length */
186 nlmsg_end(skb, nlh);
1d2fedd8 187 rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
ae43f828
MB
188
189 /* Make the request retry, so when we get the response from userspace
190 * we will have something.
191 */
192 return -ENODATA;
193}
194
2df7dba8 195int rdma_addr_size(const struct sockaddr *addr)
ef560861
SH
196{
197 switch (addr->sa_family) {
198 case AF_INET:
199 return sizeof(struct sockaddr_in);
200 case AF_INET6:
201 return sizeof(struct sockaddr_in6);
202 case AF_IB:
203 return sizeof(struct sockaddr_ib);
204 default:
205 return 0;
206 }
207}
208EXPORT_SYMBOL(rdma_addr_size);
209
84652aef
RD
210int rdma_addr_size_in6(struct sockaddr_in6 *addr)
211{
212 int ret = rdma_addr_size((struct sockaddr *) addr);
213
214 return ret <= sizeof(*addr) ? ret : 0;
215}
216EXPORT_SYMBOL(rdma_addr_size_in6);
217
218int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
219{
220 int ret = rdma_addr_size((struct sockaddr *) addr);
221
222 return ret <= sizeof(*addr) ? ret : 0;
223}
224EXPORT_SYMBOL(rdma_addr_size_kss);
225
77addc52
PP
226/**
227 * rdma_copy_src_l2_addr - Copy netdevice source addresses
228 * @dev_addr: Destination address pointer where to copy the addresses
229 * @dev: Netdevice whose source addresses to copy
230 *
231 * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice.
232 * This includes unicast address, broadcast address, device type and
233 * interface index.
234 */
235void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
236 const struct net_device *dev)
7025fcd3 237{
c4315d85 238 dev_addr->dev_type = dev->type;
7025fcd3
SH
239 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
240 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
6266ed6e 241 dev_addr->bound_dev_if = dev->ifindex;
7025fcd3 242}
77addc52 243EXPORT_SYMBOL(rdma_copy_src_l2_addr);
7025fcd3 244
caf1e3ae
PP
245static struct net_device *
246rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in)
247{
248 struct net_device *dev = NULL;
249 int ret = -EADDRNOTAVAIL;
250
251 switch (src_in->sa_family) {
252 case AF_INET:
253 dev = __ip_dev_find(net,
254 ((const struct sockaddr_in *)src_in)->sin_addr.s_addr,
255 false);
256 if (dev)
257 ret = 0;
258 break;
259#if IS_ENABLED(CONFIG_IPV6)
260 case AF_INET6:
261 for_each_netdev_rcu(net, dev) {
262 if (ipv6_chk_addr(net,
263 &((const struct sockaddr_in6 *)src_in)->sin6_addr,
264 dev, 1)) {
265 ret = 0;
266 break;
267 }
268 }
269 break;
270#endif
271 }
272 return ret ? ERR_PTR(ret) : dev;
273}
274
20029832 275int rdma_translate_ip(const struct sockaddr *addr,
575c7e58 276 struct rdma_dev_addr *dev_addr)
7025fcd3
SH
277{
278 struct net_device *dev;
7025fcd3 279
6266ed6e 280 if (dev_addr->bound_dev_if) {
565edd1d 281 dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
6266ed6e
SH
282 if (!dev)
283 return -ENODEV;
77addc52 284 rdma_copy_src_l2_addr(dev_addr, dev);
6266ed6e 285 dev_put(dev);
e08ce2e8 286 return 0;
6266ed6e
SH
287 }
288
caf1e3ae
PP
289 rcu_read_lock();
290 dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr);
291 if (!IS_ERR(dev))
77addc52 292 rdma_copy_src_l2_addr(dev_addr, dev);
caf1e3ae
PP
293 rcu_read_unlock();
294 return PTR_ERR_OR_ZERO(dev);
7025fcd3
SH
295}
296EXPORT_SYMBOL(rdma_translate_ip);
297
e19c0d23 298static void set_timeout(struct addr_req *req, unsigned long time)
7025fcd3
SH
299{
300 unsigned long delay;
301
7025fcd3 302 delay = time - jiffies;
346f98b4
OK
303 if ((long)delay < 0)
304 delay = 0;
7025fcd3 305
e19c0d23 306 mod_delayed_work(addr_wq, &req->work, delay);
7025fcd3
SH
307}
308
309static void queue_req(struct addr_req *req)
310{
e19c0d23
JG
311 spin_lock_bh(&lock);
312 list_add_tail(&req->list, &req_list);
313 set_timeout(req, req->timeout);
314 spin_unlock_bh(&lock);
7025fcd3
SH
315}
316
a362ea1d 317static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr,
ae43f828
MB
318 const void *daddr, u32 seq, u16 family)
319{
38716732 320 if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
ae43f828
MB
321 return -EADDRNOTAVAIL;
322
ae43f828
MB
323 return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
324}
325
fd59015d
PP
326static int dst_fetch_ha(const struct dst_entry *dst,
327 struct rdma_dev_addr *dev_addr,
20029832 328 const void *daddr)
51d45974
DM
329{
330 struct neighbour *n;
e08ce2e8 331 int ret = 0;
51d45974 332
02b61955 333 n = dst_neigh_lookup(dst, daddr);
92ebb6a0
JG
334 if (!n)
335 return -ENODATA;
02b61955 336
92ebb6a0
JG
337 if (!(n->nud_state & NUD_VALID)) {
338 neigh_event_send(n, NULL);
51d45974
DM
339 ret = -ENODATA;
340 } else {
d8d9ec7d 341 neigh_ha_snapshot(dev_addr->dst_dev_addr, n, dst->dev);
51d45974 342 }
51d45974 343
92ebb6a0 344 neigh_release(n);
02b61955 345
51d45974
DM
346 return ret;
347}
348
fd59015d 349static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
ae43f828 350{
05d6d492
ED
351 if (family == AF_INET)
352 return dst_rtable(dst)->rt_uses_gateway;
ae43f828 353
05d6d492 354 return dst_rt6_info(dst)->rt6i_flags & RTF_GATEWAY;
ae43f828
MB
355}
356
fd59015d 357static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
ae43f828
MB
358 const struct sockaddr *dst_in, u32 seq)
359{
360 const struct sockaddr_in *dst_in4 =
361 (const struct sockaddr_in *)dst_in;
362 const struct sockaddr_in6 *dst_in6 =
363 (const struct sockaddr_in6 *)dst_in;
364 const void *daddr = (dst_in->sa_family == AF_INET) ?
365 (const void *)&dst_in4->sin_addr.s_addr :
366 (const void *)&dst_in6->sin6_addr;
367 sa_family_t family = dst_in->sa_family;
368
f8f2a576
JG
369 might_sleep();
370
307edde8
PP
371 /* If we have a gateway in IB mode then it must be an IB network */
372 if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
a362ea1d 373 return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
ae43f828
MB
374 else
375 return dst_fetch_ha(dst, dev_addr, daddr);
376}
377
89c5691c
PP
378static int addr4_resolve(struct sockaddr *src_sock,
379 const struct sockaddr *dst_sock,
20029832
MB
380 struct rdma_dev_addr *addr,
381 struct rtable **prt)
7025fcd3 382{
89c5691c
PP
383 struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock;
384 const struct sockaddr_in *dst_in =
385 (const struct sockaddr_in *)dst_sock;
386
1b90c137
AV
387 __be32 src_ip = src_in->sin_addr.s_addr;
388 __be32 dst_ip = dst_in->sin_addr.s_addr;
7025fcd3 389 struct rtable *rt;
5fc3590c 390 struct flowi4 fl4;
7025fcd3
SH
391 int ret;
392
5fc3590c
DM
393 memset(&fl4, 0, sizeof(fl4));
394 fl4.daddr = dst_ip;
395 fl4.saddr = src_ip;
396 fl4.flowi4_oif = addr->bound_dev_if;
565edd1d 397 rt = ip_route_output_key(addr->net, &fl4);
cbd09aeb
MS
398 ret = PTR_ERR_OR_ZERO(rt);
399 if (ret)
400 return ret;
401
5fc3590c 402 src_in->sin_addr.s_addr = fl4.saddr;
923c100e 403
c3efe750
MB
404 addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
405
20029832
MB
406 *prt = rt;
407 return 0;
7025fcd3
SH
408}
409
d90f9b35 410#if IS_ENABLED(CONFIG_IPV6)
89c5691c
PP
411static int addr6_resolve(struct sockaddr *src_sock,
412 const struct sockaddr *dst_sock,
20029832
MB
413 struct rdma_dev_addr *addr,
414 struct dst_entry **pdst)
38617c64 415{
89c5691c
PP
416 struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock;
417 const struct sockaddr_in6 *dst_in =
418 (const struct sockaddr_in6 *)dst_sock;
4c9483b2 419 struct flowi6 fl6;
38617c64 420 struct dst_entry *dst;
38617c64 421
4c9483b2 422 memset(&fl6, 0, sizeof fl6);
4e3fd7a0
AD
423 fl6.daddr = dst_in->sin6_addr;
424 fl6.saddr = src_in->sin6_addr;
4c9483b2 425 fl6.flowi6_oif = addr->bound_dev_if;
38617c64 426
6c8991f4
SD
427 dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL);
428 if (IS_ERR(dst))
429 return PTR_ERR(dst);
d14714df 430
f89b7dfa 431 if (ipv6_addr_any(&src_in->sin6_addr))
4e3fd7a0 432 src_in->sin6_addr = fl6.saddr;
d14714df 433
c3efe750
MB
434 addr->hoplimit = ip6_dst_hoplimit(dst);
435
20029832
MB
436 *pdst = dst;
437 return 0;
38617c64 438}
2c4ab624 439#else
89c5691c
PP
440static int addr6_resolve(struct sockaddr *src_sock,
441 const struct sockaddr *dst_sock,
20029832
MB
442 struct rdma_dev_addr *addr,
443 struct dst_entry **pdst)
2c4ab624
RD
444{
445 return -EADDRNOTAVAIL;
446}
447#endif
38617c64 448
fd59015d 449static int addr_resolve_neigh(const struct dst_entry *dst,
20029832 450 const struct sockaddr *dst_in,
ae43f828 451 struct rdma_dev_addr *addr,
c31d4b2d 452 unsigned int ndev_flags,
ae43f828 453 u32 seq)
20029832 454{
c31d4b2d
PP
455 int ret = 0;
456
457 if (ndev_flags & IFF_LOOPBACK) {
a362ea1d 458 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
c31d4b2d
PP
459 } else {
460 if (!(ndev_flags & IFF_NOARP)) {
461 /* If the device doesn't do ARP internally */
462 ret = fetch_ha(dst, addr, dst_in, seq);
463 }
20029832 464 }
c31d4b2d 465 return ret;
20029832
MB
466}
467
0965cc95
Y
468static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
469 const struct sockaddr *dst_in,
470 const struct dst_entry *dst,
471 const struct net_device *ndev)
a362ea1d
PP
472{
473 int ret = 0;
474
475 if (dst->dev->flags & IFF_LOOPBACK)
476 ret = rdma_translate_ip(dst_in, dev_addr);
477 else
77addc52 478 rdma_copy_src_l2_addr(dev_addr, dst->dev);
307edde8
PP
479
480 /*
481 * If there's a gateway and type of device not ARPHRD_INFINIBAND,
482 * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
483 * network type accordingly.
484 */
485 if (has_gateway(dst, dst_in->sa_family) &&
c31d4b2d 486 ndev->type != ARPHRD_INFINIBAND)
307edde8
PP
487 dev_addr->network = dst_in->sa_family == AF_INET ?
488 RDMA_NETWORK_IPV4 :
489 RDMA_NETWORK_IPV6;
490 else
491 dev_addr->network = RDMA_NETWORK_IB;
0965cc95
Y
492
493 return ret;
c31d4b2d 494}
307edde8 495
c31d4b2d
PP
496static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
497 unsigned int *ndev_flags,
498 const struct sockaddr *dst_in,
499 const struct dst_entry *dst)
500{
501 struct net_device *ndev = READ_ONCE(dst->dev);
502
503 *ndev_flags = ndev->flags;
504 /* A physical device must be the RDMA device to use */
505 if (ndev->flags & IFF_LOOPBACK) {
506 /*
507 * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
508 * loopback IP address. So if route is resolved to loopback
509 * interface, translate that to a real ndev based on non
510 * loopback IP address.
511 */
512 ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in);
fe33507e 513 if (IS_ERR(ndev))
c31d4b2d
PP
514 return -ENODEV;
515 }
516
0965cc95 517 return copy_src_l2_addr(dev_addr, dst_in, dst, ndev);
a362ea1d
PP
518}
519
0e9d2c19
PP
520static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr)
521{
522 struct net_device *ndev;
523
524 ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr);
525 if (IS_ERR(ndev))
526 return PTR_ERR(ndev);
527
528 /*
529 * Since we are holding the rcu, reading net and ifindex
530 * are safe without any additional reference; because
531 * change_net_namespace() in net/core/dev.c does rcu sync
532 * after it changes the state to IFF_DOWN and before
533 * updating netdev fields {net, ifindex}.
534 */
535 addr->net = dev_net(ndev);
536 addr->bound_dev_if = ndev->ifindex;
537 return 0;
538}
539
540static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr)
541{
542 addr->net = &init_net;
543 addr->bound_dev_if = 0;
544}
545
923c100e 546static int addr_resolve(struct sockaddr *src_in,
20029832
MB
547 const struct sockaddr *dst_in,
548 struct rdma_dev_addr *addr,
ae43f828 549 bool resolve_neigh,
0e9d2c19 550 bool resolve_by_gid_attr,
ae43f828 551 u32 seq)
38617c64 552{
783793b5 553 struct dst_entry *dst = NULL;
c31d4b2d 554 unsigned int ndev_flags = 0;
a362ea1d 555 struct rtable *rt = NULL;
20029832
MB
556 int ret;
557
bebb2a47
MS
558 if (!addr->net) {
559 pr_warn_ratelimited("%s: missing namespace\n", __func__);
560 return -EINVAL;
561 }
562
c31d4b2d 563 rcu_read_lock();
0e9d2c19
PP
564 if (resolve_by_gid_attr) {
565 if (!addr->sgid_attr) {
566 rcu_read_unlock();
567 pr_warn_ratelimited("%s: missing gid_attr\n", __func__);
568 return -EINVAL;
569 }
570 /*
571 * If the request is for a specific gid attribute of the
572 * rdma_dev_addr, derive net from the netdevice of the
573 * GID attribute.
574 */
575 ret = set_addr_netns_by_gid_rcu(addr);
576 if (ret) {
577 rcu_read_unlock();
578 return ret;
579 }
580 }
38617c64 581 if (src_in->sa_family == AF_INET) {
89c5691c 582 ret = addr4_resolve(src_in, dst_in, addr, &rt);
783793b5 583 dst = &rt->dst;
20029832 584 } else {
89c5691c 585 ret = addr6_resolve(src_in, dst_in, addr, &dst);
783793b5 586 }
c31d4b2d
PP
587 if (ret) {
588 rcu_read_unlock();
0e9d2c19 589 goto done;
c31d4b2d
PP
590 }
591 ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst);
592 rcu_read_unlock();
20029832 593
783793b5
PP
594 /*
595 * Resolve neighbor destination address if requested and
596 * only if src addr translation didn't fail.
597 */
598 if (!ret && resolve_neigh)
c31d4b2d 599 ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq);
20029832 600
783793b5
PP
601 if (src_in->sa_family == AF_INET)
602 ip_rt_put(rt);
603 else
20029832 604 dst_release(dst);
0e9d2c19
PP
605done:
606 /*
607 * Clear the addr net to go back to its original state, only if it was
608 * derived from GID attribute in this context.
609 */
610 if (resolve_by_gid_attr)
611 rdma_addr_set_net_defaults(addr);
20029832 612 return ret;
38617c64
AS
613}
614
5fff41e1
PP
615static void process_one_req(struct work_struct *_work)
616{
617 struct addr_req *req;
618 struct sockaddr *src_in, *dst_in;
619
5fff41e1
PP
620 req = container_of(_work, struct addr_req, work.work);
621
622 if (req->status == -ENODATA) {
623 src_in = (struct sockaddr *)&req->src_addr;
624 dst_in = (struct sockaddr *)&req->dst_addr;
625 req->status = addr_resolve(src_in, dst_in, req->addr,
0e9d2c19
PP
626 true, req->resolve_by_gid_attr,
627 req->seq);
5fff41e1
PP
628 if (req->status && time_after_eq(jiffies, req->timeout)) {
629 req->status = -ETIMEDOUT;
630 } else if (req->status == -ENODATA) {
631 /* requeue the work for retrying again */
e19c0d23 632 spin_lock_bh(&lock);
44e75052
JG
633 if (!list_empty(&req->list))
634 set_timeout(req, req->timeout);
e19c0d23 635 spin_unlock_bh(&lock);
5fff41e1
PP
636 return;
637 }
638 }
9137108c 639
5fff41e1
PP
640 req->callback(req->status, (struct sockaddr *)&req->src_addr,
641 req->addr, req->context);
44e75052
JG
642 req->callback = NULL;
643
644 spin_lock_bh(&lock);
2ee9bf34
JG
645 /*
646 * Although the work will normally have been canceled by the workqueue,
647 * it can still be requeued as long as it is on the req_list.
648 */
649 cancel_delayed_work(&req->work);
44e75052 650 if (!list_empty(&req->list)) {
44e75052 651 list_del_init(&req->list);
44e75052
JG
652 kfree(req);
653 }
654 spin_unlock_bh(&lock);
5fff41e1
PP
655}
656
2df7dba8 657int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
dbace111 658 struct rdma_dev_addr *addr, unsigned long timeout_ms,
7025fcd3
SH
659 void (*callback)(int status, struct sockaddr *src_addr,
660 struct rdma_dev_addr *addr, void *context),
9549c2bd 661 bool resolve_by_gid_attr, void *context)
7025fcd3 662{
38617c64 663 struct sockaddr *src_in, *dst_in;
7025fcd3
SH
664 struct addr_req *req;
665 int ret = 0;
666
dd00cc48 667 req = kzalloc(sizeof *req, GFP_KERNEL);
7025fcd3
SH
668 if (!req)
669 return -ENOMEM;
7025fcd3 670
d2e08862
SH
671 src_in = (struct sockaddr *) &req->src_addr;
672 dst_in = (struct sockaddr *) &req->dst_addr;
673
674 if (src_addr) {
675 if (src_addr->sa_family != dst_addr->sa_family) {
676 ret = -EINVAL;
677 goto err;
678 }
679
ef560861 680 memcpy(src_in, src_addr, rdma_addr_size(src_addr));
d2e08862
SH
681 } else {
682 src_in->sa_family = dst_addr->sa_family;
683 }
684
ef560861 685 memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
7025fcd3
SH
686 req->addr = addr;
687 req->callback = callback;
688 req->context = context;
0e9d2c19 689 req->resolve_by_gid_attr = resolve_by_gid_attr;
5fff41e1 690 INIT_DELAYED_WORK(&req->work, process_one_req);
ae43f828 691 req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
7025fcd3 692
0e9d2c19
PP
693 req->status = addr_resolve(src_in, dst_in, addr, true,
694 req->resolve_by_gid_attr, req->seq);
7025fcd3
SH
695 switch (req->status) {
696 case 0:
697 req->timeout = jiffies;
698 queue_req(req);
699 break;
700 case -ENODATA:
701 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
702 queue_req(req);
7025fcd3
SH
703 break;
704 default:
705 ret = req->status;
d2e08862 706 goto err;
7025fcd3
SH
707 }
708 return ret;
d2e08862
SH
709err:
710 kfree(req);
711 return ret;
7025fcd3
SH
712}
713EXPORT_SYMBOL(rdma_resolve_ip);
714
6aaecd38
PP
715int roce_resolve_route_from_path(struct sa_path_rec *rec,
716 const struct ib_gid_attr *attr)
20029832 717{
6aaecd38
PP
718 union {
719 struct sockaddr _sockaddr;
720 struct sockaddr_in _sockaddr_in;
721 struct sockaddr_in6 _sockaddr_in6;
722 } sgid, dgid;
723 struct rdma_dev_addr dev_addr = {};
724 int ret;
20029832 725
f8f2a576
JG
726 might_sleep();
727
6aaecd38
PP
728 if (rec->roce.route_resolved)
729 return 0;
20029832 730
641114d2
JG
731 rdma_gid2ip((struct sockaddr *)&sgid, &rec->sgid);
732 rdma_gid2ip((struct sockaddr *)&dgid, &rec->dgid);
20029832 733
6aaecd38
PP
734 if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family)
735 return -EINVAL;
736
737 if (!attr || !attr->ndev)
738 return -EINVAL;
739
6aaecd38 740 dev_addr.net = &init_net;
0e9d2c19 741 dev_addr.sgid_attr = attr;
6aaecd38 742
641114d2 743 ret = addr_resolve((struct sockaddr *)&sgid, (struct sockaddr *)&dgid,
0e9d2c19 744 &dev_addr, false, true, 0);
6aaecd38
PP
745 if (ret)
746 return ret;
747
748 if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
749 dev_addr.network == RDMA_NETWORK_IPV6) &&
750 rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
751 return -EINVAL;
752
753 rec->roce.route_resolved = true;
754 return 0;
20029832 755}
20029832 756
722c7b2b
PP
757/**
758 * rdma_addr_cancel - Cancel resolve ip request
759 * @addr: Pointer to address structure given previously
760 * during rdma_resolve_ip().
761 * rdma_addr_cancel() is synchronous function which cancels any pending
762 * request if there is any.
763 */
7025fcd3
SH
764void rdma_addr_cancel(struct rdma_dev_addr *addr)
765{
766 struct addr_req *req, *temp_req;
44e75052 767 struct addr_req *found = NULL;
7025fcd3 768
e19c0d23 769 spin_lock_bh(&lock);
7025fcd3
SH
770 list_for_each_entry_safe(req, temp_req, &req_list, list) {
771 if (req->addr == addr) {
44e75052
JG
772 /*
773 * Removing from the list means we take ownership of
774 * the req
775 */
776 list_del_init(&req->list);
777 found = req;
7025fcd3
SH
778 break;
779 }
780 }
e19c0d23 781 spin_unlock_bh(&lock);
44e75052
JG
782
783 if (!found)
784 return;
785
786 /*
787 * sync canceling the work after removing it from the req_list
788 * guarentees no work is running and none will be started.
789 */
790 cancel_delayed_work_sync(&found->work);
44e75052 791 kfree(found);
7025fcd3
SH
792}
793EXPORT_SYMBOL(rdma_addr_cancel);
794
dd5f03be 795struct resolve_cb_context {
dd5f03be 796 struct completion comp;
61c37028 797 int status;
dd5f03be
MB
798};
799
800static void resolve_cb(int status, struct sockaddr *src_addr,
801 struct rdma_dev_addr *addr, void *context)
802{
61c37028 803 ((struct resolve_cb_context *)context)->status = status;
dd5f03be
MB
804 complete(&((struct resolve_cb_context *)context)->comp);
805}
806
f7f4b23e
MB
807int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
808 const union ib_gid *dgid,
0e9d2c19 809 u8 *dmac, const struct ib_gid_attr *sgid_attr,
c3efe750 810 int *hoplimit)
dd5f03be 811{
dd5f03be
MB
812 struct rdma_dev_addr dev_addr;
813 struct resolve_cb_context ctx;
dd5f03be 814 union {
dd5f03be
MB
815 struct sockaddr_in _sockaddr_in;
816 struct sockaddr_in6 _sockaddr_in6;
817 } sgid_addr, dgid_addr;
1060f865 818 int ret;
dd5f03be 819
641114d2
JG
820 rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid);
821 rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid);
dd5f03be
MB
822
823 memset(&dev_addr, 0, sizeof(dev_addr));
565edd1d 824 dev_addr.net = &init_net;
0e9d2c19 825 dev_addr.sgid_attr = sgid_attr;
dd5f03be 826
dd5f03be 827 init_completion(&ctx.comp);
641114d2
JG
828 ret = rdma_resolve_ip((struct sockaddr *)&sgid_addr,
829 (struct sockaddr *)&dgid_addr, &dev_addr, 1000,
830 resolve_cb, true, &ctx);
dd5f03be
MB
831 if (ret)
832 return ret;
833
834 wait_for_completion(&ctx.comp);
835
61c37028
MB
836 ret = ctx.status;
837 if (ret)
838 return ret;
839
dd5f03be 840 memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
1060f865
PP
841 *hoplimit = dev_addr.hoplimit;
842 return 0;
dd5f03be 843}
dd5f03be 844
3cd96564 845static int netevent_callback(struct notifier_block *self, unsigned long event,
e795d092 846 void *ctx)
7025fcd3 847{
e19c0d23
JG
848 struct addr_req *req;
849
3cd96564 850 if (event == NETEVENT_NEIGH_UPDATE) {
e795d092 851 struct neighbour *neigh = ctx;
7025fcd3 852
e19c0d23
JG
853 if (neigh->nud_state & NUD_VALID) {
854 spin_lock_bh(&lock);
855 list_for_each_entry(req, &req_list, list)
856 set_timeout(req, jiffies);
857 spin_unlock_bh(&lock);
858 }
e795d092 859 }
7025fcd3
SH
860 return 0;
861}
862
e795d092
TT
863static struct notifier_block nb = {
864 .notifier_call = netevent_callback
7025fcd3
SH
865};
866
e3f20f02 867int addr_init(void)
7025fcd3 868{
39baf103 869 addr_wq = alloc_ordered_workqueue("ib_addr", 0);
7025fcd3
SH
870 if (!addr_wq)
871 return -ENOMEM;
872
e795d092 873 register_netevent_notifier(&nb);
ae43f828 874
7025fcd3
SH
875 return 0;
876}
877
e3f20f02 878void addr_cleanup(void)
7025fcd3 879{
e795d092 880 unregister_netevent_notifier(&nb);
7025fcd3 881 destroy_workqueue(addr_wq);
ee6548d1 882 WARN_ON(!list_empty(&req_list));
7025fcd3 883}