Commit | Line | Data |
---|---|---|
7025fcd3 SH |
1 | /* |
2 | * Copyright (c) 2005 Voltaire Inc. All rights reserved. | |
3 | * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. | |
4 | * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. | |
5 | * Copyright (c) 2005 Intel Corporation. All rights reserved. | |
6 | * | |
a9474917 SH |
7 | * This software is available to you under a choice of one of two |
8 | * licenses. You may choose to be licensed under the terms of the GNU | |
9 | * General Public License (GPL) Version 2, available from the file | |
10 | * COPYING in the main directory of this source tree, or the | |
11 | * OpenIB.org BSD license below: | |
7025fcd3 | 12 | * |
a9474917 SH |
13 | * Redistribution and use in source and binary forms, with or |
14 | * without modification, are permitted provided that the following | |
15 | * conditions are met: | |
7025fcd3 | 16 | * |
a9474917 SH |
17 | * - Redistributions of source code must retain the above |
18 | * copyright notice, this list of conditions and the following | |
19 | * disclaimer. | |
7025fcd3 | 20 | * |
a9474917 SH |
21 | * - Redistributions in binary form must reproduce the above |
22 | * copyright notice, this list of conditions and the following | |
23 | * disclaimer in the documentation and/or other materials | |
24 | * provided with the distribution. | |
7025fcd3 | 25 | * |
a9474917 SH |
26 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
27 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
28 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
29 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
30 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
31 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
32 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
33 | * SOFTWARE. | |
7025fcd3 SH |
34 | */ |
35 | ||
36 | #include <linux/mutex.h> | |
37 | #include <linux/inetdevice.h> | |
5a0e3ad6 | 38 | #include <linux/slab.h> |
7025fcd3 | 39 | #include <linux/workqueue.h> |
e4dd23d7 | 40 | #include <linux/module.h> |
7025fcd3 SH |
41 | #include <net/arp.h> |
42 | #include <net/neighbour.h> | |
43 | #include <net/route.h> | |
e795d092 | 44 | #include <net/netevent.h> |
38617c64 AS |
45 | #include <net/addrconf.h> |
46 | #include <net/ip6_route.h> | |
7025fcd3 | 47 | #include <rdma/ib_addr.h> |
ef560861 | 48 | #include <rdma/ib.h> |
ae43f828 MB |
49 | #include <rdma/rdma_netlink.h> |
50 | #include <net/netlink.h> | |
51 | ||
52 | #include "core_priv.h" | |
7025fcd3 | 53 | |
7025fcd3 SH |
54 | struct addr_req { |
55 | struct list_head list; | |
38617c64 AS |
56 | struct sockaddr_storage src_addr; |
57 | struct sockaddr_storage dst_addr; | |
7025fcd3 | 58 | struct rdma_dev_addr *addr; |
7a118df3 | 59 | struct rdma_addr_client *client; |
7025fcd3 SH |
60 | void *context; |
61 | void (*callback)(int status, struct sockaddr *src_addr, | |
62 | struct rdma_dev_addr *addr, void *context); | |
63 | unsigned long timeout; | |
5fff41e1 | 64 | struct delayed_work work; |
7025fcd3 | 65 | int status; |
ae43f828 | 66 | u32 seq; |
7025fcd3 SH |
67 | }; |
68 | ||
ae43f828 MB |
69 | static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0); |
70 | ||
c4028958 | 71 | static void process_req(struct work_struct *work); |
7025fcd3 SH |
72 | |
73 | static DEFINE_MUTEX(lock); | |
74 | static LIST_HEAD(req_list); | |
c4028958 | 75 | static DECLARE_DELAYED_WORK(work, process_req); |
7025fcd3 SH |
76 | static struct workqueue_struct *addr_wq; |
77 | ||
ae43f828 MB |
78 | static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = { |
79 | [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, | |
80 | .len = sizeof(struct rdma_nla_ls_gid)}, | |
81 | }; | |
82 | ||
83 | static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh) | |
84 | { | |
85 | struct nlattr *tb[LS_NLA_TYPE_MAX] = {}; | |
86 | int ret; | |
87 | ||
88 | if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) | |
89 | return false; | |
90 | ||
91 | ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), | |
fceb6435 | 92 | nlmsg_len(nlh), ib_nl_addr_policy, NULL); |
ae43f828 MB |
93 | if (ret) |
94 | return false; | |
95 | ||
96 | return true; | |
97 | } | |
98 | ||
99 | static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh) | |
100 | { | |
101 | const struct nlattr *head, *curr; | |
102 | union ib_gid gid; | |
103 | struct addr_req *req; | |
104 | int len, rem; | |
105 | int found = 0; | |
106 | ||
107 | head = (const struct nlattr *)nlmsg_data(nlh); | |
108 | len = nlmsg_len(nlh); | |
109 | ||
110 | nla_for_each_attr(curr, head, len, rem) { | |
111 | if (curr->nla_type == LS_NLA_TYPE_DGID) | |
112 | memcpy(&gid, nla_data(curr), nla_len(curr)); | |
113 | } | |
114 | ||
115 | mutex_lock(&lock); | |
116 | list_for_each_entry(req, &req_list, list) { | |
117 | if (nlh->nlmsg_seq != req->seq) | |
118 | continue; | |
119 | /* We set the DGID part, the rest was set earlier */ | |
120 | rdma_addr_set_dgid(req->addr, &gid); | |
121 | req->status = 0; | |
122 | found = 1; | |
123 | break; | |
124 | } | |
125 | mutex_unlock(&lock); | |
126 | ||
127 | if (!found) | |
128 | pr_info("Couldn't find request waiting for DGID: %pI6\n", | |
129 | &gid); | |
130 | } | |
131 | ||
132 | int ib_nl_handle_ip_res_resp(struct sk_buff *skb, | |
647c75ac LR |
133 | struct nlmsghdr *nlh, |
134 | struct netlink_ext_ack *extack) | |
ae43f828 | 135 | { |
ae43f828 | 136 | if ((nlh->nlmsg_flags & NLM_F_REQUEST) || |
e3a2b93d | 137 | !(NETLINK_CB(skb).sk)) |
ae43f828 MB |
138 | return -EPERM; |
139 | ||
140 | if (ib_nl_is_good_ip_resp(nlh)) | |
141 | ib_nl_process_good_ip_rsep(nlh); | |
142 | ||
143 | return skb->len; | |
144 | } | |
145 | ||
146 | static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, | |
147 | const void *daddr, | |
148 | u32 seq, u16 family) | |
149 | { | |
150 | struct sk_buff *skb = NULL; | |
151 | struct nlmsghdr *nlh; | |
152 | struct rdma_ls_ip_resolve_header *header; | |
153 | void *data; | |
154 | size_t size; | |
155 | int attrtype; | |
156 | int len; | |
157 | ||
158 | if (family == AF_INET) { | |
159 | size = sizeof(struct in_addr); | |
160 | attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4; | |
161 | } else { | |
162 | size = sizeof(struct in6_addr); | |
163 | attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6; | |
164 | } | |
165 | ||
166 | len = nla_total_size(sizeof(size)); | |
167 | len += NLMSG_ALIGN(sizeof(*header)); | |
168 | ||
169 | skb = nlmsg_new(len, GFP_KERNEL); | |
170 | if (!skb) | |
171 | return -ENOMEM; | |
172 | ||
173 | data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS, | |
174 | RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST); | |
175 | if (!data) { | |
176 | nlmsg_free(skb); | |
177 | return -ENODATA; | |
178 | } | |
179 | ||
180 | /* Construct the family header first */ | |
4df864c1 | 181 | header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); |
ae43f828 MB |
182 | header->ifindex = dev_addr->bound_dev_if; |
183 | nla_put(skb, attrtype, size, daddr); | |
184 | ||
185 | /* Repair the nlmsg header length */ | |
186 | nlmsg_end(skb, nlh); | |
4d7f693a | 187 | rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL); |
ae43f828 MB |
188 | |
189 | /* Make the request retry, so when we get the response from userspace | |
190 | * we will have something. | |
191 | */ | |
192 | return -ENODATA; | |
193 | } | |
194 | ||
ef560861 SH |
195 | int rdma_addr_size(struct sockaddr *addr) |
196 | { | |
197 | switch (addr->sa_family) { | |
198 | case AF_INET: | |
199 | return sizeof(struct sockaddr_in); | |
200 | case AF_INET6: | |
201 | return sizeof(struct sockaddr_in6); | |
202 | case AF_IB: | |
203 | return sizeof(struct sockaddr_ib); | |
204 | default: | |
205 | return 0; | |
206 | } | |
207 | } | |
208 | EXPORT_SYMBOL(rdma_addr_size); | |
209 | ||
dd5f03be MB |
210 | static struct rdma_addr_client self; |
211 | ||
7a118df3 SH |
212 | void rdma_addr_register_client(struct rdma_addr_client *client) |
213 | { | |
214 | atomic_set(&client->refcount, 1); | |
215 | init_completion(&client->comp); | |
216 | } | |
217 | EXPORT_SYMBOL(rdma_addr_register_client); | |
218 | ||
219 | static inline void put_client(struct rdma_addr_client *client) | |
220 | { | |
221 | if (atomic_dec_and_test(&client->refcount)) | |
222 | complete(&client->comp); | |
223 | } | |
224 | ||
225 | void rdma_addr_unregister_client(struct rdma_addr_client *client) | |
226 | { | |
227 | put_client(client); | |
228 | wait_for_completion(&client->comp); | |
229 | } | |
230 | EXPORT_SYMBOL(rdma_addr_unregister_client); | |
231 | ||
07ebafba TT |
232 | int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, |
233 | const unsigned char *dst_dev_addr) | |
7025fcd3 | 234 | { |
c4315d85 | 235 | dev_addr->dev_type = dev->type; |
7025fcd3 SH |
236 | memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); |
237 | memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); | |
238 | if (dst_dev_addr) | |
239 | memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); | |
6266ed6e | 240 | dev_addr->bound_dev_if = dev->ifindex; |
7025fcd3 SH |
241 | return 0; |
242 | } | |
07ebafba | 243 | EXPORT_SYMBOL(rdma_copy_addr); |
7025fcd3 | 244 | |
20029832 MB |
245 | int rdma_translate_ip(const struct sockaddr *addr, |
246 | struct rdma_dev_addr *dev_addr, | |
dd5f03be | 247 | u16 *vlan_id) |
7025fcd3 SH |
248 | { |
249 | struct net_device *dev; | |
38617c64 | 250 | int ret = -EADDRNOTAVAIL; |
7025fcd3 | 251 | |
6266ed6e | 252 | if (dev_addr->bound_dev_if) { |
565edd1d | 253 | dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); |
6266ed6e SH |
254 | if (!dev) |
255 | return -ENODEV; | |
256 | ret = rdma_copy_addr(dev_addr, dev, NULL); | |
257 | dev_put(dev); | |
258 | return ret; | |
259 | } | |
260 | ||
38617c64 AS |
261 | switch (addr->sa_family) { |
262 | case AF_INET: | |
565edd1d | 263 | dev = ip_dev_find(dev_addr->net, |
20029832 | 264 | ((const struct sockaddr_in *)addr)->sin_addr.s_addr); |
38617c64 AS |
265 | |
266 | if (!dev) | |
267 | return ret; | |
7025fcd3 | 268 | |
38617c64 | 269 | ret = rdma_copy_addr(dev_addr, dev, NULL); |
cbd09aeb | 270 | dev_addr->bound_dev_if = dev->ifindex; |
dd5f03be MB |
271 | if (vlan_id) |
272 | *vlan_id = rdma_vlan_dev_vlan_id(dev); | |
38617c64 AS |
273 | dev_put(dev); |
274 | break; | |
d90f9b35 | 275 | #if IS_ENABLED(CONFIG_IPV6) |
38617c64 | 276 | case AF_INET6: |
22f4fbd9 | 277 | rcu_read_lock(); |
565edd1d GS |
278 | for_each_netdev_rcu(dev_addr->net, dev) { |
279 | if (ipv6_chk_addr(dev_addr->net, | |
20029832 | 280 | &((const struct sockaddr_in6 *)addr)->sin6_addr, |
38617c64 AS |
281 | dev, 1)) { |
282 | ret = rdma_copy_addr(dev_addr, dev, NULL); | |
cbd09aeb | 283 | dev_addr->bound_dev_if = dev->ifindex; |
dd5f03be MB |
284 | if (vlan_id) |
285 | *vlan_id = rdma_vlan_dev_vlan_id(dev); | |
38617c64 AS |
286 | break; |
287 | } | |
288 | } | |
22f4fbd9 | 289 | rcu_read_unlock(); |
38617c64 | 290 | break; |
2c4ab624 | 291 | #endif |
38617c64 | 292 | } |
7025fcd3 SH |
293 | return ret; |
294 | } | |
295 | EXPORT_SYMBOL(rdma_translate_ip); | |
296 | ||
5fff41e1 | 297 | static void set_timeout(struct delayed_work *delayed_work, unsigned long time) |
7025fcd3 SH |
298 | { |
299 | unsigned long delay; | |
300 | ||
7025fcd3 | 301 | delay = time - jiffies; |
346f98b4 OK |
302 | if ((long)delay < 0) |
303 | delay = 0; | |
7025fcd3 | 304 | |
5fff41e1 | 305 | mod_delayed_work(addr_wq, delayed_work, delay); |
7025fcd3 SH |
306 | } |
307 | ||
308 | static void queue_req(struct addr_req *req) | |
309 | { | |
310 | struct addr_req *temp_req; | |
311 | ||
312 | mutex_lock(&lock); | |
313 | list_for_each_entry_reverse(temp_req, &req_list, list) { | |
f115db48 | 314 | if (time_after_eq(req->timeout, temp_req->timeout)) |
7025fcd3 SH |
315 | break; |
316 | } | |
317 | ||
318 | list_add(&req->list, &temp_req->list); | |
319 | ||
5fff41e1 | 320 | set_timeout(&req->work, req->timeout); |
7025fcd3 SH |
321 | mutex_unlock(&lock); |
322 | } | |
323 | ||
ae43f828 MB |
324 | static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, |
325 | const void *daddr, u32 seq, u16 family) | |
326 | { | |
ff61c425 | 327 | if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) |
ae43f828 MB |
328 | return -EADDRNOTAVAIL; |
329 | ||
330 | /* We fill in what we can, the response will fill the rest */ | |
331 | rdma_copy_addr(dev_addr, dst->dev, NULL); | |
332 | return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); | |
333 | } | |
334 | ||
20029832 MB |
335 | static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, |
336 | const void *daddr) | |
51d45974 DM |
337 | { |
338 | struct neighbour *n; | |
339 | int ret; | |
340 | ||
02b61955 DM |
341 | n = dst_neigh_lookup(dst, daddr); |
342 | ||
51d45974 | 343 | rcu_read_lock(); |
51d45974 DM |
344 | if (!n || !(n->nud_state & NUD_VALID)) { |
345 | if (n) | |
346 | neigh_event_send(n, NULL); | |
347 | ret = -ENODATA; | |
348 | } else { | |
02b61955 | 349 | ret = rdma_copy_addr(dev_addr, dst->dev, n->ha); |
51d45974 DM |
350 | } |
351 | rcu_read_unlock(); | |
352 | ||
02b61955 DM |
353 | if (n) |
354 | neigh_release(n); | |
355 | ||
51d45974 DM |
356 | return ret; |
357 | } | |
358 | ||
ae43f828 MB |
359 | static bool has_gateway(struct dst_entry *dst, sa_family_t family) |
360 | { | |
361 | struct rtable *rt; | |
362 | struct rt6_info *rt6; | |
363 | ||
364 | if (family == AF_INET) { | |
365 | rt = container_of(dst, struct rtable, dst); | |
366 | return rt->rt_uses_gateway; | |
367 | } | |
368 | ||
369 | rt6 = container_of(dst, struct rt6_info, dst); | |
370 | return rt6->rt6i_flags & RTF_GATEWAY; | |
371 | } | |
372 | ||
373 | static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, | |
374 | const struct sockaddr *dst_in, u32 seq) | |
375 | { | |
376 | const struct sockaddr_in *dst_in4 = | |
377 | (const struct sockaddr_in *)dst_in; | |
378 | const struct sockaddr_in6 *dst_in6 = | |
379 | (const struct sockaddr_in6 *)dst_in; | |
380 | const void *daddr = (dst_in->sa_family == AF_INET) ? | |
381 | (const void *)&dst_in4->sin_addr.s_addr : | |
382 | (const void *)&dst_in6->sin6_addr; | |
383 | sa_family_t family = dst_in->sa_family; | |
384 | ||
385 | /* Gateway + ARPHRD_INFINIBAND -> IB router */ | |
386 | if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND) | |
387 | return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family); | |
388 | else | |
389 | return dst_fetch_ha(dst, dev_addr, daddr); | |
390 | } | |
391 | ||
923c100e | 392 | static int addr4_resolve(struct sockaddr_in *src_in, |
20029832 MB |
393 | const struct sockaddr_in *dst_in, |
394 | struct rdma_dev_addr *addr, | |
395 | struct rtable **prt) | |
7025fcd3 | 396 | { |
1b90c137 AV |
397 | __be32 src_ip = src_in->sin_addr.s_addr; |
398 | __be32 dst_ip = dst_in->sin_addr.s_addr; | |
7025fcd3 | 399 | struct rtable *rt; |
5fc3590c | 400 | struct flowi4 fl4; |
7025fcd3 SH |
401 | int ret; |
402 | ||
5fc3590c DM |
403 | memset(&fl4, 0, sizeof(fl4)); |
404 | fl4.daddr = dst_ip; | |
405 | fl4.saddr = src_ip; | |
406 | fl4.flowi4_oif = addr->bound_dev_if; | |
565edd1d | 407 | rt = ip_route_output_key(addr->net, &fl4); |
cbd09aeb MS |
408 | ret = PTR_ERR_OR_ZERO(rt); |
409 | if (ret) | |
410 | return ret; | |
411 | ||
923c100e | 412 | src_in->sin_family = AF_INET; |
5fc3590c | 413 | src_in->sin_addr.s_addr = fl4.saddr; |
923c100e | 414 | |
ae43f828 MB |
415 | /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're |
416 | * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network | |
417 | * type accordingly. | |
c865f246 | 418 | */ |
ae43f828 | 419 | if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND) |
c865f246 SK |
420 | addr->network = RDMA_NETWORK_IPV4; |
421 | ||
c3efe750 MB |
422 | addr->hoplimit = ip4_dst_hoplimit(&rt->dst); |
423 | ||
20029832 MB |
424 | *prt = rt; |
425 | return 0; | |
7025fcd3 SH |
426 | } |
427 | ||
d90f9b35 | 428 | #if IS_ENABLED(CONFIG_IPV6) |
d14714df | 429 | static int addr6_resolve(struct sockaddr_in6 *src_in, |
20029832 MB |
430 | const struct sockaddr_in6 *dst_in, |
431 | struct rdma_dev_addr *addr, | |
432 | struct dst_entry **pdst) | |
38617c64 | 433 | { |
4c9483b2 | 434 | struct flowi6 fl6; |
38617c64 | 435 | struct dst_entry *dst; |
c865f246 | 436 | struct rt6_info *rt; |
d14714df | 437 | int ret; |
38617c64 | 438 | |
4c9483b2 | 439 | memset(&fl6, 0, sizeof fl6); |
4e3fd7a0 AD |
440 | fl6.daddr = dst_in->sin6_addr; |
441 | fl6.saddr = src_in->sin6_addr; | |
4c9483b2 | 442 | fl6.flowi6_oif = addr->bound_dev_if; |
38617c64 | 443 | |
eea40b8f PA |
444 | ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6); |
445 | if (ret < 0) | |
24b43c99 | 446 | return ret; |
d14714df | 447 | |
c865f246 | 448 | rt = (struct rt6_info *)dst; |
79e25959 | 449 | if (ipv6_addr_any(&src_in->sin6_addr)) { |
d14714df | 450 | src_in->sin6_family = AF_INET6; |
4e3fd7a0 | 451 | src_in->sin6_addr = fl6.saddr; |
d14714df SH |
452 | } |
453 | ||
ae43f828 MB |
454 | /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're |
455 | * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network | |
456 | * type accordingly. | |
c865f246 | 457 | */ |
ae43f828 MB |
458 | if (rt->rt6i_flags & RTF_GATEWAY && |
459 | ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND) | |
c865f246 SK |
460 | addr->network = RDMA_NETWORK_IPV6; |
461 | ||
c3efe750 MB |
462 | addr->hoplimit = ip6_dst_hoplimit(dst); |
463 | ||
20029832 MB |
464 | *pdst = dst; |
465 | return 0; | |
38617c64 | 466 | } |
2c4ab624 | 467 | #else |
d14714df | 468 | static int addr6_resolve(struct sockaddr_in6 *src_in, |
20029832 MB |
469 | const struct sockaddr_in6 *dst_in, |
470 | struct rdma_dev_addr *addr, | |
471 | struct dst_entry **pdst) | |
2c4ab624 RD |
472 | { |
473 | return -EADDRNOTAVAIL; | |
474 | } | |
475 | #endif | |
38617c64 | 476 | |
20029832 MB |
477 | static int addr_resolve_neigh(struct dst_entry *dst, |
478 | const struct sockaddr *dst_in, | |
ae43f828 MB |
479 | struct rdma_dev_addr *addr, |
480 | u32 seq) | |
20029832 MB |
481 | { |
482 | if (dst->dev->flags & IFF_LOOPBACK) { | |
483 | int ret; | |
484 | ||
485 | ret = rdma_translate_ip(dst_in, addr, NULL); | |
486 | if (!ret) | |
487 | memcpy(addr->dst_dev_addr, addr->src_dev_addr, | |
488 | MAX_ADDR_LEN); | |
489 | ||
490 | return ret; | |
491 | } | |
492 | ||
493 | /* If the device doesn't do ARP internally */ | |
ae43f828 MB |
494 | if (!(dst->dev->flags & IFF_NOARP)) |
495 | return fetch_ha(dst, addr, dst_in, seq); | |
20029832 MB |
496 | |
497 | return rdma_copy_addr(addr, dst->dev, NULL); | |
498 | } | |
499 | ||
923c100e | 500 | static int addr_resolve(struct sockaddr *src_in, |
20029832 MB |
501 | const struct sockaddr *dst_in, |
502 | struct rdma_dev_addr *addr, | |
ae43f828 MB |
503 | bool resolve_neigh, |
504 | u32 seq) | |
38617c64 | 505 | { |
20029832 MB |
506 | struct net_device *ndev; |
507 | struct dst_entry *dst; | |
508 | int ret; | |
509 | ||
bebb2a47 MS |
510 | if (!addr->net) { |
511 | pr_warn_ratelimited("%s: missing namespace\n", __func__); | |
512 | return -EINVAL; | |
513 | } | |
514 | ||
38617c64 | 515 | if (src_in->sa_family == AF_INET) { |
20029832 MB |
516 | struct rtable *rt = NULL; |
517 | const struct sockaddr_in *dst_in4 = | |
518 | (const struct sockaddr_in *)dst_in; | |
519 | ||
520 | ret = addr4_resolve((struct sockaddr_in *)src_in, | |
521 | dst_in4, addr, &rt); | |
522 | if (ret) | |
523 | return ret; | |
524 | ||
525 | if (resolve_neigh) | |
ae43f828 | 526 | ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq); |
20029832 | 527 | |
cbd09aeb MS |
528 | if (addr->bound_dev_if) { |
529 | ndev = dev_get_by_index(addr->net, addr->bound_dev_if); | |
530 | } else { | |
531 | ndev = rt->dst.dev; | |
532 | dev_hold(ndev); | |
533 | } | |
20029832 MB |
534 | |
535 | ip_rt_put(rt); | |
536 | } else { | |
537 | const struct sockaddr_in6 *dst_in6 = | |
538 | (const struct sockaddr_in6 *)dst_in; | |
539 | ||
540 | ret = addr6_resolve((struct sockaddr_in6 *)src_in, | |
541 | dst_in6, addr, | |
542 | &dst); | |
543 | if (ret) | |
544 | return ret; | |
545 | ||
546 | if (resolve_neigh) | |
ae43f828 | 547 | ret = addr_resolve_neigh(dst, dst_in, addr, seq); |
20029832 | 548 | |
cbd09aeb MS |
549 | if (addr->bound_dev_if) { |
550 | ndev = dev_get_by_index(addr->net, addr->bound_dev_if); | |
551 | } else { | |
552 | ndev = dst->dev; | |
553 | dev_hold(ndev); | |
554 | } | |
20029832 MB |
555 | |
556 | dst_release(dst); | |
557 | } | |
558 | ||
cbd09aeb MS |
559 | if (ndev->flags & IFF_LOOPBACK) { |
560 | ret = rdma_translate_ip(dst_in, addr, NULL); | |
561 | /* | |
562 | * Put the loopback device and get the translated | |
563 | * device instead. | |
564 | */ | |
565 | dev_put(ndev); | |
566 | ndev = dev_get_by_index(addr->net, addr->bound_dev_if); | |
567 | } else { | |
568 | addr->bound_dev_if = ndev->ifindex; | |
569 | } | |
20029832 MB |
570 | dev_put(ndev); |
571 | ||
572 | return ret; | |
38617c64 AS |
573 | } |
574 | ||
5fff41e1 PP |
575 | static void process_one_req(struct work_struct *_work) |
576 | { | |
577 | struct addr_req *req; | |
578 | struct sockaddr *src_in, *dst_in; | |
579 | ||
580 | mutex_lock(&lock); | |
581 | req = container_of(_work, struct addr_req, work.work); | |
582 | ||
583 | if (req->status == -ENODATA) { | |
584 | src_in = (struct sockaddr *)&req->src_addr; | |
585 | dst_in = (struct sockaddr *)&req->dst_addr; | |
586 | req->status = addr_resolve(src_in, dst_in, req->addr, | |
587 | true, req->seq); | |
588 | if (req->status && time_after_eq(jiffies, req->timeout)) { | |
589 | req->status = -ETIMEDOUT; | |
590 | } else if (req->status == -ENODATA) { | |
591 | /* requeue the work for retrying again */ | |
592 | set_timeout(&req->work, req->timeout); | |
593 | mutex_unlock(&lock); | |
594 | return; | |
595 | } | |
596 | } | |
597 | list_del(&req->list); | |
598 | mutex_unlock(&lock); | |
599 | ||
600 | req->callback(req->status, (struct sockaddr *)&req->src_addr, | |
601 | req->addr, req->context); | |
602 | put_client(req->client); | |
603 | kfree(req); | |
604 | } | |
605 | ||
c4028958 | 606 | static void process_req(struct work_struct *work) |
7025fcd3 SH |
607 | { |
608 | struct addr_req *req, *temp_req; | |
38617c64 | 609 | struct sockaddr *src_in, *dst_in; |
7025fcd3 SH |
610 | struct list_head done_list; |
611 | ||
612 | INIT_LIST_HEAD(&done_list); | |
613 | ||
614 | mutex_lock(&lock); | |
615 | list_for_each_entry_safe(req, temp_req, &req_list, list) { | |
c78bb844 | 616 | if (req->status == -ENODATA) { |
38617c64 AS |
617 | src_in = (struct sockaddr *) &req->src_addr; |
618 | dst_in = (struct sockaddr *) &req->dst_addr; | |
20029832 | 619 | req->status = addr_resolve(src_in, dst_in, req->addr, |
ae43f828 | 620 | true, req->seq); |
c78bb844 KK |
621 | if (req->status && time_after_eq(jiffies, req->timeout)) |
622 | req->status = -ETIMEDOUT; | |
5fff41e1 PP |
623 | else if (req->status == -ENODATA) { |
624 | set_timeout(&req->work, req->timeout); | |
c78bb844 | 625 | continue; |
5fff41e1 | 626 | } |
7025fcd3 | 627 | } |
04699a1f | 628 | list_move_tail(&req->list, &done_list); |
7025fcd3 SH |
629 | } |
630 | ||
7025fcd3 SH |
631 | mutex_unlock(&lock); |
632 | ||
633 | list_for_each_entry_safe(req, temp_req, &done_list, list) { | |
634 | list_del(&req->list); | |
5fff41e1 PP |
635 | /* It is safe to cancel other work items from this work item |
636 | * because at a time there can be only one work item running | |
637 | * with this single threaded work queue. | |
638 | */ | |
639 | cancel_delayed_work(&req->work); | |
38617c64 AS |
640 | req->callback(req->status, (struct sockaddr *) &req->src_addr, |
641 | req->addr, req->context); | |
7a118df3 | 642 | put_client(req->client); |
7025fcd3 SH |
643 | kfree(req); |
644 | } | |
645 | } | |
646 | ||
7a118df3 SH |
647 | int rdma_resolve_ip(struct rdma_addr_client *client, |
648 | struct sockaddr *src_addr, struct sockaddr *dst_addr, | |
7025fcd3 SH |
649 | struct rdma_dev_addr *addr, int timeout_ms, |
650 | void (*callback)(int status, struct sockaddr *src_addr, | |
651 | struct rdma_dev_addr *addr, void *context), | |
652 | void *context) | |
653 | { | |
38617c64 | 654 | struct sockaddr *src_in, *dst_in; |
7025fcd3 SH |
655 | struct addr_req *req; |
656 | int ret = 0; | |
657 | ||
dd00cc48 | 658 | req = kzalloc(sizeof *req, GFP_KERNEL); |
7025fcd3 SH |
659 | if (!req) |
660 | return -ENOMEM; | |
7025fcd3 | 661 | |
d2e08862 SH |
662 | src_in = (struct sockaddr *) &req->src_addr; |
663 | dst_in = (struct sockaddr *) &req->dst_addr; | |
664 | ||
665 | if (src_addr) { | |
666 | if (src_addr->sa_family != dst_addr->sa_family) { | |
667 | ret = -EINVAL; | |
668 | goto err; | |
669 | } | |
670 | ||
ef560861 | 671 | memcpy(src_in, src_addr, rdma_addr_size(src_addr)); |
d2e08862 SH |
672 | } else { |
673 | src_in->sa_family = dst_addr->sa_family; | |
674 | } | |
675 | ||
ef560861 | 676 | memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr)); |
7025fcd3 SH |
677 | req->addr = addr; |
678 | req->callback = callback; | |
679 | req->context = context; | |
7a118df3 SH |
680 | req->client = client; |
681 | atomic_inc(&client->refcount); | |
5fff41e1 | 682 | INIT_DELAYED_WORK(&req->work, process_one_req); |
ae43f828 | 683 | req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); |
7025fcd3 | 684 | |
ae43f828 | 685 | req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); |
7025fcd3 SH |
686 | switch (req->status) { |
687 | case 0: | |
688 | req->timeout = jiffies; | |
689 | queue_req(req); | |
690 | break; | |
691 | case -ENODATA: | |
692 | req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; | |
693 | queue_req(req); | |
7025fcd3 SH |
694 | break; |
695 | default: | |
696 | ret = req->status; | |
7a118df3 | 697 | atomic_dec(&client->refcount); |
d2e08862 | 698 | goto err; |
7025fcd3 SH |
699 | } |
700 | return ret; | |
d2e08862 SH |
701 | err: |
702 | kfree(req); | |
703 | return ret; | |
7025fcd3 SH |
704 | } |
705 | EXPORT_SYMBOL(rdma_resolve_ip); | |
706 | ||
20029832 MB |
707 | int rdma_resolve_ip_route(struct sockaddr *src_addr, |
708 | const struct sockaddr *dst_addr, | |
709 | struct rdma_dev_addr *addr) | |
710 | { | |
711 | struct sockaddr_storage ssrc_addr = {}; | |
712 | struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr; | |
713 | ||
9506902b MB |
714 | if (src_addr) { |
715 | if (src_addr->sa_family != dst_addr->sa_family) | |
716 | return -EINVAL; | |
20029832 | 717 | |
20029832 | 718 | memcpy(src_in, src_addr, rdma_addr_size(src_addr)); |
9506902b | 719 | } else { |
20029832 | 720 | src_in->sa_family = dst_addr->sa_family; |
9506902b | 721 | } |
20029832 | 722 | |
ae43f828 | 723 | return addr_resolve(src_in, dst_addr, addr, false, 0); |
20029832 MB |
724 | } |
725 | EXPORT_SYMBOL(rdma_resolve_ip_route); | |
726 | ||
7025fcd3 SH |
727 | void rdma_addr_cancel(struct rdma_dev_addr *addr) |
728 | { | |
729 | struct addr_req *req, *temp_req; | |
730 | ||
731 | mutex_lock(&lock); | |
732 | list_for_each_entry_safe(req, temp_req, &req_list, list) { | |
733 | if (req->addr == addr) { | |
734 | req->status = -ECANCELED; | |
735 | req->timeout = jiffies; | |
04699a1f | 736 | list_move(&req->list, &req_list); |
5fff41e1 | 737 | set_timeout(&req->work, req->timeout); |
7025fcd3 SH |
738 | break; |
739 | } | |
740 | } | |
741 | mutex_unlock(&lock); | |
742 | } | |
743 | EXPORT_SYMBOL(rdma_addr_cancel); | |
744 | ||
dd5f03be MB |
745 | struct resolve_cb_context { |
746 | struct rdma_dev_addr *addr; | |
747 | struct completion comp; | |
61c37028 | 748 | int status; |
dd5f03be MB |
749 | }; |
750 | ||
751 | static void resolve_cb(int status, struct sockaddr *src_addr, | |
752 | struct rdma_dev_addr *addr, void *context) | |
753 | { | |
61c37028 MB |
754 | if (!status) |
755 | memcpy(((struct resolve_cb_context *)context)->addr, | |
756 | addr, sizeof(struct rdma_dev_addr)); | |
757 | ((struct resolve_cb_context *)context)->status = status; | |
dd5f03be MB |
758 | complete(&((struct resolve_cb_context *)context)->comp); |
759 | } | |
760 | ||
f7f4b23e MB |
761 | int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, |
762 | const union ib_gid *dgid, | |
c3efe750 MB |
763 | u8 *dmac, u16 *vlan_id, int *if_index, |
764 | int *hoplimit) | |
dd5f03be MB |
765 | { |
766 | int ret = 0; | |
767 | struct rdma_dev_addr dev_addr; | |
768 | struct resolve_cb_context ctx; | |
769 | struct net_device *dev; | |
770 | ||
771 | union { | |
772 | struct sockaddr _sockaddr; | |
773 | struct sockaddr_in _sockaddr_in; | |
774 | struct sockaddr_in6 _sockaddr_in6; | |
775 | } sgid_addr, dgid_addr; | |
776 | ||
777 | ||
471e7058 HL |
778 | rdma_gid2ip(&sgid_addr._sockaddr, sgid); |
779 | rdma_gid2ip(&dgid_addr._sockaddr, dgid); | |
dd5f03be MB |
780 | |
781 | memset(&dev_addr, 0, sizeof(dev_addr)); | |
20029832 MB |
782 | if (if_index) |
783 | dev_addr.bound_dev_if = *if_index; | |
565edd1d | 784 | dev_addr.net = &init_net; |
dd5f03be MB |
785 | |
786 | ctx.addr = &dev_addr; | |
787 | init_completion(&ctx.comp); | |
788 | ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, | |
789 | &dev_addr, 1000, resolve_cb, &ctx); | |
790 | if (ret) | |
791 | return ret; | |
792 | ||
793 | wait_for_completion(&ctx.comp); | |
794 | ||
61c37028 MB |
795 | ret = ctx.status; |
796 | if (ret) | |
797 | return ret; | |
798 | ||
dd5f03be MB |
799 | memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); |
800 | dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); | |
801 | if (!dev) | |
802 | return -ENODEV; | |
20029832 MB |
803 | if (if_index) |
804 | *if_index = dev_addr.bound_dev_if; | |
dd5f03be MB |
805 | if (vlan_id) |
806 | *vlan_id = rdma_vlan_dev_vlan_id(dev); | |
c3efe750 MB |
807 | if (hoplimit) |
808 | *hoplimit = dev_addr.hoplimit; | |
dd5f03be MB |
809 | dev_put(dev); |
810 | return ret; | |
811 | } | |
f7f4b23e | 812 | EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh); |
dd5f03be MB |
813 | |
814 | int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) | |
815 | { | |
816 | int ret = 0; | |
817 | struct rdma_dev_addr dev_addr; | |
818 | union { | |
819 | struct sockaddr _sockaddr; | |
820 | struct sockaddr_in _sockaddr_in; | |
821 | struct sockaddr_in6 _sockaddr_in6; | |
822 | } gid_addr; | |
823 | ||
471e7058 | 824 | rdma_gid2ip(&gid_addr._sockaddr, sgid); |
dd5f03be | 825 | |
dd5f03be | 826 | memset(&dev_addr, 0, sizeof(dev_addr)); |
565edd1d | 827 | dev_addr.net = &init_net; |
dd5f03be MB |
828 | ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); |
829 | if (ret) | |
830 | return ret; | |
831 | ||
832 | memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN); | |
833 | return ret; | |
834 | } | |
835 | EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid); | |
836 | ||
3cd96564 | 837 | static int netevent_callback(struct notifier_block *self, unsigned long event, |
e795d092 | 838 | void *ctx) |
7025fcd3 | 839 | { |
3cd96564 | 840 | if (event == NETEVENT_NEIGH_UPDATE) { |
e795d092 | 841 | struct neighbour *neigh = ctx; |
7025fcd3 | 842 | |
5fff41e1 PP |
843 | if (neigh->nud_state & NUD_VALID) |
844 | set_timeout(&work, jiffies); | |
e795d092 | 845 | } |
7025fcd3 SH |
846 | return 0; |
847 | } | |
848 | ||
e795d092 TT |
849 | static struct notifier_block nb = { |
850 | .notifier_call = netevent_callback | |
7025fcd3 SH |
851 | }; |
852 | ||
e3f20f02 | 853 | int addr_init(void) |
7025fcd3 | 854 | { |
39baf103 | 855 | addr_wq = alloc_ordered_workqueue("ib_addr", 0); |
7025fcd3 SH |
856 | if (!addr_wq) |
857 | return -ENOMEM; | |
858 | ||
e795d092 | 859 | register_netevent_notifier(&nb); |
dd5f03be | 860 | rdma_addr_register_client(&self); |
ae43f828 | 861 | |
7025fcd3 SH |
862 | return 0; |
863 | } | |
864 | ||
e3f20f02 | 865 | void addr_cleanup(void) |
7025fcd3 | 866 | { |
dd5f03be | 867 | rdma_addr_unregister_client(&self); |
e795d092 | 868 | unregister_netevent_notifier(&nb); |
7025fcd3 SH |
869 | destroy_workqueue(addr_wq); |
870 | } |