Commit | Line | Data |
---|---|---|
7025fcd3 SH |
1 | /* |
2 | * Copyright (c) 2005 Voltaire Inc. All rights reserved. | |
3 | * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. | |
4 | * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. | |
5 | * Copyright (c) 2005 Intel Corporation. All rights reserved. | |
6 | * | |
a9474917 SH |
7 | * This software is available to you under a choice of one of two |
8 | * licenses. You may choose to be licensed under the terms of the GNU | |
9 | * General Public License (GPL) Version 2, available from the file | |
10 | * COPYING in the main directory of this source tree, or the | |
11 | * OpenIB.org BSD license below: | |
7025fcd3 | 12 | * |
a9474917 SH |
13 | * Redistribution and use in source and binary forms, with or |
14 | * without modification, are permitted provided that the following | |
15 | * conditions are met: | |
7025fcd3 | 16 | * |
a9474917 SH |
17 | * - Redistributions of source code must retain the above |
18 | * copyright notice, this list of conditions and the following | |
19 | * disclaimer. | |
7025fcd3 | 20 | * |
a9474917 SH |
21 | * - Redistributions in binary form must reproduce the above |
22 | * copyright notice, this list of conditions and the following | |
23 | * disclaimer in the documentation and/or other materials | |
24 | * provided with the distribution. | |
7025fcd3 | 25 | * |
a9474917 SH |
26 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
27 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
28 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
29 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
30 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
31 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
32 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
33 | * SOFTWARE. | |
7025fcd3 SH |
34 | */ |
35 | ||
36 | #include <linux/mutex.h> | |
37 | #include <linux/inetdevice.h> | |
5a0e3ad6 | 38 | #include <linux/slab.h> |
7025fcd3 | 39 | #include <linux/workqueue.h> |
e4dd23d7 | 40 | #include <linux/module.h> |
7025fcd3 SH |
41 | #include <net/arp.h> |
42 | #include <net/neighbour.h> | |
43 | #include <net/route.h> | |
e795d092 | 44 | #include <net/netevent.h> |
38617c64 AS |
45 | #include <net/addrconf.h> |
46 | #include <net/ip6_route.h> | |
7025fcd3 | 47 | #include <rdma/ib_addr.h> |
ef560861 | 48 | #include <rdma/ib.h> |
ae43f828 MB |
49 | #include <rdma/rdma_netlink.h> |
50 | #include <net/netlink.h> | |
51 | ||
52 | #include "core_priv.h" | |
7025fcd3 | 53 | |
7025fcd3 SH |
54 | struct addr_req { |
55 | struct list_head list; | |
38617c64 AS |
56 | struct sockaddr_storage src_addr; |
57 | struct sockaddr_storage dst_addr; | |
7025fcd3 | 58 | struct rdma_dev_addr *addr; |
7a118df3 | 59 | struct rdma_addr_client *client; |
7025fcd3 SH |
60 | void *context; |
61 | void (*callback)(int status, struct sockaddr *src_addr, | |
62 | struct rdma_dev_addr *addr, void *context); | |
63 | unsigned long timeout; | |
5fff41e1 | 64 | struct delayed_work work; |
7025fcd3 | 65 | int status; |
ae43f828 | 66 | u32 seq; |
7025fcd3 SH |
67 | }; |
68 | ||
ae43f828 MB |
69 | static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0); |
70 | ||
c4028958 | 71 | static void process_req(struct work_struct *work); |
7025fcd3 SH |
72 | |
73 | static DEFINE_MUTEX(lock); | |
74 | static LIST_HEAD(req_list); | |
c4028958 | 75 | static DECLARE_DELAYED_WORK(work, process_req); |
7025fcd3 SH |
76 | static struct workqueue_struct *addr_wq; |
77 | ||
ae43f828 MB |
78 | static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = { |
79 | [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, | |
80 | .len = sizeof(struct rdma_nla_ls_gid)}, | |
81 | }; | |
82 | ||
83 | static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh) | |
84 | { | |
85 | struct nlattr *tb[LS_NLA_TYPE_MAX] = {}; | |
86 | int ret; | |
87 | ||
88 | if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) | |
89 | return false; | |
90 | ||
91 | ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), | |
fceb6435 | 92 | nlmsg_len(nlh), ib_nl_addr_policy, NULL); |
ae43f828 MB |
93 | if (ret) |
94 | return false; | |
95 | ||
96 | return true; | |
97 | } | |
98 | ||
99 | static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh) | |
100 | { | |
101 | const struct nlattr *head, *curr; | |
102 | union ib_gid gid; | |
103 | struct addr_req *req; | |
104 | int len, rem; | |
105 | int found = 0; | |
106 | ||
107 | head = (const struct nlattr *)nlmsg_data(nlh); | |
108 | len = nlmsg_len(nlh); | |
109 | ||
110 | nla_for_each_attr(curr, head, len, rem) { | |
111 | if (curr->nla_type == LS_NLA_TYPE_DGID) | |
112 | memcpy(&gid, nla_data(curr), nla_len(curr)); | |
113 | } | |
114 | ||
115 | mutex_lock(&lock); | |
116 | list_for_each_entry(req, &req_list, list) { | |
117 | if (nlh->nlmsg_seq != req->seq) | |
118 | continue; | |
119 | /* We set the DGID part, the rest was set earlier */ | |
120 | rdma_addr_set_dgid(req->addr, &gid); | |
121 | req->status = 0; | |
122 | found = 1; | |
123 | break; | |
124 | } | |
125 | mutex_unlock(&lock); | |
126 | ||
127 | if (!found) | |
128 | pr_info("Couldn't find request waiting for DGID: %pI6\n", | |
129 | &gid); | |
130 | } | |
131 | ||
132 | int ib_nl_handle_ip_res_resp(struct sk_buff *skb, | |
647c75ac LR |
133 | struct nlmsghdr *nlh, |
134 | struct netlink_ext_ack *extack) | |
ae43f828 | 135 | { |
ae43f828 | 136 | if ((nlh->nlmsg_flags & NLM_F_REQUEST) || |
e3a2b93d | 137 | !(NETLINK_CB(skb).sk)) |
ae43f828 MB |
138 | return -EPERM; |
139 | ||
140 | if (ib_nl_is_good_ip_resp(nlh)) | |
141 | ib_nl_process_good_ip_rsep(nlh); | |
142 | ||
143 | return skb->len; | |
144 | } | |
145 | ||
146 | static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, | |
147 | const void *daddr, | |
148 | u32 seq, u16 family) | |
149 | { | |
150 | struct sk_buff *skb = NULL; | |
151 | struct nlmsghdr *nlh; | |
152 | struct rdma_ls_ip_resolve_header *header; | |
153 | void *data; | |
154 | size_t size; | |
155 | int attrtype; | |
156 | int len; | |
157 | ||
158 | if (family == AF_INET) { | |
159 | size = sizeof(struct in_addr); | |
160 | attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4; | |
161 | } else { | |
162 | size = sizeof(struct in6_addr); | |
163 | attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6; | |
164 | } | |
165 | ||
166 | len = nla_total_size(sizeof(size)); | |
167 | len += NLMSG_ALIGN(sizeof(*header)); | |
168 | ||
169 | skb = nlmsg_new(len, GFP_KERNEL); | |
170 | if (!skb) | |
171 | return -ENOMEM; | |
172 | ||
173 | data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS, | |
174 | RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST); | |
175 | if (!data) { | |
176 | nlmsg_free(skb); | |
177 | return -ENODATA; | |
178 | } | |
179 | ||
180 | /* Construct the family header first */ | |
4df864c1 | 181 | header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); |
ae43f828 MB |
182 | header->ifindex = dev_addr->bound_dev_if; |
183 | nla_put(skb, attrtype, size, daddr); | |
184 | ||
185 | /* Repair the nlmsg header length */ | |
186 | nlmsg_end(skb, nlh); | |
4d7f693a | 187 | rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL); |
ae43f828 MB |
188 | |
189 | /* Make the request retry, so when we get the response from userspace | |
190 | * we will have something. | |
191 | */ | |
192 | return -ENODATA; | |
193 | } | |
194 | ||
ef560861 SH |
195 | int rdma_addr_size(struct sockaddr *addr) |
196 | { | |
197 | switch (addr->sa_family) { | |
198 | case AF_INET: | |
199 | return sizeof(struct sockaddr_in); | |
200 | case AF_INET6: | |
201 | return sizeof(struct sockaddr_in6); | |
202 | case AF_IB: | |
203 | return sizeof(struct sockaddr_ib); | |
204 | default: | |
205 | return 0; | |
206 | } | |
207 | } | |
208 | EXPORT_SYMBOL(rdma_addr_size); | |
209 | ||
dd5f03be MB |
210 | static struct rdma_addr_client self; |
211 | ||
7a118df3 SH |
212 | void rdma_addr_register_client(struct rdma_addr_client *client) |
213 | { | |
214 | atomic_set(&client->refcount, 1); | |
215 | init_completion(&client->comp); | |
216 | } | |
217 | EXPORT_SYMBOL(rdma_addr_register_client); | |
218 | ||
219 | static inline void put_client(struct rdma_addr_client *client) | |
220 | { | |
221 | if (atomic_dec_and_test(&client->refcount)) | |
222 | complete(&client->comp); | |
223 | } | |
224 | ||
225 | void rdma_addr_unregister_client(struct rdma_addr_client *client) | |
226 | { | |
227 | put_client(client); | |
228 | wait_for_completion(&client->comp); | |
229 | } | |
230 | EXPORT_SYMBOL(rdma_addr_unregister_client); | |
231 | ||
e08ce2e8 YS |
232 | void rdma_copy_addr(struct rdma_dev_addr *dev_addr, |
233 | const struct net_device *dev, | |
234 | const unsigned char *dst_dev_addr) | |
7025fcd3 | 235 | { |
c4315d85 | 236 | dev_addr->dev_type = dev->type; |
7025fcd3 SH |
237 | memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); |
238 | memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); | |
239 | if (dst_dev_addr) | |
240 | memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); | |
6266ed6e | 241 | dev_addr->bound_dev_if = dev->ifindex; |
7025fcd3 | 242 | } |
07ebafba | 243 | EXPORT_SYMBOL(rdma_copy_addr); |
7025fcd3 | 244 | |
20029832 MB |
245 | int rdma_translate_ip(const struct sockaddr *addr, |
246 | struct rdma_dev_addr *dev_addr, | |
dd5f03be | 247 | u16 *vlan_id) |
7025fcd3 SH |
248 | { |
249 | struct net_device *dev; | |
7025fcd3 | 250 | |
6266ed6e | 251 | if (dev_addr->bound_dev_if) { |
565edd1d | 252 | dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); |
6266ed6e SH |
253 | if (!dev) |
254 | return -ENODEV; | |
e08ce2e8 | 255 | rdma_copy_addr(dev_addr, dev, NULL); |
6266ed6e | 256 | dev_put(dev); |
e08ce2e8 | 257 | return 0; |
6266ed6e SH |
258 | } |
259 | ||
38617c64 AS |
260 | switch (addr->sa_family) { |
261 | case AF_INET: | |
565edd1d | 262 | dev = ip_dev_find(dev_addr->net, |
20029832 | 263 | ((const struct sockaddr_in *)addr)->sin_addr.s_addr); |
38617c64 AS |
264 | |
265 | if (!dev) | |
e08ce2e8 | 266 | return -EADDRNOTAVAIL; |
7025fcd3 | 267 | |
e08ce2e8 | 268 | rdma_copy_addr(dev_addr, dev, NULL); |
cbd09aeb | 269 | dev_addr->bound_dev_if = dev->ifindex; |
dd5f03be MB |
270 | if (vlan_id) |
271 | *vlan_id = rdma_vlan_dev_vlan_id(dev); | |
38617c64 AS |
272 | dev_put(dev); |
273 | break; | |
d90f9b35 | 274 | #if IS_ENABLED(CONFIG_IPV6) |
38617c64 | 275 | case AF_INET6: |
22f4fbd9 | 276 | rcu_read_lock(); |
565edd1d GS |
277 | for_each_netdev_rcu(dev_addr->net, dev) { |
278 | if (ipv6_chk_addr(dev_addr->net, | |
20029832 | 279 | &((const struct sockaddr_in6 *)addr)->sin6_addr, |
38617c64 | 280 | dev, 1)) { |
e08ce2e8 | 281 | rdma_copy_addr(dev_addr, dev, NULL); |
cbd09aeb | 282 | dev_addr->bound_dev_if = dev->ifindex; |
dd5f03be MB |
283 | if (vlan_id) |
284 | *vlan_id = rdma_vlan_dev_vlan_id(dev); | |
38617c64 AS |
285 | break; |
286 | } | |
287 | } | |
22f4fbd9 | 288 | rcu_read_unlock(); |
38617c64 | 289 | break; |
2c4ab624 | 290 | #endif |
38617c64 | 291 | } |
e08ce2e8 | 292 | return 0; |
7025fcd3 SH |
293 | } |
294 | EXPORT_SYMBOL(rdma_translate_ip); | |
295 | ||
5fff41e1 | 296 | static void set_timeout(struct delayed_work *delayed_work, unsigned long time) |
7025fcd3 SH |
297 | { |
298 | unsigned long delay; | |
299 | ||
7025fcd3 | 300 | delay = time - jiffies; |
346f98b4 OK |
301 | if ((long)delay < 0) |
302 | delay = 0; | |
7025fcd3 | 303 | |
5fff41e1 | 304 | mod_delayed_work(addr_wq, delayed_work, delay); |
7025fcd3 SH |
305 | } |
306 | ||
307 | static void queue_req(struct addr_req *req) | |
308 | { | |
309 | struct addr_req *temp_req; | |
310 | ||
311 | mutex_lock(&lock); | |
312 | list_for_each_entry_reverse(temp_req, &req_list, list) { | |
f115db48 | 313 | if (time_after_eq(req->timeout, temp_req->timeout)) |
7025fcd3 SH |
314 | break; |
315 | } | |
316 | ||
317 | list_add(&req->list, &temp_req->list); | |
318 | ||
5fff41e1 | 319 | set_timeout(&req->work, req->timeout); |
7025fcd3 SH |
320 | mutex_unlock(&lock); |
321 | } | |
322 | ||
ae43f828 MB |
323 | static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, |
324 | const void *daddr, u32 seq, u16 family) | |
325 | { | |
ff61c425 | 326 | if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) |
ae43f828 MB |
327 | return -EADDRNOTAVAIL; |
328 | ||
329 | /* We fill in what we can, the response will fill the rest */ | |
330 | rdma_copy_addr(dev_addr, dst->dev, NULL); | |
331 | return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); | |
332 | } | |
333 | ||
20029832 MB |
334 | static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, |
335 | const void *daddr) | |
51d45974 DM |
336 | { |
337 | struct neighbour *n; | |
e08ce2e8 | 338 | int ret = 0; |
51d45974 | 339 | |
02b61955 DM |
340 | n = dst_neigh_lookup(dst, daddr); |
341 | ||
51d45974 | 342 | rcu_read_lock(); |
51d45974 DM |
343 | if (!n || !(n->nud_state & NUD_VALID)) { |
344 | if (n) | |
345 | neigh_event_send(n, NULL); | |
346 | ret = -ENODATA; | |
347 | } else { | |
e08ce2e8 | 348 | rdma_copy_addr(dev_addr, dst->dev, n->ha); |
51d45974 DM |
349 | } |
350 | rcu_read_unlock(); | |
351 | ||
02b61955 DM |
352 | if (n) |
353 | neigh_release(n); | |
354 | ||
51d45974 DM |
355 | return ret; |
356 | } | |
357 | ||
ae43f828 MB |
358 | static bool has_gateway(struct dst_entry *dst, sa_family_t family) |
359 | { | |
360 | struct rtable *rt; | |
361 | struct rt6_info *rt6; | |
362 | ||
363 | if (family == AF_INET) { | |
364 | rt = container_of(dst, struct rtable, dst); | |
365 | return rt->rt_uses_gateway; | |
366 | } | |
367 | ||
368 | rt6 = container_of(dst, struct rt6_info, dst); | |
369 | return rt6->rt6i_flags & RTF_GATEWAY; | |
370 | } | |
371 | ||
372 | static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, | |
373 | const struct sockaddr *dst_in, u32 seq) | |
374 | { | |
375 | const struct sockaddr_in *dst_in4 = | |
376 | (const struct sockaddr_in *)dst_in; | |
377 | const struct sockaddr_in6 *dst_in6 = | |
378 | (const struct sockaddr_in6 *)dst_in; | |
379 | const void *daddr = (dst_in->sa_family == AF_INET) ? | |
380 | (const void *)&dst_in4->sin_addr.s_addr : | |
381 | (const void *)&dst_in6->sin6_addr; | |
382 | sa_family_t family = dst_in->sa_family; | |
383 | ||
384 | /* Gateway + ARPHRD_INFINIBAND -> IB router */ | |
385 | if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND) | |
386 | return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family); | |
387 | else | |
388 | return dst_fetch_ha(dst, dev_addr, daddr); | |
389 | } | |
390 | ||
923c100e | 391 | static int addr4_resolve(struct sockaddr_in *src_in, |
20029832 MB |
392 | const struct sockaddr_in *dst_in, |
393 | struct rdma_dev_addr *addr, | |
394 | struct rtable **prt) | |
7025fcd3 | 395 | { |
1b90c137 AV |
396 | __be32 src_ip = src_in->sin_addr.s_addr; |
397 | __be32 dst_ip = dst_in->sin_addr.s_addr; | |
7025fcd3 | 398 | struct rtable *rt; |
5fc3590c | 399 | struct flowi4 fl4; |
7025fcd3 SH |
400 | int ret; |
401 | ||
5fc3590c DM |
402 | memset(&fl4, 0, sizeof(fl4)); |
403 | fl4.daddr = dst_ip; | |
404 | fl4.saddr = src_ip; | |
405 | fl4.flowi4_oif = addr->bound_dev_if; | |
565edd1d | 406 | rt = ip_route_output_key(addr->net, &fl4); |
cbd09aeb MS |
407 | ret = PTR_ERR_OR_ZERO(rt); |
408 | if (ret) | |
409 | return ret; | |
410 | ||
923c100e | 411 | src_in->sin_family = AF_INET; |
5fc3590c | 412 | src_in->sin_addr.s_addr = fl4.saddr; |
923c100e | 413 | |
ae43f828 MB |
414 | /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're |
415 | * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network | |
416 | * type accordingly. | |
c865f246 | 417 | */ |
ae43f828 | 418 | if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND) |
c865f246 SK |
419 | addr->network = RDMA_NETWORK_IPV4; |
420 | ||
c3efe750 MB |
421 | addr->hoplimit = ip4_dst_hoplimit(&rt->dst); |
422 | ||
20029832 MB |
423 | *prt = rt; |
424 | return 0; | |
7025fcd3 SH |
425 | } |
426 | ||
d90f9b35 | 427 | #if IS_ENABLED(CONFIG_IPV6) |
d14714df | 428 | static int addr6_resolve(struct sockaddr_in6 *src_in, |
20029832 MB |
429 | const struct sockaddr_in6 *dst_in, |
430 | struct rdma_dev_addr *addr, | |
431 | struct dst_entry **pdst) | |
38617c64 | 432 | { |
4c9483b2 | 433 | struct flowi6 fl6; |
38617c64 | 434 | struct dst_entry *dst; |
c865f246 | 435 | struct rt6_info *rt; |
d14714df | 436 | int ret; |
38617c64 | 437 | |
4c9483b2 | 438 | memset(&fl6, 0, sizeof fl6); |
4e3fd7a0 AD |
439 | fl6.daddr = dst_in->sin6_addr; |
440 | fl6.saddr = src_in->sin6_addr; | |
4c9483b2 | 441 | fl6.flowi6_oif = addr->bound_dev_if; |
38617c64 | 442 | |
eea40b8f PA |
443 | ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6); |
444 | if (ret < 0) | |
24b43c99 | 445 | return ret; |
d14714df | 446 | |
c865f246 | 447 | rt = (struct rt6_info *)dst; |
79e25959 | 448 | if (ipv6_addr_any(&src_in->sin6_addr)) { |
d14714df | 449 | src_in->sin6_family = AF_INET6; |
4e3fd7a0 | 450 | src_in->sin6_addr = fl6.saddr; |
d14714df SH |
451 | } |
452 | ||
ae43f828 MB |
453 | /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're |
454 | * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network | |
455 | * type accordingly. | |
c865f246 | 456 | */ |
ae43f828 MB |
457 | if (rt->rt6i_flags & RTF_GATEWAY && |
458 | ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND) | |
c865f246 SK |
459 | addr->network = RDMA_NETWORK_IPV6; |
460 | ||
c3efe750 MB |
461 | addr->hoplimit = ip6_dst_hoplimit(dst); |
462 | ||
20029832 MB |
463 | *pdst = dst; |
464 | return 0; | |
38617c64 | 465 | } |
2c4ab624 | 466 | #else |
d14714df | 467 | static int addr6_resolve(struct sockaddr_in6 *src_in, |
20029832 MB |
468 | const struct sockaddr_in6 *dst_in, |
469 | struct rdma_dev_addr *addr, | |
470 | struct dst_entry **pdst) | |
2c4ab624 RD |
471 | { |
472 | return -EADDRNOTAVAIL; | |
473 | } | |
474 | #endif | |
38617c64 | 475 | |
20029832 MB |
476 | static int addr_resolve_neigh(struct dst_entry *dst, |
477 | const struct sockaddr *dst_in, | |
ae43f828 MB |
478 | struct rdma_dev_addr *addr, |
479 | u32 seq) | |
20029832 MB |
480 | { |
481 | if (dst->dev->flags & IFF_LOOPBACK) { | |
482 | int ret; | |
483 | ||
484 | ret = rdma_translate_ip(dst_in, addr, NULL); | |
485 | if (!ret) | |
486 | memcpy(addr->dst_dev_addr, addr->src_dev_addr, | |
487 | MAX_ADDR_LEN); | |
488 | ||
489 | return ret; | |
490 | } | |
491 | ||
492 | /* If the device doesn't do ARP internally */ | |
ae43f828 MB |
493 | if (!(dst->dev->flags & IFF_NOARP)) |
494 | return fetch_ha(dst, addr, dst_in, seq); | |
20029832 | 495 | |
e08ce2e8 YS |
496 | rdma_copy_addr(addr, dst->dev, NULL); |
497 | ||
498 | return 0; | |
20029832 MB |
499 | } |
500 | ||
923c100e | 501 | static int addr_resolve(struct sockaddr *src_in, |
20029832 MB |
502 | const struct sockaddr *dst_in, |
503 | struct rdma_dev_addr *addr, | |
ae43f828 MB |
504 | bool resolve_neigh, |
505 | u32 seq) | |
38617c64 | 506 | { |
20029832 MB |
507 | struct net_device *ndev; |
508 | struct dst_entry *dst; | |
509 | int ret; | |
510 | ||
bebb2a47 MS |
511 | if (!addr->net) { |
512 | pr_warn_ratelimited("%s: missing namespace\n", __func__); | |
513 | return -EINVAL; | |
514 | } | |
515 | ||
38617c64 | 516 | if (src_in->sa_family == AF_INET) { |
20029832 MB |
517 | struct rtable *rt = NULL; |
518 | const struct sockaddr_in *dst_in4 = | |
519 | (const struct sockaddr_in *)dst_in; | |
520 | ||
521 | ret = addr4_resolve((struct sockaddr_in *)src_in, | |
522 | dst_in4, addr, &rt); | |
523 | if (ret) | |
524 | return ret; | |
525 | ||
526 | if (resolve_neigh) | |
ae43f828 | 527 | ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq); |
20029832 | 528 | |
cbd09aeb MS |
529 | if (addr->bound_dev_if) { |
530 | ndev = dev_get_by_index(addr->net, addr->bound_dev_if); | |
531 | } else { | |
532 | ndev = rt->dst.dev; | |
533 | dev_hold(ndev); | |
534 | } | |
20029832 MB |
535 | |
536 | ip_rt_put(rt); | |
537 | } else { | |
538 | const struct sockaddr_in6 *dst_in6 = | |
539 | (const struct sockaddr_in6 *)dst_in; | |
540 | ||
541 | ret = addr6_resolve((struct sockaddr_in6 *)src_in, | |
542 | dst_in6, addr, | |
543 | &dst); | |
544 | if (ret) | |
545 | return ret; | |
546 | ||
547 | if (resolve_neigh) | |
ae43f828 | 548 | ret = addr_resolve_neigh(dst, dst_in, addr, seq); |
20029832 | 549 | |
cbd09aeb MS |
550 | if (addr->bound_dev_if) { |
551 | ndev = dev_get_by_index(addr->net, addr->bound_dev_if); | |
552 | } else { | |
553 | ndev = dst->dev; | |
554 | dev_hold(ndev); | |
555 | } | |
20029832 MB |
556 | |
557 | dst_release(dst); | |
558 | } | |
559 | ||
cbd09aeb MS |
560 | if (ndev->flags & IFF_LOOPBACK) { |
561 | ret = rdma_translate_ip(dst_in, addr, NULL); | |
562 | /* | |
563 | * Put the loopback device and get the translated | |
564 | * device instead. | |
565 | */ | |
566 | dev_put(ndev); | |
567 | ndev = dev_get_by_index(addr->net, addr->bound_dev_if); | |
568 | } else { | |
569 | addr->bound_dev_if = ndev->ifindex; | |
570 | } | |
20029832 MB |
571 | dev_put(ndev); |
572 | ||
573 | return ret; | |
38617c64 AS |
574 | } |
575 | ||
5fff41e1 PP |
576 | static void process_one_req(struct work_struct *_work) |
577 | { | |
578 | struct addr_req *req; | |
579 | struct sockaddr *src_in, *dst_in; | |
580 | ||
581 | mutex_lock(&lock); | |
582 | req = container_of(_work, struct addr_req, work.work); | |
583 | ||
584 | if (req->status == -ENODATA) { | |
585 | src_in = (struct sockaddr *)&req->src_addr; | |
586 | dst_in = (struct sockaddr *)&req->dst_addr; | |
587 | req->status = addr_resolve(src_in, dst_in, req->addr, | |
588 | true, req->seq); | |
589 | if (req->status && time_after_eq(jiffies, req->timeout)) { | |
590 | req->status = -ETIMEDOUT; | |
591 | } else if (req->status == -ENODATA) { | |
592 | /* requeue the work for retrying again */ | |
593 | set_timeout(&req->work, req->timeout); | |
594 | mutex_unlock(&lock); | |
595 | return; | |
596 | } | |
597 | } | |
598 | list_del(&req->list); | |
599 | mutex_unlock(&lock); | |
600 | ||
601 | req->callback(req->status, (struct sockaddr *)&req->src_addr, | |
602 | req->addr, req->context); | |
603 | put_client(req->client); | |
604 | kfree(req); | |
605 | } | |
606 | ||
c4028958 | 607 | static void process_req(struct work_struct *work) |
7025fcd3 SH |
608 | { |
609 | struct addr_req *req, *temp_req; | |
38617c64 | 610 | struct sockaddr *src_in, *dst_in; |
7025fcd3 SH |
611 | struct list_head done_list; |
612 | ||
613 | INIT_LIST_HEAD(&done_list); | |
614 | ||
615 | mutex_lock(&lock); | |
616 | list_for_each_entry_safe(req, temp_req, &req_list, list) { | |
c78bb844 | 617 | if (req->status == -ENODATA) { |
38617c64 AS |
618 | src_in = (struct sockaddr *) &req->src_addr; |
619 | dst_in = (struct sockaddr *) &req->dst_addr; | |
20029832 | 620 | req->status = addr_resolve(src_in, dst_in, req->addr, |
ae43f828 | 621 | true, req->seq); |
c78bb844 KK |
622 | if (req->status && time_after_eq(jiffies, req->timeout)) |
623 | req->status = -ETIMEDOUT; | |
5fff41e1 PP |
624 | else if (req->status == -ENODATA) { |
625 | set_timeout(&req->work, req->timeout); | |
c78bb844 | 626 | continue; |
5fff41e1 | 627 | } |
7025fcd3 | 628 | } |
04699a1f | 629 | list_move_tail(&req->list, &done_list); |
7025fcd3 SH |
630 | } |
631 | ||
7025fcd3 SH |
632 | mutex_unlock(&lock); |
633 | ||
634 | list_for_each_entry_safe(req, temp_req, &done_list, list) { | |
635 | list_del(&req->list); | |
5fff41e1 PP |
636 | /* It is safe to cancel other work items from this work item |
637 | * because at a time there can be only one work item running | |
638 | * with this single threaded work queue. | |
639 | */ | |
640 | cancel_delayed_work(&req->work); | |
38617c64 AS |
641 | req->callback(req->status, (struct sockaddr *) &req->src_addr, |
642 | req->addr, req->context); | |
7a118df3 | 643 | put_client(req->client); |
7025fcd3 SH |
644 | kfree(req); |
645 | } | |
646 | } | |
647 | ||
7a118df3 SH |
648 | int rdma_resolve_ip(struct rdma_addr_client *client, |
649 | struct sockaddr *src_addr, struct sockaddr *dst_addr, | |
7025fcd3 SH |
650 | struct rdma_dev_addr *addr, int timeout_ms, |
651 | void (*callback)(int status, struct sockaddr *src_addr, | |
652 | struct rdma_dev_addr *addr, void *context), | |
653 | void *context) | |
654 | { | |
38617c64 | 655 | struct sockaddr *src_in, *dst_in; |
7025fcd3 SH |
656 | struct addr_req *req; |
657 | int ret = 0; | |
658 | ||
dd00cc48 | 659 | req = kzalloc(sizeof *req, GFP_KERNEL); |
7025fcd3 SH |
660 | if (!req) |
661 | return -ENOMEM; | |
7025fcd3 | 662 | |
d2e08862 SH |
663 | src_in = (struct sockaddr *) &req->src_addr; |
664 | dst_in = (struct sockaddr *) &req->dst_addr; | |
665 | ||
666 | if (src_addr) { | |
667 | if (src_addr->sa_family != dst_addr->sa_family) { | |
668 | ret = -EINVAL; | |
669 | goto err; | |
670 | } | |
671 | ||
ef560861 | 672 | memcpy(src_in, src_addr, rdma_addr_size(src_addr)); |
d2e08862 SH |
673 | } else { |
674 | src_in->sa_family = dst_addr->sa_family; | |
675 | } | |
676 | ||
ef560861 | 677 | memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr)); |
7025fcd3 SH |
678 | req->addr = addr; |
679 | req->callback = callback; | |
680 | req->context = context; | |
7a118df3 SH |
681 | req->client = client; |
682 | atomic_inc(&client->refcount); | |
5fff41e1 | 683 | INIT_DELAYED_WORK(&req->work, process_one_req); |
ae43f828 | 684 | req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); |
7025fcd3 | 685 | |
ae43f828 | 686 | req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); |
7025fcd3 SH |
687 | switch (req->status) { |
688 | case 0: | |
689 | req->timeout = jiffies; | |
690 | queue_req(req); | |
691 | break; | |
692 | case -ENODATA: | |
693 | req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; | |
694 | queue_req(req); | |
7025fcd3 SH |
695 | break; |
696 | default: | |
697 | ret = req->status; | |
7a118df3 | 698 | atomic_dec(&client->refcount); |
d2e08862 | 699 | goto err; |
7025fcd3 SH |
700 | } |
701 | return ret; | |
d2e08862 SH |
702 | err: |
703 | kfree(req); | |
704 | return ret; | |
7025fcd3 SH |
705 | } |
706 | EXPORT_SYMBOL(rdma_resolve_ip); | |
707 | ||
20029832 MB |
708 | int rdma_resolve_ip_route(struct sockaddr *src_addr, |
709 | const struct sockaddr *dst_addr, | |
710 | struct rdma_dev_addr *addr) | |
711 | { | |
712 | struct sockaddr_storage ssrc_addr = {}; | |
713 | struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr; | |
714 | ||
9506902b MB |
715 | if (src_addr) { |
716 | if (src_addr->sa_family != dst_addr->sa_family) | |
717 | return -EINVAL; | |
20029832 | 718 | |
20029832 | 719 | memcpy(src_in, src_addr, rdma_addr_size(src_addr)); |
9506902b | 720 | } else { |
20029832 | 721 | src_in->sa_family = dst_addr->sa_family; |
9506902b | 722 | } |
20029832 | 723 | |
ae43f828 | 724 | return addr_resolve(src_in, dst_addr, addr, false, 0); |
20029832 MB |
725 | } |
726 | EXPORT_SYMBOL(rdma_resolve_ip_route); | |
727 | ||
7025fcd3 SH |
728 | void rdma_addr_cancel(struct rdma_dev_addr *addr) |
729 | { | |
730 | struct addr_req *req, *temp_req; | |
731 | ||
732 | mutex_lock(&lock); | |
733 | list_for_each_entry_safe(req, temp_req, &req_list, list) { | |
734 | if (req->addr == addr) { | |
735 | req->status = -ECANCELED; | |
736 | req->timeout = jiffies; | |
04699a1f | 737 | list_move(&req->list, &req_list); |
5fff41e1 | 738 | set_timeout(&req->work, req->timeout); |
7025fcd3 SH |
739 | break; |
740 | } | |
741 | } | |
742 | mutex_unlock(&lock); | |
743 | } | |
744 | EXPORT_SYMBOL(rdma_addr_cancel); | |
745 | ||
dd5f03be MB |
746 | struct resolve_cb_context { |
747 | struct rdma_dev_addr *addr; | |
748 | struct completion comp; | |
61c37028 | 749 | int status; |
dd5f03be MB |
750 | }; |
751 | ||
752 | static void resolve_cb(int status, struct sockaddr *src_addr, | |
753 | struct rdma_dev_addr *addr, void *context) | |
754 | { | |
61c37028 MB |
755 | if (!status) |
756 | memcpy(((struct resolve_cb_context *)context)->addr, | |
757 | addr, sizeof(struct rdma_dev_addr)); | |
758 | ((struct resolve_cb_context *)context)->status = status; | |
dd5f03be MB |
759 | complete(&((struct resolve_cb_context *)context)->comp); |
760 | } | |
761 | ||
f7f4b23e MB |
762 | int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, |
763 | const union ib_gid *dgid, | |
c3efe750 MB |
764 | u8 *dmac, u16 *vlan_id, int *if_index, |
765 | int *hoplimit) | |
dd5f03be MB |
766 | { |
767 | int ret = 0; | |
768 | struct rdma_dev_addr dev_addr; | |
769 | struct resolve_cb_context ctx; | |
770 | struct net_device *dev; | |
771 | ||
772 | union { | |
773 | struct sockaddr _sockaddr; | |
774 | struct sockaddr_in _sockaddr_in; | |
775 | struct sockaddr_in6 _sockaddr_in6; | |
776 | } sgid_addr, dgid_addr; | |
777 | ||
778 | ||
471e7058 HL |
779 | rdma_gid2ip(&sgid_addr._sockaddr, sgid); |
780 | rdma_gid2ip(&dgid_addr._sockaddr, dgid); | |
dd5f03be MB |
781 | |
782 | memset(&dev_addr, 0, sizeof(dev_addr)); | |
20029832 MB |
783 | if (if_index) |
784 | dev_addr.bound_dev_if = *if_index; | |
565edd1d | 785 | dev_addr.net = &init_net; |
dd5f03be MB |
786 | |
787 | ctx.addr = &dev_addr; | |
788 | init_completion(&ctx.comp); | |
789 | ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, | |
790 | &dev_addr, 1000, resolve_cb, &ctx); | |
791 | if (ret) | |
792 | return ret; | |
793 | ||
794 | wait_for_completion(&ctx.comp); | |
795 | ||
61c37028 MB |
796 | ret = ctx.status; |
797 | if (ret) | |
798 | return ret; | |
799 | ||
dd5f03be MB |
800 | memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); |
801 | dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); | |
802 | if (!dev) | |
803 | return -ENODEV; | |
20029832 MB |
804 | if (if_index) |
805 | *if_index = dev_addr.bound_dev_if; | |
dd5f03be MB |
806 | if (vlan_id) |
807 | *vlan_id = rdma_vlan_dev_vlan_id(dev); | |
c3efe750 MB |
808 | if (hoplimit) |
809 | *hoplimit = dev_addr.hoplimit; | |
dd5f03be MB |
810 | dev_put(dev); |
811 | return ret; | |
812 | } | |
f7f4b23e | 813 | EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh); |
dd5f03be MB |
814 | |
815 | int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) | |
816 | { | |
817 | int ret = 0; | |
818 | struct rdma_dev_addr dev_addr; | |
819 | union { | |
820 | struct sockaddr _sockaddr; | |
821 | struct sockaddr_in _sockaddr_in; | |
822 | struct sockaddr_in6 _sockaddr_in6; | |
823 | } gid_addr; | |
824 | ||
471e7058 | 825 | rdma_gid2ip(&gid_addr._sockaddr, sgid); |
dd5f03be | 826 | |
dd5f03be | 827 | memset(&dev_addr, 0, sizeof(dev_addr)); |
565edd1d | 828 | dev_addr.net = &init_net; |
dd5f03be MB |
829 | ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); |
830 | if (ret) | |
831 | return ret; | |
832 | ||
833 | memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN); | |
834 | return ret; | |
835 | } | |
836 | EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid); | |
837 | ||
3cd96564 | 838 | static int netevent_callback(struct notifier_block *self, unsigned long event, |
e795d092 | 839 | void *ctx) |
7025fcd3 | 840 | { |
3cd96564 | 841 | if (event == NETEVENT_NEIGH_UPDATE) { |
e795d092 | 842 | struct neighbour *neigh = ctx; |
7025fcd3 | 843 | |
5fff41e1 PP |
844 | if (neigh->nud_state & NUD_VALID) |
845 | set_timeout(&work, jiffies); | |
e795d092 | 846 | } |
7025fcd3 SH |
847 | return 0; |
848 | } | |
849 | ||
e795d092 TT |
850 | static struct notifier_block nb = { |
851 | .notifier_call = netevent_callback | |
7025fcd3 SH |
852 | }; |
853 | ||
e3f20f02 | 854 | int addr_init(void) |
7025fcd3 | 855 | { |
39baf103 | 856 | addr_wq = alloc_ordered_workqueue("ib_addr", 0); |
7025fcd3 SH |
857 | if (!addr_wq) |
858 | return -ENOMEM; | |
859 | ||
e795d092 | 860 | register_netevent_notifier(&nb); |
dd5f03be | 861 | rdma_addr_register_client(&self); |
ae43f828 | 862 | |
7025fcd3 SH |
863 | return 0; |
864 | } | |
865 | ||
e3f20f02 | 866 | void addr_cleanup(void) |
7025fcd3 | 867 | { |
dd5f03be | 868 | rdma_addr_unregister_client(&self); |
e795d092 | 869 | unregister_netevent_notifier(&nb); |
7025fcd3 SH |
870 | destroy_workqueue(addr_wq); |
871 | } |