Commit | Line | Data |
---|---|---|
889b7da2 JK |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Management Component Transport Protocol (MCTP) - routing | |
4 | * implementation. | |
5 | * | |
6 | * This is currently based on a simple routing table, with no dst cache. The | |
7 | * number of routes should stay fairly small, so the lookup cost is small. | |
8 | * | |
9 | * Copyright (c) 2021 Code Construct | |
10 | * Copyright (c) 2021 Google | |
11 | */ | |
12 | ||
13 | #include <linux/idr.h> | |
161eba50 | 14 | #include <linux/kconfig.h> |
889b7da2 JK |
15 | #include <linux/mctp.h> |
16 | #include <linux/netdevice.h> | |
17 | #include <linux/rtnetlink.h> | |
18 | #include <linux/skbuff.h> | |
19 | ||
20 | #include <uapi/linux/if_arp.h> | |
21 | ||
22 | #include <net/mctp.h> | |
23 | #include <net/mctpdevice.h> | |
06d2f4c5 MJ |
24 | #include <net/netlink.h> |
25 | #include <net/sock.h> | |
889b7da2 | 26 | |
4f9e1ba6 JK |
27 | #include <trace/events/mctp.h> |
28 | ||
4a992bbd | 29 | static const unsigned int mctp_message_maxlen = 64 * 1024; |
7b14e15a JK |
30 | static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ; |
31 | ||
67737c45 JK |
32 | static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev); |
33 | ||
889b7da2 JK |
34 | /* route output callbacks */ |
35 | static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) | |
36 | { | |
37 | kfree_skb(skb); | |
38 | return 0; | |
39 | } | |
40 | ||
833ef3b9 JK |
41 | static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) |
42 | { | |
43 | struct mctp_skb_cb *cb = mctp_cb(skb); | |
44 | struct mctp_hdr *mh; | |
45 | struct sock *sk; | |
46 | u8 type; | |
47 | ||
48 | WARN_ON(!rcu_read_lock_held()); | |
49 | ||
50 | /* TODO: look up in skb->cb? */ | |
51 | mh = mctp_hdr(skb); | |
52 | ||
53 | if (!skb_headlen(skb)) | |
54 | return NULL; | |
55 | ||
56 | type = (*(u8 *)skb->data) & 0x7f; | |
57 | ||
58 | sk_for_each_rcu(sk, &net->mctp.binds) { | |
59 | struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); | |
60 | ||
61 | if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net) | |
62 | continue; | |
63 | ||
64 | if (msk->bind_type != type) | |
65 | continue; | |
66 | ||
8069b22d | 67 | if (!mctp_address_matches(msk->bind_addr, mh->dest)) |
833ef3b9 JK |
68 | continue; |
69 | ||
70 | return msk; | |
71 | } | |
72 | ||
73 | return NULL; | |
74 | } | |
75 | ||
76 | static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, | |
77 | mctp_eid_t peer, u8 tag) | |
78 | { | |
0de55a7d | 79 | if (!mctp_address_matches(key->local_addr, local)) |
833ef3b9 JK |
80 | return false; |
81 | ||
82 | if (key->peer_addr != peer) | |
83 | return false; | |
84 | ||
85 | if (key->tag != tag) | |
86 | return false; | |
87 | ||
88 | return true; | |
89 | } | |
90 | ||
73c61845 JK |
91 | /* returns a key (with key->lock held, and refcounted), or NULL if no such |
92 | * key exists. | |
93 | */ | |
833ef3b9 | 94 | static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, |
73c61845 JK |
95 | mctp_eid_t peer, |
96 | unsigned long *irqflags) | |
97 | __acquires(&key->lock) | |
833ef3b9 JK |
98 | { |
99 | struct mctp_sk_key *key, *ret; | |
73c61845 | 100 | unsigned long flags; |
833ef3b9 JK |
101 | struct mctp_hdr *mh; |
102 | u8 tag; | |
103 | ||
833ef3b9 JK |
104 | mh = mctp_hdr(skb); |
105 | tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); | |
106 | ||
107 | ret = NULL; | |
73c61845 | 108 | spin_lock_irqsave(&net->mctp.keys_lock, flags); |
833ef3b9 | 109 | |
73c61845 JK |
110 | hlist_for_each_entry(key, &net->mctp.keys, hlist) { |
111 | if (!mctp_key_match(key, mh->dest, peer, tag)) | |
112 | continue; | |
113 | ||
114 | spin_lock(&key->lock); | |
115 | if (key->valid) { | |
116 | refcount_inc(&key->refs); | |
833ef3b9 JK |
117 | ret = key; |
118 | break; | |
119 | } | |
73c61845 JK |
120 | spin_unlock(&key->lock); |
121 | } | |
122 | ||
123 | if (ret) { | |
124 | spin_unlock(&net->mctp.keys_lock); | |
125 | *irqflags = flags; | |
126 | } else { | |
127 | spin_unlock_irqrestore(&net->mctp.keys_lock, flags); | |
833ef3b9 JK |
128 | } |
129 | ||
130 | return ret; | |
131 | } | |
132 | ||
4a992bbd JK |
133 | static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, |
134 | mctp_eid_t local, mctp_eid_t peer, | |
135 | u8 tag, gfp_t gfp) | |
136 | { | |
137 | struct mctp_sk_key *key; | |
138 | ||
139 | key = kzalloc(sizeof(*key), gfp); | |
140 | if (!key) | |
141 | return NULL; | |
142 | ||
143 | key->peer_addr = peer; | |
144 | key->local_addr = local; | |
145 | key->tag = tag; | |
146 | key->sk = &msk->sk; | |
73c61845 JK |
147 | key->valid = true; |
148 | spin_lock_init(&key->lock); | |
149 | refcount_set(&key->refs, 1); | |
de8a6b15 | 150 | sock_hold(key->sk); |
4a992bbd JK |
151 | |
152 | return key; | |
153 | } | |
154 | ||
73c61845 JK |
155 | void mctp_key_unref(struct mctp_sk_key *key) |
156 | { | |
67737c45 JK |
157 | unsigned long flags; |
158 | ||
159 | if (!refcount_dec_and_test(&key->refs)) | |
160 | return; | |
161 | ||
162 | /* even though no refs exist here, the lock allows us to stay | |
163 | * consistent with the locking requirement of mctp_dev_release_key | |
164 | */ | |
165 | spin_lock_irqsave(&key->lock, flags); | |
166 | mctp_dev_release_key(key->dev, key); | |
167 | spin_unlock_irqrestore(&key->lock, flags); | |
168 | ||
de8a6b15 | 169 | sock_put(key->sk); |
67737c45 | 170 | kfree(key); |
73c61845 JK |
171 | } |
172 | ||
4a992bbd JK |
173 | static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) |
174 | { | |
175 | struct net *net = sock_net(&msk->sk); | |
176 | struct mctp_sk_key *tmp; | |
177 | unsigned long flags; | |
178 | int rc = 0; | |
179 | ||
180 | spin_lock_irqsave(&net->mctp.keys_lock, flags); | |
181 | ||
b98e1a04 JK |
182 | if (sock_flag(&msk->sk, SOCK_DEAD)) { |
183 | rc = -EINVAL; | |
184 | goto out_unlock; | |
185 | } | |
186 | ||
4a992bbd JK |
187 | hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { |
188 | if (mctp_key_match(tmp, key->local_addr, key->peer_addr, | |
189 | key->tag)) { | |
73c61845 JK |
190 | spin_lock(&tmp->lock); |
191 | if (tmp->valid) | |
192 | rc = -EEXIST; | |
193 | spin_unlock(&tmp->lock); | |
194 | if (rc) | |
195 | break; | |
4a992bbd JK |
196 | } |
197 | } | |
198 | ||
199 | if (!rc) { | |
73c61845 | 200 | refcount_inc(&key->refs); |
7b14e15a JK |
201 | key->expiry = jiffies + mctp_key_lifetime; |
202 | timer_reduce(&msk->key_expiry, key->expiry); | |
203 | ||
4a992bbd JK |
204 | hlist_add_head(&key->hlist, &net->mctp.keys); |
205 | hlist_add_head(&key->sklist, &msk->keys); | |
206 | } | |
207 | ||
b98e1a04 | 208 | out_unlock: |
4a992bbd JK |
209 | spin_unlock_irqrestore(&net->mctp.keys_lock, flags); |
210 | ||
211 | return rc; | |
212 | } | |
213 | ||
63ed1aab MJ |
214 | /* Helper for mctp_route_input(). |
215 | * We're done with the key; unlock and unref the key. | |
216 | * For the usual case of automatic expiry we remove the key from lists. | |
217 | * In the case that manual allocation is set on a key we release the lock | |
218 | * and local ref, reset reassembly, but don't remove from lists. | |
4a992bbd | 219 | */ |
63ed1aab MJ |
220 | static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net, |
221 | unsigned long flags, unsigned long reason) | |
222 | __releases(&key->lock) | |
4a992bbd JK |
223 | { |
224 | struct sk_buff *skb; | |
225 | ||
63ed1aab | 226 | trace_mctp_key_release(key, reason); |
4a992bbd JK |
227 | skb = key->reasm_head; |
228 | key->reasm_head = NULL; | |
63ed1aab MJ |
229 | |
230 | if (!key->manual_alloc) { | |
231 | key->reasm_dead = true; | |
232 | key->valid = false; | |
233 | mctp_dev_release_key(key->dev, key); | |
234 | } | |
73c61845 | 235 | spin_unlock_irqrestore(&key->lock, flags); |
4a992bbd | 236 | |
63ed1aab MJ |
237 | if (!key->manual_alloc) { |
238 | spin_lock_irqsave(&net->mctp.keys_lock, flags); | |
3a732b46 JK |
239 | if (!hlist_unhashed(&key->hlist)) { |
240 | hlist_del_init(&key->hlist); | |
241 | hlist_del_init(&key->sklist); | |
242 | mctp_key_unref(key); | |
243 | } | |
63ed1aab | 244 | spin_unlock_irqrestore(&net->mctp.keys_lock, flags); |
63ed1aab | 245 | } |
73c61845 JK |
246 | |
247 | /* and one for the local reference */ | |
248 | mctp_key_unref(key); | |
4a992bbd | 249 | |
5cfe53cf | 250 | kfree_skb(skb); |
4a992bbd JK |
251 | } |
252 | ||
67737c45 JK |
253 | #ifdef CONFIG_MCTP_FLOWS |
254 | static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) | |
255 | { | |
256 | struct mctp_flow *flow; | |
257 | ||
258 | flow = skb_ext_add(skb, SKB_EXT_MCTP); | |
259 | if (!flow) | |
260 | return; | |
261 | ||
262 | refcount_inc(&key->refs); | |
263 | flow->key = key; | |
264 | } | |
265 | ||
266 | static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) | |
267 | { | |
268 | struct mctp_sk_key *key; | |
269 | struct mctp_flow *flow; | |
270 | ||
271 | flow = skb_ext_find(skb, SKB_EXT_MCTP); | |
272 | if (!flow) | |
273 | return; | |
274 | ||
275 | key = flow->key; | |
276 | ||
277 | if (WARN_ON(key->dev && key->dev != dev)) | |
278 | return; | |
279 | ||
280 | mctp_dev_set_key(dev, key); | |
281 | } | |
282 | #else | |
283 | static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {} | |
284 | static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {} | |
285 | #endif | |
286 | ||
4a992bbd JK |
287 | static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) |
288 | { | |
289 | struct mctp_hdr *hdr = mctp_hdr(skb); | |
290 | u8 exp_seq, this_seq; | |
291 | ||
292 | this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT) | |
293 | & MCTP_HDR_SEQ_MASK; | |
294 | ||
295 | if (!key->reasm_head) { | |
296 | key->reasm_head = skb; | |
297 | key->reasm_tailp = &(skb_shinfo(skb)->frag_list); | |
298 | key->last_seq = this_seq; | |
299 | return 0; | |
300 | } | |
301 | ||
302 | exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK; | |
303 | ||
304 | if (this_seq != exp_seq) | |
305 | return -EINVAL; | |
306 | ||
307 | if (key->reasm_head->len + skb->len > mctp_message_maxlen) | |
308 | return -EINVAL; | |
309 | ||
310 | skb->next = NULL; | |
311 | skb->sk = NULL; | |
312 | *key->reasm_tailp = skb; | |
313 | key->reasm_tailp = &skb->next; | |
314 | ||
315 | key->last_seq = this_seq; | |
316 | ||
317 | key->reasm_head->data_len += skb->len; | |
318 | key->reasm_head->len += skb->len; | |
319 | key->reasm_head->truesize += skb->truesize; | |
320 | ||
321 | return 0; | |
322 | } | |
323 | ||
889b7da2 JK |
324 | static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) |
325 | { | |
6e54ea37 | 326 | struct mctp_sk_key *key, *any_key = NULL; |
833ef3b9 | 327 | struct net *net = dev_net(skb->dev); |
833ef3b9 JK |
328 | struct mctp_sock *msk; |
329 | struct mctp_hdr *mh; | |
4a992bbd JK |
330 | unsigned long f; |
331 | u8 tag, flags; | |
332 | int rc; | |
833ef3b9 JK |
333 | |
334 | msk = NULL; | |
4a992bbd | 335 | rc = -EINVAL; |
833ef3b9 JK |
336 | |
337 | /* we may be receiving a locally-routed packet; drop source sk | |
338 | * accounting | |
339 | */ | |
340 | skb_orphan(skb); | |
341 | ||
342 | /* ensure we have enough data for a header and a type */ | |
343 | if (skb->len < sizeof(struct mctp_hdr) + 1) | |
4a992bbd | 344 | goto out; |
833ef3b9 JK |
345 | |
346 | /* grab header, advance data ptr */ | |
347 | mh = mctp_hdr(skb); | |
348 | skb_pull(skb, sizeof(struct mctp_hdr)); | |
349 | ||
350 | if (mh->ver != 1) | |
4a992bbd | 351 | goto out; |
833ef3b9 | 352 | |
4a992bbd JK |
353 | flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM); |
354 | tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); | |
833ef3b9 JK |
355 | |
356 | rcu_read_lock(); | |
4a992bbd | 357 | |
73c61845 JK |
358 | /* lookup socket / reasm context, exactly matching (src,dest,tag). |
359 | * we hold a ref on the key, and key->lock held. | |
360 | */ | |
361 | key = mctp_lookup_key(net, skb, mh->src, &f); | |
833ef3b9 | 362 | |
4a992bbd JK |
363 | if (flags & MCTP_HDR_FLAG_SOM) { |
364 | if (key) { | |
365 | msk = container_of(key->sk, struct mctp_sock, sk); | |
366 | } else { | |
367 | /* first response to a broadcast? do a more general | |
368 | * key lookup to find the socket, but don't use this | |
369 | * key for reassembly - we'll create a more specific | |
370 | * one for future packets if required (ie, !EOM). | |
371 | */ | |
6e54ea37 PA |
372 | any_key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f); |
373 | if (any_key) { | |
374 | msk = container_of(any_key->sk, | |
4a992bbd | 375 | struct mctp_sock, sk); |
6e54ea37 | 376 | spin_unlock_irqrestore(&any_key->lock, f); |
4a992bbd JK |
377 | } |
378 | } | |
833ef3b9 | 379 | |
4a992bbd JK |
380 | if (!key && !msk && (tag & MCTP_HDR_FLAG_TO)) |
381 | msk = mctp_lookup_bind(net, skb); | |
833ef3b9 | 382 | |
4a992bbd JK |
383 | if (!msk) { |
384 | rc = -ENOENT; | |
385 | goto out_unlock; | |
386 | } | |
833ef3b9 | 387 | |
4a992bbd JK |
388 | /* single-packet message? deliver to socket, clean up any |
389 | * pending key. | |
390 | */ | |
391 | if (flags & MCTP_HDR_FLAG_EOM) { | |
392 | sock_queue_rcv_skb(&msk->sk, skb); | |
393 | if (key) { | |
4a992bbd JK |
394 | /* we've hit a pending reassembly; not much we |
395 | * can do but drop it | |
396 | */ | |
63ed1aab MJ |
397 | __mctp_key_done_in(key, net, f, |
398 | MCTP_TRACE_KEY_REPLIED); | |
73c61845 | 399 | key = NULL; |
4a992bbd JK |
400 | } |
401 | rc = 0; | |
402 | goto out_unlock; | |
403 | } | |
833ef3b9 | 404 | |
4a992bbd JK |
405 | /* broadcast response or a bind() - create a key for further |
406 | * packets for this message | |
407 | */ | |
408 | if (!key) { | |
409 | key = mctp_key_alloc(msk, mh->dest, mh->src, | |
410 | tag, GFP_ATOMIC); | |
411 | if (!key) { | |
412 | rc = -ENOMEM; | |
413 | goto out_unlock; | |
414 | } | |
833ef3b9 | 415 | |
73c61845 | 416 | /* we can queue without the key lock here, as the |
4a992bbd JK |
417 | * key isn't observable yet |
418 | */ | |
419 | mctp_frag_queue(key, skb); | |
420 | ||
421 | /* if the key_add fails, we've raced with another | |
422 | * SOM packet with the same src, dest and tag. There's | |
423 | * no way to distinguish future packets, so all we | |
424 | * can do is drop; we'll free the skb on exit from | |
425 | * this function. | |
426 | */ | |
427 | rc = mctp_key_add(key, msk); | |
de8a6b15 | 428 | if (!rc) |
7e5b6a5c | 429 | trace_mctp_key_acquire(key); |
4a992bbd | 430 | |
de8a6b15 JK |
431 | /* we don't need to release key->lock on exit, so |
432 | * clean up here and suppress the unlock via | |
433 | * setting to NULL | |
434 | */ | |
435 | mctp_key_unref(key); | |
73c61845 | 436 | key = NULL; |
4a992bbd | 437 | |
73c61845 | 438 | } else { |
4a992bbd JK |
439 | if (key->reasm_head || key->reasm_dead) { |
440 | /* duplicate start? drop everything */ | |
63ed1aab MJ |
441 | __mctp_key_done_in(key, net, f, |
442 | MCTP_TRACE_KEY_INVALIDATED); | |
4a992bbd | 443 | rc = -EEXIST; |
73c61845 | 444 | key = NULL; |
4a992bbd JK |
445 | } else { |
446 | rc = mctp_frag_queue(key, skb); | |
4a992bbd JK |
447 | } |
448 | } | |
449 | ||
450 | } else if (key) { | |
451 | /* this packet continues a previous message; reassemble | |
452 | * using the message-specific key | |
453 | */ | |
454 | ||
4a992bbd JK |
455 | /* we need to be continuing an existing reassembly... */ |
456 | if (!key->reasm_head) | |
457 | rc = -EINVAL; | |
458 | else | |
459 | rc = mctp_frag_queue(key, skb); | |
460 | ||
461 | /* end of message? deliver to socket, and we're done with | |
462 | * the reassembly/response key | |
463 | */ | |
464 | if (!rc && flags & MCTP_HDR_FLAG_EOM) { | |
465 | sock_queue_rcv_skb(key->sk, key->reasm_head); | |
466 | key->reasm_head = NULL; | |
63ed1aab | 467 | __mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED); |
73c61845 | 468 | key = NULL; |
4a992bbd JK |
469 | } |
470 | ||
471 | } else { | |
472 | /* not a start, no matching key */ | |
473 | rc = -ENOENT; | |
474 | } | |
833ef3b9 | 475 | |
4a992bbd | 476 | out_unlock: |
833ef3b9 | 477 | rcu_read_unlock(); |
73c61845 JK |
478 | if (key) { |
479 | spin_unlock_irqrestore(&key->lock, f); | |
480 | mctp_key_unref(key); | |
481 | } | |
6e54ea37 PA |
482 | if (any_key) |
483 | mctp_key_unref(any_key); | |
4a992bbd JK |
484 | out: |
485 | if (rc) | |
486 | kfree_skb(skb); | |
487 | return rc; | |
488 | } | |
489 | ||
490 | static unsigned int mctp_route_mtu(struct mctp_route *rt) | |
491 | { | |
492 | return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu); | |
889b7da2 JK |
493 | } |
494 | ||
06d2f4c5 | 495 | static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) |
889b7da2 | 496 | { |
99ce45d5 | 497 | struct mctp_skb_cb *cb = mctp_cb(skb); |
26ab3fca MJ |
498 | struct mctp_hdr *hdr = mctp_hdr(skb); |
499 | char daddr_buf[MAX_ADDR_LEN]; | |
500 | char *daddr = NULL; | |
889b7da2 JK |
501 | unsigned int mtu; |
502 | int rc; | |
503 | ||
504 | skb->protocol = htons(ETH_P_MCTP); | |
505 | ||
506 | mtu = READ_ONCE(skb->dev->mtu); | |
507 | if (skb->len > mtu) { | |
508 | kfree_skb(skb); | |
509 | return -EMSGSIZE; | |
510 | } | |
511 | ||
99ce45d5 JK |
512 | if (cb->ifindex) { |
513 | /* direct route; use the hwaddr we stashed in sendmsg */ | |
4a9dda1c MJ |
514 | if (cb->halen != skb->dev->addr_len) { |
515 | /* sanity check, sendmsg should have already caught this */ | |
516 | kfree_skb(skb); | |
517 | return -EMSGSIZE; | |
518 | } | |
99ce45d5 JK |
519 | daddr = cb->haddr; |
520 | } else { | |
521 | /* If lookup fails let the device handle daddr==NULL */ | |
522 | if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) | |
523 | daddr = daddr_buf; | |
524 | } | |
26ab3fca | 525 | |
889b7da2 | 526 | rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), |
26ab3fca | 527 | daddr, skb->dev->dev_addr, skb->len); |
60be976a | 528 | if (rc < 0) { |
889b7da2 JK |
529 | kfree_skb(skb); |
530 | return -EHOSTUNREACH; | |
531 | } | |
532 | ||
67737c45 JK |
533 | mctp_flow_prepare_output(skb, route->dev); |
534 | ||
889b7da2 JK |
535 | rc = dev_queue_xmit(skb); |
536 | if (rc) | |
537 | rc = net_xmit_errno(rc); | |
538 | ||
539 | return rc; | |
540 | } | |
541 | ||
542 | /* route alloc/release */ | |
543 | static void mctp_route_release(struct mctp_route *rt) | |
544 | { | |
545 | if (refcount_dec_and_test(&rt->refs)) { | |
43f55f23 | 546 | mctp_dev_put(rt->dev); |
889b7da2 JK |
547 | kfree_rcu(rt, rcu); |
548 | } | |
549 | } | |
550 | ||
551 | /* returns a route with the refcount at 1 */ | |
552 | static struct mctp_route *mctp_route_alloc(void) | |
553 | { | |
554 | struct mctp_route *rt; | |
555 | ||
556 | rt = kzalloc(sizeof(*rt), GFP_KERNEL); | |
557 | if (!rt) | |
558 | return NULL; | |
559 | ||
560 | INIT_LIST_HEAD(&rt->list); | |
561 | refcount_set(&rt->refs, 1); | |
562 | rt->output = mctp_route_discard; | |
563 | ||
564 | return rt; | |
565 | } | |
566 | ||
03f2bbc4 MJ |
567 | unsigned int mctp_default_net(struct net *net) |
568 | { | |
569 | return READ_ONCE(net->mctp.default_net); | |
570 | } | |
571 | ||
572 | int mctp_default_net_set(struct net *net, unsigned int index) | |
573 | { | |
574 | if (index == 0) | |
575 | return -EINVAL; | |
576 | WRITE_ONCE(net->mctp.default_net, index); | |
577 | return 0; | |
578 | } | |
579 | ||
833ef3b9 JK |
580 | /* tag management */ |
581 | static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, | |
582 | struct mctp_sock *msk) | |
583 | { | |
584 | struct netns_mctp *mns = &net->mctp; | |
585 | ||
586 | lockdep_assert_held(&mns->keys_lock); | |
587 | ||
7b14e15a JK |
588 | key->expiry = jiffies + mctp_key_lifetime; |
589 | timer_reduce(&msk->key_expiry, key->expiry); | |
590 | ||
833ef3b9 JK |
591 | /* we hold the net->key_lock here, allowing updates to both |
592 | * then net and sk | |
593 | */ | |
594 | hlist_add_head_rcu(&key->hlist, &mns->keys); | |
595 | hlist_add_head_rcu(&key->sklist, &msk->keys); | |
73c61845 | 596 | refcount_inc(&key->refs); |
833ef3b9 JK |
597 | } |
598 | ||
599 | /* Allocate a locally-owned tag value for (saddr, daddr), and reserve | |
600 | * it for the socket msk | |
601 | */ | |
63ed1aab MJ |
602 | struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, |
603 | mctp_eid_t daddr, mctp_eid_t saddr, | |
604 | bool manual, u8 *tagp) | |
833ef3b9 JK |
605 | { |
606 | struct net *net = sock_net(&msk->sk); | |
607 | struct netns_mctp *mns = &net->mctp; | |
608 | struct mctp_sk_key *key, *tmp; | |
609 | unsigned long flags; | |
833ef3b9 JK |
610 | u8 tagbits; |
611 | ||
1f6c77ac JK |
612 | /* for NULL destination EIDs, we may get a response from any peer */ |
613 | if (daddr == MCTP_ADDR_NULL) | |
614 | daddr = MCTP_ADDR_ANY; | |
615 | ||
833ef3b9 | 616 | /* be optimistic, alloc now */ |
4a992bbd | 617 | key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL); |
833ef3b9 | 618 | if (!key) |
212c10c3 | 619 | return ERR_PTR(-ENOMEM); |
833ef3b9 JK |
620 | |
621 | /* 8 possible tag values */ | |
622 | tagbits = 0xff; | |
623 | ||
624 | spin_lock_irqsave(&mns->keys_lock, flags); | |
625 | ||
626 | /* Walk through the existing keys, looking for potential conflicting | |
627 | * tags. If we find a conflict, clear that bit from tagbits | |
628 | */ | |
629 | hlist_for_each_entry(tmp, &mns->keys, hlist) { | |
73c61845 JK |
630 | /* We can check the lookup fields (*_addr, tag) without the |
631 | * lock held, they don't change over the lifetime of the key. | |
632 | */ | |
633 | ||
833ef3b9 JK |
634 | /* if we don't own the tag, it can't conflict */ |
635 | if (tmp->tag & MCTP_HDR_FLAG_TO) | |
636 | continue; | |
637 | ||
8069b22d | 638 | if (!(mctp_address_matches(tmp->peer_addr, daddr) && |
0de55a7d | 639 | mctp_address_matches(tmp->local_addr, saddr))) |
73c61845 JK |
640 | continue; |
641 | ||
642 | spin_lock(&tmp->lock); | |
643 | /* key must still be valid. If we find a match, clear the | |
644 | * potential tag value | |
645 | */ | |
646 | if (tmp->valid) | |
833ef3b9 | 647 | tagbits &= ~(1 << tmp->tag); |
73c61845 | 648 | spin_unlock(&tmp->lock); |
833ef3b9 JK |
649 | |
650 | if (!tagbits) | |
651 | break; | |
652 | } | |
653 | ||
654 | if (tagbits) { | |
655 | key->tag = __ffs(tagbits); | |
656 | mctp_reserve_tag(net, key, msk); | |
4f9e1ba6 JK |
657 | trace_mctp_key_acquire(key); |
658 | ||
63ed1aab | 659 | key->manual_alloc = manual; |
833ef3b9 | 660 | *tagp = key->tag; |
833ef3b9 JK |
661 | } |
662 | ||
663 | spin_unlock_irqrestore(&mns->keys_lock, flags); | |
664 | ||
212c10c3 | 665 | if (!tagbits) { |
833ef3b9 | 666 | kfree(key); |
212c10c3 JK |
667 | return ERR_PTR(-EBUSY); |
668 | } | |
833ef3b9 | 669 | |
212c10c3 | 670 | return key; |
833ef3b9 JK |
671 | } |
672 | ||
63ed1aab MJ |
673 | static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk, |
674 | mctp_eid_t daddr, | |
675 | u8 req_tag, u8 *tagp) | |
676 | { | |
677 | struct net *net = sock_net(&msk->sk); | |
678 | struct netns_mctp *mns = &net->mctp; | |
679 | struct mctp_sk_key *key, *tmp; | |
680 | unsigned long flags; | |
681 | ||
682 | req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER); | |
683 | key = NULL; | |
684 | ||
685 | spin_lock_irqsave(&mns->keys_lock, flags); | |
686 | ||
687 | hlist_for_each_entry(tmp, &mns->keys, hlist) { | |
688 | if (tmp->tag != req_tag) | |
689 | continue; | |
690 | ||
691 | if (!mctp_address_matches(tmp->peer_addr, daddr)) | |
692 | continue; | |
693 | ||
694 | if (!tmp->manual_alloc) | |
695 | continue; | |
696 | ||
697 | spin_lock(&tmp->lock); | |
698 | if (tmp->valid) { | |
699 | key = tmp; | |
700 | refcount_inc(&key->refs); | |
701 | spin_unlock(&tmp->lock); | |
702 | break; | |
703 | } | |
704 | spin_unlock(&tmp->lock); | |
705 | } | |
706 | spin_unlock_irqrestore(&mns->keys_lock, flags); | |
707 | ||
708 | if (!key) | |
709 | return ERR_PTR(-ENOENT); | |
710 | ||
711 | if (tagp) | |
712 | *tagp = key->tag; | |
713 | ||
714 | return key; | |
715 | } | |
716 | ||
889b7da2 JK |
717 | /* routing lookups */ |
718 | static bool mctp_rt_match_eid(struct mctp_route *rt, | |
719 | unsigned int net, mctp_eid_t eid) | |
720 | { | |
721 | return READ_ONCE(rt->dev->net) == net && | |
722 | rt->min <= eid && rt->max >= eid; | |
723 | } | |
724 | ||
725 | /* compares match, used for duplicate prevention */ | |
726 | static bool mctp_rt_compare_exact(struct mctp_route *rt1, | |
727 | struct mctp_route *rt2) | |
728 | { | |
729 | ASSERT_RTNL(); | |
730 | return rt1->dev->net == rt2->dev->net && | |
731 | rt1->min == rt2->min && | |
732 | rt1->max == rt2->max; | |
733 | } | |
734 | ||
735 | struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, | |
736 | mctp_eid_t daddr) | |
737 | { | |
738 | struct mctp_route *tmp, *rt = NULL; | |
739 | ||
740 | list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { | |
741 | /* TODO: add metrics */ | |
742 | if (mctp_rt_match_eid(tmp, dnet, daddr)) { | |
743 | if (refcount_inc_not_zero(&tmp->refs)) { | |
744 | rt = tmp; | |
745 | break; | |
746 | } | |
747 | } | |
748 | } | |
749 | ||
750 | return rt; | |
751 | } | |
752 | ||
1f6c77ac JK |
753 | static struct mctp_route *mctp_route_lookup_null(struct net *net, |
754 | struct net_device *dev) | |
755 | { | |
756 | struct mctp_route *rt; | |
757 | ||
758 | list_for_each_entry_rcu(rt, &net->mctp.routes, list) { | |
759 | if (rt->dev->dev == dev && rt->type == RTN_LOCAL && | |
760 | refcount_inc_not_zero(&rt->refs)) | |
761 | return rt; | |
762 | } | |
763 | ||
764 | return NULL; | |
765 | } | |
766 | ||
4a992bbd JK |
767 | static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, |
768 | unsigned int mtu, u8 tag) | |
769 | { | |
770 | const unsigned int hlen = sizeof(struct mctp_hdr); | |
771 | struct mctp_hdr *hdr, *hdr2; | |
4a9dda1c | 772 | unsigned int pos, size, headroom; |
4a992bbd JK |
773 | struct sk_buff *skb2; |
774 | int rc; | |
775 | u8 seq; | |
776 | ||
777 | hdr = mctp_hdr(skb); | |
778 | seq = 0; | |
779 | rc = 0; | |
780 | ||
781 | if (mtu < hlen + 1) { | |
782 | kfree_skb(skb); | |
783 | return -EMSGSIZE; | |
784 | } | |
785 | ||
4a9dda1c MJ |
786 | /* keep same headroom as the original skb */ |
787 | headroom = skb_headroom(skb); | |
788 | ||
4a992bbd JK |
789 | /* we've got the header */ |
790 | skb_pull(skb, hlen); | |
791 | ||
792 | for (pos = 0; pos < skb->len;) { | |
793 | /* size of message payload */ | |
794 | size = min(mtu - hlen, skb->len - pos); | |
795 | ||
4a9dda1c | 796 | skb2 = alloc_skb(headroom + hlen + size, GFP_KERNEL); |
4a992bbd JK |
797 | if (!skb2) { |
798 | rc = -ENOMEM; | |
799 | break; | |
800 | } | |
801 | ||
802 | /* generic skb copy */ | |
803 | skb2->protocol = skb->protocol; | |
804 | skb2->priority = skb->priority; | |
805 | skb2->dev = skb->dev; | |
806 | memcpy(skb2->cb, skb->cb, sizeof(skb2->cb)); | |
807 | ||
808 | if (skb->sk) | |
809 | skb_set_owner_w(skb2, skb->sk); | |
810 | ||
811 | /* establish packet */ | |
4a9dda1c | 812 | skb_reserve(skb2, headroom); |
4a992bbd JK |
813 | skb_reset_network_header(skb2); |
814 | skb_put(skb2, hlen + size); | |
815 | skb2->transport_header = skb2->network_header + hlen; | |
816 | ||
817 | /* copy header fields, calculate SOM/EOM flags & seq */ | |
818 | hdr2 = mctp_hdr(skb2); | |
819 | hdr2->ver = hdr->ver; | |
820 | hdr2->dest = hdr->dest; | |
821 | hdr2->src = hdr->src; | |
822 | hdr2->flags_seq_tag = tag & | |
823 | (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); | |
824 | ||
825 | if (pos == 0) | |
826 | hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM; | |
827 | ||
828 | if (pos + size == skb->len) | |
829 | hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM; | |
830 | ||
831 | hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT; | |
832 | ||
833 | /* copy message payload */ | |
834 | skb_copy_bits(skb, pos, skb_transport_header(skb2), size); | |
835 | ||
99ce45d5 | 836 | /* do route */ |
4a992bbd JK |
837 | rc = rt->output(rt, skb2); |
838 | if (rc) | |
839 | break; | |
840 | ||
841 | seq = (seq + 1) & MCTP_HDR_SEQ_MASK; | |
842 | pos += size; | |
843 | } | |
844 | ||
4a992bbd JK |
845 | consume_skb(skb); |
846 | return rc; | |
847 | } | |
848 | ||
889b7da2 JK |
849 | int mctp_local_output(struct sock *sk, struct mctp_route *rt, |
850 | struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) | |
851 | { | |
833ef3b9 | 852 | struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); |
889b7da2 | 853 | struct mctp_skb_cb *cb = mctp_cb(skb); |
dc121c00 | 854 | struct mctp_route tmp_rt = {0}; |
212c10c3 | 855 | struct mctp_sk_key *key; |
889b7da2 JK |
856 | struct mctp_hdr *hdr; |
857 | unsigned long flags; | |
4a992bbd | 858 | unsigned int mtu; |
889b7da2 | 859 | mctp_eid_t saddr; |
99ce45d5 | 860 | bool ext_rt; |
889b7da2 | 861 | int rc; |
833ef3b9 | 862 | u8 tag; |
889b7da2 | 863 | |
99ce45d5 JK |
864 | rc = -ENODEV; |
865 | ||
866 | if (rt) { | |
867 | ext_rt = false; | |
99ce45d5 JK |
868 | if (WARN_ON(!rt->dev)) |
869 | goto out_release; | |
870 | ||
871 | } else if (cb->ifindex) { | |
e297db3e MJ |
872 | struct net_device *dev; |
873 | ||
99ce45d5 JK |
874 | ext_rt = true; |
875 | rt = &tmp_rt; | |
876 | ||
877 | rcu_read_lock(); | |
878 | dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); | |
879 | if (!dev) { | |
880 | rcu_read_unlock(); | |
881 | return rc; | |
882 | } | |
99ce45d5 JK |
883 | rt->dev = __mctp_dev_get(dev); |
884 | rcu_read_unlock(); | |
885 | ||
886 | if (!rt->dev) | |
887 | goto out_release; | |
888 | ||
889 | /* establish temporary route - we set up enough to keep | |
890 | * mctp_route_output happy | |
891 | */ | |
892 | rt->output = mctp_route_output; | |
893 | rt->mtu = 0; | |
894 | ||
895 | } else { | |
889b7da2 | 896 | return -EINVAL; |
99ce45d5 | 897 | } |
889b7da2 JK |
898 | |
899 | spin_lock_irqsave(&rt->dev->addrs_lock, flags); | |
900 | if (rt->dev->num_addrs == 0) { | |
901 | rc = -EHOSTUNREACH; | |
902 | } else { | |
903 | /* use the outbound interface's first address as our source */ | |
904 | saddr = rt->dev->addrs[0]; | |
905 | rc = 0; | |
906 | } | |
907 | spin_unlock_irqrestore(&rt->dev->addrs_lock, flags); | |
908 | ||
909 | if (rc) | |
99ce45d5 | 910 | goto out_release; |
889b7da2 | 911 | |
63ed1aab MJ |
912 | if (req_tag & MCTP_TAG_OWNER) { |
913 | if (req_tag & MCTP_TAG_PREALLOC) | |
914 | key = mctp_lookup_prealloc_tag(msk, daddr, | |
915 | req_tag, &tag); | |
916 | else | |
917 | key = mctp_alloc_local_tag(msk, daddr, saddr, | |
918 | false, &tag); | |
919 | ||
212c10c3 JK |
920 | if (IS_ERR(key)) { |
921 | rc = PTR_ERR(key); | |
99ce45d5 | 922 | goto out_release; |
212c10c3 | 923 | } |
67737c45 | 924 | mctp_skb_set_flow(skb, key); |
212c10c3 JK |
925 | /* done with the key in this scope */ |
926 | mctp_key_unref(key); | |
833ef3b9 JK |
927 | tag |= MCTP_HDR_FLAG_TO; |
928 | } else { | |
212c10c3 | 929 | key = NULL; |
63ed1aab | 930 | tag = req_tag & MCTP_TAG_MASK; |
833ef3b9 JK |
931 | } |
932 | ||
4a992bbd JK |
933 | skb->protocol = htons(ETH_P_MCTP); |
934 | skb->priority = 0; | |
889b7da2 JK |
935 | skb_reset_transport_header(skb); |
936 | skb_push(skb, sizeof(struct mctp_hdr)); | |
937 | skb_reset_network_header(skb); | |
4a992bbd JK |
938 | skb->dev = rt->dev->dev; |
939 | ||
940 | /* cb->net will have been set on initial ingress */ | |
941 | cb->src = saddr; | |
942 | ||
943 | /* set up common header fields */ | |
889b7da2 JK |
944 | hdr = mctp_hdr(skb); |
945 | hdr->ver = 1; | |
946 | hdr->dest = daddr; | |
947 | hdr->src = saddr; | |
889b7da2 | 948 | |
4a992bbd | 949 | mtu = mctp_route_mtu(rt); |
889b7da2 | 950 | |
4a992bbd | 951 | if (skb->len + sizeof(struct mctp_hdr) <= mtu) { |
99ce45d5 JK |
952 | hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | |
953 | MCTP_HDR_FLAG_EOM | tag; | |
954 | rc = rt->output(rt, skb); | |
4a992bbd | 955 | } else { |
99ce45d5 | 956 | rc = mctp_do_fragment_route(rt, skb, mtu, tag); |
4a992bbd | 957 | } |
99ce45d5 JK |
958 | |
959 | out_release: | |
960 | if (!ext_rt) | |
961 | mctp_route_release(rt); | |
962 | ||
dc121c00 | 963 | mctp_dev_put(tmp_rt.dev); |
99ce45d5 JK |
964 | |
965 | return rc; | |
889b7da2 JK |
966 | } |
967 | ||
968 | /* route management */ | |
06d2f4c5 MJ |
969 | static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, |
970 | unsigned int daddr_extent, unsigned int mtu, | |
83f0a0b7 | 971 | unsigned char type) |
889b7da2 | 972 | { |
83f0a0b7 | 973 | int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb); |
889b7da2 JK |
974 | struct net *net = dev_net(mdev->dev); |
975 | struct mctp_route *rt, *ert; | |
976 | ||
cb196b72 | 977 | if (!mctp_address_unicast(daddr_start)) |
06d2f4c5 MJ |
978 | return -EINVAL; |
979 | ||
980 | if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) | |
981 | return -EINVAL; | |
982 | ||
83f0a0b7 JK |
983 | switch (type) { |
984 | case RTN_LOCAL: | |
985 | rtfn = mctp_route_input; | |
986 | break; | |
987 | case RTN_UNICAST: | |
988 | rtfn = mctp_route_output; | |
989 | break; | |
990 | default: | |
991 | return -EINVAL; | |
992 | } | |
993 | ||
889b7da2 JK |
994 | rt = mctp_route_alloc(); |
995 | if (!rt) | |
996 | return -ENOMEM; | |
997 | ||
06d2f4c5 MJ |
998 | rt->min = daddr_start; |
999 | rt->max = daddr_start + daddr_extent; | |
1000 | rt->mtu = mtu; | |
889b7da2 | 1001 | rt->dev = mdev; |
43f55f23 | 1002 | mctp_dev_hold(rt->dev); |
83f0a0b7 JK |
1003 | rt->type = type; |
1004 | rt->output = rtfn; | |
889b7da2 JK |
1005 | |
1006 | ASSERT_RTNL(); | |
1007 | /* Prevent duplicate identical routes. */ | |
1008 | list_for_each_entry(ert, &net->mctp.routes, list) { | |
1009 | if (mctp_rt_compare_exact(rt, ert)) { | |
1010 | mctp_route_release(rt); | |
1011 | return -EEXIST; | |
1012 | } | |
1013 | } | |
1014 | ||
1015 | list_add_rcu(&rt->list, &net->mctp.routes); | |
1016 | ||
1017 | return 0; | |
1018 | } | |
1019 | ||
06d2f4c5 | 1020 | static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, |
76d00160 | 1021 | unsigned int daddr_extent, unsigned char type) |
889b7da2 JK |
1022 | { |
1023 | struct net *net = dev_net(mdev->dev); | |
1024 | struct mctp_route *rt, *tmp; | |
06d2f4c5 MJ |
1025 | mctp_eid_t daddr_end; |
1026 | bool dropped; | |
1027 | ||
1028 | if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) | |
1029 | return -EINVAL; | |
1030 | ||
1031 | daddr_end = daddr_start + daddr_extent; | |
1032 | dropped = false; | |
889b7da2 JK |
1033 | |
1034 | ASSERT_RTNL(); | |
1035 | ||
1036 | list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { | |
06d2f4c5 | 1037 | if (rt->dev == mdev && |
76d00160 MJ |
1038 | rt->min == daddr_start && rt->max == daddr_end && |
1039 | rt->type == type) { | |
889b7da2 JK |
1040 | list_del_rcu(&rt->list); |
1041 | /* TODO: immediate RTM_DELROUTE */ | |
1042 | mctp_route_release(rt); | |
06d2f4c5 | 1043 | dropped = true; |
889b7da2 JK |
1044 | } |
1045 | } | |
1046 | ||
06d2f4c5 MJ |
1047 | return dropped ? 0 : -ENOENT; |
1048 | } | |
1049 | ||
1050 | int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) | |
1051 | { | |
83f0a0b7 | 1052 | return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL); |
06d2f4c5 MJ |
1053 | } |
1054 | ||
1055 | int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) | |
1056 | { | |
76d00160 | 1057 | return mctp_route_remove(mdev, addr, 0, RTN_LOCAL); |
889b7da2 JK |
1058 | } |
1059 | ||
1060 | /* removes all entries for a given device */ | |
1061 | void mctp_route_remove_dev(struct mctp_dev *mdev) | |
1062 | { | |
1063 | struct net *net = dev_net(mdev->dev); | |
1064 | struct mctp_route *rt, *tmp; | |
1065 | ||
1066 | ASSERT_RTNL(); | |
1067 | list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { | |
1068 | if (rt->dev == mdev) { | |
1069 | list_del_rcu(&rt->list); | |
1070 | /* TODO: immediate RTM_DELROUTE */ | |
1071 | mctp_route_release(rt); | |
1072 | } | |
1073 | } | |
1074 | } | |
1075 | ||
1076 | /* Incoming packet-handling */ | |
1077 | ||
1078 | static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, | |
1079 | struct packet_type *pt, | |
1080 | struct net_device *orig_dev) | |
1081 | { | |
1082 | struct net *net = dev_net(dev); | |
f364dd71 | 1083 | struct mctp_dev *mdev; |
889b7da2 JK |
1084 | struct mctp_skb_cb *cb; |
1085 | struct mctp_route *rt; | |
1086 | struct mctp_hdr *mh; | |
1087 | ||
f364dd71 MJ |
1088 | rcu_read_lock(); |
1089 | mdev = __mctp_dev_get(dev); | |
1090 | rcu_read_unlock(); | |
1091 | if (!mdev) { | |
1092 | /* basic non-data sanity checks */ | |
889b7da2 | 1093 | goto err_drop; |
f364dd71 | 1094 | } |
889b7da2 JK |
1095 | |
1096 | if (!pskb_may_pull(skb, sizeof(struct mctp_hdr))) | |
1097 | goto err_drop; | |
1098 | ||
1099 | skb_reset_transport_header(skb); | |
1100 | skb_reset_network_header(skb); | |
1101 | ||
1102 | /* We have enough for a header; decode and route */ | |
1103 | mh = mctp_hdr(skb); | |
1104 | if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) | |
1105 | goto err_drop; | |
1106 | ||
86cdfd63 JK |
1107 | /* source must be valid unicast or null; drop reserved ranges and |
1108 | * broadcast | |
1109 | */ | |
1110 | if (!(mctp_address_unicast(mh->src) || mctp_address_null(mh->src))) | |
1111 | goto err_drop; | |
1112 | ||
1113 | /* dest address: as above, but allow broadcast */ | |
1114 | if (!(mctp_address_unicast(mh->dest) || mctp_address_null(mh->dest) || | |
1115 | mctp_address_broadcast(mh->dest))) | |
1116 | goto err_drop; | |
1117 | ||
99ce45d5 JK |
1118 | /* MCTP drivers must populate halen/haddr */ |
1119 | if (dev->type == ARPHRD_MCTP) { | |
1120 | cb = mctp_cb(skb); | |
1121 | } else { | |
1122 | cb = __mctp_cb(skb); | |
1123 | cb->halen = 0; | |
1124 | } | |
f364dd71 | 1125 | cb->net = READ_ONCE(mdev->net); |
99ce45d5 | 1126 | cb->ifindex = dev->ifindex; |
889b7da2 JK |
1127 | |
1128 | rt = mctp_route_lookup(net, cb->net, mh->dest); | |
1f6c77ac JK |
1129 | |
1130 | /* NULL EID, but addressed to our physical address */ | |
1131 | if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) | |
1132 | rt = mctp_route_lookup_null(net, dev); | |
1133 | ||
889b7da2 JK |
1134 | if (!rt) |
1135 | goto err_drop; | |
1136 | ||
99ce45d5 JK |
1137 | rt->output(rt, skb); |
1138 | mctp_route_release(rt); | |
dc121c00 | 1139 | mctp_dev_put(mdev); |
889b7da2 JK |
1140 | |
1141 | return NET_RX_SUCCESS; | |
1142 | ||
1143 | err_drop: | |
1144 | kfree_skb(skb); | |
dc121c00 | 1145 | mctp_dev_put(mdev); |
889b7da2 JK |
1146 | return NET_RX_DROP; |
1147 | } | |
1148 | ||
1149 | static struct packet_type mctp_packet_type = { | |
1150 | .type = cpu_to_be16(ETH_P_MCTP), | |
1151 | .func = mctp_pkttype_receive, | |
1152 | }; | |
1153 | ||
06d2f4c5 MJ |
1154 | /* netlink interface */ |
1155 | ||
1156 | static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = { | |
1157 | [RTA_DST] = { .type = NLA_U8 }, | |
1158 | [RTA_METRICS] = { .type = NLA_NESTED }, | |
1159 | [RTA_OIF] = { .type = NLA_U32 }, | |
1160 | }; | |
1161 | ||
1162 | /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing. | |
1163 | * tb must hold RTA_MAX+1 elements. | |
1164 | */ | |
1165 | static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, | |
1166 | struct netlink_ext_ack *extack, | |
1167 | struct nlattr **tb, struct rtmsg **rtm, | |
1168 | struct mctp_dev **mdev, mctp_eid_t *daddr_start) | |
1169 | { | |
1170 | struct net *net = sock_net(skb->sk); | |
1171 | struct net_device *dev; | |
1172 | unsigned int ifindex; | |
1173 | int rc; | |
1174 | ||
1175 | rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX, | |
1176 | rta_mctp_policy, extack); | |
1177 | if (rc < 0) { | |
1178 | NL_SET_ERR_MSG(extack, "incorrect format"); | |
1179 | return rc; | |
1180 | } | |
1181 | ||
1182 | if (!tb[RTA_DST]) { | |
1183 | NL_SET_ERR_MSG(extack, "dst EID missing"); | |
1184 | return -EINVAL; | |
1185 | } | |
1186 | *daddr_start = nla_get_u8(tb[RTA_DST]); | |
1187 | ||
1188 | if (!tb[RTA_OIF]) { | |
1189 | NL_SET_ERR_MSG(extack, "ifindex missing"); | |
1190 | return -EINVAL; | |
1191 | } | |
1192 | ifindex = nla_get_u32(tb[RTA_OIF]); | |
1193 | ||
1194 | *rtm = nlmsg_data(nlh); | |
1195 | if ((*rtm)->rtm_family != AF_MCTP) { | |
1196 | NL_SET_ERR_MSG(extack, "route family must be AF_MCTP"); | |
1197 | return -EINVAL; | |
1198 | } | |
1199 | ||
1200 | dev = __dev_get_by_index(net, ifindex); | |
1201 | if (!dev) { | |
1202 | NL_SET_ERR_MSG(extack, "bad ifindex"); | |
1203 | return -ENODEV; | |
1204 | } | |
1205 | *mdev = mctp_dev_get_rtnl(dev); | |
1206 | if (!*mdev) | |
1207 | return -ENODEV; | |
1208 | ||
1209 | if (dev->flags & IFF_LOOPBACK) { | |
1210 | NL_SET_ERR_MSG(extack, "no routes to loopback"); | |
1211 | return -EINVAL; | |
1212 | } | |
1213 | ||
1214 | return 0; | |
1215 | } | |
1216 | ||
6183569d MJ |
1217 | static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = { |
1218 | [RTAX_MTU] = { .type = NLA_U32 }, | |
1219 | }; | |
1220 | ||
06d2f4c5 MJ |
1221 | static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, |
1222 | struct netlink_ext_ack *extack) | |
1223 | { | |
1224 | struct nlattr *tb[RTA_MAX + 1]; | |
6183569d | 1225 | struct nlattr *tbx[RTAX_MAX + 1]; |
06d2f4c5 MJ |
1226 | mctp_eid_t daddr_start; |
1227 | struct mctp_dev *mdev; | |
1228 | struct rtmsg *rtm; | |
1229 | unsigned int mtu; | |
1230 | int rc; | |
1231 | ||
1232 | rc = mctp_route_nlparse(skb, nlh, extack, tb, | |
1233 | &rtm, &mdev, &daddr_start); | |
1234 | if (rc < 0) | |
1235 | return rc; | |
1236 | ||
1237 | if (rtm->rtm_type != RTN_UNICAST) { | |
1238 | NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST"); | |
1239 | return -EINVAL; | |
1240 | } | |
1241 | ||
06d2f4c5 | 1242 | mtu = 0; |
6183569d MJ |
1243 | if (tb[RTA_METRICS]) { |
1244 | rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS], | |
1245 | rta_metrics_policy, NULL); | |
1246 | if (rc < 0) | |
1247 | return rc; | |
1248 | if (tbx[RTAX_MTU]) | |
1249 | mtu = nla_get_u32(tbx[RTAX_MTU]); | |
1250 | } | |
06d2f4c5 | 1251 | |
83f0a0b7 JK |
1252 | if (rtm->rtm_type != RTN_UNICAST) |
1253 | return -EINVAL; | |
1254 | ||
1255 | rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, | |
1256 | rtm->rtm_type); | |
06d2f4c5 MJ |
1257 | return rc; |
1258 | } | |
1259 | ||
1260 | static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, | |
1261 | struct netlink_ext_ack *extack) | |
1262 | { | |
1263 | struct nlattr *tb[RTA_MAX + 1]; | |
1264 | mctp_eid_t daddr_start; | |
1265 | struct mctp_dev *mdev; | |
1266 | struct rtmsg *rtm; | |
1267 | int rc; | |
1268 | ||
1269 | rc = mctp_route_nlparse(skb, nlh, extack, tb, | |
1270 | &rtm, &mdev, &daddr_start); | |
1271 | if (rc < 0) | |
1272 | return rc; | |
1273 | ||
1274 | /* we only have unicast routes */ | |
1275 | if (rtm->rtm_type != RTN_UNICAST) | |
1276 | return -EINVAL; | |
1277 | ||
76d00160 | 1278 | rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST); |
06d2f4c5 MJ |
1279 | return rc; |
1280 | } | |
1281 | ||
1282 | static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, | |
1283 | u32 portid, u32 seq, int event, unsigned int flags) | |
1284 | { | |
1285 | struct nlmsghdr *nlh; | |
1286 | struct rtmsg *hdr; | |
1287 | void *metrics; | |
1288 | ||
1289 | nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); | |
1290 | if (!nlh) | |
1291 | return -EMSGSIZE; | |
1292 | ||
1293 | hdr = nlmsg_data(nlh); | |
1294 | hdr->rtm_family = AF_MCTP; | |
1295 | ||
1296 | /* we use the _len fields as a number of EIDs, rather than | |
1297 | * a number of bits in the address | |
1298 | */ | |
1299 | hdr->rtm_dst_len = rt->max - rt->min; | |
1300 | hdr->rtm_src_len = 0; | |
1301 | hdr->rtm_tos = 0; | |
1302 | hdr->rtm_table = RT_TABLE_DEFAULT; | |
1303 | hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */ | |
1304 | hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */ | |
83f0a0b7 | 1305 | hdr->rtm_type = rt->type; |
06d2f4c5 MJ |
1306 | |
1307 | if (nla_put_u8(skb, RTA_DST, rt->min)) | |
1308 | goto cancel; | |
1309 | ||
1310 | metrics = nla_nest_start_noflag(skb, RTA_METRICS); | |
1311 | if (!metrics) | |
1312 | goto cancel; | |
1313 | ||
1314 | if (rt->mtu) { | |
1315 | if (nla_put_u32(skb, RTAX_MTU, rt->mtu)) | |
1316 | goto cancel; | |
1317 | } | |
1318 | ||
1319 | nla_nest_end(skb, metrics); | |
1320 | ||
1321 | if (rt->dev) { | |
1322 | if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex)) | |
1323 | goto cancel; | |
1324 | } | |
1325 | ||
1326 | /* TODO: conditional neighbour physaddr? */ | |
1327 | ||
1328 | nlmsg_end(skb, nlh); | |
1329 | ||
1330 | return 0; | |
1331 | ||
1332 | cancel: | |
1333 | nlmsg_cancel(skb, nlh); | |
1334 | return -EMSGSIZE; | |
1335 | } | |
1336 | ||
1337 | static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb) | |
1338 | { | |
1339 | struct net *net = sock_net(skb->sk); | |
1340 | struct mctp_route *rt; | |
1341 | int s_idx, idx; | |
1342 | ||
1343 | /* TODO: allow filtering on route data, possibly under | |
1344 | * cb->strict_check | |
1345 | */ | |
1346 | ||
1347 | /* TODO: change to struct overlay */ | |
1348 | s_idx = cb->args[0]; | |
1349 | idx = 0; | |
1350 | ||
1351 | rcu_read_lock(); | |
1352 | list_for_each_entry_rcu(rt, &net->mctp.routes, list) { | |
1353 | if (idx++ < s_idx) | |
1354 | continue; | |
1355 | if (mctp_fill_rtinfo(skb, rt, | |
1356 | NETLINK_CB(cb->skb).portid, | |
1357 | cb->nlh->nlmsg_seq, | |
1358 | RTM_NEWROUTE, NLM_F_MULTI) < 0) | |
1359 | break; | |
1360 | } | |
1361 | ||
1362 | rcu_read_unlock(); | |
1363 | cb->args[0] = idx; | |
1364 | ||
1365 | return skb->len; | |
1366 | } | |
1367 | ||
889b7da2 JK |
1368 | /* net namespace implementation */ |
1369 | static int __net_init mctp_routes_net_init(struct net *net) | |
1370 | { | |
1371 | struct netns_mctp *ns = &net->mctp; | |
1372 | ||
1373 | INIT_LIST_HEAD(&ns->routes); | |
833ef3b9 JK |
1374 | INIT_HLIST_HEAD(&ns->binds); |
1375 | mutex_init(&ns->bind_lock); | |
1376 | INIT_HLIST_HEAD(&ns->keys); | |
1377 | spin_lock_init(&ns->keys_lock); | |
03f2bbc4 | 1378 | WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET)); |
889b7da2 JK |
1379 | return 0; |
1380 | } | |
1381 | ||
1382 | static void __net_exit mctp_routes_net_exit(struct net *net) | |
1383 | { | |
1384 | struct mctp_route *rt; | |
1385 | ||
581edcd0 | 1386 | rcu_read_lock(); |
889b7da2 JK |
1387 | list_for_each_entry_rcu(rt, &net->mctp.routes, list) |
1388 | mctp_route_release(rt); | |
581edcd0 | 1389 | rcu_read_unlock(); |
889b7da2 JK |
1390 | } |
1391 | ||
1392 | static struct pernet_operations mctp_net_ops = { | |
1393 | .init = mctp_routes_net_init, | |
1394 | .exit = mctp_routes_net_exit, | |
1395 | }; | |
1396 | ||
1397 | int __init mctp_routes_init(void) | |
1398 | { | |
1399 | dev_add_pack(&mctp_packet_type); | |
06d2f4c5 MJ |
1400 | |
1401 | rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE, | |
1402 | NULL, mctp_dump_rtinfo, 0); | |
1403 | rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE, | |
1404 | mctp_newroute, NULL, 0); | |
1405 | rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE, | |
1406 | mctp_delroute, NULL, 0); | |
1407 | ||
889b7da2 JK |
1408 | return register_pernet_subsys(&mctp_net_ops); |
1409 | } | |
1410 | ||
d4072058 | 1411 | void mctp_routes_exit(void) |
889b7da2 JK |
1412 | { |
1413 | unregister_pernet_subsys(&mctp_net_ops); | |
06d2f4c5 MJ |
1414 | rtnl_unregister(PF_MCTP, RTM_DELROUTE); |
1415 | rtnl_unregister(PF_MCTP, RTM_NEWROUTE); | |
1416 | rtnl_unregister(PF_MCTP, RTM_GETROUTE); | |
889b7da2 JK |
1417 | dev_remove_pack(&mctp_packet_type); |
1418 | } | |
161eba50 JK |
1419 | |
1420 | #if IS_ENABLED(CONFIG_MCTP_TEST) | |
1421 | #include "test/route-test.c" | |
1422 | #endif |