Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
6c8702c6 DL |
2 | /* |
3 | * SR-IPv6 implementation | |
4 | * | |
5 | * Author: | |
6 | * David Lebrun <david.lebrun@uclouvain.be> | |
6c8702c6 DL |
7 | */ |
8 | ||
9 | #include <linux/types.h> | |
10 | #include <linux/skbuff.h> | |
11 | #include <linux/net.h> | |
12 | #include <linux/module.h> | |
13 | #include <net/ip.h> | |
5807b22c | 14 | #include <net/ip_tunnels.h> |
6c8702c6 DL |
15 | #include <net/lwtunnel.h> |
16 | #include <net/netevent.h> | |
17 | #include <net/netns/generic.h> | |
18 | #include <net/ip6_fib.h> | |
19 | #include <net/route.h> | |
20 | #include <net/seg6.h> | |
21 | #include <linux/seg6.h> | |
22 | #include <linux/seg6_iptunnel.h> | |
23 | #include <net/addrconf.h> | |
24 | #include <net/ip6_route.h> | |
6c8702c6 | 25 | #include <net/dst_cache.h> |
9baee834 DL |
26 | #ifdef CONFIG_IPV6_SEG6_HMAC |
27 | #include <net/seg6_hmac.h> | |
28 | #endif | |
7a3f5b0d | 29 | #include <linux/netfilter.h> |
6c8702c6 | 30 | |
88fab21c IRS |
31 | static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) |
32 | { | |
33 | int head = 0; | |
34 | ||
35 | switch (tuninfo->mode) { | |
36 | case SEG6_IPTUN_MODE_INLINE: | |
37 | break; | |
38 | case SEG6_IPTUN_MODE_ENCAP: | |
b07c8cdb | 39 | case SEG6_IPTUN_MODE_ENCAP_RED: |
88fab21c IRS |
40 | head = sizeof(struct ipv6hdr); |
41 | break; | |
42 | case SEG6_IPTUN_MODE_L2ENCAP: | |
13f0296b | 43 | case SEG6_IPTUN_MODE_L2ENCAP_RED: |
88fab21c IRS |
44 | return 0; |
45 | } | |
46 | ||
47 | return ((tuninfo->srh->hdrlen + 1) << 3) + head; | |
48 | } | |
49 | ||
6c8702c6 | 50 | struct seg6_lwt { |
6c8702c6 | 51 | struct dst_cache cache; |
b0c9a2d9 | 52 | struct seg6_iptunnel_encap tuninfo[]; |
6c8702c6 DL |
53 | }; |
54 | ||
55 | static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt) | |
56 | { | |
57 | return (struct seg6_lwt *)lwt->data; | |
58 | } | |
59 | ||
60 | static inline struct seg6_iptunnel_encap * | |
61 | seg6_encap_lwtunnel(struct lwtunnel_state *lwt) | |
62 | { | |
63 | return seg6_lwt_lwtunnel(lwt)->tuninfo; | |
64 | } | |
65 | ||
66 | static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { | |
67 | [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, | |
68 | }; | |
69 | ||
bb4005ba WY |
70 | static int nla_put_srh(struct sk_buff *skb, int attrtype, |
71 | struct seg6_iptunnel_encap *tuninfo) | |
6c8702c6 DL |
72 | { |
73 | struct seg6_iptunnel_encap *data; | |
74 | struct nlattr *nla; | |
75 | int len; | |
76 | ||
77 | len = SEG6_IPTUN_ENCAP_SIZE(tuninfo); | |
78 | ||
79 | nla = nla_reserve(skb, attrtype, len); | |
80 | if (!nla) | |
81 | return -EMSGSIZE; | |
82 | ||
83 | data = nla_data(nla); | |
84 | memcpy(data, tuninfo, len); | |
85 | ||
86 | return 0; | |
87 | } | |
88 | ||
89 | static void set_tun_src(struct net *net, struct net_device *dev, | |
90 | struct in6_addr *daddr, struct in6_addr *saddr) | |
91 | { | |
92 | struct seg6_pernet_data *sdata = seg6_pernet(net); | |
93 | struct in6_addr *tun_src; | |
94 | ||
95 | rcu_read_lock(); | |
96 | ||
97 | tun_src = rcu_dereference(sdata->tun_src); | |
98 | ||
99 | if (!ipv6_addr_any(tun_src)) { | |
100 | memcpy(saddr, tun_src, sizeof(struct in6_addr)); | |
101 | } else { | |
102 | ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, | |
103 | saddr); | |
104 | } | |
105 | ||
106 | rcu_read_unlock(); | |
107 | } | |
108 | ||
b5facfdb AA |
109 | /* Compute flowlabel for outer IPv6 header */ |
110 | static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb, | |
111 | struct ipv6hdr *inner_hdr) | |
112 | { | |
113 | int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel; | |
114 | __be32 flowlabel = 0; | |
115 | u32 hash; | |
116 | ||
117 | if (do_flowlabel > 0) { | |
118 | hash = skb_get_hash(skb); | |
3ee593ad | 119 | hash = rol32(hash, 16); |
b5facfdb AA |
120 | flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; |
121 | } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) { | |
122 | flowlabel = ip6_flowlabel(inner_hdr); | |
123 | } | |
124 | return flowlabel; | |
125 | } | |
126 | ||
6c8702c6 | 127 | /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ |
32d99d0b | 128 | int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) |
6c8702c6 | 129 | { |
8936ef76 DL |
130 | struct dst_entry *dst = skb_dst(skb); |
131 | struct net *net = dev_net(dst->dev); | |
6c8702c6 DL |
132 | struct ipv6hdr *hdr, *inner_hdr; |
133 | struct ipv6_sr_hdr *isrh; | |
134 | int hdrlen, tot_len, err; | |
b5facfdb | 135 | __be32 flowlabel; |
6c8702c6 DL |
136 | |
137 | hdrlen = (osrh->hdrlen + 1) << 3; | |
138 | tot_len = hdrlen + sizeof(*hdr); | |
139 | ||
bbb40a0b | 140 | err = skb_cow_head(skb, tot_len + skb->mac_len); |
6c8702c6 DL |
141 | if (unlikely(err)) |
142 | return err; | |
143 | ||
144 | inner_hdr = ipv6_hdr(skb); | |
6df93462 | 145 | flowlabel = seg6_make_flowlabel(net, skb, inner_hdr); |
6c8702c6 DL |
146 | |
147 | skb_push(skb, tot_len); | |
148 | skb_reset_network_header(skb); | |
149 | skb_mac_header_rebuild(skb); | |
150 | hdr = ipv6_hdr(skb); | |
151 | ||
152 | /* inherit tc, flowlabel and hlim | |
153 | * hlim will be decremented in ip6_forward() afterwards and | |
154 | * decapsulation will overwrite inner hlim with outer hlim | |
155 | */ | |
32d99d0b DL |
156 | |
157 | if (skb->protocol == htons(ETH_P_IPV6)) { | |
158 | ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), | |
b5facfdb | 159 | flowlabel); |
32d99d0b DL |
160 | hdr->hop_limit = inner_hdr->hop_limit; |
161 | } else { | |
b5facfdb | 162 | ip6_flow_hdr(hdr, 0, flowlabel); |
32d99d0b | 163 | hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); |
ef489749 YK |
164 | |
165 | memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); | |
ae68d933 AM |
166 | |
167 | /* the control block has been erased, so we have to set the | |
168 | * iif once again. | |
169 | * We read the receiving interface index directly from the | |
170 | * skb->skb_iif as it is done in the IPv4 receiving path (i.e.: | |
171 | * ip_rcv_core(...)). | |
172 | */ | |
173 | IP6CB(skb)->iif = skb->skb_iif; | |
32d99d0b DL |
174 | } |
175 | ||
6c8702c6 DL |
176 | hdr->nexthdr = NEXTHDR_ROUTING; |
177 | ||
178 | isrh = (void *)hdr + sizeof(*hdr); | |
179 | memcpy(isrh, osrh, hdrlen); | |
180 | ||
32d99d0b | 181 | isrh->nexthdr = proto; |
6c8702c6 DL |
182 | |
183 | hdr->daddr = isrh->segments[isrh->first_segment]; | |
a957fa19 | 184 | set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); |
6c8702c6 | 185 | |
9baee834 DL |
186 | #ifdef CONFIG_IPV6_SEG6_HMAC |
187 | if (sr_has_hmac(isrh)) { | |
188 | err = seg6_push_hmac(net, &hdr->saddr, isrh); | |
189 | if (unlikely(err)) | |
190 | return err; | |
191 | } | |
192 | #endif | |
193 | ||
df8386d1 AM |
194 | hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); |
195 | ||
6c8702c6 DL |
196 | skb_postpush_rcsum(skb, hdr, tot_len); |
197 | ||
198 | return 0; | |
199 | } | |
b04c80d3 | 200 | EXPORT_SYMBOL_GPL(seg6_do_srh_encap); |
6c8702c6 | 201 | |
b07c8cdb AM |
202 | /* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */ |
203 | static int seg6_do_srh_encap_red(struct sk_buff *skb, | |
204 | struct ipv6_sr_hdr *osrh, int proto) | |
205 | { | |
206 | __u8 first_seg = osrh->first_segment; | |
207 | struct dst_entry *dst = skb_dst(skb); | |
208 | struct net *net = dev_net(dst->dev); | |
209 | struct ipv6hdr *hdr, *inner_hdr; | |
210 | int hdrlen = ipv6_optlen(osrh); | |
211 | int red_tlv_offset, tlv_offset; | |
212 | struct ipv6_sr_hdr *isrh; | |
213 | bool skip_srh = false; | |
214 | __be32 flowlabel; | |
215 | int tot_len, err; | |
216 | int red_hdrlen; | |
217 | int tlvs_len; | |
218 | ||
219 | if (first_seg > 0) { | |
220 | red_hdrlen = hdrlen - sizeof(struct in6_addr); | |
221 | } else { | |
222 | /* NOTE: if tag/flags and/or other TLVs are introduced in the | |
223 | * seg6_iptunnel infrastructure, they should be considered when | |
224 | * deciding to skip the SRH. | |
225 | */ | |
226 | skip_srh = !sr_has_hmac(osrh); | |
227 | ||
228 | red_hdrlen = skip_srh ? 0 : hdrlen; | |
229 | } | |
230 | ||
231 | tot_len = red_hdrlen + sizeof(struct ipv6hdr); | |
232 | ||
233 | err = skb_cow_head(skb, tot_len + skb->mac_len); | |
234 | if (unlikely(err)) | |
235 | return err; | |
236 | ||
237 | inner_hdr = ipv6_hdr(skb); | |
238 | flowlabel = seg6_make_flowlabel(net, skb, inner_hdr); | |
239 | ||
240 | skb_push(skb, tot_len); | |
241 | skb_reset_network_header(skb); | |
242 | skb_mac_header_rebuild(skb); | |
243 | hdr = ipv6_hdr(skb); | |
244 | ||
245 | /* based on seg6_do_srh_encap() */ | |
246 | if (skb->protocol == htons(ETH_P_IPV6)) { | |
247 | ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), | |
248 | flowlabel); | |
249 | hdr->hop_limit = inner_hdr->hop_limit; | |
250 | } else { | |
251 | ip6_flow_hdr(hdr, 0, flowlabel); | |
252 | hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); | |
253 | ||
254 | memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); | |
255 | IP6CB(skb)->iif = skb->skb_iif; | |
256 | } | |
257 | ||
258 | /* no matter if we have to skip the SRH or not, the first segment | |
259 | * always comes in the pushed IPv6 header. | |
260 | */ | |
261 | hdr->daddr = osrh->segments[first_seg]; | |
262 | ||
263 | if (skip_srh) { | |
264 | hdr->nexthdr = proto; | |
265 | ||
266 | set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); | |
267 | goto out; | |
268 | } | |
269 | ||
270 | /* we cannot skip the SRH, slow path */ | |
271 | ||
272 | hdr->nexthdr = NEXTHDR_ROUTING; | |
273 | isrh = (void *)hdr + sizeof(struct ipv6hdr); | |
274 | ||
275 | if (unlikely(!first_seg)) { | |
276 | /* this is a very rare case; we have only one SID but | |
277 | * we cannot skip the SRH since we are carrying some | |
278 | * other info. | |
279 | */ | |
280 | memcpy(isrh, osrh, hdrlen); | |
281 | goto srcaddr; | |
282 | } | |
283 | ||
284 | tlv_offset = sizeof(*osrh) + (first_seg + 1) * sizeof(struct in6_addr); | |
285 | red_tlv_offset = tlv_offset - sizeof(struct in6_addr); | |
286 | ||
287 | memcpy(isrh, osrh, red_tlv_offset); | |
288 | ||
289 | tlvs_len = hdrlen - tlv_offset; | |
290 | if (unlikely(tlvs_len > 0)) { | |
291 | const void *s = (const void *)osrh + tlv_offset; | |
292 | void *d = (void *)isrh + red_tlv_offset; | |
293 | ||
294 | memcpy(d, s, tlvs_len); | |
295 | } | |
296 | ||
297 | --isrh->first_segment; | |
298 | isrh->hdrlen -= 2; | |
299 | ||
300 | srcaddr: | |
301 | isrh->nexthdr = proto; | |
302 | set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); | |
303 | ||
304 | #ifdef CONFIG_IPV6_SEG6_HMAC | |
305 | if (unlikely(!skip_srh && sr_has_hmac(isrh))) { | |
306 | err = seg6_push_hmac(net, &hdr->saddr, isrh); | |
307 | if (unlikely(err)) | |
308 | return err; | |
309 | } | |
310 | #endif | |
311 | ||
312 | out: | |
313 | hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); | |
314 | ||
315 | skb_postpush_rcsum(skb, hdr, tot_len); | |
316 | ||
317 | return 0; | |
318 | } | |
319 | ||
6c8702c6 | 320 | /* insert an SRH within an IPv6 packet, just after the IPv6 header */ |
b04c80d3 | 321 | int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) |
6c8702c6 DL |
322 | { |
323 | struct ipv6hdr *hdr, *oldhdr; | |
324 | struct ipv6_sr_hdr *isrh; | |
325 | int hdrlen, err; | |
326 | ||
327 | hdrlen = (osrh->hdrlen + 1) << 3; | |
328 | ||
bbb40a0b | 329 | err = skb_cow_head(skb, hdrlen + skb->mac_len); |
6c8702c6 DL |
330 | if (unlikely(err)) |
331 | return err; | |
332 | ||
333 | oldhdr = ipv6_hdr(skb); | |
334 | ||
335 | skb_pull(skb, sizeof(struct ipv6hdr)); | |
336 | skb_postpull_rcsum(skb, skb_network_header(skb), | |
337 | sizeof(struct ipv6hdr)); | |
338 | ||
339 | skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); | |
340 | skb_reset_network_header(skb); | |
341 | skb_mac_header_rebuild(skb); | |
342 | ||
343 | hdr = ipv6_hdr(skb); | |
344 | ||
345 | memmove(hdr, oldhdr, sizeof(*hdr)); | |
346 | ||
347 | isrh = (void *)hdr + sizeof(*hdr); | |
348 | memcpy(isrh, osrh, hdrlen); | |
349 | ||
350 | isrh->nexthdr = hdr->nexthdr; | |
351 | hdr->nexthdr = NEXTHDR_ROUTING; | |
352 | ||
353 | isrh->segments[0] = hdr->daddr; | |
354 | hdr->daddr = isrh->segments[isrh->first_segment]; | |
355 | ||
9baee834 DL |
356 | #ifdef CONFIG_IPV6_SEG6_HMAC |
357 | if (sr_has_hmac(isrh)) { | |
358 | struct net *net = dev_net(skb_dst(skb)->dev); | |
359 | ||
360 | err = seg6_push_hmac(net, &hdr->saddr, isrh); | |
361 | if (unlikely(err)) | |
362 | return err; | |
363 | } | |
364 | #endif | |
365 | ||
df8386d1 AM |
366 | hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); |
367 | ||
6c8702c6 DL |
368 | skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); |
369 | ||
370 | return 0; | |
371 | } | |
b04c80d3 | 372 | EXPORT_SYMBOL_GPL(seg6_do_srh_inline); |
6c8702c6 DL |
373 | |
374 | static int seg6_do_srh(struct sk_buff *skb) | |
375 | { | |
376 | struct dst_entry *dst = skb_dst(skb); | |
377 | struct seg6_iptunnel_encap *tinfo; | |
32d99d0b | 378 | int proto, err = 0; |
6c8702c6 DL |
379 | |
380 | tinfo = seg6_encap_lwtunnel(dst->lwtstate); | |
381 | ||
6c8702c6 | 382 | switch (tinfo->mode) { |
6c8702c6 | 383 | case SEG6_IPTUN_MODE_INLINE: |
32d99d0b DL |
384 | if (skb->protocol != htons(ETH_P_IPV6)) |
385 | return -EINVAL; | |
386 | ||
6c8702c6 | 387 | err = seg6_do_srh_inline(skb, tinfo->srh); |
32d99d0b DL |
388 | if (err) |
389 | return err; | |
6c8702c6 | 390 | break; |
6c8702c6 | 391 | case SEG6_IPTUN_MODE_ENCAP: |
b07c8cdb | 392 | case SEG6_IPTUN_MODE_ENCAP_RED: |
5807b22c DL |
393 | err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6); |
394 | if (err) | |
395 | return err; | |
396 | ||
32d99d0b DL |
397 | if (skb->protocol == htons(ETH_P_IPV6)) |
398 | proto = IPPROTO_IPV6; | |
399 | else if (skb->protocol == htons(ETH_P_IP)) | |
400 | proto = IPPROTO_IPIP; | |
401 | else | |
402 | return -EINVAL; | |
403 | ||
b07c8cdb AM |
404 | if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP) |
405 | err = seg6_do_srh_encap(skb, tinfo->srh, proto); | |
406 | else | |
407 | err = seg6_do_srh_encap_red(skb, tinfo->srh, proto); | |
408 | ||
32d99d0b DL |
409 | if (err) |
410 | return err; | |
411 | ||
5807b22c DL |
412 | skb_set_inner_transport_header(skb, skb_transport_offset(skb)); |
413 | skb_set_inner_protocol(skb, skb->protocol); | |
38ee7f2d DL |
414 | skb->protocol = htons(ETH_P_IPV6); |
415 | break; | |
416 | case SEG6_IPTUN_MODE_L2ENCAP: | |
13f0296b | 417 | case SEG6_IPTUN_MODE_L2ENCAP_RED: |
38ee7f2d DL |
418 | if (!skb_mac_header_was_set(skb)) |
419 | return -EINVAL; | |
420 | ||
421 | if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0) | |
422 | return -ENOMEM; | |
423 | ||
424 | skb_mac_header_rebuild(skb); | |
425 | skb_push(skb, skb->mac_len); | |
426 | ||
13f0296b AM |
427 | if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP) |
428 | err = seg6_do_srh_encap(skb, tinfo->srh, | |
429 | IPPROTO_ETHERNET); | |
430 | else | |
431 | err = seg6_do_srh_encap_red(skb, tinfo->srh, | |
432 | IPPROTO_ETHERNET); | |
433 | ||
38ee7f2d DL |
434 | if (err) |
435 | return err; | |
436 | ||
32d99d0b | 437 | skb->protocol = htons(ETH_P_IPV6); |
6c8702c6 DL |
438 | break; |
439 | } | |
440 | ||
6c8702c6 | 441 | skb_set_transport_header(skb, sizeof(struct ipv6hdr)); |
7a3f5b0d | 442 | nf_reset_ct(skb); |
6c8702c6 | 443 | |
6c8702c6 DL |
444 | return 0; |
445 | } | |
446 | ||
7a3f5b0d RS |
447 | static int seg6_input_finish(struct net *net, struct sock *sk, |
448 | struct sk_buff *skb) | |
449 | { | |
450 | return dst_input(skb); | |
451 | } | |
452 | ||
453 | static int seg6_input_core(struct net *net, struct sock *sk, | |
454 | struct sk_buff *skb) | |
6c8702c6 | 455 | { |
af4a2209 DL |
456 | struct dst_entry *orig_dst = skb_dst(skb); |
457 | struct dst_entry *dst = NULL; | |
458 | struct seg6_lwt *slwt; | |
6c8702c6 DL |
459 | int err; |
460 | ||
461 | err = seg6_do_srh(skb); | |
462 | if (unlikely(err)) { | |
463 | kfree_skb(skb); | |
464 | return err; | |
465 | } | |
466 | ||
af4a2209 DL |
467 | slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); |
468 | ||
af4a2209 DL |
469 | preempt_disable(); |
470 | dst = dst_cache_get(&slwt->cache); | |
471 | preempt_enable(); | |
af4a2209 | 472 | |
af4a2209 DL |
473 | if (!dst) { |
474 | ip6_route_input(skb); | |
af4a2209 DL |
475 | dst = skb_dst(skb); |
476 | if (!dst->error) { | |
477 | preempt_disable(); | |
478 | dst_cache_set_ip6(&slwt->cache, dst, | |
479 | &ipv6_hdr(skb)->saddr); | |
480 | preempt_enable(); | |
481 | } | |
af4a2209 | 482 | } else { |
fa0583c2 | 483 | skb_dst_drop(skb); |
af4a2209 DL |
484 | skb_dst_set(skb, dst); |
485 | } | |
6c8702c6 | 486 | |
af3b5158 DL |
487 | err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); |
488 | if (unlikely(err)) | |
489 | return err; | |
490 | ||
7a3f5b0d RS |
491 | if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) |
492 | return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, | |
493 | dev_net(skb->dev), NULL, skb, NULL, | |
494 | skb_dst(skb)->dev, seg6_input_finish); | |
495 | ||
496 | return seg6_input_finish(dev_net(skb->dev), NULL, skb); | |
6c8702c6 DL |
497 | } |
498 | ||
7a3f5b0d RS |
499 | static int seg6_input_nf(struct sk_buff *skb) |
500 | { | |
501 | struct net_device *dev = skb_dst(skb)->dev; | |
502 | struct net *net = dev_net(skb->dev); | |
503 | ||
504 | switch (skb->protocol) { | |
505 | case htons(ETH_P_IP): | |
506 | return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL, | |
507 | skb, NULL, dev, seg6_input_core); | |
508 | case htons(ETH_P_IPV6): | |
509 | return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL, | |
510 | skb, NULL, dev, seg6_input_core); | |
511 | } | |
512 | ||
513 | return -EINVAL; | |
514 | } | |
515 | ||
516 | static int seg6_input(struct sk_buff *skb) | |
517 | { | |
518 | if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) | |
519 | return seg6_input_nf(skb); | |
520 | ||
521 | return seg6_input_core(dev_net(skb->dev), NULL, skb); | |
522 | } | |
523 | ||
524 | static int seg6_output_core(struct net *net, struct sock *sk, | |
525 | struct sk_buff *skb) | |
6c8702c6 DL |
526 | { |
527 | struct dst_entry *orig_dst = skb_dst(skb); | |
528 | struct dst_entry *dst = NULL; | |
529 | struct seg6_lwt *slwt; | |
bf0df73a | 530 | int err; |
6c8702c6 DL |
531 | |
532 | err = seg6_do_srh(skb); | |
533 | if (unlikely(err)) | |
534 | goto drop; | |
535 | ||
536 | slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); | |
537 | ||
fa79581e | 538 | preempt_disable(); |
6c8702c6 | 539 | dst = dst_cache_get(&slwt->cache); |
fa79581e | 540 | preempt_enable(); |
6c8702c6 DL |
541 | |
542 | if (unlikely(!dst)) { | |
543 | struct ipv6hdr *hdr = ipv6_hdr(skb); | |
544 | struct flowi6 fl6; | |
545 | ||
1b4e5ad5 | 546 | memset(&fl6, 0, sizeof(fl6)); |
6c8702c6 DL |
547 | fl6.daddr = hdr->daddr; |
548 | fl6.saddr = hdr->saddr; | |
549 | fl6.flowlabel = ip6_flowinfo(hdr); | |
550 | fl6.flowi6_mark = skb->mark; | |
551 | fl6.flowi6_proto = hdr->nexthdr; | |
552 | ||
553 | dst = ip6_route_output(net, NULL, &fl6); | |
554 | if (dst->error) { | |
555 | err = dst->error; | |
556 | dst_release(dst); | |
557 | goto drop; | |
558 | } | |
559 | ||
fa79581e | 560 | preempt_disable(); |
6c8702c6 | 561 | dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); |
fa79581e | 562 | preempt_enable(); |
6c8702c6 DL |
563 | } |
564 | ||
565 | skb_dst_drop(skb); | |
566 | skb_dst_set(skb, dst); | |
567 | ||
af3b5158 DL |
568 | err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); |
569 | if (unlikely(err)) | |
570 | goto drop; | |
571 | ||
7a3f5b0d RS |
572 | if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) |
573 | return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, | |
574 | NULL, skb_dst(skb)->dev, dst_output); | |
575 | ||
6c8702c6 DL |
576 | return dst_output(net, sk, skb); |
577 | drop: | |
578 | kfree_skb(skb); | |
579 | return err; | |
580 | } | |
581 | ||
7a3f5b0d RS |
582 | static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb) |
583 | { | |
584 | struct net_device *dev = skb_dst(skb)->dev; | |
585 | ||
586 | switch (skb->protocol) { | |
587 | case htons(ETH_P_IP): | |
588 | return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, | |
589 | NULL, dev, seg6_output_core); | |
590 | case htons(ETH_P_IPV6): | |
591 | return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, | |
592 | NULL, dev, seg6_output_core); | |
593 | } | |
594 | ||
595 | return -EINVAL; | |
596 | } | |
597 | ||
598 | static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) | |
599 | { | |
600 | if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) | |
601 | return seg6_output_nf(net, sk, skb); | |
602 | ||
603 | return seg6_output_core(net, sk, skb); | |
604 | } | |
605 | ||
faee6769 | 606 | static int seg6_build_state(struct net *net, struct nlattr *nla, |
6c8702c6 | 607 | unsigned int family, const void *cfg, |
9ae28727 DA |
608 | struct lwtunnel_state **ts, |
609 | struct netlink_ext_ack *extack) | |
6c8702c6 DL |
610 | { |
611 | struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; | |
612 | struct seg6_iptunnel_encap *tuninfo; | |
613 | struct lwtunnel_state *newts; | |
614 | int tuninfo_len, min_size; | |
615 | struct seg6_lwt *slwt; | |
616 | int err; | |
617 | ||
32d99d0b DL |
618 | if (family != AF_INET && family != AF_INET6) |
619 | return -EINVAL; | |
620 | ||
8cb08174 JB |
621 | err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla, |
622 | seg6_iptunnel_policy, extack); | |
6c8702c6 DL |
623 | |
624 | if (err < 0) | |
625 | return err; | |
626 | ||
627 | if (!tb[SEG6_IPTUNNEL_SRH]) | |
628 | return -EINVAL; | |
629 | ||
630 | tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]); | |
631 | tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]); | |
632 | ||
633 | /* tuninfo must contain at least the iptunnel encap structure, | |
634 | * the SRH and one segment | |
635 | */ | |
636 | min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) + | |
637 | sizeof(struct in6_addr); | |
638 | if (tuninfo_len < min_size) | |
639 | return -EINVAL; | |
640 | ||
641 | switch (tuninfo->mode) { | |
6c8702c6 | 642 | case SEG6_IPTUN_MODE_INLINE: |
32d99d0b DL |
643 | if (family != AF_INET6) |
644 | return -EINVAL; | |
645 | ||
6c8702c6 | 646 | break; |
6c8702c6 DL |
647 | case SEG6_IPTUN_MODE_ENCAP: |
648 | break; | |
38ee7f2d DL |
649 | case SEG6_IPTUN_MODE_L2ENCAP: |
650 | break; | |
b07c8cdb AM |
651 | case SEG6_IPTUN_MODE_ENCAP_RED: |
652 | break; | |
13f0296b AM |
653 | case SEG6_IPTUN_MODE_L2ENCAP_RED: |
654 | break; | |
6c8702c6 DL |
655 | default: |
656 | return -EINVAL; | |
657 | } | |
658 | ||
659 | /* verify that SRH is consistent */ | |
bb986a50 | 660 | if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false)) |
6c8702c6 DL |
661 | return -EINVAL; |
662 | ||
663 | newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt)); | |
664 | if (!newts) | |
665 | return -ENOMEM; | |
666 | ||
667 | slwt = seg6_lwt_lwtunnel(newts); | |
668 | ||
191f86ca | 669 | err = dst_cache_init(&slwt->cache, GFP_ATOMIC); |
6c8702c6 DL |
670 | if (err) { |
671 | kfree(newts); | |
672 | return err; | |
673 | } | |
6c8702c6 DL |
674 | |
675 | memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); | |
676 | ||
677 | newts->type = LWTUNNEL_ENCAP_SEG6; | |
38ee7f2d DL |
678 | newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; |
679 | ||
680 | if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP) | |
681 | newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; | |
682 | ||
6c8702c6 DL |
683 | newts->headroom = seg6_lwt_headroom(tuninfo); |
684 | ||
685 | *ts = newts; | |
686 | ||
687 | return 0; | |
688 | } | |
689 | ||
6c8702c6 DL |
690 | static void seg6_destroy_state(struct lwtunnel_state *lwt) |
691 | { | |
692 | dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); | |
693 | } | |
6c8702c6 DL |
694 | |
695 | static int seg6_fill_encap_info(struct sk_buff *skb, | |
696 | struct lwtunnel_state *lwtstate) | |
697 | { | |
698 | struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); | |
699 | ||
700 | if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) | |
701 | return -EMSGSIZE; | |
702 | ||
703 | return 0; | |
704 | } | |
705 | ||
706 | static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) | |
707 | { | |
708 | struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); | |
709 | ||
710 | return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); | |
711 | } | |
712 | ||
713 | static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) | |
714 | { | |
715 | struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); | |
716 | struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); | |
717 | int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); | |
718 | ||
719 | if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) | |
720 | return 1; | |
721 | ||
722 | return memcmp(a_hdr, b_hdr, len); | |
723 | } | |
724 | ||
725 | static const struct lwtunnel_encap_ops seg6_iptun_ops = { | |
726 | .build_state = seg6_build_state, | |
6c8702c6 | 727 | .destroy_state = seg6_destroy_state, |
6c8702c6 DL |
728 | .output = seg6_output, |
729 | .input = seg6_input, | |
730 | .fill_encap = seg6_fill_encap_info, | |
731 | .get_encap_size = seg6_encap_nlsize, | |
732 | .cmp_encap = seg6_encap_cmp, | |
88ff7334 | 733 | .owner = THIS_MODULE, |
6c8702c6 DL |
734 | }; |
735 | ||
736 | int __init seg6_iptunnel_init(void) | |
737 | { | |
738 | return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); | |
739 | } | |
740 | ||
741 | void seg6_iptunnel_exit(void) | |
742 | { | |
743 | lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); | |
744 | } |