2 * SR-IPv6 implementation
5 * David Lebrun <david.lebrun@uclouvain.be>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 #include <linux/types.h>
15 #include <linux/skbuff.h>
16 #include <linux/net.h>
17 #include <linux/module.h>
19 #include <net/lwtunnel.h>
20 #include <net/netevent.h>
21 #include <net/netns/generic.h>
22 #include <net/ip6_fib.h>
23 #include <net/route.h>
25 #include <linux/seg6.h>
26 #include <linux/seg6_local.h>
27 #include <net/addrconf.h>
28 #include <net/ip6_route.h>
29 #include <net/dst_cache.h>
30 #ifdef CONFIG_IPV6_SEG6_HMAC
31 #include <net/seg6_hmac.h>
33 #include <linux/etherdevice.h>
35 struct seg6_local_lwt;
37 struct seg6_action_desc {
40 int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
44 struct seg6_local_lwt {
46 struct ipv6_sr_hdr *srh;
54 struct seg6_action_desc *desc;
57 static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
59 return (struct seg6_local_lwt *)lwt->data;
62 static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
64 struct ipv6_sr_hdr *srh;
67 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
70 if (!pskb_may_pull(skb, srhoff + sizeof(*srh)))
73 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
75 /* make sure it's a Segment Routing header (Routing Type 4) */
76 if (srh->type != IPV6_SRCRT_TYPE_4)
79 len = (srh->hdrlen + 1) << 3;
81 if (!pskb_may_pull(skb, srhoff + len))
84 if (!seg6_validate_srh(srh, len))
90 static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
92 struct ipv6_sr_hdr *srh;
98 if (srh->segments_left == 0)
101 #ifdef CONFIG_IPV6_SEG6_HMAC
102 if (!seg6_hmac_validate_skb(skb))
109 static bool decap_and_validate(struct sk_buff *skb, int proto)
111 struct ipv6_sr_hdr *srh;
112 unsigned int off = 0;
115 if (srh && srh->segments_left > 0)
118 #ifdef CONFIG_IPV6_SEG6_HMAC
119 if (srh && !seg6_hmac_validate_skb(skb))
123 if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
126 if (!pskb_pull(skb, off))
129 skb_postpull_rcsum(skb, skb_network_header(skb), off);
131 skb_reset_network_header(skb);
132 skb_reset_transport_header(skb);
133 skb->encapsulation = 0;
138 static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
140 struct in6_addr *addr;
142 srh->segments_left--;
143 addr = srh->segments + srh->segments_left;
147 static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
150 struct net *net = dev_net(skb->dev);
151 struct ipv6hdr *hdr = ipv6_hdr(skb);
152 int flags = RT6_LOOKUP_F_HAS_SADDR;
153 struct dst_entry *dst = NULL;
157 fl6.flowi6_iif = skb->dev->ifindex;
158 fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
159 fl6.saddr = hdr->saddr;
160 fl6.flowlabel = ip6_flowinfo(hdr);
161 fl6.flowi6_mark = skb->mark;
162 fl6.flowi6_proto = hdr->nexthdr;
165 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
168 dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
170 struct fib6_table *table;
172 table = fib6_get_table(net, tbl_id);
176 rt = ip6_pol_route(net, table, 0, &fl6, flags);
180 if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) {
187 rt = net->ipv6.ip6_blk_hole_entry;
193 skb_dst_set(skb, dst);
196 /* regular endpoint function */
197 static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
199 struct ipv6_sr_hdr *srh;
201 srh = get_and_validate_srh(skb);
205 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
207 lookup_nexthop(skb, NULL, 0);
209 return dst_input(skb);
216 /* regular endpoint, and forward to specified nexthop */
217 static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
219 struct ipv6_sr_hdr *srh;
221 srh = get_and_validate_srh(skb);
225 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
227 lookup_nexthop(skb, &slwt->nh6, 0);
229 return dst_input(skb);
236 static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
238 struct ipv6_sr_hdr *srh;
240 srh = get_and_validate_srh(skb);
244 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
246 lookup_nexthop(skb, NULL, slwt->table);
248 return dst_input(skb);
255 /* decapsulate and forward inner L2 frame on specified interface */
256 static int input_action_end_dx2(struct sk_buff *skb,
257 struct seg6_local_lwt *slwt)
259 struct net *net = dev_net(skb->dev);
260 struct net_device *odev;
263 if (!decap_and_validate(skb, NEXTHDR_NONE))
266 if (!pskb_may_pull(skb, ETH_HLEN))
269 skb_reset_mac_header(skb);
270 eth = (struct ethhdr *)skb->data;
272 /* To determine the frame's protocol, we assume it is 802.3. This avoids
273 * a call to eth_type_trans(), which is not really relevant for our
276 if (!eth_proto_is_802_3(eth->h_proto))
279 odev = dev_get_by_index_rcu(net, slwt->oif);
283 /* As we accept Ethernet frames, make sure the egress device is of
286 if (odev->type != ARPHRD_ETHER)
289 if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
294 if (skb_warn_if_lro(skb))
297 skb_forward_csum(skb);
299 if (skb->len - ETH_HLEN > odev->mtu)
303 skb->protocol = eth->h_proto;
305 return dev_queue_xmit(skb);
312 /* decapsulate and forward to specified nexthop */
313 static int input_action_end_dx6(struct sk_buff *skb,
314 struct seg6_local_lwt *slwt)
316 struct in6_addr *nhaddr = NULL;
318 /* this function accepts IPv6 encapsulated packets, with either
319 * an SRH with SL=0, or no SRH.
322 if (!decap_and_validate(skb, IPPROTO_IPV6))
325 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
328 /* The inner packet is not associated to any local interface,
329 * so we do not call netif_rx().
331 * If slwt->nh6 is set to ::, then lookup the nexthop for the
332 * inner packet's DA. Otherwise, use the specified nexthop.
335 if (!ipv6_addr_any(&slwt->nh6))
338 lookup_nexthop(skb, nhaddr, 0);
340 return dst_input(skb);
346 static int input_action_end_dx4(struct sk_buff *skb,
347 struct seg6_local_lwt *slwt)
353 if (!decap_and_validate(skb, IPPROTO_IPIP))
356 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
359 skb->protocol = htons(ETH_P_IP);
363 nhaddr = slwt->nh4.s_addr ?: iph->daddr;
367 err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
371 return dst_input(skb);
378 static int input_action_end_dt6(struct sk_buff *skb,
379 struct seg6_local_lwt *slwt)
381 if (!decap_and_validate(skb, IPPROTO_IPV6))
384 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
387 lookup_nexthop(skb, NULL, slwt->table);
389 return dst_input(skb);
396 /* push an SRH on top of the current one */
397 static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
399 struct ipv6_sr_hdr *srh;
402 srh = get_and_validate_srh(skb);
406 err = seg6_do_srh_inline(skb, slwt->srh);
410 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
411 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
413 lookup_nexthop(skb, NULL, 0);
415 return dst_input(skb);
422 /* encapsulate within an outer IPv6 header and a specified SRH */
423 static int input_action_end_b6_encap(struct sk_buff *skb,
424 struct seg6_local_lwt *slwt)
426 struct ipv6_sr_hdr *srh;
429 srh = get_and_validate_srh(skb);
433 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
435 skb_reset_inner_headers(skb);
436 skb->encapsulation = 1;
438 err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
442 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
443 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
445 lookup_nexthop(skb, NULL, 0);
447 return dst_input(skb);
454 static struct seg6_action_desc seg6_action_table[] = {
456 .action = SEG6_LOCAL_ACTION_END,
458 .input = input_action_end,
461 .action = SEG6_LOCAL_ACTION_END_X,
462 .attrs = (1 << SEG6_LOCAL_NH6),
463 .input = input_action_end_x,
466 .action = SEG6_LOCAL_ACTION_END_T,
467 .attrs = (1 << SEG6_LOCAL_TABLE),
468 .input = input_action_end_t,
471 .action = SEG6_LOCAL_ACTION_END_DX2,
472 .attrs = (1 << SEG6_LOCAL_OIF),
473 .input = input_action_end_dx2,
476 .action = SEG6_LOCAL_ACTION_END_DX6,
477 .attrs = (1 << SEG6_LOCAL_NH6),
478 .input = input_action_end_dx6,
481 .action = SEG6_LOCAL_ACTION_END_DX4,
482 .attrs = (1 << SEG6_LOCAL_NH4),
483 .input = input_action_end_dx4,
486 .action = SEG6_LOCAL_ACTION_END_DT6,
487 .attrs = (1 << SEG6_LOCAL_TABLE),
488 .input = input_action_end_dt6,
491 .action = SEG6_LOCAL_ACTION_END_B6,
492 .attrs = (1 << SEG6_LOCAL_SRH),
493 .input = input_action_end_b6,
496 .action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
497 .attrs = (1 << SEG6_LOCAL_SRH),
498 .input = input_action_end_b6_encap,
499 .static_headroom = sizeof(struct ipv6hdr),
503 static struct seg6_action_desc *__get_action_desc(int action)
505 struct seg6_action_desc *desc;
508 count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc);
509 for (i = 0; i < count; i++) {
510 desc = &seg6_action_table[i];
511 if (desc->action == action)
518 static int seg6_local_input(struct sk_buff *skb)
520 struct dst_entry *orig_dst = skb_dst(skb);
521 struct seg6_action_desc *desc;
522 struct seg6_local_lwt *slwt;
524 if (skb->protocol != htons(ETH_P_IPV6)) {
529 slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
532 return desc->input(skb, slwt);
535 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
536 [SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
537 [SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
538 [SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
539 [SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
540 .len = sizeof(struct in_addr) },
541 [SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
542 .len = sizeof(struct in6_addr) },
543 [SEG6_LOCAL_IIF] = { .type = NLA_U32 },
544 [SEG6_LOCAL_OIF] = { .type = NLA_U32 },
547 static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
549 struct ipv6_sr_hdr *srh;
552 srh = nla_data(attrs[SEG6_LOCAL_SRH]);
553 len = nla_len(attrs[SEG6_LOCAL_SRH]);
555 /* SRH must contain at least one segment */
556 if (len < sizeof(*srh) + sizeof(struct in6_addr))
559 if (!seg6_validate_srh(srh, len))
562 slwt->srh = kmalloc(len, GFP_KERNEL);
566 memcpy(slwt->srh, srh, len);
568 slwt->headroom += len;
573 static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
575 struct ipv6_sr_hdr *srh;
580 len = (srh->hdrlen + 1) << 3;
582 nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
586 memcpy(nla_data(nla), srh, len);
591 static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
593 int len = (a->srh->hdrlen + 1) << 3;
595 if (len != ((b->srh->hdrlen + 1) << 3))
598 return memcmp(a->srh, b->srh, len);
601 static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt)
603 slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
608 static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
610 if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
616 static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
618 if (a->table != b->table)
624 static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
626 memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
627 sizeof(struct in_addr));
632 static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
636 nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
640 memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
645 static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
647 return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
650 static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt)
652 memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
653 sizeof(struct in6_addr));
658 static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
662 nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
666 memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
671 static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
673 return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
676 static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
678 slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
683 static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
685 if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
691 static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
693 if (a->iif != b->iif)
699 static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
701 slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
706 static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
708 if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
714 static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
716 if (a->oif != b->oif)
722 struct seg6_action_param {
723 int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
724 int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
725 int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
728 static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
729 [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh,
731 .cmp = cmp_nla_srh },
733 [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table,
734 .put = put_nla_table,
735 .cmp = cmp_nla_table },
737 [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4,
739 .cmp = cmp_nla_nh4 },
741 [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6,
743 .cmp = cmp_nla_nh6 },
745 [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif,
747 .cmp = cmp_nla_iif },
749 [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
751 .cmp = cmp_nla_oif },
754 static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
756 struct seg6_action_param *param;
757 struct seg6_action_desc *desc;
760 desc = __get_action_desc(slwt->action);
768 slwt->headroom += desc->static_headroom;
770 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
771 if (desc->attrs & (1 << i)) {
775 param = &seg6_action_params[i];
777 err = param->parse(attrs, slwt);
786 static int seg6_local_build_state(struct nlattr *nla, unsigned int family,
787 const void *cfg, struct lwtunnel_state **ts,
788 struct netlink_ext_ack *extack)
790 struct nlattr *tb[SEG6_LOCAL_MAX + 1];
791 struct lwtunnel_state *newts;
792 struct seg6_local_lwt *slwt;
795 if (family != AF_INET6)
798 err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
804 if (!tb[SEG6_LOCAL_ACTION])
807 newts = lwtunnel_state_alloc(sizeof(*slwt));
811 slwt = seg6_local_lwtunnel(newts);
812 slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
814 err = parse_nla_action(tb, slwt);
818 newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
819 newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
820 newts->headroom = slwt->headroom;
832 static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
834 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
839 static int seg6_local_fill_encap(struct sk_buff *skb,
840 struct lwtunnel_state *lwt)
842 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
843 struct seg6_action_param *param;
846 if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
849 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
850 if (slwt->desc->attrs & (1 << i)) {
851 param = &seg6_action_params[i];
852 err = param->put(skb, slwt);
861 static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
863 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
867 nlsize = nla_total_size(4); /* action */
869 attrs = slwt->desc->attrs;
871 if (attrs & (1 << SEG6_LOCAL_SRH))
872 nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
874 if (attrs & (1 << SEG6_LOCAL_TABLE))
875 nlsize += nla_total_size(4);
877 if (attrs & (1 << SEG6_LOCAL_NH4))
878 nlsize += nla_total_size(4);
880 if (attrs & (1 << SEG6_LOCAL_NH6))
881 nlsize += nla_total_size(16);
883 if (attrs & (1 << SEG6_LOCAL_IIF))
884 nlsize += nla_total_size(4);
886 if (attrs & (1 << SEG6_LOCAL_OIF))
887 nlsize += nla_total_size(4);
892 static int seg6_local_cmp_encap(struct lwtunnel_state *a,
893 struct lwtunnel_state *b)
895 struct seg6_local_lwt *slwt_a, *slwt_b;
896 struct seg6_action_param *param;
899 slwt_a = seg6_local_lwtunnel(a);
900 slwt_b = seg6_local_lwtunnel(b);
902 if (slwt_a->action != slwt_b->action)
905 if (slwt_a->desc->attrs != slwt_b->desc->attrs)
908 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
909 if (slwt_a->desc->attrs & (1 << i)) {
910 param = &seg6_action_params[i];
911 if (param->cmp(slwt_a, slwt_b))
919 static const struct lwtunnel_encap_ops seg6_local_ops = {
920 .build_state = seg6_local_build_state,
921 .destroy_state = seg6_local_destroy_state,
922 .input = seg6_local_input,
923 .fill_encap = seg6_local_fill_encap,
924 .get_encap_size = seg6_local_get_encap_size,
925 .cmp_encap = seg6_local_cmp_encap,
926 .owner = THIS_MODULE,
929 int __init seg6_local_init(void)
931 return lwtunnel_encap_add_ops(&seg6_local_ops,
932 LWTUNNEL_ENCAP_SEG6_LOCAL);
935 void seg6_local_exit(void)
937 lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);