| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #ifndef __NET_IP_TUNNELS_H |
| 3 | #define __NET_IP_TUNNELS_H 1 |
| 4 | |
| 5 | #include <linux/if_tunnel.h> |
| 6 | #include <linux/netdevice.h> |
| 7 | #include <linux/skbuff.h> |
| 8 | #include <linux/socket.h> |
| 9 | #include <linux/types.h> |
| 10 | #include <linux/u64_stats_sync.h> |
| 11 | #include <linux/bitops.h> |
| 12 | |
| 13 | #include <net/dsfield.h> |
| 14 | #include <net/gro_cells.h> |
| 15 | #include <net/inet_ecn.h> |
| 16 | #include <net/netns/generic.h> |
| 17 | #include <net/rtnetlink.h> |
| 18 | #include <net/lwtunnel.h> |
| 19 | #include <net/dst_cache.h> |
| 20 | |
| 21 | #if IS_ENABLED(CONFIG_IPV6) |
| 22 | #include <net/ipv6.h> |
| 23 | #include <net/ip6_fib.h> |
| 24 | #include <net/ip6_route.h> |
| 25 | #endif |
| 26 | |
| 27 | /* Keep error state on tunnel for 30 sec */ |
| 28 | #define IPTUNNEL_ERR_TIMEO (30*HZ) |
| 29 | |
| 30 | /* Used to memset ip_tunnel padding. */ |
| 31 | #define IP_TUNNEL_KEY_SIZE offsetofend(struct ip_tunnel_key, tp_dst) |
| 32 | |
| 33 | /* Used to memset ipv4 address padding. */ |
| 34 | #define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst) |
| 35 | #define IP_TUNNEL_KEY_IPV4_PAD_LEN \ |
| 36 | (sizeof_field(struct ip_tunnel_key, u) - \ |
| 37 | sizeof_field(struct ip_tunnel_key, u.ipv4)) |
| 38 | |
| 39 | #define __ipt_flag_op(op, ...) \ |
| 40 | op(__VA_ARGS__, __IP_TUNNEL_FLAG_NUM) |
| 41 | |
| 42 | #define IP_TUNNEL_DECLARE_FLAGS(...) \ |
| 43 | __ipt_flag_op(DECLARE_BITMAP, __VA_ARGS__) |
| 44 | |
| 45 | #define ip_tunnel_flags_zero(...) __ipt_flag_op(bitmap_zero, __VA_ARGS__) |
| 46 | #define ip_tunnel_flags_copy(...) __ipt_flag_op(bitmap_copy, __VA_ARGS__) |
| 47 | #define ip_tunnel_flags_and(...) __ipt_flag_op(bitmap_and, __VA_ARGS__) |
| 48 | #define ip_tunnel_flags_or(...) __ipt_flag_op(bitmap_or, __VA_ARGS__) |
| 49 | |
| 50 | #define ip_tunnel_flags_empty(...) \ |
| 51 | __ipt_flag_op(bitmap_empty, __VA_ARGS__) |
| 52 | #define ip_tunnel_flags_intersect(...) \ |
| 53 | __ipt_flag_op(bitmap_intersects, __VA_ARGS__) |
| 54 | #define ip_tunnel_flags_subset(...) \ |
| 55 | __ipt_flag_op(bitmap_subset, __VA_ARGS__) |
| 56 | |
| 57 | struct ip_tunnel_key { |
| 58 | __be64 tun_id; |
| 59 | union { |
| 60 | struct { |
| 61 | __be32 src; |
| 62 | __be32 dst; |
| 63 | } ipv4; |
| 64 | struct { |
| 65 | struct in6_addr src; |
| 66 | struct in6_addr dst; |
| 67 | } ipv6; |
| 68 | } u; |
| 69 | IP_TUNNEL_DECLARE_FLAGS(tun_flags); |
| 70 | __be32 label; /* Flow Label for IPv6 */ |
| 71 | u32 nhid; |
| 72 | u8 tos; /* TOS for IPv4, TC for IPv6 */ |
| 73 | u8 ttl; /* TTL for IPv4, HL for IPv6 */ |
| 74 | __be16 tp_src; |
| 75 | __be16 tp_dst; |
| 76 | __u8 flow_flags; |
| 77 | }; |
| 78 | |
| 79 | struct ip_tunnel_encap { |
| 80 | u16 type; |
| 81 | u16 flags; |
| 82 | __be16 sport; |
| 83 | __be16 dport; |
| 84 | }; |
| 85 | |
| 86 | /* Flags for ip_tunnel_info mode. */ |
| 87 | #define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */ |
| 88 | #define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */ |
| 89 | #define IP_TUNNEL_INFO_BRIDGE 0x04 /* represents a bridged tunnel id */ |
| 90 | |
| 91 | /* Maximum tunnel options length. */ |
| 92 | #define IP_TUNNEL_OPTS_MAX \ |
| 93 | GENMASK((sizeof_field(struct ip_tunnel_info, \ |
| 94 | options_len) * BITS_PER_BYTE) - 1, 0) |
| 95 | |
| 96 | #define ip_tunnel_info_opts(info) \ |
| 97 | _Generic(info, \ |
| 98 | const struct ip_tunnel_info * : ((const void *)((info) + 1)),\ |
| 99 | struct ip_tunnel_info * : ((void *)((info) + 1))\ |
| 100 | ) |
| 101 | |
| 102 | struct ip_tunnel_info { |
| 103 | struct ip_tunnel_key key; |
| 104 | struct ip_tunnel_encap encap; |
| 105 | #ifdef CONFIG_DST_CACHE |
| 106 | struct dst_cache dst_cache; |
| 107 | #endif |
| 108 | u8 options_len; |
| 109 | u8 mode; |
| 110 | }; |
| 111 | |
| 112 | /* 6rd prefix/relay information */ |
| 113 | #ifdef CONFIG_IPV6_SIT_6RD |
| 114 | struct ip_tunnel_6rd_parm { |
| 115 | struct in6_addr prefix; |
| 116 | __be32 relay_prefix; |
| 117 | u16 prefixlen; |
| 118 | u16 relay_prefixlen; |
| 119 | }; |
| 120 | #endif |
| 121 | |
| 122 | struct ip_tunnel_prl_entry { |
| 123 | struct ip_tunnel_prl_entry __rcu *next; |
| 124 | __be32 addr; |
| 125 | u16 flags; |
| 126 | struct rcu_head rcu_head; |
| 127 | }; |
| 128 | |
| 129 | struct metadata_dst; |
| 130 | |
| 131 | /* Kernel-side variant of ip_tunnel_parm */ |
| 132 | struct ip_tunnel_parm_kern { |
| 133 | char name[IFNAMSIZ]; |
| 134 | IP_TUNNEL_DECLARE_FLAGS(i_flags); |
| 135 | IP_TUNNEL_DECLARE_FLAGS(o_flags); |
| 136 | __be32 i_key; |
| 137 | __be32 o_key; |
| 138 | int link; |
| 139 | struct iphdr iph; |
| 140 | }; |
| 141 | |
| 142 | struct ip_tunnel { |
| 143 | struct ip_tunnel __rcu *next; |
| 144 | struct hlist_node hash_node; |
| 145 | |
| 146 | struct net_device *dev; |
| 147 | netdevice_tracker dev_tracker; |
| 148 | |
| 149 | struct net *net; /* netns for packet i/o */ |
| 150 | |
| 151 | unsigned long err_time; /* Time when the last ICMP error |
| 152 | * arrived */ |
| 153 | int err_count; /* Number of arrived ICMP errors */ |
| 154 | |
| 155 | /* These four fields used only by GRE */ |
| 156 | u32 i_seqno; /* The last seen seqno */ |
| 157 | atomic_t o_seqno; /* The last output seqno */ |
| 158 | int tun_hlen; /* Precalculated header length */ |
| 159 | |
| 160 | /* These four fields used only by ERSPAN */ |
| 161 | u32 index; /* ERSPAN type II index */ |
| 162 | u8 erspan_ver; /* ERSPAN version */ |
| 163 | u8 dir; /* ERSPAN direction */ |
| 164 | u16 hwid; /* ERSPAN hardware ID */ |
| 165 | |
| 166 | struct dst_cache dst_cache; |
| 167 | |
| 168 | struct ip_tunnel_parm_kern parms; |
| 169 | |
| 170 | int mlink; |
| 171 | int encap_hlen; /* Encap header length (FOU,GUE) */ |
| 172 | int hlen; /* tun_hlen + encap_hlen */ |
| 173 | struct ip_tunnel_encap encap; |
| 174 | |
| 175 | /* for SIT */ |
| 176 | #ifdef CONFIG_IPV6_SIT_6RD |
| 177 | struct ip_tunnel_6rd_parm ip6rd; |
| 178 | #endif |
| 179 | struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ |
| 180 | unsigned int prl_count; /* # of entries in PRL */ |
| 181 | unsigned int ip_tnl_net_id; |
| 182 | struct gro_cells gro_cells; |
| 183 | __u32 fwmark; |
| 184 | bool collect_md; |
| 185 | bool ignore_df; |
| 186 | }; |
| 187 | |
| 188 | struct tnl_ptk_info { |
| 189 | IP_TUNNEL_DECLARE_FLAGS(flags); |
| 190 | __be16 proto; |
| 191 | __be32 key; |
| 192 | __be32 seq; |
| 193 | int hdr_len; |
| 194 | }; |
| 195 | |
| 196 | #define PACKET_RCVD 0 |
| 197 | #define PACKET_REJECT 1 |
| 198 | #define PACKET_NEXT 2 |
| 199 | |
| 200 | #define IP_TNL_HASH_BITS 7 |
| 201 | #define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS) |
| 202 | |
| 203 | struct ip_tunnel_net { |
| 204 | struct net_device *fb_tunnel_dev; |
| 205 | struct rtnl_link_ops *rtnl_link_ops; |
| 206 | struct hlist_head tunnels[IP_TNL_HASH_SIZE]; |
| 207 | struct ip_tunnel __rcu *collect_md_tun; |
| 208 | int type; |
| 209 | }; |
| 210 | |
| 211 | static inline void ip_tunnel_set_options_present(unsigned long *flags) |
| 212 | { |
| 213 | IP_TUNNEL_DECLARE_FLAGS(present) = { }; |
| 214 | |
| 215 | __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); |
| 216 | __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); |
| 217 | __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); |
| 218 | __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); |
| 219 | __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); |
| 220 | |
| 221 | ip_tunnel_flags_or(flags, flags, present); |
| 222 | } |
| 223 | |
| 224 | static inline void ip_tunnel_clear_options_present(unsigned long *flags) |
| 225 | { |
| 226 | IP_TUNNEL_DECLARE_FLAGS(present) = { }; |
| 227 | |
| 228 | __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); |
| 229 | __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); |
| 230 | __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); |
| 231 | __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); |
| 232 | __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); |
| 233 | |
| 234 | __ipt_flag_op(bitmap_andnot, flags, flags, present); |
| 235 | } |
| 236 | |
| 237 | static inline bool ip_tunnel_is_options_present(const unsigned long *flags) |
| 238 | { |
| 239 | IP_TUNNEL_DECLARE_FLAGS(present) = { }; |
| 240 | |
| 241 | __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); |
| 242 | __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); |
| 243 | __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); |
| 244 | __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); |
| 245 | __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); |
| 246 | |
| 247 | return ip_tunnel_flags_intersect(flags, present); |
| 248 | } |
| 249 | |
| 250 | static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) |
| 251 | { |
| 252 | IP_TUNNEL_DECLARE_FLAGS(supp) = { }; |
| 253 | |
| 254 | bitmap_set(supp, 0, BITS_PER_TYPE(__be16)); |
| 255 | __set_bit(IP_TUNNEL_VTI_BIT, supp); |
| 256 | |
| 257 | return ip_tunnel_flags_subset(flags, supp); |
| 258 | } |
| 259 | |
| 260 | static inline void ip_tunnel_flags_from_be16(unsigned long *dst, __be16 flags) |
| 261 | { |
| 262 | ip_tunnel_flags_zero(dst); |
| 263 | |
| 264 | bitmap_write(dst, be16_to_cpu(flags), 0, BITS_PER_TYPE(__be16)); |
| 265 | __assign_bit(IP_TUNNEL_VTI_BIT, dst, flags & VTI_ISVTI); |
| 266 | } |
| 267 | |
| 268 | static inline __be16 ip_tunnel_flags_to_be16(const unsigned long *flags) |
| 269 | { |
| 270 | __be16 ret; |
| 271 | |
| 272 | ret = cpu_to_be16(bitmap_read(flags, 0, BITS_PER_TYPE(__be16))); |
| 273 | if (test_bit(IP_TUNNEL_VTI_BIT, flags)) |
| 274 | ret |= VTI_ISVTI; |
| 275 | |
| 276 | return ret; |
| 277 | } |
| 278 | |
| 279 | static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, |
| 280 | __be32 saddr, __be32 daddr, |
| 281 | u8 tos, u8 ttl, __be32 label, |
| 282 | __be16 tp_src, __be16 tp_dst, |
| 283 | __be64 tun_id, |
| 284 | const unsigned long *tun_flags) |
| 285 | { |
| 286 | key->tun_id = tun_id; |
| 287 | key->u.ipv4.src = saddr; |
| 288 | key->u.ipv4.dst = daddr; |
| 289 | memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD, |
| 290 | 0, IP_TUNNEL_KEY_IPV4_PAD_LEN); |
| 291 | key->tos = tos; |
| 292 | key->ttl = ttl; |
| 293 | key->label = label; |
| 294 | ip_tunnel_flags_copy(key->tun_flags, tun_flags); |
| 295 | |
| 296 | /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of |
| 297 | * the upper tunnel are used. |
| 298 | * E.g: GRE over IPSEC, the tp_src and tp_port are zero. |
| 299 | */ |
| 300 | key->tp_src = tp_src; |
| 301 | key->tp_dst = tp_dst; |
| 302 | |
| 303 | /* Clear struct padding. */ |
| 304 | if (sizeof(*key) != IP_TUNNEL_KEY_SIZE) |
| 305 | memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE, |
| 306 | 0, sizeof(*key) - IP_TUNNEL_KEY_SIZE); |
| 307 | } |
| 308 | |
| 309 | static inline bool |
| 310 | ip_tunnel_dst_cache_usable(const struct sk_buff *skb, |
| 311 | const struct ip_tunnel_info *info) |
| 312 | { |
| 313 | if (skb->mark) |
| 314 | return false; |
| 315 | |
| 316 | return !info || !test_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags); |
| 317 | } |
| 318 | |
| 319 | static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info |
| 320 | *tun_info) |
| 321 | { |
| 322 | return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET; |
| 323 | } |
| 324 | |
| 325 | static inline __be64 key32_to_tunnel_id(__be32 key) |
| 326 | { |
| 327 | #ifdef __BIG_ENDIAN |
| 328 | return (__force __be64)key; |
| 329 | #else |
| 330 | return (__force __be64)((__force u64)key << 32); |
| 331 | #endif |
| 332 | } |
| 333 | |
| 334 | /* Returns the least-significant 32 bits of a __be64. */ |
| 335 | static inline __be32 tunnel_id_to_key32(__be64 tun_id) |
| 336 | { |
| 337 | #ifdef __BIG_ENDIAN |
| 338 | return (__force __be32)tun_id; |
| 339 | #else |
| 340 | return (__force __be32)((__force u64)tun_id >> 32); |
| 341 | #endif |
| 342 | } |
| 343 | |
| 344 | #ifdef CONFIG_INET |
| 345 | |
| 346 | static inline void ip_tunnel_init_flow(struct flowi4 *fl4, |
| 347 | int proto, |
| 348 | __be32 daddr, __be32 saddr, |
| 349 | __be32 key, __u8 tos, |
| 350 | struct net *net, int oif, |
| 351 | __u32 mark, __u32 tun_inner_hash, |
| 352 | __u8 flow_flags) |
| 353 | { |
| 354 | memset(fl4, 0, sizeof(*fl4)); |
| 355 | |
| 356 | if (oif) { |
| 357 | fl4->flowi4_l3mdev = l3mdev_master_upper_ifindex_by_index_rcu(net, oif); |
| 358 | /* Legacy VRF/l3mdev use case */ |
| 359 | fl4->flowi4_oif = fl4->flowi4_l3mdev ? 0 : oif; |
| 360 | } |
| 361 | |
| 362 | fl4->daddr = daddr; |
| 363 | fl4->saddr = saddr; |
| 364 | fl4->flowi4_tos = tos; |
| 365 | fl4->flowi4_proto = proto; |
| 366 | fl4->fl4_gre_key = key; |
| 367 | fl4->flowi4_mark = mark; |
| 368 | fl4->flowi4_multipath_hash = tun_inner_hash; |
| 369 | fl4->flowi4_flags = flow_flags; |
| 370 | } |
| 371 | |
| 372 | int ip_tunnel_init(struct net_device *dev); |
| 373 | void ip_tunnel_uninit(struct net_device *dev); |
| 374 | void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); |
| 375 | struct net *ip_tunnel_get_link_net(const struct net_device *dev); |
| 376 | int ip_tunnel_get_iflink(const struct net_device *dev); |
| 377 | int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, |
| 378 | struct rtnl_link_ops *ops, char *devname); |
| 379 | |
| 380 | void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, |
| 381 | struct rtnl_link_ops *ops, |
| 382 | struct list_head *dev_to_kill); |
| 383 | |
| 384 | void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, |
| 385 | const struct iphdr *tnl_params, const u8 protocol); |
| 386 | void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, |
| 387 | const u8 proto, int tunnel_hlen); |
| 388 | int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, |
| 389 | int cmd); |
| 390 | bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp, |
| 391 | const void __user *data); |
| 392 | bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp); |
| 393 | int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, |
| 394 | void __user *data, int cmd); |
| 395 | int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); |
| 396 | int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); |
| 397 | |
| 398 | struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, |
| 399 | int link, const unsigned long *flags, |
| 400 | __be32 remote, __be32 local, |
| 401 | __be32 key); |
| 402 | |
| 403 | void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info); |
| 404 | int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, |
| 405 | const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, |
| 406 | bool log_ecn_error); |
| 407 | int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], |
| 408 | struct ip_tunnel_parm_kern *p, __u32 fwmark); |
| 409 | int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], |
| 410 | struct ip_tunnel_parm_kern *p, __u32 fwmark); |
| 411 | void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); |
| 412 | |
| 413 | bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], |
| 414 | struct ip_tunnel_encap *encap); |
| 415 | |
| 416 | void ip_tunnel_netlink_parms(struct nlattr *data[], |
| 417 | struct ip_tunnel_parm_kern *parms); |
| 418 | |
| 419 | extern const struct header_ops ip_tunnel_header_ops; |
| 420 | __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); |
| 421 | |
| 422 | struct ip_tunnel_encap_ops { |
| 423 | size_t (*encap_hlen)(struct ip_tunnel_encap *e); |
| 424 | int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, |
| 425 | u8 *protocol, struct flowi4 *fl4); |
| 426 | int (*err_handler)(struct sk_buff *skb, u32 info); |
| 427 | }; |
| 428 | |
| 429 | #define MAX_IPTUN_ENCAP_OPS 8 |
| 430 | |
| 431 | extern const struct ip_tunnel_encap_ops __rcu * |
| 432 | iptun_encaps[MAX_IPTUN_ENCAP_OPS]; |
| 433 | |
| 434 | int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op, |
| 435 | unsigned int num); |
| 436 | int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, |
| 437 | unsigned int num); |
| 438 | |
| 439 | int ip_tunnel_encap_setup(struct ip_tunnel *t, |
| 440 | struct ip_tunnel_encap *ipencap); |
| 441 | |
| 442 | static inline bool pskb_inet_may_pull(struct sk_buff *skb) |
| 443 | { |
| 444 | int nhlen; |
| 445 | |
| 446 | switch (skb->protocol) { |
| 447 | #if IS_ENABLED(CONFIG_IPV6) |
| 448 | case htons(ETH_P_IPV6): |
| 449 | nhlen = sizeof(struct ipv6hdr); |
| 450 | break; |
| 451 | #endif |
| 452 | case htons(ETH_P_IP): |
| 453 | nhlen = sizeof(struct iphdr); |
| 454 | break; |
| 455 | default: |
| 456 | nhlen = 0; |
| 457 | } |
| 458 | |
| 459 | return pskb_network_may_pull(skb, nhlen); |
| 460 | } |
| 461 | |
| 462 | /* Variant of pskb_inet_may_pull(). |
| 463 | */ |
| 464 | static inline bool skb_vlan_inet_prepare(struct sk_buff *skb, |
| 465 | bool inner_proto_inherit) |
| 466 | { |
| 467 | int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN; |
| 468 | __be16 type = skb->protocol; |
| 469 | |
| 470 | /* Essentially this is skb_protocol(skb, true) |
| 471 | * And we get MAC len. |
| 472 | */ |
| 473 | if (eth_type_vlan(type)) |
| 474 | type = __vlan_get_protocol(skb, type, &maclen); |
| 475 | |
| 476 | switch (type) { |
| 477 | #if IS_ENABLED(CONFIG_IPV6) |
| 478 | case htons(ETH_P_IPV6): |
| 479 | nhlen = sizeof(struct ipv6hdr); |
| 480 | break; |
| 481 | #endif |
| 482 | case htons(ETH_P_IP): |
| 483 | nhlen = sizeof(struct iphdr); |
| 484 | break; |
| 485 | } |
| 486 | /* For ETH_P_IPV6/ETH_P_IP we make sure to pull |
| 487 | * a base network header in skb->head. |
| 488 | */ |
| 489 | if (!pskb_may_pull(skb, maclen + nhlen)) |
| 490 | return false; |
| 491 | |
| 492 | skb_set_network_header(skb, maclen); |
| 493 | return true; |
| 494 | } |
| 495 | |
| 496 | static inline int ip_encap_hlen(struct ip_tunnel_encap *e) |
| 497 | { |
| 498 | const struct ip_tunnel_encap_ops *ops; |
| 499 | int hlen = -EINVAL; |
| 500 | |
| 501 | if (e->type == TUNNEL_ENCAP_NONE) |
| 502 | return 0; |
| 503 | |
| 504 | if (e->type >= MAX_IPTUN_ENCAP_OPS) |
| 505 | return -EINVAL; |
| 506 | |
| 507 | rcu_read_lock(); |
| 508 | ops = rcu_dereference(iptun_encaps[e->type]); |
| 509 | if (likely(ops && ops->encap_hlen)) |
| 510 | hlen = ops->encap_hlen(e); |
| 511 | rcu_read_unlock(); |
| 512 | |
| 513 | return hlen; |
| 514 | } |
| 515 | |
| 516 | static inline int ip_tunnel_encap(struct sk_buff *skb, |
| 517 | struct ip_tunnel_encap *e, |
| 518 | u8 *protocol, struct flowi4 *fl4) |
| 519 | { |
| 520 | const struct ip_tunnel_encap_ops *ops; |
| 521 | int ret = -EINVAL; |
| 522 | |
| 523 | if (e->type == TUNNEL_ENCAP_NONE) |
| 524 | return 0; |
| 525 | |
| 526 | if (e->type >= MAX_IPTUN_ENCAP_OPS) |
| 527 | return -EINVAL; |
| 528 | |
| 529 | rcu_read_lock(); |
| 530 | ops = rcu_dereference(iptun_encaps[e->type]); |
| 531 | if (likely(ops && ops->build_header)) |
| 532 | ret = ops->build_header(skb, e, protocol, fl4); |
| 533 | rcu_read_unlock(); |
| 534 | |
| 535 | return ret; |
| 536 | } |
| 537 | |
| 538 | /* Extract dsfield from inner protocol */ |
| 539 | static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, |
| 540 | const struct sk_buff *skb) |
| 541 | { |
| 542 | __be16 payload_protocol = skb_protocol(skb, true); |
| 543 | |
| 544 | if (payload_protocol == htons(ETH_P_IP)) |
| 545 | return iph->tos; |
| 546 | else if (payload_protocol == htons(ETH_P_IPV6)) |
| 547 | return ipv6_get_dsfield((const struct ipv6hdr *)iph); |
| 548 | else |
| 549 | return 0; |
| 550 | } |
| 551 | |
| 552 | static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph, |
| 553 | const struct sk_buff *skb) |
| 554 | { |
| 555 | __be16 payload_protocol = skb_protocol(skb, true); |
| 556 | |
| 557 | if (payload_protocol == htons(ETH_P_IPV6)) |
| 558 | return ip6_flowlabel((const struct ipv6hdr *)iph); |
| 559 | else |
| 560 | return 0; |
| 561 | } |
| 562 | |
| 563 | static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, |
| 564 | const struct sk_buff *skb) |
| 565 | { |
| 566 | __be16 payload_protocol = skb_protocol(skb, true); |
| 567 | |
| 568 | if (payload_protocol == htons(ETH_P_IP)) |
| 569 | return iph->ttl; |
| 570 | else if (payload_protocol == htons(ETH_P_IPV6)) |
| 571 | return ((const struct ipv6hdr *)iph)->hop_limit; |
| 572 | else |
| 573 | return 0; |
| 574 | } |
| 575 | |
| 576 | /* Propagate ECN bits out */ |
| 577 | static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, |
| 578 | const struct sk_buff *skb) |
| 579 | { |
| 580 | u8 inner = ip_tunnel_get_dsfield(iph, skb); |
| 581 | |
| 582 | return INET_ECN_encapsulate(tos, inner); |
| 583 | } |
| 584 | |
| 585 | int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, |
| 586 | __be16 inner_proto, bool raw_proto, bool xnet); |
| 587 | |
| 588 | static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, |
| 589 | __be16 inner_proto, bool xnet) |
| 590 | { |
| 591 | return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet); |
| 592 | } |
| 593 | |
| 594 | void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, |
| 595 | __be32 src, __be32 dst, u8 proto, |
| 596 | u8 tos, u8 ttl, __be16 df, bool xnet); |
| 597 | struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, |
| 598 | gfp_t flags); |
| 599 | int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, |
| 600 | int headroom, bool reply); |
| 601 | |
| 602 | int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); |
| 603 | |
| 604 | static inline int iptunnel_pull_offloads(struct sk_buff *skb) |
| 605 | { |
| 606 | if (skb_is_gso(skb)) { |
| 607 | int err; |
| 608 | |
| 609 | err = skb_unclone(skb, GFP_ATOMIC); |
| 610 | if (unlikely(err)) |
| 611 | return err; |
| 612 | skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >> |
| 613 | NETIF_F_GSO_SHIFT); |
| 614 | } |
| 615 | |
| 616 | skb->encapsulation = 0; |
| 617 | return 0; |
| 618 | } |
| 619 | |
| 620 | static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) |
| 621 | { |
| 622 | if (pkt_len > 0) { |
| 623 | struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); |
| 624 | |
| 625 | u64_stats_update_begin(&tstats->syncp); |
| 626 | u64_stats_add(&tstats->tx_bytes, pkt_len); |
| 627 | u64_stats_inc(&tstats->tx_packets); |
| 628 | u64_stats_update_end(&tstats->syncp); |
| 629 | put_cpu_ptr(tstats); |
| 630 | return; |
| 631 | } |
| 632 | |
| 633 | if (pkt_len < 0) { |
| 634 | DEV_STATS_INC(dev, tx_errors); |
| 635 | DEV_STATS_INC(dev, tx_aborted_errors); |
| 636 | } else { |
| 637 | DEV_STATS_INC(dev, tx_dropped); |
| 638 | } |
| 639 | } |
| 640 | |
| 641 | static inline void ip_tunnel_info_opts_get(void *to, |
| 642 | const struct ip_tunnel_info *info) |
| 643 | { |
| 644 | memcpy(to, info + 1, info->options_len); |
| 645 | } |
| 646 | |
| 647 | static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, |
| 648 | const void *from, int len, |
| 649 | const unsigned long *flags) |
| 650 | { |
| 651 | info->options_len = len; |
| 652 | if (len > 0) { |
| 653 | memcpy(ip_tunnel_info_opts(info), from, len); |
| 654 | ip_tunnel_flags_or(info->key.tun_flags, info->key.tun_flags, |
| 655 | flags); |
| 656 | } |
| 657 | } |
| 658 | |
| 659 | static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) |
| 660 | { |
| 661 | return (struct ip_tunnel_info *)lwtstate->data; |
| 662 | } |
| 663 | |
| 664 | DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt); |
| 665 | |
| 666 | /* Returns > 0 if metadata should be collected */ |
| 667 | static inline int ip_tunnel_collect_metadata(void) |
| 668 | { |
| 669 | return static_branch_unlikely(&ip_tunnel_metadata_cnt); |
| 670 | } |
| 671 | |
| 672 | void __init ip_tunnel_core_init(void); |
| 673 | |
| 674 | void ip_tunnel_need_metadata(void); |
| 675 | void ip_tunnel_unneed_metadata(void); |
| 676 | |
| 677 | #else /* CONFIG_INET */ |
| 678 | |
| 679 | static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) |
| 680 | { |
| 681 | return NULL; |
| 682 | } |
| 683 | |
| 684 | static inline void ip_tunnel_need_metadata(void) |
| 685 | { |
| 686 | } |
| 687 | |
| 688 | static inline void ip_tunnel_unneed_metadata(void) |
| 689 | { |
| 690 | } |
| 691 | |
| 692 | static inline void ip_tunnel_info_opts_get(void *to, |
| 693 | const struct ip_tunnel_info *info) |
| 694 | { |
| 695 | } |
| 696 | |
| 697 | static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, |
| 698 | const void *from, int len, |
| 699 | const unsigned long *flags) |
| 700 | { |
| 701 | info->options_len = 0; |
| 702 | } |
| 703 | |
| 704 | #endif /* CONFIG_INET */ |
| 705 | |
| 706 | #endif /* __NET_IP_TUNNELS_H */ |