| 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* |
| 3 | * xfrm_output.c - Common IPsec encapsulation code. |
| 4 | * |
| 5 | * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> |
| 6 | */ |
| 7 | |
| 8 | #include <linux/errno.h> |
| 9 | #include <linux/module.h> |
| 10 | #include <linux/netdevice.h> |
| 11 | #include <linux/netfilter.h> |
| 12 | #include <linux/skbuff.h> |
| 13 | #include <linux/slab.h> |
| 14 | #include <linux/spinlock.h> |
| 15 | #include <net/dst.h> |
| 16 | #include <net/gso.h> |
| 17 | #include <net/icmp.h> |
| 18 | #include <net/inet_ecn.h> |
| 19 | #include <net/xfrm.h> |
| 20 | |
| 21 | #if IS_ENABLED(CONFIG_IPV6) |
| 22 | #include <net/ip6_route.h> |
| 23 | #include <net/ipv6_stubs.h> |
| 24 | #endif |
| 25 | |
| 26 | #include "xfrm_inout.h" |
| 27 | |
| 28 | static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); |
| 29 | static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); |
| 30 | |
| 31 | static int xfrm_skb_check_space(struct sk_buff *skb) |
| 32 | { |
| 33 | struct dst_entry *dst = skb_dst(skb); |
| 34 | int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) |
| 35 | - skb_headroom(skb); |
| 36 | int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); |
| 37 | |
| 38 | if (nhead <= 0) { |
| 39 | if (ntail <= 0) |
| 40 | return 0; |
| 41 | nhead = 0; |
| 42 | } else if (ntail < 0) |
| 43 | ntail = 0; |
| 44 | |
| 45 | return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); |
| 46 | } |
| 47 | |
| 48 | /* Children define the path of the packet through the |
| 49 | * Linux networking. Thus, destinations are stackable. |
| 50 | */ |
| 51 | |
| 52 | static struct dst_entry *skb_dst_pop(struct sk_buff *skb) |
| 53 | { |
| 54 | struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb))); |
| 55 | |
| 56 | skb_dst_drop(skb); |
| 57 | return child; |
| 58 | } |
| 59 | |
| 60 | /* Add encapsulation header. |
| 61 | * |
| 62 | * The IP header will be moved forward to make space for the encapsulation |
| 63 | * header. |
| 64 | */ |
| 65 | static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) |
| 66 | { |
| 67 | struct iphdr *iph = ip_hdr(skb); |
| 68 | int ihl = iph->ihl * 4; |
| 69 | |
| 70 | skb_set_inner_transport_header(skb, skb_transport_offset(skb)); |
| 71 | |
| 72 | skb_set_network_header(skb, -x->props.header_len); |
| 73 | skb->mac_header = skb->network_header + |
| 74 | offsetof(struct iphdr, protocol); |
| 75 | skb->transport_header = skb->network_header + ihl; |
| 76 | __skb_pull(skb, ihl); |
| 77 | memmove(skb_network_header(skb), iph, ihl); |
| 78 | return 0; |
| 79 | } |
| 80 | |
| 81 | #if IS_ENABLED(CONFIG_IPV6_MIP6) |
| 82 | static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type) |
| 83 | { |
| 84 | const unsigned char *nh = skb_network_header(skb); |
| 85 | unsigned int offset = sizeof(struct ipv6hdr); |
| 86 | unsigned int packet_len; |
| 87 | int found_rhdr = 0; |
| 88 | |
| 89 | packet_len = skb_tail_pointer(skb) - nh; |
| 90 | *nexthdr = &ipv6_hdr(skb)->nexthdr; |
| 91 | |
| 92 | while (offset <= packet_len) { |
| 93 | struct ipv6_opt_hdr *exthdr; |
| 94 | |
| 95 | switch (**nexthdr) { |
| 96 | case NEXTHDR_HOP: |
| 97 | break; |
| 98 | case NEXTHDR_ROUTING: |
| 99 | if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) { |
| 100 | struct ipv6_rt_hdr *rt; |
| 101 | |
| 102 | rt = (struct ipv6_rt_hdr *)(nh + offset); |
| 103 | if (rt->type != 0) |
| 104 | return offset; |
| 105 | } |
| 106 | found_rhdr = 1; |
| 107 | break; |
| 108 | case NEXTHDR_DEST: |
| 109 | /* HAO MUST NOT appear more than once. |
| 110 | * XXX: It is better to try to find by the end of |
| 111 | * XXX: packet if HAO exists. |
| 112 | */ |
| 113 | if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { |
| 114 | net_dbg_ratelimited("mip6: hao exists already, override\n"); |
| 115 | return offset; |
| 116 | } |
| 117 | |
| 118 | if (found_rhdr) |
| 119 | return offset; |
| 120 | |
| 121 | break; |
| 122 | default: |
| 123 | return offset; |
| 124 | } |
| 125 | |
| 126 | if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) |
| 127 | return -EINVAL; |
| 128 | |
| 129 | exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + |
| 130 | offset); |
| 131 | offset += ipv6_optlen(exthdr); |
| 132 | if (offset > IPV6_MAXPLEN) |
| 133 | return -EINVAL; |
| 134 | *nexthdr = &exthdr->nexthdr; |
| 135 | } |
| 136 | |
| 137 | return -EINVAL; |
| 138 | } |
| 139 | #endif |
| 140 | |
| 141 | #if IS_ENABLED(CONFIG_IPV6) |
| 142 | static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr) |
| 143 | { |
| 144 | switch (x->type->proto) { |
| 145 | #if IS_ENABLED(CONFIG_IPV6_MIP6) |
| 146 | case IPPROTO_DSTOPTS: |
| 147 | case IPPROTO_ROUTING: |
| 148 | return mip6_rthdr_offset(skb, prevhdr, x->type->proto); |
| 149 | #endif |
| 150 | default: |
| 151 | break; |
| 152 | } |
| 153 | |
| 154 | return ip6_find_1stfragopt(skb, prevhdr); |
| 155 | } |
| 156 | #endif |
| 157 | |
| 158 | /* Add encapsulation header. |
| 159 | * |
| 160 | * The IP header and mutable extension headers will be moved forward to make |
| 161 | * space for the encapsulation header. |
| 162 | */ |
| 163 | static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) |
| 164 | { |
| 165 | #if IS_ENABLED(CONFIG_IPV6) |
| 166 | struct ipv6hdr *iph; |
| 167 | u8 *prevhdr; |
| 168 | int hdr_len; |
| 169 | |
| 170 | iph = ipv6_hdr(skb); |
| 171 | skb_set_inner_transport_header(skb, skb_transport_offset(skb)); |
| 172 | |
| 173 | hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); |
| 174 | if (hdr_len < 0) |
| 175 | return hdr_len; |
| 176 | skb_set_mac_header(skb, |
| 177 | (prevhdr - x->props.header_len) - skb->data); |
| 178 | skb_set_network_header(skb, -x->props.header_len); |
| 179 | skb->transport_header = skb->network_header + hdr_len; |
| 180 | __skb_pull(skb, hdr_len); |
| 181 | memmove(ipv6_hdr(skb), iph, hdr_len); |
| 182 | return 0; |
| 183 | #else |
| 184 | WARN_ON_ONCE(1); |
| 185 | return -EAFNOSUPPORT; |
| 186 | #endif |
| 187 | } |
| 188 | |
| 189 | /* Add route optimization header space. |
| 190 | * |
| 191 | * The IP header and mutable extension headers will be moved forward to make |
| 192 | * space for the route optimization header. |
| 193 | */ |
| 194 | static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) |
| 195 | { |
| 196 | #if IS_ENABLED(CONFIG_IPV6) |
| 197 | struct ipv6hdr *iph; |
| 198 | u8 *prevhdr; |
| 199 | int hdr_len; |
| 200 | |
| 201 | iph = ipv6_hdr(skb); |
| 202 | |
| 203 | hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); |
| 204 | if (hdr_len < 0) |
| 205 | return hdr_len; |
| 206 | skb_set_mac_header(skb, |
| 207 | (prevhdr - x->props.header_len) - skb->data); |
| 208 | skb_set_network_header(skb, -x->props.header_len); |
| 209 | skb->transport_header = skb->network_header + hdr_len; |
| 210 | __skb_pull(skb, hdr_len); |
| 211 | memmove(ipv6_hdr(skb), iph, hdr_len); |
| 212 | |
| 213 | return 0; |
| 214 | #else |
| 215 | WARN_ON_ONCE(1); |
| 216 | return -EAFNOSUPPORT; |
| 217 | #endif |
| 218 | } |
| 219 | |
| 220 | /* Add encapsulation header. |
| 221 | * |
| 222 | * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. |
| 223 | */ |
| 224 | static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) |
| 225 | { |
| 226 | struct ip_beet_phdr *ph; |
| 227 | struct iphdr *top_iph; |
| 228 | int hdrlen, optlen; |
| 229 | |
| 230 | hdrlen = 0; |
| 231 | optlen = XFRM_MODE_SKB_CB(skb)->optlen; |
| 232 | if (unlikely(optlen)) |
| 233 | hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); |
| 234 | |
| 235 | skb_set_network_header(skb, -x->props.header_len - hdrlen + |
| 236 | (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); |
| 237 | if (x->sel.family != AF_INET6) |
| 238 | skb->network_header += IPV4_BEET_PHMAXLEN; |
| 239 | skb->mac_header = skb->network_header + |
| 240 | offsetof(struct iphdr, protocol); |
| 241 | skb->transport_header = skb->network_header + sizeof(*top_iph); |
| 242 | |
| 243 | xfrm4_beet_make_header(skb); |
| 244 | |
| 245 | ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); |
| 246 | |
| 247 | top_iph = ip_hdr(skb); |
| 248 | |
| 249 | if (unlikely(optlen)) { |
| 250 | if (WARN_ON(optlen < 0)) |
| 251 | return -EINVAL; |
| 252 | |
| 253 | ph->padlen = 4 - (optlen & 4); |
| 254 | ph->hdrlen = optlen / 8; |
| 255 | ph->nexthdr = top_iph->protocol; |
| 256 | if (ph->padlen) |
| 257 | memset(ph + 1, IPOPT_NOP, ph->padlen); |
| 258 | |
| 259 | top_iph->protocol = IPPROTO_BEETPH; |
| 260 | top_iph->ihl = sizeof(struct iphdr) / 4; |
| 261 | } |
| 262 | |
| 263 | top_iph->saddr = x->props.saddr.a4; |
| 264 | top_iph->daddr = x->id.daddr.a4; |
| 265 | |
| 266 | return 0; |
| 267 | } |
| 268 | |
| 269 | /* Add encapsulation header. |
| 270 | * |
| 271 | * The top IP header will be constructed per RFC 2401. |
| 272 | */ |
| 273 | static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) |
| 274 | { |
| 275 | bool small_ipv6 = (skb->protocol == htons(ETH_P_IPV6)) && (skb->len <= IPV6_MIN_MTU); |
| 276 | struct dst_entry *dst = skb_dst(skb); |
| 277 | struct iphdr *top_iph; |
| 278 | int flags; |
| 279 | |
| 280 | skb_set_inner_network_header(skb, skb_network_offset(skb)); |
| 281 | skb_set_inner_transport_header(skb, skb_transport_offset(skb)); |
| 282 | |
| 283 | skb_set_network_header(skb, -x->props.header_len); |
| 284 | skb->mac_header = skb->network_header + |
| 285 | offsetof(struct iphdr, protocol); |
| 286 | skb->transport_header = skb->network_header + sizeof(*top_iph); |
| 287 | top_iph = ip_hdr(skb); |
| 288 | |
| 289 | top_iph->ihl = 5; |
| 290 | top_iph->version = 4; |
| 291 | |
| 292 | top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); |
| 293 | |
| 294 | /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ |
| 295 | if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) |
| 296 | top_iph->tos = 0; |
| 297 | else |
| 298 | top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; |
| 299 | top_iph->tos = INET_ECN_encapsulate(top_iph->tos, |
| 300 | XFRM_MODE_SKB_CB(skb)->tos); |
| 301 | |
| 302 | flags = x->props.flags; |
| 303 | if (flags & XFRM_STATE_NOECN) |
| 304 | IP_ECN_clear(top_iph); |
| 305 | |
| 306 | top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) || small_ipv6 ? |
| 307 | 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); |
| 308 | |
| 309 | top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); |
| 310 | |
| 311 | top_iph->saddr = x->props.saddr.a4; |
| 312 | top_iph->daddr = x->id.daddr.a4; |
| 313 | ip_select_ident(dev_net(dst->dev), skb, NULL); |
| 314 | |
| 315 | return 0; |
| 316 | } |
| 317 | |
| 318 | #if IS_ENABLED(CONFIG_IPV6) |
| 319 | static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) |
| 320 | { |
| 321 | struct dst_entry *dst = skb_dst(skb); |
| 322 | struct ipv6hdr *top_iph; |
| 323 | int dsfield; |
| 324 | |
| 325 | skb_set_inner_network_header(skb, skb_network_offset(skb)); |
| 326 | skb_set_inner_transport_header(skb, skb_transport_offset(skb)); |
| 327 | |
| 328 | skb_set_network_header(skb, -x->props.header_len); |
| 329 | skb->mac_header = skb->network_header + |
| 330 | offsetof(struct ipv6hdr, nexthdr); |
| 331 | skb->transport_header = skb->network_header + sizeof(*top_iph); |
| 332 | top_iph = ipv6_hdr(skb); |
| 333 | |
| 334 | top_iph->version = 6; |
| 335 | |
| 336 | memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, |
| 337 | sizeof(top_iph->flow_lbl)); |
| 338 | top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); |
| 339 | |
| 340 | if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) |
| 341 | dsfield = 0; |
| 342 | else |
| 343 | dsfield = XFRM_MODE_SKB_CB(skb)->tos; |
| 344 | dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); |
| 345 | if (x->props.flags & XFRM_STATE_NOECN) |
| 346 | dsfield &= ~INET_ECN_MASK; |
| 347 | ipv6_change_dsfield(top_iph, 0, dsfield); |
| 348 | top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); |
| 349 | top_iph->saddr = *(struct in6_addr *)&x->props.saddr; |
| 350 | top_iph->daddr = *(struct in6_addr *)&x->id.daddr; |
| 351 | return 0; |
| 352 | } |
| 353 | |
| 354 | static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) |
| 355 | { |
| 356 | struct ipv6hdr *top_iph; |
| 357 | struct ip_beet_phdr *ph; |
| 358 | int optlen, hdr_len; |
| 359 | |
| 360 | hdr_len = 0; |
| 361 | optlen = XFRM_MODE_SKB_CB(skb)->optlen; |
| 362 | if (unlikely(optlen)) |
| 363 | hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); |
| 364 | |
| 365 | skb_set_network_header(skb, -x->props.header_len - hdr_len); |
| 366 | if (x->sel.family != AF_INET6) |
| 367 | skb->network_header += IPV4_BEET_PHMAXLEN; |
| 368 | skb->mac_header = skb->network_header + |
| 369 | offsetof(struct ipv6hdr, nexthdr); |
| 370 | skb->transport_header = skb->network_header + sizeof(*top_iph); |
| 371 | ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); |
| 372 | |
| 373 | xfrm6_beet_make_header(skb); |
| 374 | |
| 375 | top_iph = ipv6_hdr(skb); |
| 376 | if (unlikely(optlen)) { |
| 377 | if (WARN_ON(optlen < 0)) |
| 378 | return -EINVAL; |
| 379 | |
| 380 | ph->padlen = 4 - (optlen & 4); |
| 381 | ph->hdrlen = optlen / 8; |
| 382 | ph->nexthdr = top_iph->nexthdr; |
| 383 | if (ph->padlen) |
| 384 | memset(ph + 1, IPOPT_NOP, ph->padlen); |
| 385 | |
| 386 | top_iph->nexthdr = IPPROTO_BEETPH; |
| 387 | } |
| 388 | |
| 389 | top_iph->saddr = *(struct in6_addr *)&x->props.saddr; |
| 390 | top_iph->daddr = *(struct in6_addr *)&x->id.daddr; |
| 391 | return 0; |
| 392 | } |
| 393 | #endif |
| 394 | |
| 395 | /* Add encapsulation header. |
| 396 | * |
| 397 | * On exit, the transport header will be set to the start of the |
| 398 | * encapsulation header to be filled in by x->type->output and the mac |
| 399 | * header will be set to the nextheader (protocol for IPv4) field of the |
| 400 | * extension header directly preceding the encapsulation header, or in |
| 401 | * its absence, that of the top IP header. |
| 402 | * The value of the network header will always point to the top IP header |
| 403 | * while skb->data will point to the payload. |
| 404 | */ |
| 405 | static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) |
| 406 | { |
| 407 | int err; |
| 408 | |
| 409 | err = xfrm_inner_extract_output(x, skb); |
| 410 | if (err) |
| 411 | return err; |
| 412 | |
| 413 | IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; |
| 414 | skb->protocol = htons(ETH_P_IP); |
| 415 | |
| 416 | switch (x->props.mode) { |
| 417 | case XFRM_MODE_BEET: |
| 418 | return xfrm4_beet_encap_add(x, skb); |
| 419 | case XFRM_MODE_TUNNEL: |
| 420 | return xfrm4_tunnel_encap_add(x, skb); |
| 421 | } |
| 422 | |
| 423 | WARN_ON_ONCE(1); |
| 424 | return -EOPNOTSUPP; |
| 425 | } |
| 426 | |
| 427 | static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) |
| 428 | { |
| 429 | #if IS_ENABLED(CONFIG_IPV6) |
| 430 | int err; |
| 431 | |
| 432 | err = xfrm_inner_extract_output(x, skb); |
| 433 | if (err) |
| 434 | return err; |
| 435 | |
| 436 | skb->ignore_df = 1; |
| 437 | skb->protocol = htons(ETH_P_IPV6); |
| 438 | |
| 439 | switch (x->props.mode) { |
| 440 | case XFRM_MODE_BEET: |
| 441 | return xfrm6_beet_encap_add(x, skb); |
| 442 | case XFRM_MODE_TUNNEL: |
| 443 | return xfrm6_tunnel_encap_add(x, skb); |
| 444 | default: |
| 445 | WARN_ON_ONCE(1); |
| 446 | return -EOPNOTSUPP; |
| 447 | } |
| 448 | #endif |
| 449 | WARN_ON_ONCE(1); |
| 450 | return -EAFNOSUPPORT; |
| 451 | } |
| 452 | |
| 453 | static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) |
| 454 | { |
| 455 | switch (x->props.mode) { |
| 456 | case XFRM_MODE_BEET: |
| 457 | case XFRM_MODE_TUNNEL: |
| 458 | if (x->props.family == AF_INET) |
| 459 | return xfrm4_prepare_output(x, skb); |
| 460 | if (x->props.family == AF_INET6) |
| 461 | return xfrm6_prepare_output(x, skb); |
| 462 | break; |
| 463 | case XFRM_MODE_TRANSPORT: |
| 464 | if (x->props.family == AF_INET) |
| 465 | return xfrm4_transport_output(x, skb); |
| 466 | if (x->props.family == AF_INET6) |
| 467 | return xfrm6_transport_output(x, skb); |
| 468 | break; |
| 469 | case XFRM_MODE_ROUTEOPTIMIZATION: |
| 470 | if (x->props.family == AF_INET6) |
| 471 | return xfrm6_ro_output(x, skb); |
| 472 | WARN_ON_ONCE(1); |
| 473 | break; |
| 474 | default: |
| 475 | if (x->mode_cbs && x->mode_cbs->prepare_output) |
| 476 | return x->mode_cbs->prepare_output(x, skb); |
| 477 | WARN_ON_ONCE(1); |
| 478 | break; |
| 479 | } |
| 480 | |
| 481 | return -EOPNOTSUPP; |
| 482 | } |
| 483 | |
| 484 | #if IS_ENABLED(CONFIG_NET_PKTGEN) |
| 485 | int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) |
| 486 | { |
| 487 | return xfrm_outer_mode_output(x, skb); |
| 488 | } |
| 489 | EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output); |
| 490 | #endif |
| 491 | |
| 492 | static int xfrm_output_one(struct sk_buff *skb, int err) |
| 493 | { |
| 494 | struct dst_entry *dst = skb_dst(skb); |
| 495 | struct xfrm_state *x = dst->xfrm; |
| 496 | struct net *net = xs_net(x); |
| 497 | |
| 498 | if (err <= 0 || x->xso.type == XFRM_DEV_OFFLOAD_PACKET) |
| 499 | goto resume; |
| 500 | |
| 501 | do { |
| 502 | err = xfrm_skb_check_space(skb); |
| 503 | if (err) { |
| 504 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); |
| 505 | goto error_nolock; |
| 506 | } |
| 507 | |
| 508 | skb->mark = xfrm_smark_get(skb->mark, x); |
| 509 | |
| 510 | err = xfrm_outer_mode_output(x, skb); |
| 511 | if (err) { |
| 512 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); |
| 513 | goto error_nolock; |
| 514 | } |
| 515 | |
| 516 | spin_lock_bh(&x->lock); |
| 517 | |
| 518 | if (unlikely(x->km.state != XFRM_STATE_VALID)) { |
| 519 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); |
| 520 | err = -EINVAL; |
| 521 | goto error; |
| 522 | } |
| 523 | |
| 524 | err = xfrm_state_check_expire(x); |
| 525 | if (err) { |
| 526 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); |
| 527 | goto error; |
| 528 | } |
| 529 | |
| 530 | err = xfrm_replay_overflow(x, skb); |
| 531 | if (err) { |
| 532 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); |
| 533 | goto error; |
| 534 | } |
| 535 | |
| 536 | x->curlft.bytes += skb->len; |
| 537 | x->curlft.packets++; |
| 538 | x->lastused = ktime_get_real_seconds(); |
| 539 | |
| 540 | spin_unlock_bh(&x->lock); |
| 541 | |
| 542 | skb_dst_force(skb); |
| 543 | if (!skb_dst(skb)) { |
| 544 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); |
| 545 | err = -EHOSTUNREACH; |
| 546 | goto error_nolock; |
| 547 | } |
| 548 | |
| 549 | if (xfrm_offload(skb)) { |
| 550 | x->type_offload->encap(x, skb); |
| 551 | } else { |
| 552 | /* Inner headers are invalid now. */ |
| 553 | skb->encapsulation = 0; |
| 554 | |
| 555 | err = x->type->output(x, skb); |
| 556 | if (err == -EINPROGRESS) |
| 557 | goto out; |
| 558 | } |
| 559 | |
| 560 | resume: |
| 561 | if (err) { |
| 562 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); |
| 563 | goto error_nolock; |
| 564 | } |
| 565 | |
| 566 | dst = skb_dst_pop(skb); |
| 567 | if (!dst) { |
| 568 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); |
| 569 | err = -EHOSTUNREACH; |
| 570 | goto error_nolock; |
| 571 | } |
| 572 | skb_dst_set(skb, dst); |
| 573 | x = dst->xfrm; |
| 574 | } while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)); |
| 575 | |
| 576 | return 0; |
| 577 | |
| 578 | error: |
| 579 | spin_unlock_bh(&x->lock); |
| 580 | error_nolock: |
| 581 | kfree_skb(skb); |
| 582 | out: |
| 583 | return err; |
| 584 | } |
| 585 | |
| 586 | int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err) |
| 587 | { |
| 588 | struct net *net = xs_net(skb_dst(skb)->xfrm); |
| 589 | |
| 590 | while (likely((err = xfrm_output_one(skb, err)) == 0)) { |
| 591 | nf_reset_ct(skb); |
| 592 | |
| 593 | err = skb_dst(skb)->ops->local_out(net, sk, skb); |
| 594 | if (unlikely(err != 1)) |
| 595 | goto out; |
| 596 | |
| 597 | if (!skb_dst(skb)->xfrm) |
| 598 | return dst_output(net, sk, skb); |
| 599 | |
| 600 | err = nf_hook(skb_dst(skb)->ops->family, |
| 601 | NF_INET_POST_ROUTING, net, sk, skb, |
| 602 | NULL, skb_dst(skb)->dev, xfrm_output2); |
| 603 | if (unlikely(err != 1)) |
| 604 | goto out; |
| 605 | } |
| 606 | |
| 607 | if (err == -EINPROGRESS) |
| 608 | err = 0; |
| 609 | |
| 610 | out: |
| 611 | return err; |
| 612 | } |
| 613 | EXPORT_SYMBOL_GPL(xfrm_output_resume); |
| 614 | |
| 615 | static int xfrm_dev_direct_output(struct sock *sk, struct xfrm_state *x, |
| 616 | struct sk_buff *skb) |
| 617 | { |
| 618 | struct dst_entry *dst = skb_dst(skb); |
| 619 | struct net *net = xs_net(x); |
| 620 | int err; |
| 621 | |
| 622 | dst = skb_dst_pop(skb); |
| 623 | if (!dst) { |
| 624 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); |
| 625 | kfree_skb(skb); |
| 626 | return -EHOSTUNREACH; |
| 627 | } |
| 628 | skb_dst_set(skb, dst); |
| 629 | nf_reset_ct(skb); |
| 630 | |
| 631 | err = skb_dst(skb)->ops->local_out(net, sk, skb); |
| 632 | if (unlikely(err != 1)) { |
| 633 | kfree_skb(skb); |
| 634 | return err; |
| 635 | } |
| 636 | |
| 637 | /* In transport mode, network destination is |
| 638 | * directly reachable, while in tunnel mode, |
| 639 | * inner packet network may not be. In packet |
| 640 | * offload type, HW is responsible for hard |
| 641 | * header packet mangling so directly xmit skb |
| 642 | * to netdevice. |
| 643 | */ |
| 644 | skb->dev = x->xso.dev; |
| 645 | __skb_push(skb, skb->dev->hard_header_len); |
| 646 | return dev_queue_xmit(skb); |
| 647 | } |
| 648 | |
| 649 | static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) |
| 650 | { |
| 651 | return xfrm_output_resume(sk, skb, 1); |
| 652 | } |
| 653 | |
| 654 | static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) |
| 655 | { |
| 656 | struct sk_buff *segs, *nskb; |
| 657 | |
| 658 | BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); |
| 659 | BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_GSO_CB_OFFSET); |
| 660 | segs = skb_gso_segment(skb, 0); |
| 661 | kfree_skb(skb); |
| 662 | if (IS_ERR(segs)) |
| 663 | return PTR_ERR(segs); |
| 664 | if (segs == NULL) |
| 665 | return -EINVAL; |
| 666 | |
| 667 | skb_list_walk_safe(segs, segs, nskb) { |
| 668 | int err; |
| 669 | |
| 670 | skb_mark_not_on_list(segs); |
| 671 | err = xfrm_output2(net, sk, segs); |
| 672 | |
| 673 | if (unlikely(err)) { |
| 674 | kfree_skb_list(nskb); |
| 675 | return err; |
| 676 | } |
| 677 | } |
| 678 | |
| 679 | return 0; |
| 680 | } |
| 681 | |
| 682 | /* For partial checksum offload, the outer header checksum is calculated |
| 683 | * by software and the inner header checksum is calculated by hardware. |
| 684 | * This requires hardware to know the inner packet type to calculate |
| 685 | * the inner header checksum. Save inner ip protocol here to avoid |
| 686 | * traversing the packet in the vendor's xmit code. |
| 687 | * For IPsec tunnel mode save the ip protocol from the IP header of the |
| 688 | * plain text packet. Otherwise If the encap type is IPIP, just save |
| 689 | * skb->inner_ipproto in any other case get the ip protocol from the IP |
| 690 | * header. |
| 691 | */ |
| 692 | static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) |
| 693 | { |
| 694 | struct xfrm_offload *xo = xfrm_offload(skb); |
| 695 | const struct ethhdr *eth; |
| 696 | |
| 697 | if (!xo) |
| 698 | return; |
| 699 | |
| 700 | if (x->outer_mode.encap == XFRM_MODE_TUNNEL) { |
| 701 | switch (x->outer_mode.family) { |
| 702 | case AF_INET: |
| 703 | xo->inner_ipproto = ip_hdr(skb)->protocol; |
| 704 | break; |
| 705 | case AF_INET6: |
| 706 | xo->inner_ipproto = ipv6_hdr(skb)->nexthdr; |
| 707 | break; |
| 708 | default: |
| 709 | break; |
| 710 | } |
| 711 | |
| 712 | return; |
| 713 | } |
| 714 | if (x->outer_mode.encap == XFRM_MODE_IPTFS) { |
| 715 | xo->inner_ipproto = IPPROTO_AGGFRAG; |
| 716 | return; |
| 717 | } |
| 718 | |
| 719 | /* non-Tunnel Mode */ |
| 720 | if (!skb->encapsulation) |
| 721 | return; |
| 722 | |
| 723 | if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) { |
| 724 | xo->inner_ipproto = skb->inner_ipproto; |
| 725 | return; |
| 726 | } |
| 727 | |
| 728 | if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) |
| 729 | return; |
| 730 | |
| 731 | eth = (struct ethhdr *)skb_inner_mac_header(skb); |
| 732 | |
| 733 | switch (ntohs(eth->h_proto)) { |
| 734 | case ETH_P_IPV6: |
| 735 | xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr; |
| 736 | break; |
| 737 | case ETH_P_IP: |
| 738 | xo->inner_ipproto = inner_ip_hdr(skb)->protocol; |
| 739 | break; |
| 740 | } |
| 741 | } |
| 742 | |
| 743 | int xfrm_output(struct sock *sk, struct sk_buff *skb) |
| 744 | { |
| 745 | struct net *net = dev_net(skb_dst(skb)->dev); |
| 746 | struct xfrm_state *x = skb_dst(skb)->xfrm; |
| 747 | int family; |
| 748 | int err; |
| 749 | |
| 750 | family = (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ? x->outer_mode.family |
| 751 | : skb_dst(skb)->ops->family; |
| 752 | |
| 753 | switch (family) { |
| 754 | case AF_INET: |
| 755 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); |
| 756 | IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; |
| 757 | break; |
| 758 | case AF_INET6: |
| 759 | memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); |
| 760 | |
| 761 | IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; |
| 762 | break; |
| 763 | } |
| 764 | |
| 765 | if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) { |
| 766 | if (!xfrm_dev_offload_ok(skb, x)) { |
| 767 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); |
| 768 | kfree_skb(skb); |
| 769 | return -EHOSTUNREACH; |
| 770 | } |
| 771 | |
| 772 | /* Exclusive direct xmit for tunnel mode, as |
| 773 | * some filtering or matching rules may apply |
| 774 | * in transport mode. |
| 775 | */ |
| 776 | if (x->props.mode == XFRM_MODE_TUNNEL) |
| 777 | return xfrm_dev_direct_output(sk, x, skb); |
| 778 | |
| 779 | return xfrm_output_resume(sk, skb, 0); |
| 780 | } |
| 781 | |
| 782 | secpath_reset(skb); |
| 783 | |
| 784 | if (xfrm_dev_offload_ok(skb, x)) { |
| 785 | struct sec_path *sp; |
| 786 | |
| 787 | sp = secpath_set(skb); |
| 788 | if (!sp) { |
| 789 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); |
| 790 | kfree_skb(skb); |
| 791 | return -ENOMEM; |
| 792 | } |
| 793 | |
| 794 | sp->olen++; |
| 795 | sp->xvec[sp->len++] = x; |
| 796 | xfrm_state_hold(x); |
| 797 | |
| 798 | xfrm_get_inner_ipproto(skb, x); |
| 799 | skb->encapsulation = 1; |
| 800 | |
| 801 | if (skb_is_gso(skb)) { |
| 802 | if (skb->inner_protocol && x->props.mode == XFRM_MODE_TUNNEL) |
| 803 | return xfrm_output_gso(net, sk, skb); |
| 804 | |
| 805 | skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; |
| 806 | goto out; |
| 807 | } |
| 808 | |
| 809 | if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) |
| 810 | goto out; |
| 811 | } else { |
| 812 | if (skb_is_gso(skb)) |
| 813 | return xfrm_output_gso(net, sk, skb); |
| 814 | } |
| 815 | |
| 816 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
| 817 | err = skb_checksum_help(skb); |
| 818 | if (err) { |
| 819 | XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); |
| 820 | kfree_skb(skb); |
| 821 | return err; |
| 822 | } |
| 823 | } |
| 824 | |
| 825 | out: |
| 826 | return xfrm_output2(net, sk, skb); |
| 827 | } |
| 828 | EXPORT_SYMBOL_GPL(xfrm_output); |
| 829 | |
| 830 | int xfrm4_tunnel_check_size(struct sk_buff *skb) |
| 831 | { |
| 832 | int mtu, ret = 0; |
| 833 | |
| 834 | if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) |
| 835 | goto out; |
| 836 | |
| 837 | if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) |
| 838 | goto out; |
| 839 | |
| 840 | mtu = dst_mtu(skb_dst(skb)); |
| 841 | if ((!skb_is_gso(skb) && skb->len > mtu) || |
| 842 | (skb_is_gso(skb) && |
| 843 | !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { |
| 844 | skb->protocol = htons(ETH_P_IP); |
| 845 | |
| 846 | if (skb->sk && sk_fullsock(skb->sk)) |
| 847 | xfrm_local_error(skb, mtu); |
| 848 | else |
| 849 | icmp_send(skb, ICMP_DEST_UNREACH, |
| 850 | ICMP_FRAG_NEEDED, htonl(mtu)); |
| 851 | ret = -EMSGSIZE; |
| 852 | } |
| 853 | out: |
| 854 | return ret; |
| 855 | } |
| 856 | EXPORT_SYMBOL_GPL(xfrm4_tunnel_check_size); |
| 857 | |
| 858 | static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) |
| 859 | { |
| 860 | int err; |
| 861 | |
| 862 | if (x->outer_mode.encap == XFRM_MODE_BEET && |
| 863 | ip_is_fragment(ip_hdr(skb))) { |
| 864 | net_warn_ratelimited("BEET mode doesn't support inner IPv4 fragments\n"); |
| 865 | return -EAFNOSUPPORT; |
| 866 | } |
| 867 | |
| 868 | err = xfrm4_tunnel_check_size(skb); |
| 869 | if (err) |
| 870 | return err; |
| 871 | |
| 872 | XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; |
| 873 | |
| 874 | xfrm4_extract_header(skb); |
| 875 | return 0; |
| 876 | } |
| 877 | |
| 878 | #if IS_ENABLED(CONFIG_IPV6) |
| 879 | int xfrm6_tunnel_check_size(struct sk_buff *skb) |
| 880 | { |
| 881 | int mtu, ret = 0; |
| 882 | struct dst_entry *dst = skb_dst(skb); |
| 883 | struct sock *sk = skb_to_full_sk(skb); |
| 884 | |
| 885 | if (skb->ignore_df) |
| 886 | goto out; |
| 887 | |
| 888 | mtu = dst_mtu(dst); |
| 889 | if (mtu < IPV6_MIN_MTU) |
| 890 | mtu = IPV6_MIN_MTU; |
| 891 | |
| 892 | if ((!skb_is_gso(skb) && skb->len > mtu) || |
| 893 | (skb_is_gso(skb) && |
| 894 | !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { |
| 895 | skb->dev = dst->dev; |
| 896 | skb->protocol = htons(ETH_P_IPV6); |
| 897 | |
| 898 | if (xfrm6_local_dontfrag(sk)) |
| 899 | ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); |
| 900 | else if (sk) |
| 901 | xfrm_local_error(skb, mtu); |
| 902 | else |
| 903 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
| 904 | ret = -EMSGSIZE; |
| 905 | } |
| 906 | out: |
| 907 | return ret; |
| 908 | } |
| 909 | EXPORT_SYMBOL_GPL(xfrm6_tunnel_check_size); |
| 910 | #endif |
| 911 | |
| 912 | static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) |
| 913 | { |
| 914 | #if IS_ENABLED(CONFIG_IPV6) |
| 915 | int err; |
| 916 | |
| 917 | err = xfrm6_tunnel_check_size(skb); |
| 918 | if (err) |
| 919 | return err; |
| 920 | |
| 921 | XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; |
| 922 | |
| 923 | xfrm6_extract_header(skb); |
| 924 | return 0; |
| 925 | #else |
| 926 | WARN_ON_ONCE(1); |
| 927 | return -EAFNOSUPPORT; |
| 928 | #endif |
| 929 | } |
| 930 | |
| 931 | static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) |
| 932 | { |
| 933 | switch (skb->protocol) { |
| 934 | case htons(ETH_P_IP): |
| 935 | return xfrm4_extract_output(x, skb); |
| 936 | case htons(ETH_P_IPV6): |
| 937 | return xfrm6_extract_output(x, skb); |
| 938 | } |
| 939 | |
| 940 | return -EAFNOSUPPORT; |
| 941 | } |
| 942 | |
| 943 | void xfrm_local_error(struct sk_buff *skb, int mtu) |
| 944 | { |
| 945 | unsigned int proto; |
| 946 | struct xfrm_state_afinfo *afinfo; |
| 947 | |
| 948 | if (skb->protocol == htons(ETH_P_IP)) |
| 949 | proto = AF_INET; |
| 950 | else if (skb->protocol == htons(ETH_P_IPV6) && |
| 951 | skb->sk->sk_family == AF_INET6) |
| 952 | proto = AF_INET6; |
| 953 | else |
| 954 | return; |
| 955 | |
| 956 | afinfo = xfrm_state_get_afinfo(proto); |
| 957 | if (afinfo) { |
| 958 | afinfo->local_error(skb, mtu); |
| 959 | rcu_read_unlock(); |
| 960 | } |
| 961 | } |
| 962 | EXPORT_SYMBOL_GPL(xfrm_local_error); |