Merge branch 'ip6_gre-Fixes-in-headroom-handling'
[linux-block.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4 41
33b48679 42#include <linux/bpf-cgroup.h>
1da177e4
LT
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
7bc570c8 58#include <linux/mroute6.h>
ca254490 59#include <net/l3mdev.h>
14972cbd 60#include <net/lwtunnel.h>
1da177e4 61
7d8c6e39 62static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 63{
adf30907 64 struct dst_entry *dst = skb_dst(skb);
1da177e4 65 struct net_device *dev = dst->dev;
f6b72b62 66 struct neighbour *neigh;
6fd6ce20
YH
67 struct in6_addr *nexthop;
68 int ret;
1da177e4 69
0660e03f 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 72
7026b1dd 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
8571ab47 74 ((mroute6_is_socket(net, skb) &&
bd91b8bf 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
b2e0b385 84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 85 net, sk, newskb, NULL, newskb->dev,
95603e22 86 dev_loopback_xmit);
1da177e4 87
0660e03f 88 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 89 IP6_INC_STATS(net, idev,
3bd653c8 90 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
78126c41 96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
97
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
101 kfree_skb(skb);
102 return 0;
103 }
1da177e4
LT
104 }
105
14972cbd
RP
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
108
109 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
110 return res;
111 }
112
6fd6ce20 113 rcu_read_lock_bh();
2647a9b0 114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
4ff06203 119 sock_confirm_neigh(skb, neigh);
c16ec185 120 ret = neigh_output(neigh, skb);
6fd6ce20
YH
121 rcu_read_unlock_bh();
122 return ret;
123 }
124 rcu_read_unlock_bh();
05e3aa09 125
78126c41 126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
127 kfree_skb(skb);
128 return -EINVAL;
1da177e4
LT
129}
130
0c4b51f0 131static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490 132{
33b48679
DM
133 int ret;
134
135 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
136 if (ret) {
137 kfree_skb(skb);
138 return ret;
139 }
140
09ee9dba
TB
141#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb)->xfrm) {
144 IPCB(skb)->flags |= IPSKB_REROUTED;
145 return dst_output(net, sk, skb);
146 }
147#endif
148
9e508490 149 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
150 dst_allfrag(skb_dst(skb)) ||
151 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 152 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 153 else
7d8c6e39 154 return ip6_finish_output2(net, sk, skb);
9e508490
JE
155}
156
ede2059d 157int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 158{
9e508490 159 struct net_device *dev = skb_dst(skb)->dev;
adf30907 160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 161
97a7a37a
CF
162 skb->protocol = htons(ETH_P_IPV6);
163 skb->dev = dev;
164
778d80be 165 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 166 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
167 kfree_skb(skb);
168 return 0;
169 }
170
29a26a56
EB
171 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 net, sk, skb, NULL, dev,
9c6eb28a
JE
173 ip6_finish_output,
174 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
175}
176
e9191ffb 177bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
513674b5
SL
178{
179 if (!np->autoflowlabel_set)
180 return ip6_default_np_autolabel(net);
181 else
182 return np->autoflowlabel;
183}
184
1da177e4 185/*
1c1e9d2b
ED
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
1da177e4 190 */
1c1e9d2b 191int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
92e55f41 192 __u32 mark, struct ipv6_txoptions *opt, int tclass)
1da177e4 193{
3bd653c8 194 struct net *net = sock_net(sk);
1c1e9d2b 195 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 196 struct in6_addr *first_hop = &fl6->daddr;
adf30907 197 struct dst_entry *dst = skb_dst(skb);
1da177e4 198 struct ipv6hdr *hdr;
4c9483b2 199 u8 proto = fl6->flowi6_proto;
1da177e4 200 int seg_len = skb->len;
e651f03a 201 int hlimit = -1;
1da177e4
LT
202 u32 mtu;
203
204 if (opt) {
c2636b4d 205 unsigned int head_room;
1da177e4
LT
206
207 /* First: exthdrs may take lots of space (~8K for now)
208 MAX_HEADER is not enough.
209 */
210 head_room = opt->opt_nflen + opt->opt_flen;
211 seg_len += head_room;
212 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
213
214 if (skb_headroom(skb) < head_room) {
215 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 216 if (!skb2) {
adf30907 217 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
218 IPSTATS_MIB_OUTDISCARDS);
219 kfree_skb(skb);
1da177e4
LT
220 return -ENOBUFS;
221 }
808db80a 222 consume_skb(skb);
a11d206d 223 skb = skb2;
1c1e9d2b
ED
224 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
225 * it is safe to call in our context (socket lock not held)
226 */
227 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
228 }
229 if (opt->opt_flen)
230 ipv6_push_frag_opts(skb, opt, &proto);
231 if (opt->opt_nflen)
613fa3ca
DL
232 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
233 &fl6->saddr);
1da177e4
LT
234 }
235
e2d1bca7
ACM
236 skb_push(skb, sizeof(struct ipv6hdr));
237 skb_reset_network_header(skb);
0660e03f 238 hdr = ipv6_hdr(skb);
1da177e4
LT
239
240 /*
241 * Fill in the IPv6 header
242 */
b903d324 243 if (np)
1da177e4
LT
244 hlimit = np->hop_limit;
245 if (hlimit < 0)
6b75d090 246 hlimit = ip6_dst_hoplimit(dst);
1da177e4 247
cb1ce2ef 248 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 249 ip6_autoflowlabel(net, np), fl6));
41a1f8ea 250
1da177e4
LT
251 hdr->payload_len = htons(seg_len);
252 hdr->nexthdr = proto;
253 hdr->hop_limit = hlimit;
254
4e3fd7a0
AD
255 hdr->saddr = fl6->saddr;
256 hdr->daddr = *first_hop;
1da177e4 257
9c9c9ad5 258 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 259 skb->priority = sk->sk_priority;
92e55f41 260 skb->mark = mark;
a2c2064f 261
1da177e4 262 mtu = dst_mtu(dst);
60ff7467 263 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 264 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 265 IPSTATS_MIB_OUT, skb->len);
a8e3e1a9
DA
266
267 /* if egress device is enslaved to an L3 master device pass the
268 * skb to its handler for processing
269 */
270 skb = l3mdev_ip6_out((struct sock *)sk, skb);
271 if (unlikely(!skb))
272 return 0;
273
1c1e9d2b
ED
274 /* hooks should never assume socket lock is held.
275 * we promote our socket to non const
276 */
29a26a56 277 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 278 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 279 dst_output);
1da177e4
LT
280 }
281
1da177e4 282 skb->dev = dst->dev;
1c1e9d2b
ED
283 /* ipv6_local_error() does not require socket lock,
284 * we promote our socket to non const
285 */
286 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
287
adf30907 288 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
289 kfree_skb(skb);
290 return -EMSGSIZE;
291}
7159039a
YH
292EXPORT_SYMBOL(ip6_xmit);
293
1da177e4
LT
294static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
295{
296 struct ip6_ra_chain *ra;
297 struct sock *last = NULL;
298
299 read_lock(&ip6_ra_lock);
300 for (ra = ip6_ra_chain; ra; ra = ra->next) {
301 struct sock *sk = ra->sk;
0bd1b59b
AM
302 if (sk && ra->sel == sel &&
303 (!sk->sk_bound_dev_if ||
304 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
305 if (last) {
306 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
307 if (skb2)
308 rawv6_rcv(last, skb2);
309 }
310 last = sk;
311 }
312 }
313
314 if (last) {
315 rawv6_rcv(last, skb);
316 read_unlock(&ip6_ra_lock);
317 return 1;
318 }
319 read_unlock(&ip6_ra_lock);
320 return 0;
321}
322
e21e0b5f
VN
323static int ip6_forward_proxy_check(struct sk_buff *skb)
324{
0660e03f 325 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 326 u8 nexthdr = hdr->nexthdr;
75f2811c 327 __be16 frag_off;
e21e0b5f
VN
328 int offset;
329
330 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 331 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
332 if (offset < 0)
333 return 0;
334 } else
335 offset = sizeof(struct ipv6hdr);
336
337 if (nexthdr == IPPROTO_ICMPV6) {
338 struct icmp6hdr *icmp6;
339
d56f90a7
ACM
340 if (!pskb_may_pull(skb, (skb_network_header(skb) +
341 offset + 1 - skb->data)))
e21e0b5f
VN
342 return 0;
343
d56f90a7 344 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
345
346 switch (icmp6->icmp6_type) {
347 case NDISC_ROUTER_SOLICITATION:
348 case NDISC_ROUTER_ADVERTISEMENT:
349 case NDISC_NEIGHBOUR_SOLICITATION:
350 case NDISC_NEIGHBOUR_ADVERTISEMENT:
351 case NDISC_REDIRECT:
352 /* For reaction involving unicast neighbor discovery
353 * message destined to the proxied address, pass it to
354 * input function.
355 */
356 return 1;
357 default:
358 break;
359 }
360 }
361
74553b09
VN
362 /*
363 * The proxying router can't forward traffic sent to a link-local
364 * address, so signal the sender and discard the packet. This
365 * behavior is clarified by the MIPv6 specification.
366 */
367 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
368 dst_link_failure(skb);
369 return -1;
370 }
371
e21e0b5f
VN
372 return 0;
373}
374
0c4b51f0
EB
375static inline int ip6_forward_finish(struct net *net, struct sock *sk,
376 struct sk_buff *skb)
1da177e4 377{
71a1c915
JB
378 struct dst_entry *dst = skb_dst(skb);
379
380 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
381 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
382
13206b6b 383 return dst_output(net, sk, skb);
1da177e4
LT
384}
385
09952107 386unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
0954cf9c
HFS
387{
388 unsigned int mtu;
389 struct inet6_dev *idev;
390
391 if (dst_metric_locked(dst, RTAX_MTU)) {
392 mtu = dst_metric_raw(dst, RTAX_MTU);
393 if (mtu)
394 return mtu;
395 }
396
397 mtu = IPV6_MIN_MTU;
398 rcu_read_lock();
399 idev = __in6_dev_get(dst->dev);
400 if (idev)
401 mtu = idev->cnf.mtu6;
402 rcu_read_unlock();
403
404 return mtu;
405}
09952107 406EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
0954cf9c 407
fe6cc55f
FW
408static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
409{
418a3156 410 if (skb->len <= mtu)
fe6cc55f
FW
411 return false;
412
60ff7467 413 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
414 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
415 return true;
416
60ff7467 417 if (skb->ignore_df)
418a3156
FW
418 return false;
419
779b7931 420 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
fe6cc55f
FW
421 return false;
422
423 return true;
424}
425
1da177e4
LT
426int ip6_forward(struct sk_buff *skb)
427{
adf30907 428 struct dst_entry *dst = skb_dst(skb);
0660e03f 429 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 430 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 431 struct net *net = dev_net(dst->dev);
14f3ad6f 432 u32 mtu;
1ab1457c 433
53b7997f 434 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
435 goto error;
436
090f1166
LR
437 if (skb->pkt_type != PACKET_HOST)
438 goto drop;
439
9ef2e965
HFS
440 if (unlikely(skb->sk))
441 goto drop;
442
4497b076
BH
443 if (skb_warn_if_lro(skb))
444 goto drop;
445
1da177e4 446 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
1d015503
ED
447 __IP6_INC_STATS(net, ip6_dst_idev(dst),
448 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
449 goto drop;
450 }
451
35fc92a9 452 skb_forward_csum(skb);
1da177e4
LT
453
454 /*
455 * We DO NOT make any processing on
456 * RA packets, pushing them to user level AS IS
457 * without ane WARRANTY that application will be able
458 * to interpret them. The reason is that we
459 * cannot make anything clever here.
460 *
461 * We are not end-node, so that if packet contains
462 * AH/ESP, we cannot make anything.
463 * Defragmentation also would be mistake, RA packets
464 * cannot be fragmented, because there is no warranty
465 * that different fragments will go along one path. --ANK
466 */
ab4eb353
YH
467 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
468 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
469 return 0;
470 }
471
472 /*
473 * check and decrement ttl
474 */
475 if (hdr->hop_limit <= 1) {
476 /* Force OUTPUT device used as source address */
477 skb->dev = dst->dev;
3ffe533c 478 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
1d015503
ED
479 __IP6_INC_STATS(net, ip6_dst_idev(dst),
480 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
481
482 kfree_skb(skb);
483 return -ETIMEDOUT;
484 }
485
fbea49e1 486 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 487 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 488 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
489 int proxied = ip6_forward_proxy_check(skb);
490 if (proxied > 0)
e21e0b5f 491 return ip6_input(skb);
74553b09 492 else if (proxied < 0) {
1d015503
ED
493 __IP6_INC_STATS(net, ip6_dst_idev(dst),
494 IPSTATS_MIB_INDISCARDS);
74553b09
VN
495 goto drop;
496 }
e21e0b5f
VN
497 }
498
1da177e4 499 if (!xfrm6_route_forward(skb)) {
1d015503
ED
500 __IP6_INC_STATS(net, ip6_dst_idev(dst),
501 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
502 goto drop;
503 }
adf30907 504 dst = skb_dst(skb);
1da177e4
LT
505
506 /* IPv6 specs say nothing about it, but it is clear that we cannot
507 send redirects to source routed frames.
1e5dc146 508 We don't send redirects to frames decapsulated from IPsec.
1da177e4 509 */
c45a3dfb 510 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 511 struct in6_addr *target = NULL;
fbfe95a4 512 struct inet_peer *peer;
1da177e4 513 struct rt6_info *rt;
1da177e4
LT
514
515 /*
516 * incoming and outgoing devices are the same
517 * send a redirect.
518 */
519
520 rt = (struct rt6_info *) dst;
c45a3dfb
DM
521 if (rt->rt6i_flags & RTF_GATEWAY)
522 target = &rt->rt6i_gateway;
1da177e4
LT
523 else
524 target = &hdr->daddr;
525
fd0273d7 526 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 527
1da177e4
LT
528 /* Limit redirects both by destination (here)
529 and by source (inside ndisc_send_redirect)
530 */
fbfe95a4 531 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 532 ndisc_send_redirect(skb, target);
1d861aa4
DM
533 if (peer)
534 inet_putpeer(peer);
5bb1ab09
DS
535 } else {
536 int addrtype = ipv6_addr_type(&hdr->saddr);
537
1da177e4 538 /* This check is security critical. */
f81b2e7d
YH
539 if (addrtype == IPV6_ADDR_ANY ||
540 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
541 goto error;
542 if (addrtype & IPV6_ADDR_LINKLOCAL) {
543 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 544 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
545 goto error;
546 }
1da177e4
LT
547 }
548
0954cf9c 549 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
550 if (mtu < IPV6_MIN_MTU)
551 mtu = IPV6_MIN_MTU;
552
fe6cc55f 553 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
554 /* Again, force OUTPUT device used as source address */
555 skb->dev = dst->dev;
14f3ad6f 556 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1d015503
ED
557 __IP6_INC_STATS(net, ip6_dst_idev(dst),
558 IPSTATS_MIB_INTOOBIGERRORS);
559 __IP6_INC_STATS(net, ip6_dst_idev(dst),
560 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
561 kfree_skb(skb);
562 return -EMSGSIZE;
563 }
564
565 if (skb_cow(skb, dst->dev->hard_header_len)) {
1d015503
ED
566 __IP6_INC_STATS(net, ip6_dst_idev(dst),
567 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
568 goto drop;
569 }
570
0660e03f 571 hdr = ipv6_hdr(skb);
1da177e4
LT
572
573 /* Mangling hops number delayed to point after skb COW */
1ab1457c 574
1da177e4
LT
575 hdr->hop_limit--;
576
29a26a56
EB
577 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
578 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 579 ip6_forward_finish);
1da177e4
LT
580
581error:
1d015503 582 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
583drop:
584 kfree_skb(skb);
585 return -EINVAL;
586}
587
588static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
589{
590 to->pkt_type = from->pkt_type;
591 to->priority = from->priority;
592 to->protocol = from->protocol;
adf30907
ED
593 skb_dst_drop(to);
594 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 595 to->dev = from->dev;
82e91ffe 596 to->mark = from->mark;
1da177e4
LT
597
598#ifdef CONFIG_NET_SCHED
599 to->tc_index = from->tc_index;
600#endif
e7ac05f3 601 nf_copy(to, from);
984bc16c 602 skb_copy_secmark(to, from);
1da177e4
LT
603}
604
7d8c6e39
EB
605int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
606 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 607{
1da177e4 608 struct sk_buff *frag;
67ba4152 609 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 610 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
611 inet6_sk(skb->sk) : NULL;
1da177e4
LT
612 struct ipv6hdr *tmp_hdr;
613 struct frag_hdr *fh;
614 unsigned int mtu, hlen, left, len;
a7ae1992 615 int hroom, troom;
286c2349 616 __be32 frag_id;
67ba4152 617 int ptr, offset = 0, err = 0;
1da177e4
LT
618 u8 *prevhdr, nexthdr = 0;
619
7dd7eb95
DM
620 err = ip6_find_1stfragopt(skb, &prevhdr);
621 if (err < 0)
2423496a 622 goto fail;
7dd7eb95 623 hlen = err;
1da177e4
LT
624 nexthdr = *prevhdr;
625
628a5c56 626 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
627
628 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 629 * or if the skb it not generated by a local socket.
b881ef76 630 */
485fca66
FW
631 if (unlikely(!skb->ignore_df && skb->len > mtu))
632 goto fail_toobig;
a34a101e 633
485fca66
FW
634 if (IP6CB(skb)->frag_max_size) {
635 if (IP6CB(skb)->frag_max_size > mtu)
636 goto fail_toobig;
637
638 /* don't send fragments larger than what we received */
639 mtu = IP6CB(skb)->frag_max_size;
640 if (mtu < IPV6_MIN_MTU)
641 mtu = IPV6_MIN_MTU;
b881ef76
JH
642 }
643
d91675f9
YH
644 if (np && np->frag_size < mtu) {
645 if (np->frag_size)
646 mtu = np->frag_size;
647 }
89bc7848 648 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 649 goto fail_toobig;
1e0d69a9 650 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 651
fd0273d7
MKL
652 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
653 &ipv6_hdr(skb)->saddr);
286c2349 654
405c92f7
HFS
655 if (skb->ip_summed == CHECKSUM_PARTIAL &&
656 (err = skb_checksum_help(skb)))
657 goto fail;
658
1d325d21 659 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 660 if (skb_has_frag_list(skb)) {
c72d8cda 661 unsigned int first_len = skb_pagelen(skb);
3d13008e 662 struct sk_buff *frag2;
1da177e4
LT
663
664 if (first_len - hlen > mtu ||
665 ((first_len - hlen) & 7) ||
1d325d21
FW
666 skb_cloned(skb) ||
667 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
668 goto slow_path;
669
4d9092bb 670 skb_walk_frags(skb, frag) {
1da177e4
LT
671 /* Correct geometry. */
672 if (frag->len > mtu ||
673 ((frag->len & 7) && frag->next) ||
1d325d21 674 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 675 goto slow_path_clean;
1da177e4 676
1da177e4
LT
677 /* Partially cloned skb? */
678 if (skb_shared(frag))
3d13008e 679 goto slow_path_clean;
2fdba6b0
HX
680
681 BUG_ON(frag->sk);
682 if (skb->sk) {
2fdba6b0
HX
683 frag->sk = skb->sk;
684 frag->destructor = sock_wfree;
2fdba6b0 685 }
3d13008e 686 skb->truesize -= frag->truesize;
1da177e4
LT
687 }
688
689 err = 0;
690 offset = 0;
1da177e4
LT
691 /* BUILD HEADER */
692
9a217a1c 693 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 694 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 695 if (!tmp_hdr) {
1d325d21
FW
696 err = -ENOMEM;
697 goto fail;
1da177e4 698 }
1d325d21
FW
699 frag = skb_shinfo(skb)->frag_list;
700 skb_frag_list_init(skb);
1da177e4 701
1da177e4 702 __skb_pull(skb, hlen);
d58ff351 703 fh = __skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
704 __skb_push(skb, hlen);
705 skb_reset_network_header(skb);
d56f90a7 706 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 707
1da177e4
LT
708 fh->nexthdr = nexthdr;
709 fh->reserved = 0;
710 fh->frag_off = htons(IP6_MF);
286c2349 711 fh->identification = frag_id;
1da177e4
LT
712
713 first_len = skb_pagelen(skb);
714 skb->data_len = first_len - skb_headlen(skb);
715 skb->len = first_len;
0660e03f
ACM
716 ipv6_hdr(skb)->payload_len = htons(first_len -
717 sizeof(struct ipv6hdr));
a11d206d 718
1da177e4
LT
719 for (;;) {
720 /* Prepare header of the next frame,
721 * before previous one went down. */
722 if (frag) {
723 frag->ip_summed = CHECKSUM_NONE;
badff6d0 724 skb_reset_transport_header(frag);
d58ff351 725 fh = __skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
726 __skb_push(frag, hlen);
727 skb_reset_network_header(frag);
d56f90a7
ACM
728 memcpy(skb_network_header(frag), tmp_hdr,
729 hlen);
1da177e4
LT
730 offset += skb->len - hlen - sizeof(struct frag_hdr);
731 fh->nexthdr = nexthdr;
732 fh->reserved = 0;
733 fh->frag_off = htons(offset);
53b24b8f 734 if (frag->next)
1da177e4
LT
735 fh->frag_off |= htons(IP6_MF);
736 fh->identification = frag_id;
0660e03f
ACM
737 ipv6_hdr(frag)->payload_len =
738 htons(frag->len -
739 sizeof(struct ipv6hdr));
1da177e4
LT
740 ip6_copy_metadata(frag, skb);
741 }
1ab1457c 742
7d8c6e39 743 err = output(net, sk, skb);
67ba4152 744 if (!err)
d8d1f30b 745 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 746 IPSTATS_MIB_FRAGCREATES);
dafee490 747
1da177e4
LT
748 if (err || !frag)
749 break;
750
751 skb = frag;
752 frag = skb->next;
753 skb->next = NULL;
754 }
755
a51482bd 756 kfree(tmp_hdr);
1da177e4
LT
757
758 if (err == 0) {
d8d1f30b 759 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 760 IPSTATS_MIB_FRAGOKS);
1da177e4
LT
761 return 0;
762 }
763
46cfd725 764 kfree_skb_list(frag);
1da177e4 765
d8d1f30b 766 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 767 IPSTATS_MIB_FRAGFAILS);
1da177e4 768 return err;
3d13008e
ED
769
770slow_path_clean:
771 skb_walk_frags(skb, frag2) {
772 if (frag2 == frag)
773 break;
774 frag2->sk = NULL;
775 frag2->destructor = NULL;
776 skb->truesize += frag2->truesize;
777 }
1da177e4
LT
778 }
779
780slow_path:
781 left = skb->len - hlen; /* Space per frame */
782 ptr = hlen; /* Where to start from */
783
784 /*
785 * Fragment the datagram.
786 */
787
a7ae1992 788 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
789
790 /*
791 * Keep copying data until we run out.
792 */
67ba4152 793 while (left > 0) {
79e49503
FW
794 u8 *fragnexthdr_offset;
795
1da177e4
LT
796 len = left;
797 /* IF: it doesn't fit, use 'mtu' - the data space left */
798 if (len > mtu)
799 len = mtu;
25985edc 800 /* IF: we are not sending up to and including the packet end
1da177e4
LT
801 then align the next start on an eight byte boundary */
802 if (len < left) {
803 len &= ~7;
804 }
1da177e4 805
cbffccc9
JP
806 /* Allocate buffer */
807 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
808 hroom + troom, GFP_ATOMIC);
809 if (!frag) {
1da177e4
LT
810 err = -ENOMEM;
811 goto fail;
812 }
813
814 /*
815 * Set up data on packet
816 */
817
818 ip6_copy_metadata(frag, skb);
a7ae1992 819 skb_reserve(frag, hroom);
1da177e4 820 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 821 skb_reset_network_header(frag);
badff6d0 822 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
823 frag->transport_header = (frag->network_header + hlen +
824 sizeof(struct frag_hdr));
1da177e4
LT
825
826 /*
827 * Charge the memory for the fragment to any owner
828 * it might possess
829 */
830 if (skb->sk)
831 skb_set_owner_w(frag, skb->sk);
832
833 /*
834 * Copy the packet header into the new buffer.
835 */
d626f62b 836 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4 837
79e49503
FW
838 fragnexthdr_offset = skb_network_header(frag);
839 fragnexthdr_offset += prevhdr - skb_network_header(skb);
840 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
841
1da177e4
LT
842 /*
843 * Build fragment header.
844 */
845 fh->nexthdr = nexthdr;
846 fh->reserved = 0;
286c2349 847 fh->identification = frag_id;
1da177e4
LT
848
849 /*
850 * Copy a block of the IP datagram.
851 */
e3f0b86b
HS
852 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
853 len));
1da177e4
LT
854 left -= len;
855
856 fh->frag_off = htons(offset);
857 if (left > 0)
858 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
859 ipv6_hdr(frag)->payload_len = htons(frag->len -
860 sizeof(struct ipv6hdr));
1da177e4
LT
861
862 ptr += len;
863 offset += len;
864
865 /*
866 * Put this fragment into the sending queue.
867 */
7d8c6e39 868 err = output(net, sk, frag);
1da177e4
LT
869 if (err)
870 goto fail;
dafee490 871
adf30907 872 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 873 IPSTATS_MIB_FRAGCREATES);
1da177e4 874 }
adf30907 875 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 876 IPSTATS_MIB_FRAGOKS);
808db80a 877 consume_skb(skb);
1da177e4
LT
878 return err;
879
485fca66
FW
880fail_toobig:
881 if (skb->sk && dst_allfrag(skb_dst(skb)))
882 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
883
485fca66
FW
884 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
885 err = -EMSGSIZE;
886
1da177e4 887fail:
adf30907 888 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 889 IPSTATS_MIB_FRAGFAILS);
1ab1457c 890 kfree_skb(skb);
1da177e4
LT
891 return err;
892}
893
b71d1d42
ED
894static inline int ip6_rt_check(const struct rt6key *rt_key,
895 const struct in6_addr *fl_addr,
896 const struct in6_addr *addr_cache)
cf6b1982 897{
a02cec21 898 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 899 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
900}
901
497c615a
HX
902static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
903 struct dst_entry *dst,
b71d1d42 904 const struct flowi6 *fl6)
1da177e4 905{
497c615a 906 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 907 struct rt6_info *rt;
1da177e4 908
497c615a
HX
909 if (!dst)
910 goto out;
911
a963a37d
ED
912 if (dst->ops->family != AF_INET6) {
913 dst_release(dst);
914 return NULL;
915 }
916
917 rt = (struct rt6_info *)dst;
497c615a
HX
918 /* Yes, checking route validity in not connected
919 * case is not very simple. Take into account,
920 * that we do not support routing by source, TOS,
67ba4152 921 * and MSG_DONTROUTE --ANK (980726)
497c615a 922 *
cf6b1982
YH
923 * 1. ip6_rt_check(): If route was host route,
924 * check that cached destination is current.
497c615a
HX
925 * If it is network route, we still may
926 * check its validity using saved pointer
927 * to the last used address: daddr_cache.
928 * We do not want to save whole address now,
929 * (because main consumer of this service
930 * is tcp, which has not this problem),
931 * so that the last trick works only on connected
932 * sockets.
933 * 2. oif also should be the same.
934 */
4c9483b2 935 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 936#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 937 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 938#endif
ca254490
DA
939 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
940 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
941 dst_release(dst);
942 dst = NULL;
1da177e4
LT
943 }
944
497c615a
HX
945out:
946 return dst;
947}
948
3aef934f 949static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 950 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 951{
69cce1d1
DM
952#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
953 struct neighbour *n;
97cac082 954 struct rt6_info *rt;
69cce1d1
DM
955#endif
956 int err;
6f21c96a 957 int flags = 0;
497c615a 958
e16e888b
MS
959 /* The correct way to handle this would be to do
960 * ip6_route_get_saddr, and then ip6_route_output; however,
961 * the route-specific preferred source forces the
962 * ip6_route_output call _before_ ip6_route_get_saddr.
963 *
964 * In source specific routing (no src=any default route),
965 * ip6_route_output will fail given src=any saddr, though, so
966 * that's why we try it again later.
967 */
968 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
969 struct rt6_info *rt;
970 bool had_dst = *dst != NULL;
1da177e4 971
e16e888b
MS
972 if (!had_dst)
973 *dst = ip6_route_output(net, sk, fl6);
974 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
975 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
976 sk ? inet6_sk(sk)->srcprefs : 0,
977 &fl6->saddr);
44456d37 978 if (err)
1da177e4 979 goto out_err_release;
e16e888b
MS
980
981 /* If we had an erroneous initial result, pretend it
982 * never existed and let the SA-enabled version take
983 * over.
984 */
985 if (!had_dst && (*dst)->error) {
986 dst_release(*dst);
987 *dst = NULL;
988 }
6f21c96a
PA
989
990 if (fl6->flowi6_oif)
991 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
992 }
993
e16e888b 994 if (!*dst)
6f21c96a 995 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
996
997 err = (*dst)->error;
998 if (err)
999 goto out_err_release;
1000
95c385b4 1001#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
1002 /*
1003 * Here if the dst entry we've looked up
1004 * has a neighbour entry that is in the INCOMPLETE
1005 * state and the src address from the flow is
1006 * marked as OPTIMISTIC, we release the found
1007 * dst entry and replace it instead with the
1008 * dst entry of the nexthop router
1009 */
c56bf6fe 1010 rt = (struct rt6_info *) *dst;
707be1ff 1011 rcu_read_lock_bh();
2647a9b0
MKL
1012 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1013 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
1014 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1015 rcu_read_unlock_bh();
1016
1017 if (err) {
e550dfb0 1018 struct inet6_ifaddr *ifp;
4c9483b2 1019 struct flowi6 fl_gw6;
e550dfb0
NH
1020 int redirect;
1021
4c9483b2 1022 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
1023 (*dst)->dev, 1);
1024
1025 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1026 if (ifp)
1027 in6_ifa_put(ifp);
1028
1029 if (redirect) {
1030 /*
1031 * We need to get the dst entry for the
1032 * default router instead
1033 */
1034 dst_release(*dst);
4c9483b2
DM
1035 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1036 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1037 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
1038 err = (*dst)->error;
1039 if (err)
e550dfb0 1040 goto out_err_release;
95c385b4 1041 }
e550dfb0 1042 }
95c385b4 1043#endif
ec5e3b0a 1044 if (ipv6_addr_v4mapped(&fl6->saddr) &&
00ea1cee
WB
1045 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1046 err = -EAFNOSUPPORT;
1047 goto out_err_release;
1048 }
95c385b4 1049
1da177e4
LT
1050 return 0;
1051
1052out_err_release:
1053 dst_release(*dst);
1054 *dst = NULL;
8a966fc0 1055
0d240e78
DA
1056 if (err == -ENETUNREACH)
1057 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1058 return err;
1059}
34a0b3cd 1060
497c615a
HX
1061/**
1062 * ip6_dst_lookup - perform route lookup on flow
1063 * @sk: socket which provides route info
1064 * @dst: pointer to dst_entry * for result
4c9483b2 1065 * @fl6: flow to lookup
497c615a
HX
1066 *
1067 * This function performs a route lookup on the given flow.
1068 *
1069 * It returns zero on success, or a standard errno code on error.
1070 */
343d60aa
RP
1071int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1072 struct flowi6 *fl6)
497c615a
HX
1073{
1074 *dst = NULL;
343d60aa 1075 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1076}
3cf3dc6c
ACM
1077EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1078
497c615a 1079/**
68d0c6d3
DM
1080 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1081 * @sk: socket which provides route info
4c9483b2 1082 * @fl6: flow to lookup
68d0c6d3 1083 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1084 *
1085 * This function performs a route lookup on the given flow.
1086 *
1087 * It returns a valid dst pointer on success, or a pointer encoded
1088 * error code.
1089 */
3aef934f 1090struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1091 const struct in6_addr *final_dst)
68d0c6d3
DM
1092{
1093 struct dst_entry *dst = NULL;
1094 int err;
1095
343d60aa 1096 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1097 if (err)
1098 return ERR_PTR(err);
1099 if (final_dst)
4e3fd7a0 1100 fl6->daddr = *final_dst;
2774c131 1101
f92ee619 1102 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1103}
1104EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1105
1106/**
1107 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1108 * @sk: socket which provides the dst cache and route info
4c9483b2 1109 * @fl6: flow to lookup
68d0c6d3 1110 * @final_dst: final destination address for ipsec lookup
96818159 1111 * @connected: whether @sk is connected or not
497c615a
HX
1112 *
1113 * This function performs a route lookup on the given flow with the
1114 * possibility of using the cached route in the socket if it is valid.
1115 * It will take the socket dst lock when operating on the dst cache.
1116 * As a result, this function can only be used in process context.
1117 *
96818159
AK
1118 * In addition, for a connected socket, cache the dst in the socket
1119 * if the current cache is not valid.
1120 *
68d0c6d3
DM
1121 * It returns a valid dst pointer on success, or a pointer encoded
1122 * error code.
497c615a 1123 */
4c9483b2 1124struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
96818159
AK
1125 const struct in6_addr *final_dst,
1126 bool connected)
497c615a 1127{
68d0c6d3 1128 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
497c615a 1129
4c9483b2 1130 dst = ip6_sk_dst_check(sk, dst, fl6);
96818159
AK
1131 if (dst)
1132 return dst;
1133
1134 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1135 if (connected && !IS_ERR(dst))
1136 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
68d0c6d3 1137
00bc0ef5 1138 return dst;
497c615a 1139}
68d0c6d3 1140EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1141
0178b695
HX
1142static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1143 gfp_t gfp)
1144{
1145 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1146}
1147
1148static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1149 gfp_t gfp)
1150{
1151 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1152}
1153
75a493e6 1154static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1155 int *maxfraglen,
1156 unsigned int fragheaderlen,
1157 struct sk_buff *skb,
75a493e6 1158 struct rt6_info *rt,
e367c2d0 1159 unsigned int orig_mtu)
0c183379
G
1160{
1161 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1162 if (!skb) {
0c183379 1163 /* first fragment, reserve header_len */
e367c2d0 1164 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1165
1166 } else {
1167 /*
1168 * this fragment is not first, the headers
1169 * space is regarded as data space.
1170 */
e367c2d0 1171 *mtu = orig_mtu;
0c183379
G
1172 }
1173 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1174 + fragheaderlen - sizeof(struct frag_hdr);
1175 }
1176}
1177
366e41d9 1178static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
26879da5 1179 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
366e41d9
VY
1180 struct rt6_info *rt, struct flowi6 *fl6)
1181{
1182 struct ipv6_pinfo *np = inet6_sk(sk);
1183 unsigned int mtu;
26879da5 1184 struct ipv6_txoptions *opt = ipc6->opt;
366e41d9
VY
1185
1186 /*
1187 * setup for corking
1188 */
1189 if (opt) {
1190 if (WARN_ON(v6_cork->opt))
1191 return -EINVAL;
1192
864e2a1f 1193 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
63159f29 1194 if (unlikely(!v6_cork->opt))
366e41d9
VY
1195 return -ENOBUFS;
1196
864e2a1f 1197 v6_cork->opt->tot_len = sizeof(*opt);
366e41d9
VY
1198 v6_cork->opt->opt_flen = opt->opt_flen;
1199 v6_cork->opt->opt_nflen = opt->opt_nflen;
1200
1201 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1202 sk->sk_allocation);
1203 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1204 return -ENOBUFS;
1205
1206 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1207 sk->sk_allocation);
1208 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1209 return -ENOBUFS;
1210
1211 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1212 sk->sk_allocation);
1213 if (opt->hopopt && !v6_cork->opt->hopopt)
1214 return -ENOBUFS;
1215
1216 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1217 sk->sk_allocation);
1218 if (opt->srcrt && !v6_cork->opt->srcrt)
1219 return -ENOBUFS;
1220
1221 /* need source address above miyazawa*/
1222 }
1223 dst_hold(&rt->dst);
1224 cork->base.dst = &rt->dst;
1225 cork->fl.u.ip6 = *fl6;
26879da5
WW
1226 v6_cork->hop_limit = ipc6->hlimit;
1227 v6_cork->tclass = ipc6->tclass;
366e41d9
VY
1228 if (rt->dst.flags & DST_XFRM_TUNNEL)
1229 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
749439bf 1230 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
366e41d9
VY
1231 else
1232 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
c02b3741 1233 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
366e41d9
VY
1234 if (np->frag_size < mtu) {
1235 if (np->frag_size)
1236 mtu = np->frag_size;
1237 }
749439bf
MM
1238 if (mtu < IPV6_MIN_MTU)
1239 return -EINVAL;
366e41d9 1240 cork->base.fragsize = mtu;
0f6c480f 1241 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
366e41d9
VY
1242 cork->base.flags |= IPCORK_ALLFRAG;
1243 cork->base.length = 0;
1244
1245 return 0;
1246}
1247
0bbe84a6
VY
1248static int __ip6_append_data(struct sock *sk,
1249 struct flowi6 *fl6,
1250 struct sk_buff_head *queue,
1251 struct inet_cork *cork,
1252 struct inet6_cork *v6_cork,
1253 struct page_frag *pfrag,
1254 int getfrag(void *from, char *to, int offset,
1255 int len, int odd, struct sk_buff *skb),
1256 void *from, int length, int transhdrlen,
26879da5 1257 unsigned int flags, struct ipcm6_cookie *ipc6,
c14ac945 1258 const struct sockcm_cookie *sockc)
1da177e4 1259{
0c183379 1260 struct sk_buff *skb, *skb_prev = NULL;
10b8a3de 1261 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
0bbe84a6
VY
1262 int exthdrlen = 0;
1263 int dst_exthdrlen = 0;
1da177e4 1264 int hh_len;
1da177e4
LT
1265 int copy;
1266 int err;
1267 int offset = 0;
a693e698 1268 __u8 tx_flags = 0;
09c2d251 1269 u32 tskey = 0;
0bbe84a6
VY
1270 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1271 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1272 int csummode = CHECKSUM_NONE;
682b1a9d 1273 unsigned int maxnonfragsize, headersize;
1f4c6eb2 1274 unsigned int wmem_alloc_delta = 0;
1da177e4 1275
0bbe84a6
VY
1276 skb = skb_peek_tail(queue);
1277 if (!skb) {
1278 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1279 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1280 }
0bbe84a6 1281
366e41d9 1282 mtu = cork->fragsize;
e367c2d0 1283 orig_mtu = mtu;
1da177e4 1284
d8d1f30b 1285 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1286
a1b05140 1287 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1288 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1289 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1290 sizeof(struct frag_hdr);
1da177e4 1291
682b1a9d
HFS
1292 headersize = sizeof(struct ipv6hdr) +
1293 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1294 (dst_allfrag(&rt->dst) ?
1295 sizeof(struct frag_hdr) : 0) +
1296 rt->rt6i_nfheader_len;
1297
10b8a3de
PA
1298 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1299 * the first fragment
1300 */
1301 if (headersize + transhdrlen > mtu)
1302 goto emsgsize;
1303
26879da5 1304 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
682b1a9d
HFS
1305 (sk->sk_protocol == IPPROTO_UDP ||
1306 sk->sk_protocol == IPPROTO_RAW)) {
1307 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1308 sizeof(struct ipv6hdr));
1309 goto emsgsize;
1310 }
4df98e76 1311
682b1a9d
HFS
1312 if (ip6_sk_ignore_df(sk))
1313 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1314 else
1315 maxnonfragsize = mtu;
4df98e76 1316
682b1a9d 1317 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1318emsgsize:
10b8a3de
PA
1319 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1320 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
682b1a9d 1321 return -EMSGSIZE;
1da177e4
LT
1322 }
1323
682b1a9d
HFS
1324 /* CHECKSUM_PARTIAL only with no extension headers and when
1325 * we are not going to fragment
1326 */
1327 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1328 headersize == sizeof(struct ipv6hdr) &&
2b89ed65 1329 length <= mtu - headersize &&
682b1a9d 1330 !(flags & MSG_MORE) &&
c8cd0989 1331 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1332 csummode = CHECKSUM_PARTIAL;
1333
09c2d251 1334 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
c14ac945 1335 sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
09c2d251
WB
1336 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1337 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1338 tskey = sk->sk_tskey++;
1339 }
a693e698 1340
1da177e4
LT
1341 /*
1342 * Let's try using as much space as possible.
1343 * Use MTU if total length of the message fits into the MTU.
1344 * Otherwise, we need to reserve fragment header and
1345 * fragment alignment (= 8-15 octects, in total).
1346 *
1347 * Note that we may need to "move" the data from the tail of
1ab1457c 1348 * of the buffer to the new fragment when we split
1da177e4
LT
1349 * the message.
1350 *
1ab1457c 1351 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1352 * at once if non-fragmentable extension headers
1353 * are too large.
1ab1457c 1354 * --yoshfuji
1da177e4
LT
1355 */
1356
2811ebac 1357 cork->length += length;
2811ebac 1358 if (!skb)
1da177e4
LT
1359 goto alloc_new_skb;
1360
1361 while (length > 0) {
1362 /* Check if the remaining data fits into current packet. */
bdc712b4 1363 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1364 if (copy < length)
1365 copy = maxfraglen - skb->len;
1366
1367 if (copy <= 0) {
1368 char *data;
1369 unsigned int datalen;
1370 unsigned int fraglen;
1371 unsigned int fraggap;
1372 unsigned int alloclen;
1da177e4 1373alloc_new_skb:
1da177e4 1374 /* There's no room in the current skb */
0c183379
G
1375 if (skb)
1376 fraggap = skb->len - maxfraglen;
1da177e4
LT
1377 else
1378 fraggap = 0;
0c183379 1379 /* update mtu and maxfraglen if necessary */
63159f29 1380 if (!skb || !skb_prev)
0c183379 1381 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1382 fragheaderlen, skb, rt,
e367c2d0 1383 orig_mtu);
0c183379
G
1384
1385 skb_prev = skb;
1da177e4
LT
1386
1387 /*
1388 * If remaining data exceeds the mtu,
1389 * we know we need more fragment(s).
1390 */
1391 datalen = length + fraggap;
1da177e4 1392
0c183379
G
1393 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1394 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1395 if ((flags & MSG_MORE) &&
d8d1f30b 1396 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1397 alloclen = mtu;
1398 else
1399 alloclen = datalen + fragheaderlen;
1400
299b0767
SK
1401 alloclen += dst_exthdrlen;
1402
0c183379
G
1403 if (datalen != length + fraggap) {
1404 /*
1405 * this is not the last fragment, the trailer
1406 * space is regarded as data space.
1407 */
1408 datalen += rt->dst.trailer_len;
1409 }
1410
1411 alloclen += rt->dst.trailer_len;
1412 fraglen = datalen + fragheaderlen;
1da177e4
LT
1413
1414 /*
1415 * We just reserve space for fragment header.
1ab1457c 1416 * Note: this may be overallocation if the message
1da177e4
LT
1417 * (without MSG_MORE) fits into the MTU.
1418 */
1419 alloclen += sizeof(struct frag_hdr);
1420
232cd35d
ED
1421 copy = datalen - transhdrlen - fraggap;
1422 if (copy < 0) {
1423 err = -EINVAL;
1424 goto error;
1425 }
1da177e4
LT
1426 if (transhdrlen) {
1427 skb = sock_alloc_send_skb(sk,
1428 alloclen + hh_len,
1429 (flags & MSG_DONTWAIT), &err);
1430 } else {
1431 skb = NULL;
1f4c6eb2 1432 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1da177e4 1433 2 * sk->sk_sndbuf)
1f4c6eb2
ED
1434 skb = alloc_skb(alloclen + hh_len,
1435 sk->sk_allocation);
63159f29 1436 if (unlikely(!skb))
1da177e4
LT
1437 err = -ENOBUFS;
1438 }
63159f29 1439 if (!skb)
1da177e4
LT
1440 goto error;
1441 /*
1442 * Fill in the control structures
1443 */
9c9c9ad5 1444 skb->protocol = htons(ETH_P_IPV6);
32dce968 1445 skb->ip_summed = csummode;
1da177e4 1446 skb->csum = 0;
1f85851e
G
1447 /* reserve for fragmentation and ipsec header */
1448 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1449 dst_exthdrlen);
1da177e4 1450
11878b40
WB
1451 /* Only the initial fragment is time stamped */
1452 skb_shinfo(skb)->tx_flags = tx_flags;
1453 tx_flags = 0;
09c2d251
WB
1454 skb_shinfo(skb)->tskey = tskey;
1455 tskey = 0;
a693e698 1456
1da177e4
LT
1457 /*
1458 * Find where to start putting bytes
1459 */
1f85851e
G
1460 data = skb_put(skb, fraglen);
1461 skb_set_network_header(skb, exthdrlen);
1462 data += fragheaderlen;
b0e380b1
ACM
1463 skb->transport_header = (skb->network_header +
1464 fragheaderlen);
1da177e4
LT
1465 if (fraggap) {
1466 skb->csum = skb_copy_and_csum_bits(
1467 skb_prev, maxfraglen,
1468 data + transhdrlen, fraggap, 0);
1469 skb_prev->csum = csum_sub(skb_prev->csum,
1470 skb->csum);
1471 data += fraggap;
e9fa4f7b 1472 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4 1473 }
232cd35d
ED
1474 if (copy > 0 &&
1475 getfrag(from, data + transhdrlen, offset,
1476 copy, fraggap, skb) < 0) {
1da177e4
LT
1477 err = -EFAULT;
1478 kfree_skb(skb);
1479 goto error;
1480 }
1481
1482 offset += copy;
1483 length -= datalen - fraggap;
1484 transhdrlen = 0;
1485 exthdrlen = 0;
299b0767 1486 dst_exthdrlen = 0;
1da177e4 1487
0dec879f
JA
1488 if ((flags & MSG_CONFIRM) && !skb_prev)
1489 skb_set_dst_pending_confirm(skb, 1);
1490
1da177e4
LT
1491 /*
1492 * Put the packet on the pending queue
1493 */
1f4c6eb2
ED
1494 if (!skb->destructor) {
1495 skb->destructor = sock_wfree;
1496 skb->sk = sk;
1497 wmem_alloc_delta += skb->truesize;
1498 }
0bbe84a6 1499 __skb_queue_tail(queue, skb);
1da177e4
LT
1500 continue;
1501 }
1502
1503 if (copy > length)
1504 copy = length;
1505
d8d1f30b 1506 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1507 unsigned int off;
1508
1509 off = skb->len;
1510 if (getfrag(from, skb_put(skb, copy),
1511 offset, copy, off, skb) < 0) {
1512 __skb_trim(skb, off);
1513 err = -EFAULT;
1514 goto error;
1515 }
1516 } else {
1517 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1518
5640f768
ED
1519 err = -ENOMEM;
1520 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1521 goto error;
5640f768
ED
1522
1523 if (!skb_can_coalesce(skb, i, pfrag->page,
1524 pfrag->offset)) {
1525 err = -EMSGSIZE;
1526 if (i == MAX_SKB_FRAGS)
1527 goto error;
1528
1529 __skb_fill_page_desc(skb, i, pfrag->page,
1530 pfrag->offset, 0);
1531 skb_shinfo(skb)->nr_frags = ++i;
1532 get_page(pfrag->page);
1da177e4 1533 }
5640f768 1534 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1535 if (getfrag(from,
5640f768
ED
1536 page_address(pfrag->page) + pfrag->offset,
1537 offset, copy, skb->len, skb) < 0)
1538 goto error_efault;
1539
1540 pfrag->offset += copy;
1541 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1542 skb->len += copy;
1543 skb->data_len += copy;
f945fa7a 1544 skb->truesize += copy;
1f4c6eb2 1545 wmem_alloc_delta += copy;
1da177e4
LT
1546 }
1547 offset += copy;
1548 length -= copy;
1549 }
5640f768 1550
9e8445a5
PA
1551 if (wmem_alloc_delta)
1552 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4 1553 return 0;
5640f768
ED
1554
1555error_efault:
1556 err = -EFAULT;
1da177e4 1557error:
bdc712b4 1558 cork->length -= length;
3bd653c8 1559 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1f4c6eb2 1560 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4
LT
1561 return err;
1562}
0bbe84a6
VY
1563
1564int ip6_append_data(struct sock *sk,
1565 int getfrag(void *from, char *to, int offset, int len,
1566 int odd, struct sk_buff *skb),
26879da5
WW
1567 void *from, int length, int transhdrlen,
1568 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1569 struct rt6_info *rt, unsigned int flags,
c14ac945 1570 const struct sockcm_cookie *sockc)
0bbe84a6
VY
1571{
1572 struct inet_sock *inet = inet_sk(sk);
1573 struct ipv6_pinfo *np = inet6_sk(sk);
1574 int exthdrlen;
1575 int err;
1576
1577 if (flags&MSG_PROBE)
1578 return 0;
1579 if (skb_queue_empty(&sk->sk_write_queue)) {
1580 /*
1581 * setup for corking
1582 */
26879da5
WW
1583 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1584 ipc6, rt, fl6);
0bbe84a6
VY
1585 if (err)
1586 return err;
1587
26879da5 1588 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
0bbe84a6
VY
1589 length += exthdrlen;
1590 transhdrlen += exthdrlen;
1591 } else {
1592 fl6 = &inet->cork.fl.u.ip6;
1593 transhdrlen = 0;
1594 }
1595
1596 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1597 &np->cork, sk_page_frag(sk), getfrag,
26879da5 1598 from, length, transhdrlen, flags, ipc6, sockc);
0bbe84a6 1599}
a495f836 1600EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1601
366e41d9
VY
1602static void ip6_cork_release(struct inet_cork_full *cork,
1603 struct inet6_cork *v6_cork)
bf138862 1604{
366e41d9
VY
1605 if (v6_cork->opt) {
1606 kfree(v6_cork->opt->dst0opt);
1607 kfree(v6_cork->opt->dst1opt);
1608 kfree(v6_cork->opt->hopopt);
1609 kfree(v6_cork->opt->srcrt);
1610 kfree(v6_cork->opt);
1611 v6_cork->opt = NULL;
0178b695
HX
1612 }
1613
366e41d9
VY
1614 if (cork->base.dst) {
1615 dst_release(cork->base.dst);
1616 cork->base.dst = NULL;
1617 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1618 }
366e41d9 1619 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1620}
1621
6422398c
VY
1622struct sk_buff *__ip6_make_skb(struct sock *sk,
1623 struct sk_buff_head *queue,
1624 struct inet_cork_full *cork,
1625 struct inet6_cork *v6_cork)
1da177e4
LT
1626{
1627 struct sk_buff *skb, *tmp_skb;
1628 struct sk_buff **tail_skb;
1629 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1630 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1631 struct net *net = sock_net(sk);
1da177e4 1632 struct ipv6hdr *hdr;
6422398c
VY
1633 struct ipv6_txoptions *opt = v6_cork->opt;
1634 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1635 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1636 unsigned char proto = fl6->flowi6_proto;
1da177e4 1637
6422398c 1638 skb = __skb_dequeue(queue);
63159f29 1639 if (!skb)
1da177e4
LT
1640 goto out;
1641 tail_skb = &(skb_shinfo(skb)->frag_list);
1642
1643 /* move skb->data to ip header from ext header */
d56f90a7 1644 if (skb->data < skb_network_header(skb))
bbe735e4 1645 __skb_pull(skb, skb_network_offset(skb));
6422398c 1646 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1647 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1648 *tail_skb = tmp_skb;
1649 tail_skb = &(tmp_skb->next);
1650 skb->len += tmp_skb->len;
1651 skb->data_len += tmp_skb->len;
1da177e4 1652 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1653 tmp_skb->destructor = NULL;
1654 tmp_skb->sk = NULL;
1da177e4
LT
1655 }
1656
28a89453 1657 /* Allow local fragmentation. */
60ff7467 1658 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1659
4e3fd7a0 1660 *final_dst = fl6->daddr;
cfe1fc77 1661 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1662 if (opt && opt->opt_flen)
1663 ipv6_push_frag_opts(skb, opt, &proto);
1664 if (opt && opt->opt_nflen)
613fa3ca 1665 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1da177e4 1666
e2d1bca7
ACM
1667 skb_push(skb, sizeof(struct ipv6hdr));
1668 skb_reset_network_header(skb);
0660e03f 1669 hdr = ipv6_hdr(skb);
1ab1457c 1670
6422398c 1671 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1672 ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 1673 ip6_autoflowlabel(net, np), fl6));
6422398c 1674 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1675 hdr->nexthdr = proto;
4e3fd7a0
AD
1676 hdr->saddr = fl6->saddr;
1677 hdr->daddr = *final_dst;
1da177e4 1678
a2c2064f 1679 skb->priority = sk->sk_priority;
4a19ec58 1680 skb->mark = sk->sk_mark;
a2c2064f 1681
d8d1f30b 1682 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1683 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1684 if (proto == IPPROTO_ICMPV6) {
adf30907 1685 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1686
43a43b60
HFS
1687 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1688 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1689 }
1690
6422398c
VY
1691 ip6_cork_release(cork, v6_cork);
1692out:
1693 return skb;
1694}
1695
1696int ip6_send_skb(struct sk_buff *skb)
1697{
1698 struct net *net = sock_net(skb->sk);
1699 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1700 int err;
1701
33224b16 1702 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1703 if (err) {
1704 if (err > 0)
6ce9e7b5 1705 err = net_xmit_errno(err);
1da177e4 1706 if (err)
6422398c
VY
1707 IP6_INC_STATS(net, rt->rt6i_idev,
1708 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1709 }
1710
1da177e4 1711 return err;
6422398c
VY
1712}
1713
1714int ip6_push_pending_frames(struct sock *sk)
1715{
1716 struct sk_buff *skb;
1717
1718 skb = ip6_finish_skb(sk);
1719 if (!skb)
1720 return 0;
1721
1722 return ip6_send_skb(skb);
1da177e4 1723}
a495f836 1724EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1725
0bbe84a6 1726static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1727 struct sk_buff_head *queue,
1728 struct inet_cork_full *cork,
1729 struct inet6_cork *v6_cork)
1da177e4 1730{
1da177e4
LT
1731 struct sk_buff *skb;
1732
0bbe84a6 1733 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1734 if (skb_dst(skb))
1735 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1736 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1737 kfree_skb(skb);
1738 }
1739
6422398c 1740 ip6_cork_release(cork, v6_cork);
1da177e4 1741}
0bbe84a6
VY
1742
1743void ip6_flush_pending_frames(struct sock *sk)
1744{
6422398c
VY
1745 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1746 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1747}
a495f836 1748EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1749
1750struct sk_buff *ip6_make_skb(struct sock *sk,
1751 int getfrag(void *from, char *to, int offset,
1752 int len, int odd, struct sk_buff *skb),
1753 void *from, int length, int transhdrlen,
26879da5 1754 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
6422398c 1755 struct rt6_info *rt, unsigned int flags,
26879da5 1756 const struct sockcm_cookie *sockc)
6422398c
VY
1757{
1758 struct inet_cork_full cork;
1759 struct inet6_cork v6_cork;
1760 struct sk_buff_head queue;
26879da5 1761 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
6422398c
VY
1762 int err;
1763
1764 if (flags & MSG_PROBE)
1765 return NULL;
1766
1767 __skb_queue_head_init(&queue);
1768
1769 cork.base.flags = 0;
1770 cork.base.addr = 0;
1771 cork.base.opt = NULL;
95ef498d 1772 cork.base.dst = NULL;
6422398c 1773 v6_cork.opt = NULL;
26879da5 1774 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
862c03ee
ED
1775 if (err) {
1776 ip6_cork_release(&cork, &v6_cork);
6422398c 1777 return ERR_PTR(err);
862c03ee 1778 }
26879da5
WW
1779 if (ipc6->dontfrag < 0)
1780 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
6422398c
VY
1781
1782 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1783 &current->task_frag, getfrag, from,
1784 length + exthdrlen, transhdrlen + exthdrlen,
26879da5 1785 flags, ipc6, sockc);
6422398c
VY
1786 if (err) {
1787 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1788 return ERR_PTR(err);
1789 }
1790
1791 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1792}