ipv6: add a wrapper for ip6_dst_store() with flowi6 checks
[linux-block.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4 41
33b48679 42#include <linux/bpf-cgroup.h>
1da177e4
LT
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
7bc570c8 58#include <linux/mroute6.h>
ca254490 59#include <net/l3mdev.h>
14972cbd 60#include <net/lwtunnel.h>
1da177e4 61
7d8c6e39 62static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 63{
adf30907 64 struct dst_entry *dst = skb_dst(skb);
1da177e4 65 struct net_device *dev = dst->dev;
f6b72b62 66 struct neighbour *neigh;
6fd6ce20
YH
67 struct in6_addr *nexthop;
68 int ret;
1da177e4 69
0660e03f 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 72
7026b1dd 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
8571ab47 74 ((mroute6_is_socket(net, skb) &&
bd91b8bf 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
b2e0b385 84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 85 net, sk, newskb, NULL, newskb->dev,
95603e22 86 dev_loopback_xmit);
1da177e4 87
0660e03f 88 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 89 IP6_INC_STATS(net, idev,
3bd653c8 90 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
78126c41 96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
97
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
101 kfree_skb(skb);
102 return 0;
103 }
1da177e4
LT
104 }
105
14972cbd
RP
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
108
109 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
110 return res;
111 }
112
6fd6ce20 113 rcu_read_lock_bh();
2647a9b0 114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
4ff06203 119 sock_confirm_neigh(skb, neigh);
c16ec185 120 ret = neigh_output(neigh, skb);
6fd6ce20
YH
121 rcu_read_unlock_bh();
122 return ret;
123 }
124 rcu_read_unlock_bh();
05e3aa09 125
78126c41 126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
127 kfree_skb(skb);
128 return -EINVAL;
1da177e4
LT
129}
130
0c4b51f0 131static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490 132{
33b48679
DM
133 int ret;
134
135 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
136 if (ret) {
137 kfree_skb(skb);
138 return ret;
139 }
140
09ee9dba
TB
141#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb)->xfrm) {
144 IPCB(skb)->flags |= IPSKB_REROUTED;
145 return dst_output(net, sk, skb);
146 }
147#endif
148
9e508490 149 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
150 dst_allfrag(skb_dst(skb)) ||
151 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 152 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 153 else
7d8c6e39 154 return ip6_finish_output2(net, sk, skb);
9e508490
JE
155}
156
ede2059d 157int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 158{
9e508490 159 struct net_device *dev = skb_dst(skb)->dev;
adf30907 160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 161
97a7a37a
CF
162 skb->protocol = htons(ETH_P_IPV6);
163 skb->dev = dev;
164
778d80be 165 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 166 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
167 kfree_skb(skb);
168 return 0;
169 }
170
29a26a56
EB
171 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 net, sk, skb, NULL, dev,
9c6eb28a
JE
173 ip6_finish_output,
174 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
175}
176
e9191ffb 177bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
513674b5
SL
178{
179 if (!np->autoflowlabel_set)
180 return ip6_default_np_autolabel(net);
181 else
182 return np->autoflowlabel;
183}
184
1da177e4 185/*
1c1e9d2b
ED
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
1da177e4 190 */
1c1e9d2b 191int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
92e55f41 192 __u32 mark, struct ipv6_txoptions *opt, int tclass)
1da177e4 193{
3bd653c8 194 struct net *net = sock_net(sk);
1c1e9d2b 195 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 196 struct in6_addr *first_hop = &fl6->daddr;
adf30907 197 struct dst_entry *dst = skb_dst(skb);
1da177e4 198 struct ipv6hdr *hdr;
4c9483b2 199 u8 proto = fl6->flowi6_proto;
1da177e4 200 int seg_len = skb->len;
e651f03a 201 int hlimit = -1;
1da177e4
LT
202 u32 mtu;
203
204 if (opt) {
c2636b4d 205 unsigned int head_room;
1da177e4
LT
206
207 /* First: exthdrs may take lots of space (~8K for now)
208 MAX_HEADER is not enough.
209 */
210 head_room = opt->opt_nflen + opt->opt_flen;
211 seg_len += head_room;
212 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
213
214 if (skb_headroom(skb) < head_room) {
215 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 216 if (!skb2) {
adf30907 217 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
218 IPSTATS_MIB_OUTDISCARDS);
219 kfree_skb(skb);
1da177e4
LT
220 return -ENOBUFS;
221 }
808db80a 222 consume_skb(skb);
a11d206d 223 skb = skb2;
1c1e9d2b
ED
224 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
225 * it is safe to call in our context (socket lock not held)
226 */
227 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
228 }
229 if (opt->opt_flen)
230 ipv6_push_frag_opts(skb, opt, &proto);
231 if (opt->opt_nflen)
613fa3ca
DL
232 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
233 &fl6->saddr);
1da177e4
LT
234 }
235
e2d1bca7
ACM
236 skb_push(skb, sizeof(struct ipv6hdr));
237 skb_reset_network_header(skb);
0660e03f 238 hdr = ipv6_hdr(skb);
1da177e4
LT
239
240 /*
241 * Fill in the IPv6 header
242 */
b903d324 243 if (np)
1da177e4
LT
244 hlimit = np->hop_limit;
245 if (hlimit < 0)
6b75d090 246 hlimit = ip6_dst_hoplimit(dst);
1da177e4 247
cb1ce2ef 248 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 249 ip6_autoflowlabel(net, np), fl6));
41a1f8ea 250
1da177e4
LT
251 hdr->payload_len = htons(seg_len);
252 hdr->nexthdr = proto;
253 hdr->hop_limit = hlimit;
254
4e3fd7a0
AD
255 hdr->saddr = fl6->saddr;
256 hdr->daddr = *first_hop;
1da177e4 257
9c9c9ad5 258 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 259 skb->priority = sk->sk_priority;
92e55f41 260 skb->mark = mark;
a2c2064f 261
1da177e4 262 mtu = dst_mtu(dst);
60ff7467 263 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 264 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 265 IPSTATS_MIB_OUT, skb->len);
a8e3e1a9
DA
266
267 /* if egress device is enslaved to an L3 master device pass the
268 * skb to its handler for processing
269 */
270 skb = l3mdev_ip6_out((struct sock *)sk, skb);
271 if (unlikely(!skb))
272 return 0;
273
1c1e9d2b
ED
274 /* hooks should never assume socket lock is held.
275 * we promote our socket to non const
276 */
29a26a56 277 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 278 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 279 dst_output);
1da177e4
LT
280 }
281
1da177e4 282 skb->dev = dst->dev;
1c1e9d2b
ED
283 /* ipv6_local_error() does not require socket lock,
284 * we promote our socket to non const
285 */
286 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
287
adf30907 288 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
289 kfree_skb(skb);
290 return -EMSGSIZE;
291}
7159039a
YH
292EXPORT_SYMBOL(ip6_xmit);
293
1da177e4
LT
294static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
295{
296 struct ip6_ra_chain *ra;
297 struct sock *last = NULL;
298
299 read_lock(&ip6_ra_lock);
300 for (ra = ip6_ra_chain; ra; ra = ra->next) {
301 struct sock *sk = ra->sk;
0bd1b59b
AM
302 if (sk && ra->sel == sel &&
303 (!sk->sk_bound_dev_if ||
304 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
305 if (last) {
306 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
307 if (skb2)
308 rawv6_rcv(last, skb2);
309 }
310 last = sk;
311 }
312 }
313
314 if (last) {
315 rawv6_rcv(last, skb);
316 read_unlock(&ip6_ra_lock);
317 return 1;
318 }
319 read_unlock(&ip6_ra_lock);
320 return 0;
321}
322
e21e0b5f
VN
323static int ip6_forward_proxy_check(struct sk_buff *skb)
324{
0660e03f 325 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 326 u8 nexthdr = hdr->nexthdr;
75f2811c 327 __be16 frag_off;
e21e0b5f
VN
328 int offset;
329
330 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 331 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
332 if (offset < 0)
333 return 0;
334 } else
335 offset = sizeof(struct ipv6hdr);
336
337 if (nexthdr == IPPROTO_ICMPV6) {
338 struct icmp6hdr *icmp6;
339
d56f90a7
ACM
340 if (!pskb_may_pull(skb, (skb_network_header(skb) +
341 offset + 1 - skb->data)))
e21e0b5f
VN
342 return 0;
343
d56f90a7 344 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
345
346 switch (icmp6->icmp6_type) {
347 case NDISC_ROUTER_SOLICITATION:
348 case NDISC_ROUTER_ADVERTISEMENT:
349 case NDISC_NEIGHBOUR_SOLICITATION:
350 case NDISC_NEIGHBOUR_ADVERTISEMENT:
351 case NDISC_REDIRECT:
352 /* For reaction involving unicast neighbor discovery
353 * message destined to the proxied address, pass it to
354 * input function.
355 */
356 return 1;
357 default:
358 break;
359 }
360 }
361
74553b09
VN
362 /*
363 * The proxying router can't forward traffic sent to a link-local
364 * address, so signal the sender and discard the packet. This
365 * behavior is clarified by the MIPv6 specification.
366 */
367 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
368 dst_link_failure(skb);
369 return -1;
370 }
371
e21e0b5f
VN
372 return 0;
373}
374
0c4b51f0
EB
375static inline int ip6_forward_finish(struct net *net, struct sock *sk,
376 struct sk_buff *skb)
1da177e4 377{
13206b6b 378 return dst_output(net, sk, skb);
1da177e4
LT
379}
380
09952107 381unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
0954cf9c
HFS
382{
383 unsigned int mtu;
384 struct inet6_dev *idev;
385
386 if (dst_metric_locked(dst, RTAX_MTU)) {
387 mtu = dst_metric_raw(dst, RTAX_MTU);
388 if (mtu)
389 return mtu;
390 }
391
392 mtu = IPV6_MIN_MTU;
393 rcu_read_lock();
394 idev = __in6_dev_get(dst->dev);
395 if (idev)
396 mtu = idev->cnf.mtu6;
397 rcu_read_unlock();
398
399 return mtu;
400}
09952107 401EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
0954cf9c 402
fe6cc55f
FW
403static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
404{
418a3156 405 if (skb->len <= mtu)
fe6cc55f
FW
406 return false;
407
60ff7467 408 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
409 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
410 return true;
411
60ff7467 412 if (skb->ignore_df)
418a3156
FW
413 return false;
414
779b7931 415 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
fe6cc55f
FW
416 return false;
417
418 return true;
419}
420
1da177e4
LT
421int ip6_forward(struct sk_buff *skb)
422{
adf30907 423 struct dst_entry *dst = skb_dst(skb);
0660e03f 424 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 425 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 426 struct net *net = dev_net(dst->dev);
14f3ad6f 427 u32 mtu;
1ab1457c 428
53b7997f 429 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
430 goto error;
431
090f1166
LR
432 if (skb->pkt_type != PACKET_HOST)
433 goto drop;
434
9ef2e965
HFS
435 if (unlikely(skb->sk))
436 goto drop;
437
4497b076
BH
438 if (skb_warn_if_lro(skb))
439 goto drop;
440
1da177e4 441 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
1d015503
ED
442 __IP6_INC_STATS(net, ip6_dst_idev(dst),
443 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
444 goto drop;
445 }
446
35fc92a9 447 skb_forward_csum(skb);
1da177e4
LT
448
449 /*
450 * We DO NOT make any processing on
451 * RA packets, pushing them to user level AS IS
452 * without ane WARRANTY that application will be able
453 * to interpret them. The reason is that we
454 * cannot make anything clever here.
455 *
456 * We are not end-node, so that if packet contains
457 * AH/ESP, we cannot make anything.
458 * Defragmentation also would be mistake, RA packets
459 * cannot be fragmented, because there is no warranty
460 * that different fragments will go along one path. --ANK
461 */
ab4eb353
YH
462 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
463 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
464 return 0;
465 }
466
467 /*
468 * check and decrement ttl
469 */
470 if (hdr->hop_limit <= 1) {
471 /* Force OUTPUT device used as source address */
472 skb->dev = dst->dev;
3ffe533c 473 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
1d015503
ED
474 __IP6_INC_STATS(net, ip6_dst_idev(dst),
475 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
476
477 kfree_skb(skb);
478 return -ETIMEDOUT;
479 }
480
fbea49e1 481 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 482 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 483 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
484 int proxied = ip6_forward_proxy_check(skb);
485 if (proxied > 0)
e21e0b5f 486 return ip6_input(skb);
74553b09 487 else if (proxied < 0) {
1d015503
ED
488 __IP6_INC_STATS(net, ip6_dst_idev(dst),
489 IPSTATS_MIB_INDISCARDS);
74553b09
VN
490 goto drop;
491 }
e21e0b5f
VN
492 }
493
1da177e4 494 if (!xfrm6_route_forward(skb)) {
1d015503
ED
495 __IP6_INC_STATS(net, ip6_dst_idev(dst),
496 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
497 goto drop;
498 }
adf30907 499 dst = skb_dst(skb);
1da177e4
LT
500
501 /* IPv6 specs say nothing about it, but it is clear that we cannot
502 send redirects to source routed frames.
1e5dc146 503 We don't send redirects to frames decapsulated from IPsec.
1da177e4 504 */
c45a3dfb 505 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 506 struct in6_addr *target = NULL;
fbfe95a4 507 struct inet_peer *peer;
1da177e4 508 struct rt6_info *rt;
1da177e4
LT
509
510 /*
511 * incoming and outgoing devices are the same
512 * send a redirect.
513 */
514
515 rt = (struct rt6_info *) dst;
c45a3dfb
DM
516 if (rt->rt6i_flags & RTF_GATEWAY)
517 target = &rt->rt6i_gateway;
1da177e4
LT
518 else
519 target = &hdr->daddr;
520
fd0273d7 521 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 522
1da177e4
LT
523 /* Limit redirects both by destination (here)
524 and by source (inside ndisc_send_redirect)
525 */
fbfe95a4 526 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 527 ndisc_send_redirect(skb, target);
1d861aa4
DM
528 if (peer)
529 inet_putpeer(peer);
5bb1ab09
DS
530 } else {
531 int addrtype = ipv6_addr_type(&hdr->saddr);
532
1da177e4 533 /* This check is security critical. */
f81b2e7d
YH
534 if (addrtype == IPV6_ADDR_ANY ||
535 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
536 goto error;
537 if (addrtype & IPV6_ADDR_LINKLOCAL) {
538 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 539 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
540 goto error;
541 }
1da177e4
LT
542 }
543
0954cf9c 544 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
545 if (mtu < IPV6_MIN_MTU)
546 mtu = IPV6_MIN_MTU;
547
fe6cc55f 548 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
549 /* Again, force OUTPUT device used as source address */
550 skb->dev = dst->dev;
14f3ad6f 551 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1d015503
ED
552 __IP6_INC_STATS(net, ip6_dst_idev(dst),
553 IPSTATS_MIB_INTOOBIGERRORS);
554 __IP6_INC_STATS(net, ip6_dst_idev(dst),
555 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
556 kfree_skb(skb);
557 return -EMSGSIZE;
558 }
559
560 if (skb_cow(skb, dst->dev->hard_header_len)) {
1d015503
ED
561 __IP6_INC_STATS(net, ip6_dst_idev(dst),
562 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
563 goto drop;
564 }
565
0660e03f 566 hdr = ipv6_hdr(skb);
1da177e4
LT
567
568 /* Mangling hops number delayed to point after skb COW */
1ab1457c 569
1da177e4
LT
570 hdr->hop_limit--;
571
1d015503
ED
572 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
573 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
574 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
575 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 576 ip6_forward_finish);
1da177e4
LT
577
578error:
1d015503 579 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
580drop:
581 kfree_skb(skb);
582 return -EINVAL;
583}
584
585static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
586{
587 to->pkt_type = from->pkt_type;
588 to->priority = from->priority;
589 to->protocol = from->protocol;
adf30907
ED
590 skb_dst_drop(to);
591 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 592 to->dev = from->dev;
82e91ffe 593 to->mark = from->mark;
1da177e4
LT
594
595#ifdef CONFIG_NET_SCHED
596 to->tc_index = from->tc_index;
597#endif
e7ac05f3 598 nf_copy(to, from);
984bc16c 599 skb_copy_secmark(to, from);
1da177e4
LT
600}
601
7d8c6e39
EB
602int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
603 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 604{
1da177e4 605 struct sk_buff *frag;
67ba4152 606 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 607 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
608 inet6_sk(skb->sk) : NULL;
1da177e4
LT
609 struct ipv6hdr *tmp_hdr;
610 struct frag_hdr *fh;
611 unsigned int mtu, hlen, left, len;
a7ae1992 612 int hroom, troom;
286c2349 613 __be32 frag_id;
67ba4152 614 int ptr, offset = 0, err = 0;
1da177e4
LT
615 u8 *prevhdr, nexthdr = 0;
616
7dd7eb95
DM
617 err = ip6_find_1stfragopt(skb, &prevhdr);
618 if (err < 0)
2423496a 619 goto fail;
7dd7eb95 620 hlen = err;
1da177e4
LT
621 nexthdr = *prevhdr;
622
628a5c56 623 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
624
625 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 626 * or if the skb it not generated by a local socket.
b881ef76 627 */
485fca66
FW
628 if (unlikely(!skb->ignore_df && skb->len > mtu))
629 goto fail_toobig;
a34a101e 630
485fca66
FW
631 if (IP6CB(skb)->frag_max_size) {
632 if (IP6CB(skb)->frag_max_size > mtu)
633 goto fail_toobig;
634
635 /* don't send fragments larger than what we received */
636 mtu = IP6CB(skb)->frag_max_size;
637 if (mtu < IPV6_MIN_MTU)
638 mtu = IPV6_MIN_MTU;
b881ef76
JH
639 }
640
d91675f9
YH
641 if (np && np->frag_size < mtu) {
642 if (np->frag_size)
643 mtu = np->frag_size;
644 }
89bc7848 645 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 646 goto fail_toobig;
1e0d69a9 647 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 648
fd0273d7
MKL
649 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
650 &ipv6_hdr(skb)->saddr);
286c2349 651
405c92f7
HFS
652 if (skb->ip_summed == CHECKSUM_PARTIAL &&
653 (err = skb_checksum_help(skb)))
654 goto fail;
655
1d325d21 656 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 657 if (skb_has_frag_list(skb)) {
c72d8cda 658 unsigned int first_len = skb_pagelen(skb);
3d13008e 659 struct sk_buff *frag2;
1da177e4
LT
660
661 if (first_len - hlen > mtu ||
662 ((first_len - hlen) & 7) ||
1d325d21
FW
663 skb_cloned(skb) ||
664 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
665 goto slow_path;
666
4d9092bb 667 skb_walk_frags(skb, frag) {
1da177e4
LT
668 /* Correct geometry. */
669 if (frag->len > mtu ||
670 ((frag->len & 7) && frag->next) ||
1d325d21 671 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 672 goto slow_path_clean;
1da177e4 673
1da177e4
LT
674 /* Partially cloned skb? */
675 if (skb_shared(frag))
3d13008e 676 goto slow_path_clean;
2fdba6b0
HX
677
678 BUG_ON(frag->sk);
679 if (skb->sk) {
2fdba6b0
HX
680 frag->sk = skb->sk;
681 frag->destructor = sock_wfree;
2fdba6b0 682 }
3d13008e 683 skb->truesize -= frag->truesize;
1da177e4
LT
684 }
685
686 err = 0;
687 offset = 0;
1da177e4
LT
688 /* BUILD HEADER */
689
9a217a1c 690 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 691 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 692 if (!tmp_hdr) {
1d325d21
FW
693 err = -ENOMEM;
694 goto fail;
1da177e4 695 }
1d325d21
FW
696 frag = skb_shinfo(skb)->frag_list;
697 skb_frag_list_init(skb);
1da177e4 698
1da177e4 699 __skb_pull(skb, hlen);
d58ff351 700 fh = __skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
701 __skb_push(skb, hlen);
702 skb_reset_network_header(skb);
d56f90a7 703 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 704
1da177e4
LT
705 fh->nexthdr = nexthdr;
706 fh->reserved = 0;
707 fh->frag_off = htons(IP6_MF);
286c2349 708 fh->identification = frag_id;
1da177e4
LT
709
710 first_len = skb_pagelen(skb);
711 skb->data_len = first_len - skb_headlen(skb);
712 skb->len = first_len;
0660e03f
ACM
713 ipv6_hdr(skb)->payload_len = htons(first_len -
714 sizeof(struct ipv6hdr));
a11d206d 715
1da177e4
LT
716 for (;;) {
717 /* Prepare header of the next frame,
718 * before previous one went down. */
719 if (frag) {
720 frag->ip_summed = CHECKSUM_NONE;
badff6d0 721 skb_reset_transport_header(frag);
d58ff351 722 fh = __skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
723 __skb_push(frag, hlen);
724 skb_reset_network_header(frag);
d56f90a7
ACM
725 memcpy(skb_network_header(frag), tmp_hdr,
726 hlen);
1da177e4
LT
727 offset += skb->len - hlen - sizeof(struct frag_hdr);
728 fh->nexthdr = nexthdr;
729 fh->reserved = 0;
730 fh->frag_off = htons(offset);
53b24b8f 731 if (frag->next)
1da177e4
LT
732 fh->frag_off |= htons(IP6_MF);
733 fh->identification = frag_id;
0660e03f
ACM
734 ipv6_hdr(frag)->payload_len =
735 htons(frag->len -
736 sizeof(struct ipv6hdr));
1da177e4
LT
737 ip6_copy_metadata(frag, skb);
738 }
1ab1457c 739
7d8c6e39 740 err = output(net, sk, skb);
67ba4152 741 if (!err)
d8d1f30b 742 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 743 IPSTATS_MIB_FRAGCREATES);
dafee490 744
1da177e4
LT
745 if (err || !frag)
746 break;
747
748 skb = frag;
749 frag = skb->next;
750 skb->next = NULL;
751 }
752
a51482bd 753 kfree(tmp_hdr);
1da177e4
LT
754
755 if (err == 0) {
d8d1f30b 756 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 757 IPSTATS_MIB_FRAGOKS);
1da177e4
LT
758 return 0;
759 }
760
46cfd725 761 kfree_skb_list(frag);
1da177e4 762
d8d1f30b 763 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 764 IPSTATS_MIB_FRAGFAILS);
1da177e4 765 return err;
3d13008e
ED
766
767slow_path_clean:
768 skb_walk_frags(skb, frag2) {
769 if (frag2 == frag)
770 break;
771 frag2->sk = NULL;
772 frag2->destructor = NULL;
773 skb->truesize += frag2->truesize;
774 }
1da177e4
LT
775 }
776
777slow_path:
778 left = skb->len - hlen; /* Space per frame */
779 ptr = hlen; /* Where to start from */
780
781 /*
782 * Fragment the datagram.
783 */
784
a7ae1992 785 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
786
787 /*
788 * Keep copying data until we run out.
789 */
67ba4152 790 while (left > 0) {
79e49503
FW
791 u8 *fragnexthdr_offset;
792
1da177e4
LT
793 len = left;
794 /* IF: it doesn't fit, use 'mtu' - the data space left */
795 if (len > mtu)
796 len = mtu;
25985edc 797 /* IF: we are not sending up to and including the packet end
1da177e4
LT
798 then align the next start on an eight byte boundary */
799 if (len < left) {
800 len &= ~7;
801 }
1da177e4 802
cbffccc9
JP
803 /* Allocate buffer */
804 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
805 hroom + troom, GFP_ATOMIC);
806 if (!frag) {
1da177e4
LT
807 err = -ENOMEM;
808 goto fail;
809 }
810
811 /*
812 * Set up data on packet
813 */
814
815 ip6_copy_metadata(frag, skb);
a7ae1992 816 skb_reserve(frag, hroom);
1da177e4 817 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 818 skb_reset_network_header(frag);
badff6d0 819 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
820 frag->transport_header = (frag->network_header + hlen +
821 sizeof(struct frag_hdr));
1da177e4
LT
822
823 /*
824 * Charge the memory for the fragment to any owner
825 * it might possess
826 */
827 if (skb->sk)
828 skb_set_owner_w(frag, skb->sk);
829
830 /*
831 * Copy the packet header into the new buffer.
832 */
d626f62b 833 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4 834
79e49503
FW
835 fragnexthdr_offset = skb_network_header(frag);
836 fragnexthdr_offset += prevhdr - skb_network_header(skb);
837 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
838
1da177e4
LT
839 /*
840 * Build fragment header.
841 */
842 fh->nexthdr = nexthdr;
843 fh->reserved = 0;
286c2349 844 fh->identification = frag_id;
1da177e4
LT
845
846 /*
847 * Copy a block of the IP datagram.
848 */
e3f0b86b
HS
849 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
850 len));
1da177e4
LT
851 left -= len;
852
853 fh->frag_off = htons(offset);
854 if (left > 0)
855 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
856 ipv6_hdr(frag)->payload_len = htons(frag->len -
857 sizeof(struct ipv6hdr));
1da177e4
LT
858
859 ptr += len;
860 offset += len;
861
862 /*
863 * Put this fragment into the sending queue.
864 */
7d8c6e39 865 err = output(net, sk, frag);
1da177e4
LT
866 if (err)
867 goto fail;
dafee490 868
adf30907 869 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 870 IPSTATS_MIB_FRAGCREATES);
1da177e4 871 }
adf30907 872 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 873 IPSTATS_MIB_FRAGOKS);
808db80a 874 consume_skb(skb);
1da177e4
LT
875 return err;
876
485fca66
FW
877fail_toobig:
878 if (skb->sk && dst_allfrag(skb_dst(skb)))
879 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
880
485fca66
FW
881 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
882 err = -EMSGSIZE;
883
1da177e4 884fail:
adf30907 885 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 886 IPSTATS_MIB_FRAGFAILS);
1ab1457c 887 kfree_skb(skb);
1da177e4
LT
888 return err;
889}
890
b71d1d42
ED
891static inline int ip6_rt_check(const struct rt6key *rt_key,
892 const struct in6_addr *fl_addr,
893 const struct in6_addr *addr_cache)
cf6b1982 894{
a02cec21 895 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 896 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
897}
898
497c615a
HX
899static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
900 struct dst_entry *dst,
b71d1d42 901 const struct flowi6 *fl6)
1da177e4 902{
497c615a 903 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 904 struct rt6_info *rt;
1da177e4 905
497c615a
HX
906 if (!dst)
907 goto out;
908
a963a37d
ED
909 if (dst->ops->family != AF_INET6) {
910 dst_release(dst);
911 return NULL;
912 }
913
914 rt = (struct rt6_info *)dst;
497c615a
HX
915 /* Yes, checking route validity in not connected
916 * case is not very simple. Take into account,
917 * that we do not support routing by source, TOS,
67ba4152 918 * and MSG_DONTROUTE --ANK (980726)
497c615a 919 *
cf6b1982
YH
920 * 1. ip6_rt_check(): If route was host route,
921 * check that cached destination is current.
497c615a
HX
922 * If it is network route, we still may
923 * check its validity using saved pointer
924 * to the last used address: daddr_cache.
925 * We do not want to save whole address now,
926 * (because main consumer of this service
927 * is tcp, which has not this problem),
928 * so that the last trick works only on connected
929 * sockets.
930 * 2. oif also should be the same.
931 */
4c9483b2 932 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 933#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 934 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 935#endif
ca254490
DA
936 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
937 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
938 dst_release(dst);
939 dst = NULL;
1da177e4
LT
940 }
941
497c615a
HX
942out:
943 return dst;
944}
945
3aef934f 946static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 947 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 948{
69cce1d1
DM
949#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
950 struct neighbour *n;
97cac082 951 struct rt6_info *rt;
69cce1d1
DM
952#endif
953 int err;
6f21c96a 954 int flags = 0;
497c615a 955
e16e888b
MS
956 /* The correct way to handle this would be to do
957 * ip6_route_get_saddr, and then ip6_route_output; however,
958 * the route-specific preferred source forces the
959 * ip6_route_output call _before_ ip6_route_get_saddr.
960 *
961 * In source specific routing (no src=any default route),
962 * ip6_route_output will fail given src=any saddr, though, so
963 * that's why we try it again later.
964 */
965 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
966 struct rt6_info *rt;
967 bool had_dst = *dst != NULL;
1da177e4 968
e16e888b
MS
969 if (!had_dst)
970 *dst = ip6_route_output(net, sk, fl6);
971 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
972 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
973 sk ? inet6_sk(sk)->srcprefs : 0,
974 &fl6->saddr);
44456d37 975 if (err)
1da177e4 976 goto out_err_release;
e16e888b
MS
977
978 /* If we had an erroneous initial result, pretend it
979 * never existed and let the SA-enabled version take
980 * over.
981 */
982 if (!had_dst && (*dst)->error) {
983 dst_release(*dst);
984 *dst = NULL;
985 }
6f21c96a
PA
986
987 if (fl6->flowi6_oif)
988 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
989 }
990
e16e888b 991 if (!*dst)
6f21c96a 992 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
993
994 err = (*dst)->error;
995 if (err)
996 goto out_err_release;
997
95c385b4 998#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
999 /*
1000 * Here if the dst entry we've looked up
1001 * has a neighbour entry that is in the INCOMPLETE
1002 * state and the src address from the flow is
1003 * marked as OPTIMISTIC, we release the found
1004 * dst entry and replace it instead with the
1005 * dst entry of the nexthop router
1006 */
c56bf6fe 1007 rt = (struct rt6_info *) *dst;
707be1ff 1008 rcu_read_lock_bh();
2647a9b0
MKL
1009 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1010 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
1011 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1012 rcu_read_unlock_bh();
1013
1014 if (err) {
e550dfb0 1015 struct inet6_ifaddr *ifp;
4c9483b2 1016 struct flowi6 fl_gw6;
e550dfb0
NH
1017 int redirect;
1018
4c9483b2 1019 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
1020 (*dst)->dev, 1);
1021
1022 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1023 if (ifp)
1024 in6_ifa_put(ifp);
1025
1026 if (redirect) {
1027 /*
1028 * We need to get the dst entry for the
1029 * default router instead
1030 */
1031 dst_release(*dst);
4c9483b2
DM
1032 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1033 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1034 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
1035 err = (*dst)->error;
1036 if (err)
e550dfb0 1037 goto out_err_release;
95c385b4 1038 }
e550dfb0 1039 }
95c385b4 1040#endif
ec5e3b0a 1041 if (ipv6_addr_v4mapped(&fl6->saddr) &&
00ea1cee
WB
1042 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1043 err = -EAFNOSUPPORT;
1044 goto out_err_release;
1045 }
95c385b4 1046
1da177e4
LT
1047 return 0;
1048
1049out_err_release:
1050 dst_release(*dst);
1051 *dst = NULL;
8a966fc0 1052
0d240e78
DA
1053 if (err == -ENETUNREACH)
1054 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1055 return err;
1056}
34a0b3cd 1057
497c615a
HX
1058/**
1059 * ip6_dst_lookup - perform route lookup on flow
1060 * @sk: socket which provides route info
1061 * @dst: pointer to dst_entry * for result
4c9483b2 1062 * @fl6: flow to lookup
497c615a
HX
1063 *
1064 * This function performs a route lookup on the given flow.
1065 *
1066 * It returns zero on success, or a standard errno code on error.
1067 */
343d60aa
RP
1068int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1069 struct flowi6 *fl6)
497c615a
HX
1070{
1071 *dst = NULL;
343d60aa 1072 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1073}
3cf3dc6c
ACM
1074EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1075
497c615a 1076/**
68d0c6d3
DM
1077 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1078 * @sk: socket which provides route info
4c9483b2 1079 * @fl6: flow to lookup
68d0c6d3 1080 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1081 *
1082 * This function performs a route lookup on the given flow.
1083 *
1084 * It returns a valid dst pointer on success, or a pointer encoded
1085 * error code.
1086 */
3aef934f 1087struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1088 const struct in6_addr *final_dst)
68d0c6d3
DM
1089{
1090 struct dst_entry *dst = NULL;
1091 int err;
1092
343d60aa 1093 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1094 if (err)
1095 return ERR_PTR(err);
1096 if (final_dst)
4e3fd7a0 1097 fl6->daddr = *final_dst;
2774c131 1098
f92ee619 1099 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1100}
1101EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1102
1103/**
1104 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1105 * @sk: socket which provides the dst cache and route info
4c9483b2 1106 * @fl6: flow to lookup
68d0c6d3 1107 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1108 *
1109 * This function performs a route lookup on the given flow with the
1110 * possibility of using the cached route in the socket if it is valid.
1111 * It will take the socket dst lock when operating on the dst cache.
1112 * As a result, this function can only be used in process context.
1113 *
68d0c6d3
DM
1114 * It returns a valid dst pointer on success, or a pointer encoded
1115 * error code.
497c615a 1116 */
4c9483b2 1117struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1118 const struct in6_addr *final_dst)
497c615a 1119{
68d0c6d3 1120 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
497c615a 1121
4c9483b2 1122 dst = ip6_sk_dst_check(sk, dst, fl6);
00bc0ef5
JS
1123 if (!dst)
1124 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
68d0c6d3 1125
00bc0ef5 1126 return dst;
497c615a 1127}
68d0c6d3 1128EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1129
0178b695
HX
1130static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1131 gfp_t gfp)
1132{
1133 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1134}
1135
1136static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1137 gfp_t gfp)
1138{
1139 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1140}
1141
75a493e6 1142static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1143 int *maxfraglen,
1144 unsigned int fragheaderlen,
1145 struct sk_buff *skb,
75a493e6 1146 struct rt6_info *rt,
e367c2d0 1147 unsigned int orig_mtu)
0c183379
G
1148{
1149 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1150 if (!skb) {
0c183379 1151 /* first fragment, reserve header_len */
e367c2d0 1152 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1153
1154 } else {
1155 /*
1156 * this fragment is not first, the headers
1157 * space is regarded as data space.
1158 */
e367c2d0 1159 *mtu = orig_mtu;
0c183379
G
1160 }
1161 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1162 + fragheaderlen - sizeof(struct frag_hdr);
1163 }
1164}
1165
366e41d9 1166static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
26879da5 1167 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
366e41d9
VY
1168 struct rt6_info *rt, struct flowi6 *fl6)
1169{
1170 struct ipv6_pinfo *np = inet6_sk(sk);
1171 unsigned int mtu;
26879da5 1172 struct ipv6_txoptions *opt = ipc6->opt;
366e41d9
VY
1173
1174 /*
1175 * setup for corking
1176 */
1177 if (opt) {
1178 if (WARN_ON(v6_cork->opt))
1179 return -EINVAL;
1180
864e2a1f 1181 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
63159f29 1182 if (unlikely(!v6_cork->opt))
366e41d9
VY
1183 return -ENOBUFS;
1184
864e2a1f 1185 v6_cork->opt->tot_len = sizeof(*opt);
366e41d9
VY
1186 v6_cork->opt->opt_flen = opt->opt_flen;
1187 v6_cork->opt->opt_nflen = opt->opt_nflen;
1188
1189 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1190 sk->sk_allocation);
1191 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1192 return -ENOBUFS;
1193
1194 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1195 sk->sk_allocation);
1196 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1197 return -ENOBUFS;
1198
1199 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1200 sk->sk_allocation);
1201 if (opt->hopopt && !v6_cork->opt->hopopt)
1202 return -ENOBUFS;
1203
1204 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1205 sk->sk_allocation);
1206 if (opt->srcrt && !v6_cork->opt->srcrt)
1207 return -ENOBUFS;
1208
1209 /* need source address above miyazawa*/
1210 }
1211 dst_hold(&rt->dst);
1212 cork->base.dst = &rt->dst;
1213 cork->fl.u.ip6 = *fl6;
26879da5
WW
1214 v6_cork->hop_limit = ipc6->hlimit;
1215 v6_cork->tclass = ipc6->tclass;
366e41d9
VY
1216 if (rt->dst.flags & DST_XFRM_TUNNEL)
1217 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
749439bf 1218 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
366e41d9
VY
1219 else
1220 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
c02b3741 1221 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
366e41d9
VY
1222 if (np->frag_size < mtu) {
1223 if (np->frag_size)
1224 mtu = np->frag_size;
1225 }
749439bf
MM
1226 if (mtu < IPV6_MIN_MTU)
1227 return -EINVAL;
366e41d9 1228 cork->base.fragsize = mtu;
0f6c480f 1229 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
366e41d9
VY
1230 cork->base.flags |= IPCORK_ALLFRAG;
1231 cork->base.length = 0;
1232
1233 return 0;
1234}
1235
0bbe84a6
VY
1236static int __ip6_append_data(struct sock *sk,
1237 struct flowi6 *fl6,
1238 struct sk_buff_head *queue,
1239 struct inet_cork *cork,
1240 struct inet6_cork *v6_cork,
1241 struct page_frag *pfrag,
1242 int getfrag(void *from, char *to, int offset,
1243 int len, int odd, struct sk_buff *skb),
1244 void *from, int length, int transhdrlen,
26879da5 1245 unsigned int flags, struct ipcm6_cookie *ipc6,
c14ac945 1246 const struct sockcm_cookie *sockc)
1da177e4 1247{
0c183379 1248 struct sk_buff *skb, *skb_prev = NULL;
10b8a3de 1249 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
0bbe84a6
VY
1250 int exthdrlen = 0;
1251 int dst_exthdrlen = 0;
1da177e4 1252 int hh_len;
1da177e4
LT
1253 int copy;
1254 int err;
1255 int offset = 0;
a693e698 1256 __u8 tx_flags = 0;
09c2d251 1257 u32 tskey = 0;
0bbe84a6
VY
1258 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1259 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1260 int csummode = CHECKSUM_NONE;
682b1a9d 1261 unsigned int maxnonfragsize, headersize;
1f4c6eb2 1262 unsigned int wmem_alloc_delta = 0;
1da177e4 1263
0bbe84a6
VY
1264 skb = skb_peek_tail(queue);
1265 if (!skb) {
1266 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1267 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1268 }
0bbe84a6 1269
366e41d9 1270 mtu = cork->fragsize;
e367c2d0 1271 orig_mtu = mtu;
1da177e4 1272
d8d1f30b 1273 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1274
a1b05140 1275 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1276 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1277 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1278 sizeof(struct frag_hdr);
1da177e4 1279
682b1a9d
HFS
1280 headersize = sizeof(struct ipv6hdr) +
1281 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1282 (dst_allfrag(&rt->dst) ?
1283 sizeof(struct frag_hdr) : 0) +
1284 rt->rt6i_nfheader_len;
1285
10b8a3de
PA
1286 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1287 * the first fragment
1288 */
1289 if (headersize + transhdrlen > mtu)
1290 goto emsgsize;
1291
26879da5 1292 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
682b1a9d
HFS
1293 (sk->sk_protocol == IPPROTO_UDP ||
1294 sk->sk_protocol == IPPROTO_RAW)) {
1295 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1296 sizeof(struct ipv6hdr));
1297 goto emsgsize;
1298 }
4df98e76 1299
682b1a9d
HFS
1300 if (ip6_sk_ignore_df(sk))
1301 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1302 else
1303 maxnonfragsize = mtu;
4df98e76 1304
682b1a9d 1305 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1306emsgsize:
10b8a3de
PA
1307 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1308 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
682b1a9d 1309 return -EMSGSIZE;
1da177e4
LT
1310 }
1311
682b1a9d
HFS
1312 /* CHECKSUM_PARTIAL only with no extension headers and when
1313 * we are not going to fragment
1314 */
1315 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1316 headersize == sizeof(struct ipv6hdr) &&
2b89ed65 1317 length <= mtu - headersize &&
682b1a9d 1318 !(flags & MSG_MORE) &&
c8cd0989 1319 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1320 csummode = CHECKSUM_PARTIAL;
1321
09c2d251 1322 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
c14ac945 1323 sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
09c2d251
WB
1324 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1325 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1326 tskey = sk->sk_tskey++;
1327 }
a693e698 1328
1da177e4
LT
1329 /*
1330 * Let's try using as much space as possible.
1331 * Use MTU if total length of the message fits into the MTU.
1332 * Otherwise, we need to reserve fragment header and
1333 * fragment alignment (= 8-15 octects, in total).
1334 *
1335 * Note that we may need to "move" the data from the tail of
1ab1457c 1336 * of the buffer to the new fragment when we split
1da177e4
LT
1337 * the message.
1338 *
1ab1457c 1339 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1340 * at once if non-fragmentable extension headers
1341 * are too large.
1ab1457c 1342 * --yoshfuji
1da177e4
LT
1343 */
1344
2811ebac 1345 cork->length += length;
2811ebac 1346 if (!skb)
1da177e4
LT
1347 goto alloc_new_skb;
1348
1349 while (length > 0) {
1350 /* Check if the remaining data fits into current packet. */
bdc712b4 1351 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1352 if (copy < length)
1353 copy = maxfraglen - skb->len;
1354
1355 if (copy <= 0) {
1356 char *data;
1357 unsigned int datalen;
1358 unsigned int fraglen;
1359 unsigned int fraggap;
1360 unsigned int alloclen;
1da177e4 1361alloc_new_skb:
1da177e4 1362 /* There's no room in the current skb */
0c183379
G
1363 if (skb)
1364 fraggap = skb->len - maxfraglen;
1da177e4
LT
1365 else
1366 fraggap = 0;
0c183379 1367 /* update mtu and maxfraglen if necessary */
63159f29 1368 if (!skb || !skb_prev)
0c183379 1369 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1370 fragheaderlen, skb, rt,
e367c2d0 1371 orig_mtu);
0c183379
G
1372
1373 skb_prev = skb;
1da177e4
LT
1374
1375 /*
1376 * If remaining data exceeds the mtu,
1377 * we know we need more fragment(s).
1378 */
1379 datalen = length + fraggap;
1da177e4 1380
0c183379
G
1381 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1382 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1383 if ((flags & MSG_MORE) &&
d8d1f30b 1384 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1385 alloclen = mtu;
1386 else
1387 alloclen = datalen + fragheaderlen;
1388
299b0767
SK
1389 alloclen += dst_exthdrlen;
1390
0c183379
G
1391 if (datalen != length + fraggap) {
1392 /*
1393 * this is not the last fragment, the trailer
1394 * space is regarded as data space.
1395 */
1396 datalen += rt->dst.trailer_len;
1397 }
1398
1399 alloclen += rt->dst.trailer_len;
1400 fraglen = datalen + fragheaderlen;
1da177e4
LT
1401
1402 /*
1403 * We just reserve space for fragment header.
1ab1457c 1404 * Note: this may be overallocation if the message
1da177e4
LT
1405 * (without MSG_MORE) fits into the MTU.
1406 */
1407 alloclen += sizeof(struct frag_hdr);
1408
232cd35d
ED
1409 copy = datalen - transhdrlen - fraggap;
1410 if (copy < 0) {
1411 err = -EINVAL;
1412 goto error;
1413 }
1da177e4
LT
1414 if (transhdrlen) {
1415 skb = sock_alloc_send_skb(sk,
1416 alloclen + hh_len,
1417 (flags & MSG_DONTWAIT), &err);
1418 } else {
1419 skb = NULL;
1f4c6eb2 1420 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1da177e4 1421 2 * sk->sk_sndbuf)
1f4c6eb2
ED
1422 skb = alloc_skb(alloclen + hh_len,
1423 sk->sk_allocation);
63159f29 1424 if (unlikely(!skb))
1da177e4
LT
1425 err = -ENOBUFS;
1426 }
63159f29 1427 if (!skb)
1da177e4
LT
1428 goto error;
1429 /*
1430 * Fill in the control structures
1431 */
9c9c9ad5 1432 skb->protocol = htons(ETH_P_IPV6);
32dce968 1433 skb->ip_summed = csummode;
1da177e4 1434 skb->csum = 0;
1f85851e
G
1435 /* reserve for fragmentation and ipsec header */
1436 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1437 dst_exthdrlen);
1da177e4 1438
11878b40
WB
1439 /* Only the initial fragment is time stamped */
1440 skb_shinfo(skb)->tx_flags = tx_flags;
1441 tx_flags = 0;
09c2d251
WB
1442 skb_shinfo(skb)->tskey = tskey;
1443 tskey = 0;
a693e698 1444
1da177e4
LT
1445 /*
1446 * Find where to start putting bytes
1447 */
1f85851e
G
1448 data = skb_put(skb, fraglen);
1449 skb_set_network_header(skb, exthdrlen);
1450 data += fragheaderlen;
b0e380b1
ACM
1451 skb->transport_header = (skb->network_header +
1452 fragheaderlen);
1da177e4
LT
1453 if (fraggap) {
1454 skb->csum = skb_copy_and_csum_bits(
1455 skb_prev, maxfraglen,
1456 data + transhdrlen, fraggap, 0);
1457 skb_prev->csum = csum_sub(skb_prev->csum,
1458 skb->csum);
1459 data += fraggap;
e9fa4f7b 1460 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4 1461 }
232cd35d
ED
1462 if (copy > 0 &&
1463 getfrag(from, data + transhdrlen, offset,
1464 copy, fraggap, skb) < 0) {
1da177e4
LT
1465 err = -EFAULT;
1466 kfree_skb(skb);
1467 goto error;
1468 }
1469
1470 offset += copy;
1471 length -= datalen - fraggap;
1472 transhdrlen = 0;
1473 exthdrlen = 0;
299b0767 1474 dst_exthdrlen = 0;
1da177e4 1475
0dec879f
JA
1476 if ((flags & MSG_CONFIRM) && !skb_prev)
1477 skb_set_dst_pending_confirm(skb, 1);
1478
1da177e4
LT
1479 /*
1480 * Put the packet on the pending queue
1481 */
1f4c6eb2
ED
1482 if (!skb->destructor) {
1483 skb->destructor = sock_wfree;
1484 skb->sk = sk;
1485 wmem_alloc_delta += skb->truesize;
1486 }
0bbe84a6 1487 __skb_queue_tail(queue, skb);
1da177e4
LT
1488 continue;
1489 }
1490
1491 if (copy > length)
1492 copy = length;
1493
d8d1f30b 1494 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1495 unsigned int off;
1496
1497 off = skb->len;
1498 if (getfrag(from, skb_put(skb, copy),
1499 offset, copy, off, skb) < 0) {
1500 __skb_trim(skb, off);
1501 err = -EFAULT;
1502 goto error;
1503 }
1504 } else {
1505 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1506
5640f768
ED
1507 err = -ENOMEM;
1508 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1509 goto error;
5640f768
ED
1510
1511 if (!skb_can_coalesce(skb, i, pfrag->page,
1512 pfrag->offset)) {
1513 err = -EMSGSIZE;
1514 if (i == MAX_SKB_FRAGS)
1515 goto error;
1516
1517 __skb_fill_page_desc(skb, i, pfrag->page,
1518 pfrag->offset, 0);
1519 skb_shinfo(skb)->nr_frags = ++i;
1520 get_page(pfrag->page);
1da177e4 1521 }
5640f768 1522 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1523 if (getfrag(from,
5640f768
ED
1524 page_address(pfrag->page) + pfrag->offset,
1525 offset, copy, skb->len, skb) < 0)
1526 goto error_efault;
1527
1528 pfrag->offset += copy;
1529 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1530 skb->len += copy;
1531 skb->data_len += copy;
f945fa7a 1532 skb->truesize += copy;
1f4c6eb2 1533 wmem_alloc_delta += copy;
1da177e4
LT
1534 }
1535 offset += copy;
1536 length -= copy;
1537 }
5640f768 1538
1f4c6eb2 1539 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4 1540 return 0;
5640f768
ED
1541
1542error_efault:
1543 err = -EFAULT;
1da177e4 1544error:
bdc712b4 1545 cork->length -= length;
3bd653c8 1546 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1f4c6eb2 1547 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4
LT
1548 return err;
1549}
0bbe84a6
VY
1550
1551int ip6_append_data(struct sock *sk,
1552 int getfrag(void *from, char *to, int offset, int len,
1553 int odd, struct sk_buff *skb),
26879da5
WW
1554 void *from, int length, int transhdrlen,
1555 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1556 struct rt6_info *rt, unsigned int flags,
c14ac945 1557 const struct sockcm_cookie *sockc)
0bbe84a6
VY
1558{
1559 struct inet_sock *inet = inet_sk(sk);
1560 struct ipv6_pinfo *np = inet6_sk(sk);
1561 int exthdrlen;
1562 int err;
1563
1564 if (flags&MSG_PROBE)
1565 return 0;
1566 if (skb_queue_empty(&sk->sk_write_queue)) {
1567 /*
1568 * setup for corking
1569 */
26879da5
WW
1570 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1571 ipc6, rt, fl6);
0bbe84a6
VY
1572 if (err)
1573 return err;
1574
26879da5 1575 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
0bbe84a6
VY
1576 length += exthdrlen;
1577 transhdrlen += exthdrlen;
1578 } else {
1579 fl6 = &inet->cork.fl.u.ip6;
1580 transhdrlen = 0;
1581 }
1582
1583 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1584 &np->cork, sk_page_frag(sk), getfrag,
26879da5 1585 from, length, transhdrlen, flags, ipc6, sockc);
0bbe84a6 1586}
a495f836 1587EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1588
366e41d9
VY
1589static void ip6_cork_release(struct inet_cork_full *cork,
1590 struct inet6_cork *v6_cork)
bf138862 1591{
366e41d9
VY
1592 if (v6_cork->opt) {
1593 kfree(v6_cork->opt->dst0opt);
1594 kfree(v6_cork->opt->dst1opt);
1595 kfree(v6_cork->opt->hopopt);
1596 kfree(v6_cork->opt->srcrt);
1597 kfree(v6_cork->opt);
1598 v6_cork->opt = NULL;
0178b695
HX
1599 }
1600
366e41d9
VY
1601 if (cork->base.dst) {
1602 dst_release(cork->base.dst);
1603 cork->base.dst = NULL;
1604 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1605 }
366e41d9 1606 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1607}
1608
6422398c
VY
1609struct sk_buff *__ip6_make_skb(struct sock *sk,
1610 struct sk_buff_head *queue,
1611 struct inet_cork_full *cork,
1612 struct inet6_cork *v6_cork)
1da177e4
LT
1613{
1614 struct sk_buff *skb, *tmp_skb;
1615 struct sk_buff **tail_skb;
1616 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1617 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1618 struct net *net = sock_net(sk);
1da177e4 1619 struct ipv6hdr *hdr;
6422398c
VY
1620 struct ipv6_txoptions *opt = v6_cork->opt;
1621 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1622 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1623 unsigned char proto = fl6->flowi6_proto;
1da177e4 1624
6422398c 1625 skb = __skb_dequeue(queue);
63159f29 1626 if (!skb)
1da177e4
LT
1627 goto out;
1628 tail_skb = &(skb_shinfo(skb)->frag_list);
1629
1630 /* move skb->data to ip header from ext header */
d56f90a7 1631 if (skb->data < skb_network_header(skb))
bbe735e4 1632 __skb_pull(skb, skb_network_offset(skb));
6422398c 1633 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1634 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1635 *tail_skb = tmp_skb;
1636 tail_skb = &(tmp_skb->next);
1637 skb->len += tmp_skb->len;
1638 skb->data_len += tmp_skb->len;
1da177e4 1639 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1640 tmp_skb->destructor = NULL;
1641 tmp_skb->sk = NULL;
1da177e4
LT
1642 }
1643
28a89453 1644 /* Allow local fragmentation. */
60ff7467 1645 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1646
4e3fd7a0 1647 *final_dst = fl6->daddr;
cfe1fc77 1648 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1649 if (opt && opt->opt_flen)
1650 ipv6_push_frag_opts(skb, opt, &proto);
1651 if (opt && opt->opt_nflen)
613fa3ca 1652 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1da177e4 1653
e2d1bca7
ACM
1654 skb_push(skb, sizeof(struct ipv6hdr));
1655 skb_reset_network_header(skb);
0660e03f 1656 hdr = ipv6_hdr(skb);
1ab1457c 1657
6422398c 1658 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1659 ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 1660 ip6_autoflowlabel(net, np), fl6));
6422398c 1661 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1662 hdr->nexthdr = proto;
4e3fd7a0
AD
1663 hdr->saddr = fl6->saddr;
1664 hdr->daddr = *final_dst;
1da177e4 1665
a2c2064f 1666 skb->priority = sk->sk_priority;
4a19ec58 1667 skb->mark = sk->sk_mark;
a2c2064f 1668
d8d1f30b 1669 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1670 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1671 if (proto == IPPROTO_ICMPV6) {
adf30907 1672 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1673
43a43b60
HFS
1674 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1675 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1676 }
1677
6422398c
VY
1678 ip6_cork_release(cork, v6_cork);
1679out:
1680 return skb;
1681}
1682
1683int ip6_send_skb(struct sk_buff *skb)
1684{
1685 struct net *net = sock_net(skb->sk);
1686 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1687 int err;
1688
33224b16 1689 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1690 if (err) {
1691 if (err > 0)
6ce9e7b5 1692 err = net_xmit_errno(err);
1da177e4 1693 if (err)
6422398c
VY
1694 IP6_INC_STATS(net, rt->rt6i_idev,
1695 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1696 }
1697
1da177e4 1698 return err;
6422398c
VY
1699}
1700
1701int ip6_push_pending_frames(struct sock *sk)
1702{
1703 struct sk_buff *skb;
1704
1705 skb = ip6_finish_skb(sk);
1706 if (!skb)
1707 return 0;
1708
1709 return ip6_send_skb(skb);
1da177e4 1710}
a495f836 1711EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1712
0bbe84a6 1713static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1714 struct sk_buff_head *queue,
1715 struct inet_cork_full *cork,
1716 struct inet6_cork *v6_cork)
1da177e4 1717{
1da177e4
LT
1718 struct sk_buff *skb;
1719
0bbe84a6 1720 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1721 if (skb_dst(skb))
1722 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1723 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1724 kfree_skb(skb);
1725 }
1726
6422398c 1727 ip6_cork_release(cork, v6_cork);
1da177e4 1728}
0bbe84a6
VY
1729
1730void ip6_flush_pending_frames(struct sock *sk)
1731{
6422398c
VY
1732 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1733 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1734}
a495f836 1735EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1736
1737struct sk_buff *ip6_make_skb(struct sock *sk,
1738 int getfrag(void *from, char *to, int offset,
1739 int len, int odd, struct sk_buff *skb),
1740 void *from, int length, int transhdrlen,
26879da5 1741 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
6422398c 1742 struct rt6_info *rt, unsigned int flags,
26879da5 1743 const struct sockcm_cookie *sockc)
6422398c
VY
1744{
1745 struct inet_cork_full cork;
1746 struct inet6_cork v6_cork;
1747 struct sk_buff_head queue;
26879da5 1748 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
6422398c
VY
1749 int err;
1750
1751 if (flags & MSG_PROBE)
1752 return NULL;
1753
1754 __skb_queue_head_init(&queue);
1755
1756 cork.base.flags = 0;
1757 cork.base.addr = 0;
1758 cork.base.opt = NULL;
95ef498d 1759 cork.base.dst = NULL;
6422398c 1760 v6_cork.opt = NULL;
26879da5 1761 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
862c03ee
ED
1762 if (err) {
1763 ip6_cork_release(&cork, &v6_cork);
6422398c 1764 return ERR_PTR(err);
862c03ee 1765 }
26879da5
WW
1766 if (ipc6->dontfrag < 0)
1767 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
6422398c
VY
1768
1769 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1770 &current->task_frag, getfrag, from,
1771 length + exthdrlen, transhdrlen + exthdrlen,
26879da5 1772 flags, ipc6, sockc);
6422398c
VY
1773 if (err) {
1774 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1775 return ERR_PTR(err);
1776 }
1777
1778 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1779}