Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
[linux-block.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4 41
33b48679 42#include <linux/bpf-cgroup.h>
1da177e4
LT
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
7bc570c8 58#include <linux/mroute6.h>
ca254490 59#include <net/l3mdev.h>
14972cbd 60#include <net/lwtunnel.h>
1da177e4 61
7d8c6e39 62static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 63{
adf30907 64 struct dst_entry *dst = skb_dst(skb);
1da177e4 65 struct net_device *dev = dst->dev;
f6b72b62 66 struct neighbour *neigh;
6fd6ce20
YH
67 struct in6_addr *nexthop;
68 int ret;
1da177e4 69
0660e03f 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 72
7026b1dd 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 74 ((mroute6_socket(net, skb) &&
bd91b8bf 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
b2e0b385 84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 85 net, sk, newskb, NULL, newskb->dev,
95603e22 86 dev_loopback_xmit);
1da177e4 87
0660e03f 88 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 89 IP6_INC_STATS(net, idev,
3bd653c8 90 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
78126c41 96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
97
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
101 kfree_skb(skb);
102 return 0;
103 }
1da177e4
LT
104 }
105
14972cbd
RP
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
108
109 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
110 return res;
111 }
112
6fd6ce20 113 rcu_read_lock_bh();
2647a9b0 114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
4ff06203 119 sock_confirm_neigh(skb, neigh);
c16ec185 120 ret = neigh_output(neigh, skb);
6fd6ce20
YH
121 rcu_read_unlock_bh();
122 return ret;
123 }
124 rcu_read_unlock_bh();
05e3aa09 125
78126c41 126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
127 kfree_skb(skb);
128 return -EINVAL;
1da177e4
LT
129}
130
0c4b51f0 131static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490 132{
33b48679
DM
133 int ret;
134
135 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
136 if (ret) {
137 kfree_skb(skb);
138 return ret;
139 }
140
09ee9dba
TB
141#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb)->xfrm) {
144 IPCB(skb)->flags |= IPSKB_REROUTED;
145 return dst_output(net, sk, skb);
146 }
147#endif
148
9e508490 149 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
150 dst_allfrag(skb_dst(skb)) ||
151 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 152 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 153 else
7d8c6e39 154 return ip6_finish_output2(net, sk, skb);
9e508490
JE
155}
156
ede2059d 157int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 158{
9e508490 159 struct net_device *dev = skb_dst(skb)->dev;
adf30907 160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 161
97a7a37a
CF
162 skb->protocol = htons(ETH_P_IPV6);
163 skb->dev = dev;
164
778d80be 165 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 166 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
167 kfree_skb(skb);
168 return 0;
169 }
170
29a26a56
EB
171 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 net, sk, skb, NULL, dev,
9c6eb28a
JE
173 ip6_finish_output,
174 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
175}
176
513674b5
SL
177static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
178{
179 if (!np->autoflowlabel_set)
180 return ip6_default_np_autolabel(net);
181 else
182 return np->autoflowlabel;
183}
184
1da177e4 185/*
1c1e9d2b
ED
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
1da177e4 190 */
1c1e9d2b 191int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
92e55f41 192 __u32 mark, struct ipv6_txoptions *opt, int tclass)
1da177e4 193{
3bd653c8 194 struct net *net = sock_net(sk);
1c1e9d2b 195 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 196 struct in6_addr *first_hop = &fl6->daddr;
adf30907 197 struct dst_entry *dst = skb_dst(skb);
1da177e4 198 struct ipv6hdr *hdr;
4c9483b2 199 u8 proto = fl6->flowi6_proto;
1da177e4 200 int seg_len = skb->len;
e651f03a 201 int hlimit = -1;
1da177e4
LT
202 u32 mtu;
203
204 if (opt) {
c2636b4d 205 unsigned int head_room;
1da177e4
LT
206
207 /* First: exthdrs may take lots of space (~8K for now)
208 MAX_HEADER is not enough.
209 */
210 head_room = opt->opt_nflen + opt->opt_flen;
211 seg_len += head_room;
212 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
213
214 if (skb_headroom(skb) < head_room) {
215 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 216 if (!skb2) {
adf30907 217 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
218 IPSTATS_MIB_OUTDISCARDS);
219 kfree_skb(skb);
1da177e4
LT
220 return -ENOBUFS;
221 }
808db80a 222 consume_skb(skb);
a11d206d 223 skb = skb2;
1c1e9d2b
ED
224 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
225 * it is safe to call in our context (socket lock not held)
226 */
227 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
228 }
229 if (opt->opt_flen)
230 ipv6_push_frag_opts(skb, opt, &proto);
231 if (opt->opt_nflen)
613fa3ca
DL
232 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
233 &fl6->saddr);
1da177e4
LT
234 }
235
e2d1bca7
ACM
236 skb_push(skb, sizeof(struct ipv6hdr));
237 skb_reset_network_header(skb);
0660e03f 238 hdr = ipv6_hdr(skb);
1da177e4
LT
239
240 /*
241 * Fill in the IPv6 header
242 */
b903d324 243 if (np)
1da177e4
LT
244 hlimit = np->hop_limit;
245 if (hlimit < 0)
6b75d090 246 hlimit = ip6_dst_hoplimit(dst);
1da177e4 247
cb1ce2ef 248 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 249 ip6_autoflowlabel(net, np), fl6));
41a1f8ea 250
1da177e4
LT
251 hdr->payload_len = htons(seg_len);
252 hdr->nexthdr = proto;
253 hdr->hop_limit = hlimit;
254
4e3fd7a0
AD
255 hdr->saddr = fl6->saddr;
256 hdr->daddr = *first_hop;
1da177e4 257
9c9c9ad5 258 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 259 skb->priority = sk->sk_priority;
92e55f41 260 skb->mark = mark;
a2c2064f 261
1da177e4 262 mtu = dst_mtu(dst);
60ff7467 263 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 264 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 265 IPSTATS_MIB_OUT, skb->len);
a8e3e1a9
DA
266
267 /* if egress device is enslaved to an L3 master device pass the
268 * skb to its handler for processing
269 */
270 skb = l3mdev_ip6_out((struct sock *)sk, skb);
271 if (unlikely(!skb))
272 return 0;
273
1c1e9d2b
ED
274 /* hooks should never assume socket lock is held.
275 * we promote our socket to non const
276 */
29a26a56 277 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 278 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 279 dst_output);
1da177e4
LT
280 }
281
1da177e4 282 skb->dev = dst->dev;
1c1e9d2b
ED
283 /* ipv6_local_error() does not require socket lock,
284 * we promote our socket to non const
285 */
286 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
287
adf30907 288 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
289 kfree_skb(skb);
290 return -EMSGSIZE;
291}
7159039a
YH
292EXPORT_SYMBOL(ip6_xmit);
293
1da177e4
LT
294static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
295{
296 struct ip6_ra_chain *ra;
297 struct sock *last = NULL;
298
299 read_lock(&ip6_ra_lock);
300 for (ra = ip6_ra_chain; ra; ra = ra->next) {
301 struct sock *sk = ra->sk;
0bd1b59b
AM
302 if (sk && ra->sel == sel &&
303 (!sk->sk_bound_dev_if ||
304 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
305 if (last) {
306 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
307 if (skb2)
308 rawv6_rcv(last, skb2);
309 }
310 last = sk;
311 }
312 }
313
314 if (last) {
315 rawv6_rcv(last, skb);
316 read_unlock(&ip6_ra_lock);
317 return 1;
318 }
319 read_unlock(&ip6_ra_lock);
320 return 0;
321}
322
e21e0b5f
VN
323static int ip6_forward_proxy_check(struct sk_buff *skb)
324{
0660e03f 325 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 326 u8 nexthdr = hdr->nexthdr;
75f2811c 327 __be16 frag_off;
e21e0b5f
VN
328 int offset;
329
330 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 331 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
332 if (offset < 0)
333 return 0;
334 } else
335 offset = sizeof(struct ipv6hdr);
336
337 if (nexthdr == IPPROTO_ICMPV6) {
338 struct icmp6hdr *icmp6;
339
d56f90a7
ACM
340 if (!pskb_may_pull(skb, (skb_network_header(skb) +
341 offset + 1 - skb->data)))
e21e0b5f
VN
342 return 0;
343
d56f90a7 344 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
345
346 switch (icmp6->icmp6_type) {
347 case NDISC_ROUTER_SOLICITATION:
348 case NDISC_ROUTER_ADVERTISEMENT:
349 case NDISC_NEIGHBOUR_SOLICITATION:
350 case NDISC_NEIGHBOUR_ADVERTISEMENT:
351 case NDISC_REDIRECT:
352 /* For reaction involving unicast neighbor discovery
353 * message destined to the proxied address, pass it to
354 * input function.
355 */
356 return 1;
357 default:
358 break;
359 }
360 }
361
74553b09
VN
362 /*
363 * The proxying router can't forward traffic sent to a link-local
364 * address, so signal the sender and discard the packet. This
365 * behavior is clarified by the MIPv6 specification.
366 */
367 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
368 dst_link_failure(skb);
369 return -1;
370 }
371
e21e0b5f
VN
372 return 0;
373}
374
0c4b51f0
EB
375static inline int ip6_forward_finish(struct net *net, struct sock *sk,
376 struct sk_buff *skb)
1da177e4 377{
13206b6b 378 return dst_output(net, sk, skb);
1da177e4
LT
379}
380
09952107 381unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
0954cf9c
HFS
382{
383 unsigned int mtu;
384 struct inet6_dev *idev;
385
386 if (dst_metric_locked(dst, RTAX_MTU)) {
387 mtu = dst_metric_raw(dst, RTAX_MTU);
388 if (mtu)
389 return mtu;
390 }
391
392 mtu = IPV6_MIN_MTU;
393 rcu_read_lock();
394 idev = __in6_dev_get(dst->dev);
395 if (idev)
396 mtu = idev->cnf.mtu6;
397 rcu_read_unlock();
398
399 return mtu;
400}
09952107 401EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
0954cf9c 402
fe6cc55f
FW
403static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
404{
418a3156 405 if (skb->len <= mtu)
fe6cc55f
FW
406 return false;
407
60ff7467 408 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
409 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
410 return true;
411
60ff7467 412 if (skb->ignore_df)
418a3156
FW
413 return false;
414
ae7ef81e 415 if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
fe6cc55f
FW
416 return false;
417
418 return true;
419}
420
1da177e4
LT
421int ip6_forward(struct sk_buff *skb)
422{
adf30907 423 struct dst_entry *dst = skb_dst(skb);
0660e03f 424 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 425 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 426 struct net *net = dev_net(dst->dev);
14f3ad6f 427 u32 mtu;
1ab1457c 428
53b7997f 429 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
430 goto error;
431
090f1166
LR
432 if (skb->pkt_type != PACKET_HOST)
433 goto drop;
434
9ef2e965
HFS
435 if (unlikely(skb->sk))
436 goto drop;
437
4497b076
BH
438 if (skb_warn_if_lro(skb))
439 goto drop;
440
1da177e4 441 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
1d015503
ED
442 __IP6_INC_STATS(net, ip6_dst_idev(dst),
443 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
444 goto drop;
445 }
446
35fc92a9 447 skb_forward_csum(skb);
1da177e4
LT
448
449 /*
450 * We DO NOT make any processing on
451 * RA packets, pushing them to user level AS IS
452 * without ane WARRANTY that application will be able
453 * to interpret them. The reason is that we
454 * cannot make anything clever here.
455 *
456 * We are not end-node, so that if packet contains
457 * AH/ESP, we cannot make anything.
458 * Defragmentation also would be mistake, RA packets
459 * cannot be fragmented, because there is no warranty
460 * that different fragments will go along one path. --ANK
461 */
ab4eb353
YH
462 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
463 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
464 return 0;
465 }
466
467 /*
468 * check and decrement ttl
469 */
470 if (hdr->hop_limit <= 1) {
471 /* Force OUTPUT device used as source address */
472 skb->dev = dst->dev;
3ffe533c 473 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
1d015503
ED
474 __IP6_INC_STATS(net, ip6_dst_idev(dst),
475 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
476
477 kfree_skb(skb);
478 return -ETIMEDOUT;
479 }
480
fbea49e1 481 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 482 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 483 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
484 int proxied = ip6_forward_proxy_check(skb);
485 if (proxied > 0)
e21e0b5f 486 return ip6_input(skb);
74553b09 487 else if (proxied < 0) {
1d015503
ED
488 __IP6_INC_STATS(net, ip6_dst_idev(dst),
489 IPSTATS_MIB_INDISCARDS);
74553b09
VN
490 goto drop;
491 }
e21e0b5f
VN
492 }
493
1da177e4 494 if (!xfrm6_route_forward(skb)) {
1d015503
ED
495 __IP6_INC_STATS(net, ip6_dst_idev(dst),
496 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
497 goto drop;
498 }
adf30907 499 dst = skb_dst(skb);
1da177e4
LT
500
501 /* IPv6 specs say nothing about it, but it is clear that we cannot
502 send redirects to source routed frames.
1e5dc146 503 We don't send redirects to frames decapsulated from IPsec.
1da177e4 504 */
c45a3dfb 505 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 506 struct in6_addr *target = NULL;
fbfe95a4 507 struct inet_peer *peer;
1da177e4 508 struct rt6_info *rt;
1da177e4
LT
509
510 /*
511 * incoming and outgoing devices are the same
512 * send a redirect.
513 */
514
515 rt = (struct rt6_info *) dst;
c45a3dfb
DM
516 if (rt->rt6i_flags & RTF_GATEWAY)
517 target = &rt->rt6i_gateway;
1da177e4
LT
518 else
519 target = &hdr->daddr;
520
fd0273d7 521 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 522
1da177e4
LT
523 /* Limit redirects both by destination (here)
524 and by source (inside ndisc_send_redirect)
525 */
fbfe95a4 526 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 527 ndisc_send_redirect(skb, target);
1d861aa4
DM
528 if (peer)
529 inet_putpeer(peer);
5bb1ab09
DS
530 } else {
531 int addrtype = ipv6_addr_type(&hdr->saddr);
532
1da177e4 533 /* This check is security critical. */
f81b2e7d
YH
534 if (addrtype == IPV6_ADDR_ANY ||
535 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
536 goto error;
537 if (addrtype & IPV6_ADDR_LINKLOCAL) {
538 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 539 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
540 goto error;
541 }
1da177e4
LT
542 }
543
0954cf9c 544 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
545 if (mtu < IPV6_MIN_MTU)
546 mtu = IPV6_MIN_MTU;
547
fe6cc55f 548 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
549 /* Again, force OUTPUT device used as source address */
550 skb->dev = dst->dev;
14f3ad6f 551 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1d015503
ED
552 __IP6_INC_STATS(net, ip6_dst_idev(dst),
553 IPSTATS_MIB_INTOOBIGERRORS);
554 __IP6_INC_STATS(net, ip6_dst_idev(dst),
555 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
556 kfree_skb(skb);
557 return -EMSGSIZE;
558 }
559
560 if (skb_cow(skb, dst->dev->hard_header_len)) {
1d015503
ED
561 __IP6_INC_STATS(net, ip6_dst_idev(dst),
562 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
563 goto drop;
564 }
565
0660e03f 566 hdr = ipv6_hdr(skb);
1da177e4
LT
567
568 /* Mangling hops number delayed to point after skb COW */
1ab1457c 569
1da177e4
LT
570 hdr->hop_limit--;
571
1d015503
ED
572 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
573 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
574 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
575 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 576 ip6_forward_finish);
1da177e4
LT
577
578error:
1d015503 579 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
580drop:
581 kfree_skb(skb);
582 return -EINVAL;
583}
584
585static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
586{
587 to->pkt_type = from->pkt_type;
588 to->priority = from->priority;
589 to->protocol = from->protocol;
adf30907
ED
590 skb_dst_drop(to);
591 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 592 to->dev = from->dev;
82e91ffe 593 to->mark = from->mark;
1da177e4
LT
594
595#ifdef CONFIG_NET_SCHED
596 to->tc_index = from->tc_index;
597#endif
e7ac05f3 598 nf_copy(to, from);
984bc16c 599 skb_copy_secmark(to, from);
1da177e4
LT
600}
601
7d8c6e39
EB
602int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
603 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 604{
1da177e4 605 struct sk_buff *frag;
67ba4152 606 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 607 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
608 inet6_sk(skb->sk) : NULL;
1da177e4
LT
609 struct ipv6hdr *tmp_hdr;
610 struct frag_hdr *fh;
611 unsigned int mtu, hlen, left, len;
a7ae1992 612 int hroom, troom;
286c2349 613 __be32 frag_id;
67ba4152 614 int ptr, offset = 0, err = 0;
1da177e4
LT
615 u8 *prevhdr, nexthdr = 0;
616
7dd7eb95
DM
617 err = ip6_find_1stfragopt(skb, &prevhdr);
618 if (err < 0)
2423496a 619 goto fail;
7dd7eb95 620 hlen = err;
1da177e4
LT
621 nexthdr = *prevhdr;
622
628a5c56 623 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
624
625 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 626 * or if the skb it not generated by a local socket.
b881ef76 627 */
485fca66
FW
628 if (unlikely(!skb->ignore_df && skb->len > mtu))
629 goto fail_toobig;
a34a101e 630
485fca66
FW
631 if (IP6CB(skb)->frag_max_size) {
632 if (IP6CB(skb)->frag_max_size > mtu)
633 goto fail_toobig;
634
635 /* don't send fragments larger than what we received */
636 mtu = IP6CB(skb)->frag_max_size;
637 if (mtu < IPV6_MIN_MTU)
638 mtu = IPV6_MIN_MTU;
b881ef76
JH
639 }
640
d91675f9
YH
641 if (np && np->frag_size < mtu) {
642 if (np->frag_size)
643 mtu = np->frag_size;
644 }
89bc7848 645 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 646 goto fail_toobig;
1e0d69a9 647 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 648
fd0273d7
MKL
649 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
650 &ipv6_hdr(skb)->saddr);
286c2349 651
405c92f7
HFS
652 if (skb->ip_summed == CHECKSUM_PARTIAL &&
653 (err = skb_checksum_help(skb)))
654 goto fail;
655
1d325d21 656 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 657 if (skb_has_frag_list(skb)) {
c72d8cda 658 unsigned int first_len = skb_pagelen(skb);
3d13008e 659 struct sk_buff *frag2;
1da177e4
LT
660
661 if (first_len - hlen > mtu ||
662 ((first_len - hlen) & 7) ||
1d325d21
FW
663 skb_cloned(skb) ||
664 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
665 goto slow_path;
666
4d9092bb 667 skb_walk_frags(skb, frag) {
1da177e4
LT
668 /* Correct geometry. */
669 if (frag->len > mtu ||
670 ((frag->len & 7) && frag->next) ||
1d325d21 671 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 672 goto slow_path_clean;
1da177e4 673
1da177e4
LT
674 /* Partially cloned skb? */
675 if (skb_shared(frag))
3d13008e 676 goto slow_path_clean;
2fdba6b0
HX
677
678 BUG_ON(frag->sk);
679 if (skb->sk) {
2fdba6b0
HX
680 frag->sk = skb->sk;
681 frag->destructor = sock_wfree;
2fdba6b0 682 }
3d13008e 683 skb->truesize -= frag->truesize;
1da177e4
LT
684 }
685
686 err = 0;
687 offset = 0;
1da177e4
LT
688 /* BUILD HEADER */
689
9a217a1c 690 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 691 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 692 if (!tmp_hdr) {
1d325d21
FW
693 err = -ENOMEM;
694 goto fail;
1da177e4 695 }
1d325d21
FW
696 frag = skb_shinfo(skb)->frag_list;
697 skb_frag_list_init(skb);
1da177e4 698
1da177e4 699 __skb_pull(skb, hlen);
d58ff351 700 fh = __skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
701 __skb_push(skb, hlen);
702 skb_reset_network_header(skb);
d56f90a7 703 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 704
1da177e4
LT
705 fh->nexthdr = nexthdr;
706 fh->reserved = 0;
707 fh->frag_off = htons(IP6_MF);
286c2349 708 fh->identification = frag_id;
1da177e4
LT
709
710 first_len = skb_pagelen(skb);
711 skb->data_len = first_len - skb_headlen(skb);
712 skb->len = first_len;
0660e03f
ACM
713 ipv6_hdr(skb)->payload_len = htons(first_len -
714 sizeof(struct ipv6hdr));
a11d206d 715
1da177e4
LT
716 for (;;) {
717 /* Prepare header of the next frame,
718 * before previous one went down. */
719 if (frag) {
720 frag->ip_summed = CHECKSUM_NONE;
badff6d0 721 skb_reset_transport_header(frag);
d58ff351 722 fh = __skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
723 __skb_push(frag, hlen);
724 skb_reset_network_header(frag);
d56f90a7
ACM
725 memcpy(skb_network_header(frag), tmp_hdr,
726 hlen);
1da177e4
LT
727 offset += skb->len - hlen - sizeof(struct frag_hdr);
728 fh->nexthdr = nexthdr;
729 fh->reserved = 0;
730 fh->frag_off = htons(offset);
53b24b8f 731 if (frag->next)
1da177e4
LT
732 fh->frag_off |= htons(IP6_MF);
733 fh->identification = frag_id;
0660e03f
ACM
734 ipv6_hdr(frag)->payload_len =
735 htons(frag->len -
736 sizeof(struct ipv6hdr));
1da177e4
LT
737 ip6_copy_metadata(frag, skb);
738 }
1ab1457c 739
7d8c6e39 740 err = output(net, sk, skb);
67ba4152 741 if (!err)
d8d1f30b 742 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 743 IPSTATS_MIB_FRAGCREATES);
dafee490 744
1da177e4
LT
745 if (err || !frag)
746 break;
747
748 skb = frag;
749 frag = skb->next;
750 skb->next = NULL;
751 }
752
a51482bd 753 kfree(tmp_hdr);
1da177e4
LT
754
755 if (err == 0) {
d8d1f30b 756 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 757 IPSTATS_MIB_FRAGOKS);
1da177e4
LT
758 return 0;
759 }
760
46cfd725 761 kfree_skb_list(frag);
1da177e4 762
d8d1f30b 763 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 764 IPSTATS_MIB_FRAGFAILS);
1da177e4 765 return err;
3d13008e
ED
766
767slow_path_clean:
768 skb_walk_frags(skb, frag2) {
769 if (frag2 == frag)
770 break;
771 frag2->sk = NULL;
772 frag2->destructor = NULL;
773 skb->truesize += frag2->truesize;
774 }
1da177e4
LT
775 }
776
777slow_path:
778 left = skb->len - hlen; /* Space per frame */
779 ptr = hlen; /* Where to start from */
780
781 /*
782 * Fragment the datagram.
783 */
784
a7ae1992 785 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
786
787 /*
788 * Keep copying data until we run out.
789 */
67ba4152 790 while (left > 0) {
79e49503
FW
791 u8 *fragnexthdr_offset;
792
1da177e4
LT
793 len = left;
794 /* IF: it doesn't fit, use 'mtu' - the data space left */
795 if (len > mtu)
796 len = mtu;
25985edc 797 /* IF: we are not sending up to and including the packet end
1da177e4
LT
798 then align the next start on an eight byte boundary */
799 if (len < left) {
800 len &= ~7;
801 }
1da177e4 802
cbffccc9
JP
803 /* Allocate buffer */
804 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
805 hroom + troom, GFP_ATOMIC);
806 if (!frag) {
1da177e4
LT
807 err = -ENOMEM;
808 goto fail;
809 }
810
811 /*
812 * Set up data on packet
813 */
814
815 ip6_copy_metadata(frag, skb);
a7ae1992 816 skb_reserve(frag, hroom);
1da177e4 817 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 818 skb_reset_network_header(frag);
badff6d0 819 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
820 frag->transport_header = (frag->network_header + hlen +
821 sizeof(struct frag_hdr));
1da177e4
LT
822
823 /*
824 * Charge the memory for the fragment to any owner
825 * it might possess
826 */
827 if (skb->sk)
828 skb_set_owner_w(frag, skb->sk);
829
830 /*
831 * Copy the packet header into the new buffer.
832 */
d626f62b 833 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4 834
79e49503
FW
835 fragnexthdr_offset = skb_network_header(frag);
836 fragnexthdr_offset += prevhdr - skb_network_header(skb);
837 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
838
1da177e4
LT
839 /*
840 * Build fragment header.
841 */
842 fh->nexthdr = nexthdr;
843 fh->reserved = 0;
286c2349 844 fh->identification = frag_id;
1da177e4
LT
845
846 /*
847 * Copy a block of the IP datagram.
848 */
e3f0b86b
HS
849 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
850 len));
1da177e4
LT
851 left -= len;
852
853 fh->frag_off = htons(offset);
854 if (left > 0)
855 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
856 ipv6_hdr(frag)->payload_len = htons(frag->len -
857 sizeof(struct ipv6hdr));
1da177e4
LT
858
859 ptr += len;
860 offset += len;
861
862 /*
863 * Put this fragment into the sending queue.
864 */
7d8c6e39 865 err = output(net, sk, frag);
1da177e4
LT
866 if (err)
867 goto fail;
dafee490 868
adf30907 869 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 870 IPSTATS_MIB_FRAGCREATES);
1da177e4 871 }
adf30907 872 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 873 IPSTATS_MIB_FRAGOKS);
808db80a 874 consume_skb(skb);
1da177e4
LT
875 return err;
876
485fca66
FW
877fail_toobig:
878 if (skb->sk && dst_allfrag(skb_dst(skb)))
879 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
880
485fca66
FW
881 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
882 err = -EMSGSIZE;
883
1da177e4 884fail:
adf30907 885 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 886 IPSTATS_MIB_FRAGFAILS);
1ab1457c 887 kfree_skb(skb);
1da177e4
LT
888 return err;
889}
890
b71d1d42
ED
891static inline int ip6_rt_check(const struct rt6key *rt_key,
892 const struct in6_addr *fl_addr,
893 const struct in6_addr *addr_cache)
cf6b1982 894{
a02cec21 895 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 896 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
897}
898
497c615a
HX
899static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
900 struct dst_entry *dst,
b71d1d42 901 const struct flowi6 *fl6)
1da177e4 902{
497c615a 903 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 904 struct rt6_info *rt;
1da177e4 905
497c615a
HX
906 if (!dst)
907 goto out;
908
a963a37d
ED
909 if (dst->ops->family != AF_INET6) {
910 dst_release(dst);
911 return NULL;
912 }
913
914 rt = (struct rt6_info *)dst;
497c615a
HX
915 /* Yes, checking route validity in not connected
916 * case is not very simple. Take into account,
917 * that we do not support routing by source, TOS,
67ba4152 918 * and MSG_DONTROUTE --ANK (980726)
497c615a 919 *
cf6b1982
YH
920 * 1. ip6_rt_check(): If route was host route,
921 * check that cached destination is current.
497c615a
HX
922 * If it is network route, we still may
923 * check its validity using saved pointer
924 * to the last used address: daddr_cache.
925 * We do not want to save whole address now,
926 * (because main consumer of this service
927 * is tcp, which has not this problem),
928 * so that the last trick works only on connected
929 * sockets.
930 * 2. oif also should be the same.
931 */
4c9483b2 932 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 933#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 934 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 935#endif
ca254490
DA
936 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
937 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
938 dst_release(dst);
939 dst = NULL;
1da177e4
LT
940 }
941
497c615a
HX
942out:
943 return dst;
944}
945
3aef934f 946static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 947 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 948{
69cce1d1
DM
949#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
950 struct neighbour *n;
97cac082 951 struct rt6_info *rt;
69cce1d1
DM
952#endif
953 int err;
6f21c96a 954 int flags = 0;
497c615a 955
e16e888b
MS
956 /* The correct way to handle this would be to do
957 * ip6_route_get_saddr, and then ip6_route_output; however,
958 * the route-specific preferred source forces the
959 * ip6_route_output call _before_ ip6_route_get_saddr.
960 *
961 * In source specific routing (no src=any default route),
962 * ip6_route_output will fail given src=any saddr, though, so
963 * that's why we try it again later.
964 */
965 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
966 struct rt6_info *rt;
967 bool had_dst = *dst != NULL;
1da177e4 968
e16e888b
MS
969 if (!had_dst)
970 *dst = ip6_route_output(net, sk, fl6);
971 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
972 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
973 sk ? inet6_sk(sk)->srcprefs : 0,
974 &fl6->saddr);
44456d37 975 if (err)
1da177e4 976 goto out_err_release;
e16e888b
MS
977
978 /* If we had an erroneous initial result, pretend it
979 * never existed and let the SA-enabled version take
980 * over.
981 */
982 if (!had_dst && (*dst)->error) {
983 dst_release(*dst);
984 *dst = NULL;
985 }
6f21c96a
PA
986
987 if (fl6->flowi6_oif)
988 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
989 }
990
e16e888b 991 if (!*dst)
6f21c96a 992 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
993
994 err = (*dst)->error;
995 if (err)
996 goto out_err_release;
997
95c385b4 998#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
999 /*
1000 * Here if the dst entry we've looked up
1001 * has a neighbour entry that is in the INCOMPLETE
1002 * state and the src address from the flow is
1003 * marked as OPTIMISTIC, we release the found
1004 * dst entry and replace it instead with the
1005 * dst entry of the nexthop router
1006 */
c56bf6fe 1007 rt = (struct rt6_info *) *dst;
707be1ff 1008 rcu_read_lock_bh();
2647a9b0
MKL
1009 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1010 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
1011 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1012 rcu_read_unlock_bh();
1013
1014 if (err) {
e550dfb0 1015 struct inet6_ifaddr *ifp;
4c9483b2 1016 struct flowi6 fl_gw6;
e550dfb0
NH
1017 int redirect;
1018
4c9483b2 1019 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
1020 (*dst)->dev, 1);
1021
1022 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1023 if (ifp)
1024 in6_ifa_put(ifp);
1025
1026 if (redirect) {
1027 /*
1028 * We need to get the dst entry for the
1029 * default router instead
1030 */
1031 dst_release(*dst);
4c9483b2
DM
1032 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1033 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1034 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
1035 err = (*dst)->error;
1036 if (err)
e550dfb0 1037 goto out_err_release;
95c385b4 1038 }
e550dfb0 1039 }
95c385b4 1040#endif
ec5e3b0a 1041 if (ipv6_addr_v4mapped(&fl6->saddr) &&
00ea1cee
WB
1042 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1043 err = -EAFNOSUPPORT;
1044 goto out_err_release;
1045 }
95c385b4 1046
1da177e4
LT
1047 return 0;
1048
1049out_err_release:
1050 dst_release(*dst);
1051 *dst = NULL;
8a966fc0 1052
0d240e78
DA
1053 if (err == -ENETUNREACH)
1054 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1055 return err;
1056}
34a0b3cd 1057
497c615a
HX
1058/**
1059 * ip6_dst_lookup - perform route lookup on flow
1060 * @sk: socket which provides route info
1061 * @dst: pointer to dst_entry * for result
4c9483b2 1062 * @fl6: flow to lookup
497c615a
HX
1063 *
1064 * This function performs a route lookup on the given flow.
1065 *
1066 * It returns zero on success, or a standard errno code on error.
1067 */
343d60aa
RP
1068int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1069 struct flowi6 *fl6)
497c615a
HX
1070{
1071 *dst = NULL;
343d60aa 1072 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1073}
3cf3dc6c
ACM
1074EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1075
497c615a 1076/**
68d0c6d3
DM
1077 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1078 * @sk: socket which provides route info
4c9483b2 1079 * @fl6: flow to lookup
68d0c6d3 1080 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1081 *
1082 * This function performs a route lookup on the given flow.
1083 *
1084 * It returns a valid dst pointer on success, or a pointer encoded
1085 * error code.
1086 */
3aef934f 1087struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1088 const struct in6_addr *final_dst)
68d0c6d3
DM
1089{
1090 struct dst_entry *dst = NULL;
1091 int err;
1092
343d60aa 1093 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1094 if (err)
1095 return ERR_PTR(err);
1096 if (final_dst)
4e3fd7a0 1097 fl6->daddr = *final_dst;
2774c131 1098
f92ee619 1099 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1100}
1101EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1102
1103/**
1104 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1105 * @sk: socket which provides the dst cache and route info
4c9483b2 1106 * @fl6: flow to lookup
68d0c6d3 1107 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1108 *
1109 * This function performs a route lookup on the given flow with the
1110 * possibility of using the cached route in the socket if it is valid.
1111 * It will take the socket dst lock when operating on the dst cache.
1112 * As a result, this function can only be used in process context.
1113 *
68d0c6d3
DM
1114 * It returns a valid dst pointer on success, or a pointer encoded
1115 * error code.
497c615a 1116 */
4c9483b2 1117struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1118 const struct in6_addr *final_dst)
497c615a 1119{
68d0c6d3 1120 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
497c615a 1121
4c9483b2 1122 dst = ip6_sk_dst_check(sk, dst, fl6);
00bc0ef5
JS
1123 if (!dst)
1124 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
68d0c6d3 1125
00bc0ef5 1126 return dst;
497c615a 1127}
68d0c6d3 1128EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1129
0178b695
HX
1130static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1131 gfp_t gfp)
1132{
1133 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1134}
1135
1136static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1137 gfp_t gfp)
1138{
1139 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1140}
1141
75a493e6 1142static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1143 int *maxfraglen,
1144 unsigned int fragheaderlen,
1145 struct sk_buff *skb,
75a493e6 1146 struct rt6_info *rt,
e367c2d0 1147 unsigned int orig_mtu)
0c183379
G
1148{
1149 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1150 if (!skb) {
0c183379 1151 /* first fragment, reserve header_len */
e367c2d0 1152 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1153
1154 } else {
1155 /*
1156 * this fragment is not first, the headers
1157 * space is regarded as data space.
1158 */
e367c2d0 1159 *mtu = orig_mtu;
0c183379
G
1160 }
1161 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1162 + fragheaderlen - sizeof(struct frag_hdr);
1163 }
1164}
1165
366e41d9 1166static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
26879da5 1167 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
366e41d9
VY
1168 struct rt6_info *rt, struct flowi6 *fl6)
1169{
1170 struct ipv6_pinfo *np = inet6_sk(sk);
1171 unsigned int mtu;
26879da5 1172 struct ipv6_txoptions *opt = ipc6->opt;
366e41d9
VY
1173
1174 /*
1175 * setup for corking
1176 */
1177 if (opt) {
1178 if (WARN_ON(v6_cork->opt))
1179 return -EINVAL;
1180
864e2a1f 1181 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
63159f29 1182 if (unlikely(!v6_cork->opt))
366e41d9
VY
1183 return -ENOBUFS;
1184
864e2a1f 1185 v6_cork->opt->tot_len = sizeof(*opt);
366e41d9
VY
1186 v6_cork->opt->opt_flen = opt->opt_flen;
1187 v6_cork->opt->opt_nflen = opt->opt_nflen;
1188
1189 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1190 sk->sk_allocation);
1191 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1192 return -ENOBUFS;
1193
1194 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1195 sk->sk_allocation);
1196 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1197 return -ENOBUFS;
1198
1199 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1200 sk->sk_allocation);
1201 if (opt->hopopt && !v6_cork->opt->hopopt)
1202 return -ENOBUFS;
1203
1204 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1205 sk->sk_allocation);
1206 if (opt->srcrt && !v6_cork->opt->srcrt)
1207 return -ENOBUFS;
1208
1209 /* need source address above miyazawa*/
1210 }
1211 dst_hold(&rt->dst);
1212 cork->base.dst = &rt->dst;
1213 cork->fl.u.ip6 = *fl6;
26879da5
WW
1214 v6_cork->hop_limit = ipc6->hlimit;
1215 v6_cork->tclass = ipc6->tclass;
366e41d9
VY
1216 if (rt->dst.flags & DST_XFRM_TUNNEL)
1217 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1218 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1219 else
1220 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
0f6c480f 1221 rt->dst.dev->mtu : dst_mtu(xfrm_dst_path(&rt->dst));
366e41d9
VY
1222 if (np->frag_size < mtu) {
1223 if (np->frag_size)
1224 mtu = np->frag_size;
1225 }
1226 cork->base.fragsize = mtu;
0f6c480f 1227 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
366e41d9
VY
1228 cork->base.flags |= IPCORK_ALLFRAG;
1229 cork->base.length = 0;
1230
1231 return 0;
1232}
1233
0bbe84a6
VY
1234static int __ip6_append_data(struct sock *sk,
1235 struct flowi6 *fl6,
1236 struct sk_buff_head *queue,
1237 struct inet_cork *cork,
1238 struct inet6_cork *v6_cork,
1239 struct page_frag *pfrag,
1240 int getfrag(void *from, char *to, int offset,
1241 int len, int odd, struct sk_buff *skb),
1242 void *from, int length, int transhdrlen,
26879da5 1243 unsigned int flags, struct ipcm6_cookie *ipc6,
c14ac945 1244 const struct sockcm_cookie *sockc)
1da177e4 1245{
0c183379 1246 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1247 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1248 int exthdrlen = 0;
1249 int dst_exthdrlen = 0;
1da177e4 1250 int hh_len;
1da177e4
LT
1251 int copy;
1252 int err;
1253 int offset = 0;
a693e698 1254 __u8 tx_flags = 0;
09c2d251 1255 u32 tskey = 0;
0bbe84a6
VY
1256 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1257 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1258 int csummode = CHECKSUM_NONE;
682b1a9d 1259 unsigned int maxnonfragsize, headersize;
1da177e4 1260
0bbe84a6
VY
1261 skb = skb_peek_tail(queue);
1262 if (!skb) {
1263 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1264 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1265 }
0bbe84a6 1266
366e41d9 1267 mtu = cork->fragsize;
e367c2d0 1268 orig_mtu = mtu;
1da177e4 1269
d8d1f30b 1270 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1271
a1b05140 1272 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1273 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1274 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1275 sizeof(struct frag_hdr);
1da177e4 1276
682b1a9d
HFS
1277 headersize = sizeof(struct ipv6hdr) +
1278 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1279 (dst_allfrag(&rt->dst) ?
1280 sizeof(struct frag_hdr) : 0) +
1281 rt->rt6i_nfheader_len;
1282
26879da5 1283 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
682b1a9d
HFS
1284 (sk->sk_protocol == IPPROTO_UDP ||
1285 sk->sk_protocol == IPPROTO_RAW)) {
1286 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1287 sizeof(struct ipv6hdr));
1288 goto emsgsize;
1289 }
4df98e76 1290
682b1a9d
HFS
1291 if (ip6_sk_ignore_df(sk))
1292 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1293 else
1294 maxnonfragsize = mtu;
4df98e76 1295
682b1a9d 1296 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1297emsgsize:
682b1a9d
HFS
1298 ipv6_local_error(sk, EMSGSIZE, fl6,
1299 mtu - headersize +
1300 sizeof(struct ipv6hdr));
1301 return -EMSGSIZE;
1da177e4
LT
1302 }
1303
682b1a9d
HFS
1304 /* CHECKSUM_PARTIAL only with no extension headers and when
1305 * we are not going to fragment
1306 */
1307 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1308 headersize == sizeof(struct ipv6hdr) &&
2b89ed65 1309 length <= mtu - headersize &&
682b1a9d 1310 !(flags & MSG_MORE) &&
c8cd0989 1311 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1312 csummode = CHECKSUM_PARTIAL;
1313
09c2d251 1314 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
c14ac945 1315 sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
09c2d251
WB
1316 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1317 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1318 tskey = sk->sk_tskey++;
1319 }
a693e698 1320
1da177e4
LT
1321 /*
1322 * Let's try using as much space as possible.
1323 * Use MTU if total length of the message fits into the MTU.
1324 * Otherwise, we need to reserve fragment header and
1325 * fragment alignment (= 8-15 octects, in total).
1326 *
1327 * Note that we may need to "move" the data from the tail of
1ab1457c 1328 * of the buffer to the new fragment when we split
1da177e4
LT
1329 * the message.
1330 *
1ab1457c 1331 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1332 * at once if non-fragmentable extension headers
1333 * are too large.
1ab1457c 1334 * --yoshfuji
1da177e4
LT
1335 */
1336
2811ebac 1337 cork->length += length;
2811ebac 1338 if (!skb)
1da177e4
LT
1339 goto alloc_new_skb;
1340
1341 while (length > 0) {
1342 /* Check if the remaining data fits into current packet. */
bdc712b4 1343 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1344 if (copy < length)
1345 copy = maxfraglen - skb->len;
1346
1347 if (copy <= 0) {
1348 char *data;
1349 unsigned int datalen;
1350 unsigned int fraglen;
1351 unsigned int fraggap;
1352 unsigned int alloclen;
1da177e4 1353alloc_new_skb:
1da177e4 1354 /* There's no room in the current skb */
0c183379
G
1355 if (skb)
1356 fraggap = skb->len - maxfraglen;
1da177e4
LT
1357 else
1358 fraggap = 0;
0c183379 1359 /* update mtu and maxfraglen if necessary */
63159f29 1360 if (!skb || !skb_prev)
0c183379 1361 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1362 fragheaderlen, skb, rt,
e367c2d0 1363 orig_mtu);
0c183379
G
1364
1365 skb_prev = skb;
1da177e4
LT
1366
1367 /*
1368 * If remaining data exceeds the mtu,
1369 * we know we need more fragment(s).
1370 */
1371 datalen = length + fraggap;
1da177e4 1372
0c183379
G
1373 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1374 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1375 if ((flags & MSG_MORE) &&
d8d1f30b 1376 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1377 alloclen = mtu;
1378 else
1379 alloclen = datalen + fragheaderlen;
1380
299b0767
SK
1381 alloclen += dst_exthdrlen;
1382
0c183379
G
1383 if (datalen != length + fraggap) {
1384 /*
1385 * this is not the last fragment, the trailer
1386 * space is regarded as data space.
1387 */
1388 datalen += rt->dst.trailer_len;
1389 }
1390
1391 alloclen += rt->dst.trailer_len;
1392 fraglen = datalen + fragheaderlen;
1da177e4
LT
1393
1394 /*
1395 * We just reserve space for fragment header.
1ab1457c 1396 * Note: this may be overallocation if the message
1da177e4
LT
1397 * (without MSG_MORE) fits into the MTU.
1398 */
1399 alloclen += sizeof(struct frag_hdr);
1400
232cd35d
ED
1401 copy = datalen - transhdrlen - fraggap;
1402 if (copy < 0) {
1403 err = -EINVAL;
1404 goto error;
1405 }
1da177e4
LT
1406 if (transhdrlen) {
1407 skb = sock_alloc_send_skb(sk,
1408 alloclen + hh_len,
1409 (flags & MSG_DONTWAIT), &err);
1410 } else {
1411 skb = NULL;
14afee4b 1412 if (refcount_read(&sk->sk_wmem_alloc) <=
1da177e4
LT
1413 2 * sk->sk_sndbuf)
1414 skb = sock_wmalloc(sk,
1415 alloclen + hh_len, 1,
1416 sk->sk_allocation);
63159f29 1417 if (unlikely(!skb))
1da177e4
LT
1418 err = -ENOBUFS;
1419 }
63159f29 1420 if (!skb)
1da177e4
LT
1421 goto error;
1422 /*
1423 * Fill in the control structures
1424 */
9c9c9ad5 1425 skb->protocol = htons(ETH_P_IPV6);
32dce968 1426 skb->ip_summed = csummode;
1da177e4 1427 skb->csum = 0;
1f85851e
G
1428 /* reserve for fragmentation and ipsec header */
1429 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1430 dst_exthdrlen);
1da177e4 1431
11878b40
WB
1432 /* Only the initial fragment is time stamped */
1433 skb_shinfo(skb)->tx_flags = tx_flags;
1434 tx_flags = 0;
09c2d251
WB
1435 skb_shinfo(skb)->tskey = tskey;
1436 tskey = 0;
a693e698 1437
1da177e4
LT
1438 /*
1439 * Find where to start putting bytes
1440 */
1f85851e
G
1441 data = skb_put(skb, fraglen);
1442 skb_set_network_header(skb, exthdrlen);
1443 data += fragheaderlen;
b0e380b1
ACM
1444 skb->transport_header = (skb->network_header +
1445 fragheaderlen);
1da177e4
LT
1446 if (fraggap) {
1447 skb->csum = skb_copy_and_csum_bits(
1448 skb_prev, maxfraglen,
1449 data + transhdrlen, fraggap, 0);
1450 skb_prev->csum = csum_sub(skb_prev->csum,
1451 skb->csum);
1452 data += fraggap;
e9fa4f7b 1453 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4 1454 }
232cd35d
ED
1455 if (copy > 0 &&
1456 getfrag(from, data + transhdrlen, offset,
1457 copy, fraggap, skb) < 0) {
1da177e4
LT
1458 err = -EFAULT;
1459 kfree_skb(skb);
1460 goto error;
1461 }
1462
1463 offset += copy;
1464 length -= datalen - fraggap;
1465 transhdrlen = 0;
1466 exthdrlen = 0;
299b0767 1467 dst_exthdrlen = 0;
1da177e4 1468
0dec879f
JA
1469 if ((flags & MSG_CONFIRM) && !skb_prev)
1470 skb_set_dst_pending_confirm(skb, 1);
1471
1da177e4
LT
1472 /*
1473 * Put the packet on the pending queue
1474 */
0bbe84a6 1475 __skb_queue_tail(queue, skb);
1da177e4
LT
1476 continue;
1477 }
1478
1479 if (copy > length)
1480 copy = length;
1481
d8d1f30b 1482 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1483 unsigned int off;
1484
1485 off = skb->len;
1486 if (getfrag(from, skb_put(skb, copy),
1487 offset, copy, off, skb) < 0) {
1488 __skb_trim(skb, off);
1489 err = -EFAULT;
1490 goto error;
1491 }
1492 } else {
1493 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1494
5640f768
ED
1495 err = -ENOMEM;
1496 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1497 goto error;
5640f768
ED
1498
1499 if (!skb_can_coalesce(skb, i, pfrag->page,
1500 pfrag->offset)) {
1501 err = -EMSGSIZE;
1502 if (i == MAX_SKB_FRAGS)
1503 goto error;
1504
1505 __skb_fill_page_desc(skb, i, pfrag->page,
1506 pfrag->offset, 0);
1507 skb_shinfo(skb)->nr_frags = ++i;
1508 get_page(pfrag->page);
1da177e4 1509 }
5640f768 1510 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1511 if (getfrag(from,
5640f768
ED
1512 page_address(pfrag->page) + pfrag->offset,
1513 offset, copy, skb->len, skb) < 0)
1514 goto error_efault;
1515
1516 pfrag->offset += copy;
1517 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1518 skb->len += copy;
1519 skb->data_len += copy;
f945fa7a 1520 skb->truesize += copy;
14afee4b 1521 refcount_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1522 }
1523 offset += copy;
1524 length -= copy;
1525 }
5640f768 1526
1da177e4 1527 return 0;
5640f768
ED
1528
1529error_efault:
1530 err = -EFAULT;
1da177e4 1531error:
bdc712b4 1532 cork->length -= length;
3bd653c8 1533 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1534 return err;
1535}
0bbe84a6
VY
1536
1537int ip6_append_data(struct sock *sk,
1538 int getfrag(void *from, char *to, int offset, int len,
1539 int odd, struct sk_buff *skb),
26879da5
WW
1540 void *from, int length, int transhdrlen,
1541 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1542 struct rt6_info *rt, unsigned int flags,
c14ac945 1543 const struct sockcm_cookie *sockc)
0bbe84a6
VY
1544{
1545 struct inet_sock *inet = inet_sk(sk);
1546 struct ipv6_pinfo *np = inet6_sk(sk);
1547 int exthdrlen;
1548 int err;
1549
1550 if (flags&MSG_PROBE)
1551 return 0;
1552 if (skb_queue_empty(&sk->sk_write_queue)) {
1553 /*
1554 * setup for corking
1555 */
26879da5
WW
1556 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1557 ipc6, rt, fl6);
0bbe84a6
VY
1558 if (err)
1559 return err;
1560
26879da5 1561 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
0bbe84a6
VY
1562 length += exthdrlen;
1563 transhdrlen += exthdrlen;
1564 } else {
1565 fl6 = &inet->cork.fl.u.ip6;
1566 transhdrlen = 0;
1567 }
1568
1569 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1570 &np->cork, sk_page_frag(sk), getfrag,
26879da5 1571 from, length, transhdrlen, flags, ipc6, sockc);
0bbe84a6 1572}
a495f836 1573EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1574
366e41d9
VY
1575static void ip6_cork_release(struct inet_cork_full *cork,
1576 struct inet6_cork *v6_cork)
bf138862 1577{
366e41d9
VY
1578 if (v6_cork->opt) {
1579 kfree(v6_cork->opt->dst0opt);
1580 kfree(v6_cork->opt->dst1opt);
1581 kfree(v6_cork->opt->hopopt);
1582 kfree(v6_cork->opt->srcrt);
1583 kfree(v6_cork->opt);
1584 v6_cork->opt = NULL;
0178b695
HX
1585 }
1586
366e41d9
VY
1587 if (cork->base.dst) {
1588 dst_release(cork->base.dst);
1589 cork->base.dst = NULL;
1590 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1591 }
366e41d9 1592 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1593}
1594
6422398c
VY
1595struct sk_buff *__ip6_make_skb(struct sock *sk,
1596 struct sk_buff_head *queue,
1597 struct inet_cork_full *cork,
1598 struct inet6_cork *v6_cork)
1da177e4
LT
1599{
1600 struct sk_buff *skb, *tmp_skb;
1601 struct sk_buff **tail_skb;
1602 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1603 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1604 struct net *net = sock_net(sk);
1da177e4 1605 struct ipv6hdr *hdr;
6422398c
VY
1606 struct ipv6_txoptions *opt = v6_cork->opt;
1607 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1608 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1609 unsigned char proto = fl6->flowi6_proto;
1da177e4 1610
6422398c 1611 skb = __skb_dequeue(queue);
63159f29 1612 if (!skb)
1da177e4
LT
1613 goto out;
1614 tail_skb = &(skb_shinfo(skb)->frag_list);
1615
1616 /* move skb->data to ip header from ext header */
d56f90a7 1617 if (skb->data < skb_network_header(skb))
bbe735e4 1618 __skb_pull(skb, skb_network_offset(skb));
6422398c 1619 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1620 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1621 *tail_skb = tmp_skb;
1622 tail_skb = &(tmp_skb->next);
1623 skb->len += tmp_skb->len;
1624 skb->data_len += tmp_skb->len;
1da177e4 1625 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1626 tmp_skb->destructor = NULL;
1627 tmp_skb->sk = NULL;
1da177e4
LT
1628 }
1629
28a89453 1630 /* Allow local fragmentation. */
60ff7467 1631 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1632
4e3fd7a0 1633 *final_dst = fl6->daddr;
cfe1fc77 1634 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1635 if (opt && opt->opt_flen)
1636 ipv6_push_frag_opts(skb, opt, &proto);
1637 if (opt && opt->opt_nflen)
613fa3ca 1638 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1da177e4 1639
e2d1bca7
ACM
1640 skb_push(skb, sizeof(struct ipv6hdr));
1641 skb_reset_network_header(skb);
0660e03f 1642 hdr = ipv6_hdr(skb);
1ab1457c 1643
6422398c 1644 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1645 ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 1646 ip6_autoflowlabel(net, np), fl6));
6422398c 1647 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1648 hdr->nexthdr = proto;
4e3fd7a0
AD
1649 hdr->saddr = fl6->saddr;
1650 hdr->daddr = *final_dst;
1da177e4 1651
a2c2064f 1652 skb->priority = sk->sk_priority;
4a19ec58 1653 skb->mark = sk->sk_mark;
a2c2064f 1654
d8d1f30b 1655 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1656 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1657 if (proto == IPPROTO_ICMPV6) {
adf30907 1658 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1659
43a43b60
HFS
1660 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1661 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1662 }
1663
6422398c
VY
1664 ip6_cork_release(cork, v6_cork);
1665out:
1666 return skb;
1667}
1668
1669int ip6_send_skb(struct sk_buff *skb)
1670{
1671 struct net *net = sock_net(skb->sk);
1672 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1673 int err;
1674
33224b16 1675 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1676 if (err) {
1677 if (err > 0)
6ce9e7b5 1678 err = net_xmit_errno(err);
1da177e4 1679 if (err)
6422398c
VY
1680 IP6_INC_STATS(net, rt->rt6i_idev,
1681 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1682 }
1683
1da177e4 1684 return err;
6422398c
VY
1685}
1686
1687int ip6_push_pending_frames(struct sock *sk)
1688{
1689 struct sk_buff *skb;
1690
1691 skb = ip6_finish_skb(sk);
1692 if (!skb)
1693 return 0;
1694
1695 return ip6_send_skb(skb);
1da177e4 1696}
a495f836 1697EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1698
0bbe84a6 1699static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1700 struct sk_buff_head *queue,
1701 struct inet_cork_full *cork,
1702 struct inet6_cork *v6_cork)
1da177e4 1703{
1da177e4
LT
1704 struct sk_buff *skb;
1705
0bbe84a6 1706 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1707 if (skb_dst(skb))
1708 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1709 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1710 kfree_skb(skb);
1711 }
1712
6422398c 1713 ip6_cork_release(cork, v6_cork);
1da177e4 1714}
0bbe84a6
VY
1715
1716void ip6_flush_pending_frames(struct sock *sk)
1717{
6422398c
VY
1718 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1719 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1720}
a495f836 1721EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1722
1723struct sk_buff *ip6_make_skb(struct sock *sk,
1724 int getfrag(void *from, char *to, int offset,
1725 int len, int odd, struct sk_buff *skb),
1726 void *from, int length, int transhdrlen,
26879da5 1727 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
6422398c 1728 struct rt6_info *rt, unsigned int flags,
26879da5 1729 const struct sockcm_cookie *sockc)
6422398c
VY
1730{
1731 struct inet_cork_full cork;
1732 struct inet6_cork v6_cork;
1733 struct sk_buff_head queue;
26879da5 1734 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
6422398c
VY
1735 int err;
1736
1737 if (flags & MSG_PROBE)
1738 return NULL;
1739
1740 __skb_queue_head_init(&queue);
1741
1742 cork.base.flags = 0;
1743 cork.base.addr = 0;
1744 cork.base.opt = NULL;
1745 v6_cork.opt = NULL;
26879da5 1746 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
862c03ee
ED
1747 if (err) {
1748 ip6_cork_release(&cork, &v6_cork);
6422398c 1749 return ERR_PTR(err);
862c03ee 1750 }
26879da5
WW
1751 if (ipc6->dontfrag < 0)
1752 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
6422398c
VY
1753
1754 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1755 &current->task_frag, getfrag, from,
1756 length + exthdrlen, transhdrlen + exthdrlen,
26879da5 1757 flags, ipc6, sockc);
6422398c
VY
1758 if (err) {
1759 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1760 return ERR_PTR(err);
1761 }
1762
1763 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1764}