Merge branch 'mlx4-fixes'
[linux-block.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
b72a2b01 31#include <linux/overflow-arith.h>
1da177e4
LT
32#include <linux/string.h>
33#include <linux/socket.h>
34#include <linux/net.h>
35#include <linux/netdevice.h>
36#include <linux/if_arp.h>
37#include <linux/in6.h>
38#include <linux/tcp.h>
39#include <linux/route.h>
b59f45d0 40#include <linux/module.h>
5a0e3ad6 41#include <linux/slab.h>
1da177e4
LT
42
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
7bc570c8 58#include <linux/mroute6.h>
1da177e4 59
7026b1dd 60static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
1da177e4 61{
adf30907 62 struct dst_entry *dst = skb_dst(skb);
1da177e4 63 struct net_device *dev = dst->dev;
f6b72b62 64 struct neighbour *neigh;
6fd6ce20
YH
65 struct in6_addr *nexthop;
66 int ret;
1da177e4
LT
67
68 skb->protocol = htons(ETH_P_IPV6);
69 skb->dev = dev;
70
0660e03f 71 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 72 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 73
7026b1dd 74 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
d1db275d 75 ((mroute6_socket(dev_net(dev), skb) &&
bd91b8bf 76 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
77 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
78 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
79 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80
81 /* Do not check for IFF_ALLMULTI; multicast routing
82 is not supported in any case.
83 */
84 if (newskb)
b2e0b385 85 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
7026b1dd 86 sk, newskb, NULL, newskb->dev,
95603e22 87 dev_loopback_xmit);
1da177e4 88
0660e03f 89 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
90 IP6_INC_STATS(dev_net(dev), idev,
91 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
92 kfree_skb(skb);
93 return 0;
94 }
95 }
96
edf391ff
NH
97 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
98 skb->len);
dd408515
HFS
99
100 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
101 IPV6_ADDR_SCOPE_NODELOCAL &&
102 !(dev->flags & IFF_LOOPBACK)) {
103 kfree_skb(skb);
104 return 0;
105 }
1da177e4
LT
106 }
107
6fd6ce20 108 rcu_read_lock_bh();
2647a9b0 109 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
110 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
111 if (unlikely(!neigh))
112 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
113 if (!IS_ERR(neigh)) {
114 ret = dst_neigh_output(dst, neigh, skb);
115 rcu_read_unlock_bh();
116 return ret;
117 }
118 rcu_read_unlock_bh();
05e3aa09 119
7f88c6b2
HFS
120 IP6_INC_STATS(dev_net(dst->dev),
121 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
122 kfree_skb(skb);
123 return -EINVAL;
1da177e4
LT
124}
125
7026b1dd 126static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
9e508490
JE
127{
128 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
129 dst_allfrag(skb_dst(skb)) ||
130 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7026b1dd 131 return ip6_fragment(sk, skb, ip6_finish_output2);
9e508490 132 else
7026b1dd 133 return ip6_finish_output2(sk, skb);
9e508490
JE
134}
135
aad88724 136int ip6_output(struct sock *sk, struct sk_buff *skb)
1da177e4 137{
9e508490 138 struct net_device *dev = skb_dst(skb)->dev;
adf30907 139 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 140 if (unlikely(idev->cnf.disable_ipv6)) {
9e508490 141 IP6_INC_STATS(dev_net(dev), idev,
3bd653c8 142 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
143 kfree_skb(skb);
144 return 0;
145 }
146
7026b1dd
DM
147 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
148 NULL, dev,
9c6eb28a
JE
149 ip6_finish_output,
150 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
151}
152
1da177e4 153/*
b5d43998 154 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
155 */
156
4c9483b2 157int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 158 struct ipv6_txoptions *opt, int tclass)
1da177e4 159{
3bd653c8 160 struct net *net = sock_net(sk);
b30bd282 161 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 162 struct in6_addr *first_hop = &fl6->daddr;
adf30907 163 struct dst_entry *dst = skb_dst(skb);
1da177e4 164 struct ipv6hdr *hdr;
4c9483b2 165 u8 proto = fl6->flowi6_proto;
1da177e4 166 int seg_len = skb->len;
e651f03a 167 int hlimit = -1;
1da177e4
LT
168 u32 mtu;
169
170 if (opt) {
c2636b4d 171 unsigned int head_room;
1da177e4
LT
172
173 /* First: exthdrs may take lots of space (~8K for now)
174 MAX_HEADER is not enough.
175 */
176 head_room = opt->opt_nflen + opt->opt_flen;
177 seg_len += head_room;
178 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
179
180 if (skb_headroom(skb) < head_room) {
181 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 182 if (!skb2) {
adf30907 183 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
184 IPSTATS_MIB_OUTDISCARDS);
185 kfree_skb(skb);
1da177e4
LT
186 return -ENOBUFS;
187 }
808db80a 188 consume_skb(skb);
a11d206d 189 skb = skb2;
83d7eb29 190 skb_set_owner_w(skb, sk);
1da177e4
LT
191 }
192 if (opt->opt_flen)
193 ipv6_push_frag_opts(skb, opt, &proto);
194 if (opt->opt_nflen)
195 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
196 }
197
e2d1bca7
ACM
198 skb_push(skb, sizeof(struct ipv6hdr));
199 skb_reset_network_header(skb);
0660e03f 200 hdr = ipv6_hdr(skb);
1da177e4
LT
201
202 /*
203 * Fill in the IPv6 header
204 */
b903d324 205 if (np)
1da177e4
LT
206 hlimit = np->hop_limit;
207 if (hlimit < 0)
6b75d090 208 hlimit = ip6_dst_hoplimit(dst);
1da177e4 209
cb1ce2ef 210 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 211 np->autoflowlabel, fl6));
41a1f8ea 212
1da177e4
LT
213 hdr->payload_len = htons(seg_len);
214 hdr->nexthdr = proto;
215 hdr->hop_limit = hlimit;
216
4e3fd7a0
AD
217 hdr->saddr = fl6->saddr;
218 hdr->daddr = *first_hop;
1da177e4 219
9c9c9ad5 220 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 221 skb->priority = sk->sk_priority;
4a19ec58 222 skb->mark = sk->sk_mark;
a2c2064f 223
1da177e4 224 mtu = dst_mtu(dst);
60ff7467 225 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 226 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 227 IPSTATS_MIB_OUT, skb->len);
7026b1dd
DM
228 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
229 NULL, dst->dev, dst_output_sk);
1da177e4
LT
230 }
231
1da177e4 232 skb->dev = dst->dev;
f4e53e29 233 ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
adf30907 234 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
235 kfree_skb(skb);
236 return -EMSGSIZE;
237}
7159039a
YH
238EXPORT_SYMBOL(ip6_xmit);
239
1da177e4
LT
240static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
241{
242 struct ip6_ra_chain *ra;
243 struct sock *last = NULL;
244
245 read_lock(&ip6_ra_lock);
246 for (ra = ip6_ra_chain; ra; ra = ra->next) {
247 struct sock *sk = ra->sk;
0bd1b59b
AM
248 if (sk && ra->sel == sel &&
249 (!sk->sk_bound_dev_if ||
250 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
251 if (last) {
252 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
253 if (skb2)
254 rawv6_rcv(last, skb2);
255 }
256 last = sk;
257 }
258 }
259
260 if (last) {
261 rawv6_rcv(last, skb);
262 read_unlock(&ip6_ra_lock);
263 return 1;
264 }
265 read_unlock(&ip6_ra_lock);
266 return 0;
267}
268
e21e0b5f
VN
269static int ip6_forward_proxy_check(struct sk_buff *skb)
270{
0660e03f 271 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 272 u8 nexthdr = hdr->nexthdr;
75f2811c 273 __be16 frag_off;
e21e0b5f
VN
274 int offset;
275
276 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 277 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
278 if (offset < 0)
279 return 0;
280 } else
281 offset = sizeof(struct ipv6hdr);
282
283 if (nexthdr == IPPROTO_ICMPV6) {
284 struct icmp6hdr *icmp6;
285
d56f90a7
ACM
286 if (!pskb_may_pull(skb, (skb_network_header(skb) +
287 offset + 1 - skb->data)))
e21e0b5f
VN
288 return 0;
289
d56f90a7 290 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
291
292 switch (icmp6->icmp6_type) {
293 case NDISC_ROUTER_SOLICITATION:
294 case NDISC_ROUTER_ADVERTISEMENT:
295 case NDISC_NEIGHBOUR_SOLICITATION:
296 case NDISC_NEIGHBOUR_ADVERTISEMENT:
297 case NDISC_REDIRECT:
298 /* For reaction involving unicast neighbor discovery
299 * message destined to the proxied address, pass it to
300 * input function.
301 */
302 return 1;
303 default:
304 break;
305 }
306 }
307
74553b09
VN
308 /*
309 * The proxying router can't forward traffic sent to a link-local
310 * address, so signal the sender and discard the packet. This
311 * behavior is clarified by the MIPv6 specification.
312 */
313 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
314 dst_link_failure(skb);
315 return -1;
316 }
317
e21e0b5f
VN
318 return 0;
319}
320
7026b1dd 321static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
1da177e4 322{
c29390c6 323 skb_sender_cpu_clear(skb);
7026b1dd 324 return dst_output_sk(sk, skb);
1da177e4
LT
325}
326
0954cf9c
HFS
327static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
328{
329 unsigned int mtu;
330 struct inet6_dev *idev;
331
332 if (dst_metric_locked(dst, RTAX_MTU)) {
333 mtu = dst_metric_raw(dst, RTAX_MTU);
334 if (mtu)
335 return mtu;
336 }
337
338 mtu = IPV6_MIN_MTU;
339 rcu_read_lock();
340 idev = __in6_dev_get(dst->dev);
341 if (idev)
342 mtu = idev->cnf.mtu6;
343 rcu_read_unlock();
344
345 return mtu;
346}
347
fe6cc55f
FW
348static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
349{
418a3156 350 if (skb->len <= mtu)
fe6cc55f
FW
351 return false;
352
60ff7467 353 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
354 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
355 return true;
356
60ff7467 357 if (skb->ignore_df)
418a3156
FW
358 return false;
359
fe6cc55f
FW
360 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
361 return false;
362
363 return true;
364}
365
1da177e4
LT
366int ip6_forward(struct sk_buff *skb)
367{
adf30907 368 struct dst_entry *dst = skb_dst(skb);
0660e03f 369 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 370 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 371 struct net *net = dev_net(dst->dev);
14f3ad6f 372 u32 mtu;
1ab1457c 373
53b7997f 374 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
375 goto error;
376
090f1166
LR
377 if (skb->pkt_type != PACKET_HOST)
378 goto drop;
379
9ef2e965
HFS
380 if (unlikely(skb->sk))
381 goto drop;
382
4497b076
BH
383 if (skb_warn_if_lro(skb))
384 goto drop;
385
1da177e4 386 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
15c77d8b
ED
387 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
388 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
389 goto drop;
390 }
391
35fc92a9 392 skb_forward_csum(skb);
1da177e4
LT
393
394 /*
395 * We DO NOT make any processing on
396 * RA packets, pushing them to user level AS IS
397 * without ane WARRANTY that application will be able
398 * to interpret them. The reason is that we
399 * cannot make anything clever here.
400 *
401 * We are not end-node, so that if packet contains
402 * AH/ESP, we cannot make anything.
403 * Defragmentation also would be mistake, RA packets
404 * cannot be fragmented, because there is no warranty
405 * that different fragments will go along one path. --ANK
406 */
ab4eb353
YH
407 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
408 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
409 return 0;
410 }
411
412 /*
413 * check and decrement ttl
414 */
415 if (hdr->hop_limit <= 1) {
416 /* Force OUTPUT device used as source address */
417 skb->dev = dst->dev;
3ffe533c 418 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
15c77d8b
ED
419 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
420 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
421
422 kfree_skb(skb);
423 return -ETIMEDOUT;
424 }
425
fbea49e1 426 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 427 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 428 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
429 int proxied = ip6_forward_proxy_check(skb);
430 if (proxied > 0)
e21e0b5f 431 return ip6_input(skb);
74553b09 432 else if (proxied < 0) {
15c77d8b
ED
433 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
434 IPSTATS_MIB_INDISCARDS);
74553b09
VN
435 goto drop;
436 }
e21e0b5f
VN
437 }
438
1da177e4 439 if (!xfrm6_route_forward(skb)) {
15c77d8b
ED
440 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
441 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
442 goto drop;
443 }
adf30907 444 dst = skb_dst(skb);
1da177e4
LT
445
446 /* IPv6 specs say nothing about it, but it is clear that we cannot
447 send redirects to source routed frames.
1e5dc146 448 We don't send redirects to frames decapsulated from IPsec.
1da177e4 449 */
c45a3dfb 450 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 451 struct in6_addr *target = NULL;
fbfe95a4 452 struct inet_peer *peer;
1da177e4 453 struct rt6_info *rt;
1da177e4
LT
454
455 /*
456 * incoming and outgoing devices are the same
457 * send a redirect.
458 */
459
460 rt = (struct rt6_info *) dst;
c45a3dfb
DM
461 if (rt->rt6i_flags & RTF_GATEWAY)
462 target = &rt->rt6i_gateway;
1da177e4
LT
463 else
464 target = &hdr->daddr;
465
fd0273d7 466 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 467
1da177e4
LT
468 /* Limit redirects both by destination (here)
469 and by source (inside ndisc_send_redirect)
470 */
fbfe95a4 471 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 472 ndisc_send_redirect(skb, target);
1d861aa4
DM
473 if (peer)
474 inet_putpeer(peer);
5bb1ab09
DS
475 } else {
476 int addrtype = ipv6_addr_type(&hdr->saddr);
477
1da177e4 478 /* This check is security critical. */
f81b2e7d
YH
479 if (addrtype == IPV6_ADDR_ANY ||
480 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
481 goto error;
482 if (addrtype & IPV6_ADDR_LINKLOCAL) {
483 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 484 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
485 goto error;
486 }
1da177e4
LT
487 }
488
0954cf9c 489 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
490 if (mtu < IPV6_MIN_MTU)
491 mtu = IPV6_MIN_MTU;
492
fe6cc55f 493 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
494 /* Again, force OUTPUT device used as source address */
495 skb->dev = dst->dev;
14f3ad6f 496 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
15c77d8b
ED
497 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
498 IPSTATS_MIB_INTOOBIGERRORS);
499 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
500 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
501 kfree_skb(skb);
502 return -EMSGSIZE;
503 }
504
505 if (skb_cow(skb, dst->dev->hard_header_len)) {
15c77d8b
ED
506 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
507 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
508 goto drop;
509 }
510
0660e03f 511 hdr = ipv6_hdr(skb);
1da177e4
LT
512
513 /* Mangling hops number delayed to point after skb COW */
1ab1457c 514
1da177e4
LT
515 hdr->hop_limit--;
516
483a47d2 517 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 518 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
7026b1dd
DM
519 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
520 skb->dev, dst->dev,
6e23ae2a 521 ip6_forward_finish);
1da177e4
LT
522
523error:
483a47d2 524 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
525drop:
526 kfree_skb(skb);
527 return -EINVAL;
528}
529
530static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
531{
532 to->pkt_type = from->pkt_type;
533 to->priority = from->priority;
534 to->protocol = from->protocol;
adf30907
ED
535 skb_dst_drop(to);
536 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 537 to->dev = from->dev;
82e91ffe 538 to->mark = from->mark;
1da177e4
LT
539
540#ifdef CONFIG_NET_SCHED
541 to->tc_index = from->tc_index;
542#endif
e7ac05f3 543 nf_copy(to, from);
984bc16c 544 skb_copy_secmark(to, from);
1da177e4
LT
545}
546
7026b1dd
DM
547int ip6_fragment(struct sock *sk, struct sk_buff *skb,
548 int (*output)(struct sock *, struct sk_buff *))
1da177e4 549{
1da177e4 550 struct sk_buff *frag;
67ba4152 551 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 552 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
553 inet6_sk(skb->sk) : NULL;
1da177e4
LT
554 struct ipv6hdr *tmp_hdr;
555 struct frag_hdr *fh;
556 unsigned int mtu, hlen, left, len;
a7ae1992 557 int hroom, troom;
286c2349 558 __be32 frag_id;
67ba4152 559 int ptr, offset = 0, err = 0;
1da177e4 560 u8 *prevhdr, nexthdr = 0;
adf30907 561 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 562
1da177e4
LT
563 hlen = ip6_find_1stfragopt(skb, &prevhdr);
564 nexthdr = *prevhdr;
565
628a5c56 566 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
567
568 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 569 * or if the skb it not generated by a local socket.
b881ef76 570 */
485fca66
FW
571 if (unlikely(!skb->ignore_df && skb->len > mtu))
572 goto fail_toobig;
a34a101e 573
485fca66
FW
574 if (IP6CB(skb)->frag_max_size) {
575 if (IP6CB(skb)->frag_max_size > mtu)
576 goto fail_toobig;
577
578 /* don't send fragments larger than what we received */
579 mtu = IP6CB(skb)->frag_max_size;
580 if (mtu < IPV6_MIN_MTU)
581 mtu = IPV6_MIN_MTU;
b881ef76
JH
582 }
583
d91675f9
YH
584 if (np && np->frag_size < mtu) {
585 if (np->frag_size)
586 mtu = np->frag_size;
587 }
b72a2b01
HFS
588
589 if (overflow_usub(mtu, hlen + sizeof(struct frag_hdr), &mtu) ||
590 mtu <= 7)
591 goto fail_toobig;
1da177e4 592
fd0273d7
MKL
593 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
594 &ipv6_hdr(skb)->saddr);
286c2349 595
1d325d21 596 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 597 if (skb_has_frag_list(skb)) {
1da177e4 598 int first_len = skb_pagelen(skb);
3d13008e 599 struct sk_buff *frag2;
1da177e4
LT
600
601 if (first_len - hlen > mtu ||
602 ((first_len - hlen) & 7) ||
1d325d21
FW
603 skb_cloned(skb) ||
604 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
605 goto slow_path;
606
4d9092bb 607 skb_walk_frags(skb, frag) {
1da177e4
LT
608 /* Correct geometry. */
609 if (frag->len > mtu ||
610 ((frag->len & 7) && frag->next) ||
1d325d21 611 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 612 goto slow_path_clean;
1da177e4 613
1da177e4
LT
614 /* Partially cloned skb? */
615 if (skb_shared(frag))
3d13008e 616 goto slow_path_clean;
2fdba6b0
HX
617
618 BUG_ON(frag->sk);
619 if (skb->sk) {
2fdba6b0
HX
620 frag->sk = skb->sk;
621 frag->destructor = sock_wfree;
2fdba6b0 622 }
3d13008e 623 skb->truesize -= frag->truesize;
1da177e4
LT
624 }
625
626 err = 0;
627 offset = 0;
1da177e4
LT
628 /* BUILD HEADER */
629
9a217a1c 630 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 631 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 632 if (!tmp_hdr) {
adf30907 633 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 634 IPSTATS_MIB_FRAGFAILS);
1d325d21
FW
635 err = -ENOMEM;
636 goto fail;
1da177e4 637 }
1d325d21
FW
638 frag = skb_shinfo(skb)->frag_list;
639 skb_frag_list_init(skb);
1da177e4 640
1da177e4 641 __skb_pull(skb, hlen);
67ba4152 642 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
643 __skb_push(skb, hlen);
644 skb_reset_network_header(skb);
d56f90a7 645 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 646
1da177e4
LT
647 fh->nexthdr = nexthdr;
648 fh->reserved = 0;
649 fh->frag_off = htons(IP6_MF);
286c2349 650 fh->identification = frag_id;
1da177e4
LT
651
652 first_len = skb_pagelen(skb);
653 skb->data_len = first_len - skb_headlen(skb);
654 skb->len = first_len;
0660e03f
ACM
655 ipv6_hdr(skb)->payload_len = htons(first_len -
656 sizeof(struct ipv6hdr));
a11d206d 657
d8d1f30b 658 dst_hold(&rt->dst);
1da177e4
LT
659
660 for (;;) {
661 /* Prepare header of the next frame,
662 * before previous one went down. */
663 if (frag) {
664 frag->ip_summed = CHECKSUM_NONE;
badff6d0 665 skb_reset_transport_header(frag);
67ba4152 666 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
667 __skb_push(frag, hlen);
668 skb_reset_network_header(frag);
d56f90a7
ACM
669 memcpy(skb_network_header(frag), tmp_hdr,
670 hlen);
1da177e4
LT
671 offset += skb->len - hlen - sizeof(struct frag_hdr);
672 fh->nexthdr = nexthdr;
673 fh->reserved = 0;
674 fh->frag_off = htons(offset);
53b24b8f 675 if (frag->next)
1da177e4
LT
676 fh->frag_off |= htons(IP6_MF);
677 fh->identification = frag_id;
0660e03f
ACM
678 ipv6_hdr(frag)->payload_len =
679 htons(frag->len -
680 sizeof(struct ipv6hdr));
1da177e4
LT
681 ip6_copy_metadata(frag, skb);
682 }
1ab1457c 683
7026b1dd 684 err = output(sk, skb);
67ba4152 685 if (!err)
d8d1f30b 686 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 687 IPSTATS_MIB_FRAGCREATES);
dafee490 688
1da177e4
LT
689 if (err || !frag)
690 break;
691
692 skb = frag;
693 frag = skb->next;
694 skb->next = NULL;
695 }
696
a51482bd 697 kfree(tmp_hdr);
1da177e4
LT
698
699 if (err == 0) {
d8d1f30b 700 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 701 IPSTATS_MIB_FRAGOKS);
94e187c0 702 ip6_rt_put(rt);
1da177e4
LT
703 return 0;
704 }
705
46cfd725 706 kfree_skb_list(frag);
1da177e4 707
d8d1f30b 708 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 709 IPSTATS_MIB_FRAGFAILS);
94e187c0 710 ip6_rt_put(rt);
1da177e4 711 return err;
3d13008e
ED
712
713slow_path_clean:
714 skb_walk_frags(skb, frag2) {
715 if (frag2 == frag)
716 break;
717 frag2->sk = NULL;
718 frag2->destructor = NULL;
719 skb->truesize += frag2->truesize;
720 }
1da177e4
LT
721 }
722
723slow_path:
72e843bb
ED
724 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
725 skb_checksum_help(skb))
726 goto fail;
727
1da177e4
LT
728 left = skb->len - hlen; /* Space per frame */
729 ptr = hlen; /* Where to start from */
730
731 /*
732 * Fragment the datagram.
733 */
734
735 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992 736 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
737
738 /*
739 * Keep copying data until we run out.
740 */
67ba4152 741 while (left > 0) {
1da177e4
LT
742 len = left;
743 /* IF: it doesn't fit, use 'mtu' - the data space left */
744 if (len > mtu)
745 len = mtu;
25985edc 746 /* IF: we are not sending up to and including the packet end
1da177e4
LT
747 then align the next start on an eight byte boundary */
748 if (len < left) {
749 len &= ~7;
750 }
1da177e4 751
cbffccc9
JP
752 /* Allocate buffer */
753 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
754 hroom + troom, GFP_ATOMIC);
755 if (!frag) {
adf30907 756 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 757 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
758 err = -ENOMEM;
759 goto fail;
760 }
761
762 /*
763 * Set up data on packet
764 */
765
766 ip6_copy_metadata(frag, skb);
a7ae1992 767 skb_reserve(frag, hroom);
1da177e4 768 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 769 skb_reset_network_header(frag);
badff6d0 770 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
771 frag->transport_header = (frag->network_header + hlen +
772 sizeof(struct frag_hdr));
1da177e4
LT
773
774 /*
775 * Charge the memory for the fragment to any owner
776 * it might possess
777 */
778 if (skb->sk)
779 skb_set_owner_w(frag, skb->sk);
780
781 /*
782 * Copy the packet header into the new buffer.
783 */
d626f62b 784 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
785
786 /*
787 * Build fragment header.
788 */
789 fh->nexthdr = nexthdr;
790 fh->reserved = 0;
286c2349 791 fh->identification = frag_id;
1da177e4
LT
792
793 /*
794 * Copy a block of the IP datagram.
795 */
e3f0b86b
HS
796 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
797 len));
1da177e4
LT
798 left -= len;
799
800 fh->frag_off = htons(offset);
801 if (left > 0)
802 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
803 ipv6_hdr(frag)->payload_len = htons(frag->len -
804 sizeof(struct ipv6hdr));
1da177e4
LT
805
806 ptr += len;
807 offset += len;
808
809 /*
810 * Put this fragment into the sending queue.
811 */
7026b1dd 812 err = output(sk, frag);
1da177e4
LT
813 if (err)
814 goto fail;
dafee490 815
adf30907 816 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 817 IPSTATS_MIB_FRAGCREATES);
1da177e4 818 }
adf30907 819 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 820 IPSTATS_MIB_FRAGOKS);
808db80a 821 consume_skb(skb);
1da177e4
LT
822 return err;
823
485fca66
FW
824fail_toobig:
825 if (skb->sk && dst_allfrag(skb_dst(skb)))
826 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
827
828 skb->dev = skb_dst(skb)->dev;
829 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
830 err = -EMSGSIZE;
831
1da177e4 832fail:
adf30907 833 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 834 IPSTATS_MIB_FRAGFAILS);
1ab1457c 835 kfree_skb(skb);
1da177e4
LT
836 return err;
837}
838
b71d1d42
ED
839static inline int ip6_rt_check(const struct rt6key *rt_key,
840 const struct in6_addr *fl_addr,
841 const struct in6_addr *addr_cache)
cf6b1982 842{
a02cec21 843 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 844 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
845}
846
497c615a
HX
847static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
848 struct dst_entry *dst,
b71d1d42 849 const struct flowi6 *fl6)
1da177e4 850{
497c615a 851 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 852 struct rt6_info *rt;
1da177e4 853
497c615a
HX
854 if (!dst)
855 goto out;
856
a963a37d
ED
857 if (dst->ops->family != AF_INET6) {
858 dst_release(dst);
859 return NULL;
860 }
861
862 rt = (struct rt6_info *)dst;
497c615a
HX
863 /* Yes, checking route validity in not connected
864 * case is not very simple. Take into account,
865 * that we do not support routing by source, TOS,
67ba4152 866 * and MSG_DONTROUTE --ANK (980726)
497c615a 867 *
cf6b1982
YH
868 * 1. ip6_rt_check(): If route was host route,
869 * check that cached destination is current.
497c615a
HX
870 * If it is network route, we still may
871 * check its validity using saved pointer
872 * to the last used address: daddr_cache.
873 * We do not want to save whole address now,
874 * (because main consumer of this service
875 * is tcp, which has not this problem),
876 * so that the last trick works only on connected
877 * sockets.
878 * 2. oif also should be the same.
879 */
4c9483b2 880 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 881#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 882 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 883#endif
f1900fb5
DA
884 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
885 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
886 dst_release(dst);
887 dst = NULL;
1da177e4
LT
888 }
889
497c615a
HX
890out:
891 return dst;
892}
893
343d60aa 894static int ip6_dst_lookup_tail(struct net *net, struct sock *sk,
4c9483b2 895 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 896{
69cce1d1
DM
897#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
898 struct neighbour *n;
97cac082 899 struct rt6_info *rt;
69cce1d1
DM
900#endif
901 int err;
497c615a 902
e16e888b
MS
903 /* The correct way to handle this would be to do
904 * ip6_route_get_saddr, and then ip6_route_output; however,
905 * the route-specific preferred source forces the
906 * ip6_route_output call _before_ ip6_route_get_saddr.
907 *
908 * In source specific routing (no src=any default route),
909 * ip6_route_output will fail given src=any saddr, though, so
910 * that's why we try it again later.
911 */
912 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
913 struct rt6_info *rt;
914 bool had_dst = *dst != NULL;
1da177e4 915
e16e888b
MS
916 if (!had_dst)
917 *dst = ip6_route_output(net, sk, fl6);
918 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
919 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
920 sk ? inet6_sk(sk)->srcprefs : 0,
921 &fl6->saddr);
44456d37 922 if (err)
1da177e4 923 goto out_err_release;
e16e888b
MS
924
925 /* If we had an erroneous initial result, pretend it
926 * never existed and let the SA-enabled version take
927 * over.
928 */
929 if (!had_dst && (*dst)->error) {
930 dst_release(*dst);
931 *dst = NULL;
932 }
1da177e4
LT
933 }
934
e16e888b
MS
935 if (!*dst)
936 *dst = ip6_route_output(net, sk, fl6);
937
938 err = (*dst)->error;
939 if (err)
940 goto out_err_release;
941
95c385b4 942#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
943 /*
944 * Here if the dst entry we've looked up
945 * has a neighbour entry that is in the INCOMPLETE
946 * state and the src address from the flow is
947 * marked as OPTIMISTIC, we release the found
948 * dst entry and replace it instead with the
949 * dst entry of the nexthop router
950 */
c56bf6fe 951 rt = (struct rt6_info *) *dst;
707be1ff 952 rcu_read_lock_bh();
2647a9b0
MKL
953 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
954 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
955 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
956 rcu_read_unlock_bh();
957
958 if (err) {
e550dfb0 959 struct inet6_ifaddr *ifp;
4c9483b2 960 struct flowi6 fl_gw6;
e550dfb0
NH
961 int redirect;
962
4c9483b2 963 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
964 (*dst)->dev, 1);
965
966 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
967 if (ifp)
968 in6_ifa_put(ifp);
969
970 if (redirect) {
971 /*
972 * We need to get the dst entry for the
973 * default router instead
974 */
975 dst_release(*dst);
4c9483b2
DM
976 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
977 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
978 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
979 err = (*dst)->error;
980 if (err)
e550dfb0 981 goto out_err_release;
95c385b4 982 }
e550dfb0 983 }
95c385b4
NH
984#endif
985
1da177e4
LT
986 return 0;
987
988out_err_release:
ca46f9c8 989 if (err == -ENETUNREACH)
5ac68e7c 990 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
991 dst_release(*dst);
992 *dst = NULL;
993 return err;
994}
34a0b3cd 995
497c615a
HX
996/**
997 * ip6_dst_lookup - perform route lookup on flow
998 * @sk: socket which provides route info
999 * @dst: pointer to dst_entry * for result
4c9483b2 1000 * @fl6: flow to lookup
497c615a
HX
1001 *
1002 * This function performs a route lookup on the given flow.
1003 *
1004 * It returns zero on success, or a standard errno code on error.
1005 */
343d60aa
RP
1006int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1007 struct flowi6 *fl6)
497c615a
HX
1008{
1009 *dst = NULL;
343d60aa 1010 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1011}
3cf3dc6c
ACM
1012EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1013
497c615a 1014/**
68d0c6d3
DM
1015 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1016 * @sk: socket which provides route info
4c9483b2 1017 * @fl6: flow to lookup
68d0c6d3 1018 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1019 *
1020 * This function performs a route lookup on the given flow.
1021 *
1022 * It returns a valid dst pointer on success, or a pointer encoded
1023 * error code.
1024 */
4c9483b2 1025struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1026 const struct in6_addr *final_dst)
68d0c6d3
DM
1027{
1028 struct dst_entry *dst = NULL;
1029 int err;
1030
343d60aa 1031 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1032 if (err)
1033 return ERR_PTR(err);
1034 if (final_dst)
4e3fd7a0 1035 fl6->daddr = *final_dst;
a0a9f33b
PS
1036 if (!fl6->flowi6_oif)
1037 fl6->flowi6_oif = dst->dev->ifindex;
2774c131 1038
f92ee619 1039 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1040}
1041EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1042
1043/**
1044 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1045 * @sk: socket which provides the dst cache and route info
4c9483b2 1046 * @fl6: flow to lookup
68d0c6d3 1047 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1048 *
1049 * This function performs a route lookup on the given flow with the
1050 * possibility of using the cached route in the socket if it is valid.
1051 * It will take the socket dst lock when operating on the dst cache.
1052 * As a result, this function can only be used in process context.
1053 *
68d0c6d3
DM
1054 * It returns a valid dst pointer on success, or a pointer encoded
1055 * error code.
497c615a 1056 */
4c9483b2 1057struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1058 const struct in6_addr *final_dst)
497c615a 1059{
68d0c6d3
DM
1060 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1061 int err;
497c615a 1062
4c9483b2 1063 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1064
343d60aa 1065 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1066 if (err)
1067 return ERR_PTR(err);
1068 if (final_dst)
4e3fd7a0 1069 fl6->daddr = *final_dst;
2774c131 1070
f92ee619 1071 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1072}
68d0c6d3 1073EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1074
34a0b3cd 1075static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1076 struct sk_buff_head *queue,
e89e9cf5
AR
1077 int getfrag(void *from, char *to, int offset, int len,
1078 int odd, struct sk_buff *skb),
1079 void *from, int length, int hh_len, int fragheaderlen,
67ba4152 1080 int transhdrlen, int mtu, unsigned int flags,
fd0273d7 1081 const struct flowi6 *fl6)
e89e9cf5
AR
1082
1083{
1084 struct sk_buff *skb;
1085 int err;
1086
1087 /* There is support for UDP large send offload by network
1088 * device, so create one single skb packet containing complete
1089 * udp datagram
1090 */
0bbe84a6 1091 skb = skb_peek_tail(queue);
63159f29 1092 if (!skb) {
e89e9cf5
AR
1093 skb = sock_alloc_send_skb(sk,
1094 hh_len + fragheaderlen + transhdrlen + 20,
1095 (flags & MSG_DONTWAIT), &err);
63159f29 1096 if (!skb)
504744e4 1097 return err;
e89e9cf5
AR
1098
1099 /* reserve space for Hardware header */
1100 skb_reserve(skb, hh_len);
1101
1102 /* create space for UDP/IP header */
67ba4152 1103 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1104
1105 /* initialize network header pointer */
c1d2bbe1 1106 skb_reset_network_header(skb);
e89e9cf5
AR
1107
1108 /* initialize protocol header pointer */
b0e380b1 1109 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1110
9c9c9ad5 1111 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1112 skb->csum = 0;
e89e9cf5 1113
0bbe84a6 1114 __skb_queue_tail(queue, skb);
c547dbf5
JP
1115 } else if (skb_is_gso(skb)) {
1116 goto append;
e89e9cf5 1117 }
e89e9cf5 1118
c547dbf5
JP
1119 skb->ip_summed = CHECKSUM_PARTIAL;
1120 /* Specify the length of each IPv6 datagram fragment.
1121 * It has to be a multiple of 8.
1122 */
1123 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1124 sizeof(struct frag_hdr)) & ~7;
1125 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
fd0273d7
MKL
1126 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1127 &fl6->daddr,
1128 &fl6->saddr);
c547dbf5
JP
1129
1130append:
2811ebac
HFS
1131 return skb_append_datato_frags(sk, skb, getfrag, from,
1132 (length - transhdrlen));
e89e9cf5 1133}
1da177e4 1134
0178b695
HX
1135static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1136 gfp_t gfp)
1137{
1138 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1139}
1140
1141static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1142 gfp_t gfp)
1143{
1144 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1145}
1146
75a493e6 1147static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1148 int *maxfraglen,
1149 unsigned int fragheaderlen,
1150 struct sk_buff *skb,
75a493e6 1151 struct rt6_info *rt,
e367c2d0 1152 unsigned int orig_mtu)
0c183379
G
1153{
1154 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1155 if (!skb) {
0c183379 1156 /* first fragment, reserve header_len */
e367c2d0 1157 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1158
1159 } else {
1160 /*
1161 * this fragment is not first, the headers
1162 * space is regarded as data space.
1163 */
e367c2d0 1164 *mtu = orig_mtu;
0c183379
G
1165 }
1166 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1167 + fragheaderlen - sizeof(struct frag_hdr);
1168 }
1169}
1170
366e41d9
VY
1171static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1172 struct inet6_cork *v6_cork,
1173 int hlimit, int tclass, struct ipv6_txoptions *opt,
1174 struct rt6_info *rt, struct flowi6 *fl6)
1175{
1176 struct ipv6_pinfo *np = inet6_sk(sk);
1177 unsigned int mtu;
1178
1179 /*
1180 * setup for corking
1181 */
1182 if (opt) {
1183 if (WARN_ON(v6_cork->opt))
1184 return -EINVAL;
1185
1186 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
63159f29 1187 if (unlikely(!v6_cork->opt))
366e41d9
VY
1188 return -ENOBUFS;
1189
1190 v6_cork->opt->tot_len = opt->tot_len;
1191 v6_cork->opt->opt_flen = opt->opt_flen;
1192 v6_cork->opt->opt_nflen = opt->opt_nflen;
1193
1194 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1195 sk->sk_allocation);
1196 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1197 return -ENOBUFS;
1198
1199 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1200 sk->sk_allocation);
1201 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1202 return -ENOBUFS;
1203
1204 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1205 sk->sk_allocation);
1206 if (opt->hopopt && !v6_cork->opt->hopopt)
1207 return -ENOBUFS;
1208
1209 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1210 sk->sk_allocation);
1211 if (opt->srcrt && !v6_cork->opt->srcrt)
1212 return -ENOBUFS;
1213
1214 /* need source address above miyazawa*/
1215 }
1216 dst_hold(&rt->dst);
1217 cork->base.dst = &rt->dst;
1218 cork->fl.u.ip6 = *fl6;
1219 v6_cork->hop_limit = hlimit;
1220 v6_cork->tclass = tclass;
1221 if (rt->dst.flags & DST_XFRM_TUNNEL)
1222 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1223 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1224 else
1225 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1226 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1227 if (np->frag_size < mtu) {
1228 if (np->frag_size)
1229 mtu = np->frag_size;
1230 }
1231 cork->base.fragsize = mtu;
1232 if (dst_allfrag(rt->dst.path))
1233 cork->base.flags |= IPCORK_ALLFRAG;
1234 cork->base.length = 0;
1235
1236 return 0;
1237}
1238
0bbe84a6
VY
1239static int __ip6_append_data(struct sock *sk,
1240 struct flowi6 *fl6,
1241 struct sk_buff_head *queue,
1242 struct inet_cork *cork,
1243 struct inet6_cork *v6_cork,
1244 struct page_frag *pfrag,
1245 int getfrag(void *from, char *to, int offset,
1246 int len, int odd, struct sk_buff *skb),
1247 void *from, int length, int transhdrlen,
1248 unsigned int flags, int dontfrag)
1da177e4 1249{
0c183379 1250 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1251 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1252 int exthdrlen = 0;
1253 int dst_exthdrlen = 0;
1da177e4 1254 int hh_len;
1da177e4
LT
1255 int copy;
1256 int err;
1257 int offset = 0;
a693e698 1258 __u8 tx_flags = 0;
09c2d251 1259 u32 tskey = 0;
0bbe84a6
VY
1260 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1261 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1262 int csummode = CHECKSUM_NONE;
1da177e4 1263
0bbe84a6
VY
1264 skb = skb_peek_tail(queue);
1265 if (!skb) {
1266 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1267 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1268 }
0bbe84a6 1269
366e41d9 1270 mtu = cork->fragsize;
e367c2d0 1271 orig_mtu = mtu;
1da177e4 1272
d8d1f30b 1273 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1274
a1b05140 1275 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1276 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1277 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1278 sizeof(struct frag_hdr);
1da177e4
LT
1279
1280 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
4df98e76
HFS
1281 unsigned int maxnonfragsize, headersize;
1282
1283 headersize = sizeof(struct ipv6hdr) +
3a1cebe7 1284 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
4df98e76
HFS
1285 (dst_allfrag(&rt->dst) ?
1286 sizeof(struct frag_hdr) : 0) +
1287 rt->rt6i_nfheader_len;
1288
60ff7467 1289 if (ip6_sk_ignore_df(sk))
0b95227a
HFS
1290 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1291 else
1292 maxnonfragsize = mtu;
4df98e76
HFS
1293
1294 /* dontfrag active */
1295 if ((cork->length + length > mtu - headersize) && dontfrag &&
1296 (sk->sk_protocol == IPPROTO_UDP ||
1297 sk->sk_protocol == IPPROTO_RAW)) {
1298 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1299 sizeof(struct ipv6hdr));
1300 goto emsgsize;
1301 }
1302
1303 if (cork->length + length > maxnonfragsize - headersize) {
1304emsgsize:
1305 ipv6_local_error(sk, EMSGSIZE, fl6,
1306 mtu - headersize +
1307 sizeof(struct ipv6hdr));
1da177e4
LT
1308 return -EMSGSIZE;
1309 }
1310 }
1311
09c2d251 1312 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
bf84a010 1313 sock_tx_timestamp(sk, &tx_flags);
09c2d251
WB
1314 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1315 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1316 tskey = sk->sk_tskey++;
1317 }
a693e698 1318
32dce968
VY
1319 /* If this is the first and only packet and device
1320 * supports checksum offloading, let's use it.
e87a468e
VY
1321 * Use transhdrlen, same as IPv4, because partial
1322 * sums only work when transhdrlen is set.
32dce968 1323 */
e87a468e 1324 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
32dce968
VY
1325 length + fragheaderlen < mtu &&
1326 rt->dst.dev->features & NETIF_F_V6_CSUM &&
1327 !exthdrlen)
1328 csummode = CHECKSUM_PARTIAL;
1da177e4
LT
1329 /*
1330 * Let's try using as much space as possible.
1331 * Use MTU if total length of the message fits into the MTU.
1332 * Otherwise, we need to reserve fragment header and
1333 * fragment alignment (= 8-15 octects, in total).
1334 *
1335 * Note that we may need to "move" the data from the tail of
1ab1457c 1336 * of the buffer to the new fragment when we split
1da177e4
LT
1337 * the message.
1338 *
1ab1457c 1339 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1340 * at once if non-fragmentable extension headers
1341 * are too large.
1ab1457c 1342 * --yoshfuji
1da177e4
LT
1343 */
1344
2811ebac
HFS
1345 cork->length += length;
1346 if (((length > mtu) ||
1347 (skb && skb_is_gso(skb))) &&
1348 (sk->sk_protocol == IPPROTO_UDP) &&
acf8dd0a
MK
1349 (rt->dst.dev->features & NETIF_F_UFO) &&
1350 (sk->sk_type == SOCK_DGRAM)) {
0bbe84a6 1351 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
2811ebac 1352 hh_len, fragheaderlen,
fd0273d7 1353 transhdrlen, mtu, flags, fl6);
2811ebac
HFS
1354 if (err)
1355 goto error;
1356 return 0;
e89e9cf5 1357 }
1da177e4 1358
2811ebac 1359 if (!skb)
1da177e4
LT
1360 goto alloc_new_skb;
1361
1362 while (length > 0) {
1363 /* Check if the remaining data fits into current packet. */
bdc712b4 1364 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1365 if (copy < length)
1366 copy = maxfraglen - skb->len;
1367
1368 if (copy <= 0) {
1369 char *data;
1370 unsigned int datalen;
1371 unsigned int fraglen;
1372 unsigned int fraggap;
1373 unsigned int alloclen;
1da177e4 1374alloc_new_skb:
1da177e4 1375 /* There's no room in the current skb */
0c183379
G
1376 if (skb)
1377 fraggap = skb->len - maxfraglen;
1da177e4
LT
1378 else
1379 fraggap = 0;
0c183379 1380 /* update mtu and maxfraglen if necessary */
63159f29 1381 if (!skb || !skb_prev)
0c183379 1382 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1383 fragheaderlen, skb, rt,
e367c2d0 1384 orig_mtu);
0c183379
G
1385
1386 skb_prev = skb;
1da177e4
LT
1387
1388 /*
1389 * If remaining data exceeds the mtu,
1390 * we know we need more fragment(s).
1391 */
1392 datalen = length + fraggap;
1da177e4 1393
0c183379
G
1394 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1395 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1396 if ((flags & MSG_MORE) &&
d8d1f30b 1397 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1398 alloclen = mtu;
1399 else
1400 alloclen = datalen + fragheaderlen;
1401
299b0767
SK
1402 alloclen += dst_exthdrlen;
1403
0c183379
G
1404 if (datalen != length + fraggap) {
1405 /*
1406 * this is not the last fragment, the trailer
1407 * space is regarded as data space.
1408 */
1409 datalen += rt->dst.trailer_len;
1410 }
1411
1412 alloclen += rt->dst.trailer_len;
1413 fraglen = datalen + fragheaderlen;
1da177e4
LT
1414
1415 /*
1416 * We just reserve space for fragment header.
1ab1457c 1417 * Note: this may be overallocation if the message
1da177e4
LT
1418 * (without MSG_MORE) fits into the MTU.
1419 */
1420 alloclen += sizeof(struct frag_hdr);
1421
1422 if (transhdrlen) {
1423 skb = sock_alloc_send_skb(sk,
1424 alloclen + hh_len,
1425 (flags & MSG_DONTWAIT), &err);
1426 } else {
1427 skb = NULL;
1428 if (atomic_read(&sk->sk_wmem_alloc) <=
1429 2 * sk->sk_sndbuf)
1430 skb = sock_wmalloc(sk,
1431 alloclen + hh_len, 1,
1432 sk->sk_allocation);
63159f29 1433 if (unlikely(!skb))
1da177e4
LT
1434 err = -ENOBUFS;
1435 }
63159f29 1436 if (!skb)
1da177e4
LT
1437 goto error;
1438 /*
1439 * Fill in the control structures
1440 */
9c9c9ad5 1441 skb->protocol = htons(ETH_P_IPV6);
32dce968 1442 skb->ip_summed = csummode;
1da177e4 1443 skb->csum = 0;
1f85851e
G
1444 /* reserve for fragmentation and ipsec header */
1445 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1446 dst_exthdrlen);
1da177e4 1447
11878b40
WB
1448 /* Only the initial fragment is time stamped */
1449 skb_shinfo(skb)->tx_flags = tx_flags;
1450 tx_flags = 0;
09c2d251
WB
1451 skb_shinfo(skb)->tskey = tskey;
1452 tskey = 0;
a693e698 1453
1da177e4
LT
1454 /*
1455 * Find where to start putting bytes
1456 */
1f85851e
G
1457 data = skb_put(skb, fraglen);
1458 skb_set_network_header(skb, exthdrlen);
1459 data += fragheaderlen;
b0e380b1
ACM
1460 skb->transport_header = (skb->network_header +
1461 fragheaderlen);
1da177e4
LT
1462 if (fraggap) {
1463 skb->csum = skb_copy_and_csum_bits(
1464 skb_prev, maxfraglen,
1465 data + transhdrlen, fraggap, 0);
1466 skb_prev->csum = csum_sub(skb_prev->csum,
1467 skb->csum);
1468 data += fraggap;
e9fa4f7b 1469 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1470 }
1471 copy = datalen - transhdrlen - fraggap;
299b0767 1472
1da177e4
LT
1473 if (copy < 0) {
1474 err = -EINVAL;
1475 kfree_skb(skb);
1476 goto error;
1477 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1478 err = -EFAULT;
1479 kfree_skb(skb);
1480 goto error;
1481 }
1482
1483 offset += copy;
1484 length -= datalen - fraggap;
1485 transhdrlen = 0;
1486 exthdrlen = 0;
299b0767 1487 dst_exthdrlen = 0;
1da177e4
LT
1488
1489 /*
1490 * Put the packet on the pending queue
1491 */
0bbe84a6 1492 __skb_queue_tail(queue, skb);
1da177e4
LT
1493 continue;
1494 }
1495
1496 if (copy > length)
1497 copy = length;
1498
d8d1f30b 1499 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1500 unsigned int off;
1501
1502 off = skb->len;
1503 if (getfrag(from, skb_put(skb, copy),
1504 offset, copy, off, skb) < 0) {
1505 __skb_trim(skb, off);
1506 err = -EFAULT;
1507 goto error;
1508 }
1509 } else {
1510 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1511
5640f768
ED
1512 err = -ENOMEM;
1513 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1514 goto error;
5640f768
ED
1515
1516 if (!skb_can_coalesce(skb, i, pfrag->page,
1517 pfrag->offset)) {
1518 err = -EMSGSIZE;
1519 if (i == MAX_SKB_FRAGS)
1520 goto error;
1521
1522 __skb_fill_page_desc(skb, i, pfrag->page,
1523 pfrag->offset, 0);
1524 skb_shinfo(skb)->nr_frags = ++i;
1525 get_page(pfrag->page);
1da177e4 1526 }
5640f768 1527 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1528 if (getfrag(from,
5640f768
ED
1529 page_address(pfrag->page) + pfrag->offset,
1530 offset, copy, skb->len, skb) < 0)
1531 goto error_efault;
1532
1533 pfrag->offset += copy;
1534 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1535 skb->len += copy;
1536 skb->data_len += copy;
f945fa7a
HX
1537 skb->truesize += copy;
1538 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1539 }
1540 offset += copy;
1541 length -= copy;
1542 }
5640f768 1543
1da177e4 1544 return 0;
5640f768
ED
1545
1546error_efault:
1547 err = -EFAULT;
1da177e4 1548error:
bdc712b4 1549 cork->length -= length;
3bd653c8 1550 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1551 return err;
1552}
0bbe84a6
VY
1553
1554int ip6_append_data(struct sock *sk,
1555 int getfrag(void *from, char *to, int offset, int len,
1556 int odd, struct sk_buff *skb),
1557 void *from, int length, int transhdrlen, int hlimit,
1558 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1559 struct rt6_info *rt, unsigned int flags, int dontfrag)
1560{
1561 struct inet_sock *inet = inet_sk(sk);
1562 struct ipv6_pinfo *np = inet6_sk(sk);
1563 int exthdrlen;
1564 int err;
1565
1566 if (flags&MSG_PROBE)
1567 return 0;
1568 if (skb_queue_empty(&sk->sk_write_queue)) {
1569 /*
1570 * setup for corking
1571 */
1572 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1573 tclass, opt, rt, fl6);
1574 if (err)
1575 return err;
1576
1577 exthdrlen = (opt ? opt->opt_flen : 0);
1578 length += exthdrlen;
1579 transhdrlen += exthdrlen;
1580 } else {
1581 fl6 = &inet->cork.fl.u.ip6;
1582 transhdrlen = 0;
1583 }
1584
1585 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1586 &np->cork, sk_page_frag(sk), getfrag,
1587 from, length, transhdrlen, flags, dontfrag);
1588}
a495f836 1589EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1590
366e41d9
VY
1591static void ip6_cork_release(struct inet_cork_full *cork,
1592 struct inet6_cork *v6_cork)
bf138862 1593{
366e41d9
VY
1594 if (v6_cork->opt) {
1595 kfree(v6_cork->opt->dst0opt);
1596 kfree(v6_cork->opt->dst1opt);
1597 kfree(v6_cork->opt->hopopt);
1598 kfree(v6_cork->opt->srcrt);
1599 kfree(v6_cork->opt);
1600 v6_cork->opt = NULL;
0178b695
HX
1601 }
1602
366e41d9
VY
1603 if (cork->base.dst) {
1604 dst_release(cork->base.dst);
1605 cork->base.dst = NULL;
1606 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1607 }
366e41d9 1608 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1609}
1610
6422398c
VY
1611struct sk_buff *__ip6_make_skb(struct sock *sk,
1612 struct sk_buff_head *queue,
1613 struct inet_cork_full *cork,
1614 struct inet6_cork *v6_cork)
1da177e4
LT
1615{
1616 struct sk_buff *skb, *tmp_skb;
1617 struct sk_buff **tail_skb;
1618 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1619 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1620 struct net *net = sock_net(sk);
1da177e4 1621 struct ipv6hdr *hdr;
6422398c
VY
1622 struct ipv6_txoptions *opt = v6_cork->opt;
1623 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1624 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1625 unsigned char proto = fl6->flowi6_proto;
1da177e4 1626
6422398c 1627 skb = __skb_dequeue(queue);
63159f29 1628 if (!skb)
1da177e4
LT
1629 goto out;
1630 tail_skb = &(skb_shinfo(skb)->frag_list);
1631
1632 /* move skb->data to ip header from ext header */
d56f90a7 1633 if (skb->data < skb_network_header(skb))
bbe735e4 1634 __skb_pull(skb, skb_network_offset(skb));
6422398c 1635 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1636 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1637 *tail_skb = tmp_skb;
1638 tail_skb = &(tmp_skb->next);
1639 skb->len += tmp_skb->len;
1640 skb->data_len += tmp_skb->len;
1da177e4 1641 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1642 tmp_skb->destructor = NULL;
1643 tmp_skb->sk = NULL;
1da177e4
LT
1644 }
1645
28a89453 1646 /* Allow local fragmentation. */
60ff7467 1647 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1648
4e3fd7a0 1649 *final_dst = fl6->daddr;
cfe1fc77 1650 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1651 if (opt && opt->opt_flen)
1652 ipv6_push_frag_opts(skb, opt, &proto);
1653 if (opt && opt->opt_nflen)
1654 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1655
e2d1bca7
ACM
1656 skb_push(skb, sizeof(struct ipv6hdr));
1657 skb_reset_network_header(skb);
0660e03f 1658 hdr = ipv6_hdr(skb);
1ab1457c 1659
6422398c 1660 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1661 ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 1662 np->autoflowlabel, fl6));
6422398c 1663 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1664 hdr->nexthdr = proto;
4e3fd7a0
AD
1665 hdr->saddr = fl6->saddr;
1666 hdr->daddr = *final_dst;
1da177e4 1667
a2c2064f 1668 skb->priority = sk->sk_priority;
4a19ec58 1669 skb->mark = sk->sk_mark;
a2c2064f 1670
d8d1f30b 1671 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1672 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1673 if (proto == IPPROTO_ICMPV6) {
adf30907 1674 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1675
43a43b60
HFS
1676 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1677 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1678 }
1679
6422398c
VY
1680 ip6_cork_release(cork, v6_cork);
1681out:
1682 return skb;
1683}
1684
1685int ip6_send_skb(struct sk_buff *skb)
1686{
1687 struct net *net = sock_net(skb->sk);
1688 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1689 int err;
1690
ef76bc23 1691 err = ip6_local_out(skb);
1da177e4
LT
1692 if (err) {
1693 if (err > 0)
6ce9e7b5 1694 err = net_xmit_errno(err);
1da177e4 1695 if (err)
6422398c
VY
1696 IP6_INC_STATS(net, rt->rt6i_idev,
1697 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1698 }
1699
1da177e4 1700 return err;
6422398c
VY
1701}
1702
1703int ip6_push_pending_frames(struct sock *sk)
1704{
1705 struct sk_buff *skb;
1706
1707 skb = ip6_finish_skb(sk);
1708 if (!skb)
1709 return 0;
1710
1711 return ip6_send_skb(skb);
1da177e4 1712}
a495f836 1713EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1714
0bbe84a6 1715static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1716 struct sk_buff_head *queue,
1717 struct inet_cork_full *cork,
1718 struct inet6_cork *v6_cork)
1da177e4 1719{
1da177e4
LT
1720 struct sk_buff *skb;
1721
0bbe84a6 1722 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1723 if (skb_dst(skb))
1724 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1725 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1726 kfree_skb(skb);
1727 }
1728
6422398c 1729 ip6_cork_release(cork, v6_cork);
1da177e4 1730}
0bbe84a6
VY
1731
1732void ip6_flush_pending_frames(struct sock *sk)
1733{
6422398c
VY
1734 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1735 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1736}
a495f836 1737EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1738
1739struct sk_buff *ip6_make_skb(struct sock *sk,
1740 int getfrag(void *from, char *to, int offset,
1741 int len, int odd, struct sk_buff *skb),
1742 void *from, int length, int transhdrlen,
1743 int hlimit, int tclass,
1744 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1745 struct rt6_info *rt, unsigned int flags,
1746 int dontfrag)
1747{
1748 struct inet_cork_full cork;
1749 struct inet6_cork v6_cork;
1750 struct sk_buff_head queue;
1751 int exthdrlen = (opt ? opt->opt_flen : 0);
1752 int err;
1753
1754 if (flags & MSG_PROBE)
1755 return NULL;
1756
1757 __skb_queue_head_init(&queue);
1758
1759 cork.base.flags = 0;
1760 cork.base.addr = 0;
1761 cork.base.opt = NULL;
1762 v6_cork.opt = NULL;
1763 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1764 if (err)
1765 return ERR_PTR(err);
1766
1767 if (dontfrag < 0)
1768 dontfrag = inet6_sk(sk)->dontfrag;
1769
1770 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1771 &current->task_frag, getfrag, from,
1772 length + exthdrlen, transhdrlen + exthdrlen,
1773 flags, dontfrag);
1774 if (err) {
1775 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1776 return ERR_PTR(err);
1777 }
1778
1779 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1780}