qlcnic: fix mailbox response handling
[linux-block.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
ef76bc23
HX
59int __ip6_local_out(struct sk_buff *skb)
60{
61 int len;
62
63 len = skb->len - sizeof(struct ipv6hdr);
64 if (len > IPV6_MAXPLEN)
65 len = 0;
66 ipv6_hdr(skb)->payload_len = htons(len);
67
b2e0b385
JE
68 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
69 skb_dst(skb)->dev, dst_output);
ef76bc23
HX
70}
71
72int ip6_local_out(struct sk_buff *skb)
73{
74 int err;
75
76 err = __ip6_local_out(skb);
77 if (likely(err == 1))
78 err = dst_output(skb);
79
80 return err;
81}
82EXPORT_SYMBOL_GPL(ip6_local_out);
83
9e508490 84static int ip6_finish_output2(struct sk_buff *skb)
1da177e4 85{
adf30907 86 struct dst_entry *dst = skb_dst(skb);
1da177e4 87 struct net_device *dev = dst->dev;
f6b72b62 88 struct neighbour *neigh;
6fd6ce20
YH
89 struct in6_addr *nexthop;
90 int ret;
1da177e4
LT
91
92 skb->protocol = htons(ETH_P_IPV6);
93 skb->dev = dev;
94
0660e03f 95 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 96 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 97
7ad6848c 98 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
d1db275d 99 ((mroute6_socket(dev_net(dev), skb) &&
bd91b8bf 100 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
101 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
102 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
103 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
104
105 /* Do not check for IFF_ALLMULTI; multicast routing
106 is not supported in any case.
107 */
108 if (newskb)
b2e0b385
JE
109 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
110 newskb, NULL, newskb->dev,
95603e22 111 dev_loopback_xmit);
1da177e4 112
0660e03f 113 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
114 IP6_INC_STATS(dev_net(dev), idev,
115 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
116 kfree_skb(skb);
117 return 0;
118 }
119 }
120
edf391ff
NH
121 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
122 skb->len);
1da177e4
LT
123 }
124
6fd6ce20
YH
125 rcu_read_lock_bh();
126 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
127 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
128 if (unlikely(!neigh))
129 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
130 if (!IS_ERR(neigh)) {
131 ret = dst_neigh_output(dst, neigh, skb);
132 rcu_read_unlock_bh();
133 return ret;
134 }
135 rcu_read_unlock_bh();
05e3aa09 136
9e508490
JE
137 IP6_INC_STATS_BH(dev_net(dst->dev),
138 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
139 kfree_skb(skb);
140 return -EINVAL;
1da177e4
LT
141}
142
9e508490
JE
143static int ip6_finish_output(struct sk_buff *skb)
144{
145 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
146 dst_allfrag(skb_dst(skb)))
147 return ip6_fragment(skb, ip6_finish_output2);
148 else
149 return ip6_finish_output2(skb);
150}
151
1da177e4
LT
152int ip6_output(struct sk_buff *skb)
153{
9e508490 154 struct net_device *dev = skb_dst(skb)->dev;
adf30907 155 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 156 if (unlikely(idev->cnf.disable_ipv6)) {
9e508490 157 IP6_INC_STATS(dev_net(dev), idev,
3bd653c8 158 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
159 kfree_skb(skb);
160 return 0;
161 }
162
9c6eb28a
JE
163 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
164 ip6_finish_output,
165 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
166}
167
1da177e4 168/*
b5d43998 169 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
170 */
171
4c9483b2 172int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 173 struct ipv6_txoptions *opt, int tclass)
1da177e4 174{
3bd653c8 175 struct net *net = sock_net(sk);
b30bd282 176 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 177 struct in6_addr *first_hop = &fl6->daddr;
adf30907 178 struct dst_entry *dst = skb_dst(skb);
1da177e4 179 struct ipv6hdr *hdr;
4c9483b2 180 u8 proto = fl6->flowi6_proto;
1da177e4 181 int seg_len = skb->len;
e651f03a 182 int hlimit = -1;
1da177e4
LT
183 u32 mtu;
184
185 if (opt) {
c2636b4d 186 unsigned int head_room;
1da177e4
LT
187
188 /* First: exthdrs may take lots of space (~8K for now)
189 MAX_HEADER is not enough.
190 */
191 head_room = opt->opt_nflen + opt->opt_flen;
192 seg_len += head_room;
193 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
194
195 if (skb_headroom(skb) < head_room) {
196 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 197 if (skb2 == NULL) {
adf30907 198 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
199 IPSTATS_MIB_OUTDISCARDS);
200 kfree_skb(skb);
1da177e4
LT
201 return -ENOBUFS;
202 }
808db80a 203 consume_skb(skb);
a11d206d 204 skb = skb2;
83d7eb29 205 skb_set_owner_w(skb, sk);
1da177e4
LT
206 }
207 if (opt->opt_flen)
208 ipv6_push_frag_opts(skb, opt, &proto);
209 if (opt->opt_nflen)
210 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
211 }
212
e2d1bca7
ACM
213 skb_push(skb, sizeof(struct ipv6hdr));
214 skb_reset_network_header(skb);
0660e03f 215 hdr = ipv6_hdr(skb);
1da177e4
LT
216
217 /*
218 * Fill in the IPv6 header
219 */
b903d324 220 if (np)
1da177e4
LT
221 hlimit = np->hop_limit;
222 if (hlimit < 0)
6b75d090 223 hlimit = ip6_dst_hoplimit(dst);
1da177e4 224
3e4e4c1f 225 ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
41a1f8ea 226
1da177e4
LT
227 hdr->payload_len = htons(seg_len);
228 hdr->nexthdr = proto;
229 hdr->hop_limit = hlimit;
230
4e3fd7a0
AD
231 hdr->saddr = fl6->saddr;
232 hdr->daddr = *first_hop;
1da177e4 233
a2c2064f 234 skb->priority = sk->sk_priority;
4a19ec58 235 skb->mark = sk->sk_mark;
a2c2064f 236
1da177e4 237 mtu = dst_mtu(dst);
283d07ac 238 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 239 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 240 IPSTATS_MIB_OUT, skb->len);
b2e0b385
JE
241 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
242 dst->dev, dst_output);
1da177e4
LT
243 }
244
1da177e4 245 skb->dev = dst->dev;
f4e53e29 246 ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
adf30907 247 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
248 kfree_skb(skb);
249 return -EMSGSIZE;
250}
251
7159039a
YH
252EXPORT_SYMBOL(ip6_xmit);
253
1da177e4
LT
254static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
255{
256 struct ip6_ra_chain *ra;
257 struct sock *last = NULL;
258
259 read_lock(&ip6_ra_lock);
260 for (ra = ip6_ra_chain; ra; ra = ra->next) {
261 struct sock *sk = ra->sk;
0bd1b59b
AM
262 if (sk && ra->sel == sel &&
263 (!sk->sk_bound_dev_if ||
264 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
265 if (last) {
266 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
267 if (skb2)
268 rawv6_rcv(last, skb2);
269 }
270 last = sk;
271 }
272 }
273
274 if (last) {
275 rawv6_rcv(last, skb);
276 read_unlock(&ip6_ra_lock);
277 return 1;
278 }
279 read_unlock(&ip6_ra_lock);
280 return 0;
281}
282
e21e0b5f
VN
283static int ip6_forward_proxy_check(struct sk_buff *skb)
284{
0660e03f 285 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 286 u8 nexthdr = hdr->nexthdr;
75f2811c 287 __be16 frag_off;
e21e0b5f
VN
288 int offset;
289
290 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 291 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
292 if (offset < 0)
293 return 0;
294 } else
295 offset = sizeof(struct ipv6hdr);
296
297 if (nexthdr == IPPROTO_ICMPV6) {
298 struct icmp6hdr *icmp6;
299
d56f90a7
ACM
300 if (!pskb_may_pull(skb, (skb_network_header(skb) +
301 offset + 1 - skb->data)))
e21e0b5f
VN
302 return 0;
303
d56f90a7 304 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
305
306 switch (icmp6->icmp6_type) {
307 case NDISC_ROUTER_SOLICITATION:
308 case NDISC_ROUTER_ADVERTISEMENT:
309 case NDISC_NEIGHBOUR_SOLICITATION:
310 case NDISC_NEIGHBOUR_ADVERTISEMENT:
311 case NDISC_REDIRECT:
312 /* For reaction involving unicast neighbor discovery
313 * message destined to the proxied address, pass it to
314 * input function.
315 */
316 return 1;
317 default:
318 break;
319 }
320 }
321
74553b09
VN
322 /*
323 * The proxying router can't forward traffic sent to a link-local
324 * address, so signal the sender and discard the packet. This
325 * behavior is clarified by the MIPv6 specification.
326 */
327 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
328 dst_link_failure(skb);
329 return -1;
330 }
331
e21e0b5f
VN
332 return 0;
333}
334
1da177e4
LT
335static inline int ip6_forward_finish(struct sk_buff *skb)
336{
337 return dst_output(skb);
338}
339
340int ip6_forward(struct sk_buff *skb)
341{
adf30907 342 struct dst_entry *dst = skb_dst(skb);
0660e03f 343 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 344 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 345 struct net *net = dev_net(dst->dev);
14f3ad6f 346 u32 mtu;
1ab1457c 347
53b7997f 348 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
349 goto error;
350
4497b076
BH
351 if (skb_warn_if_lro(skb))
352 goto drop;
353
1da177e4 354 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 355 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
356 goto drop;
357 }
358
72b43d08
AK
359 if (skb->pkt_type != PACKET_HOST)
360 goto drop;
361
35fc92a9 362 skb_forward_csum(skb);
1da177e4
LT
363
364 /*
365 * We DO NOT make any processing on
366 * RA packets, pushing them to user level AS IS
367 * without ane WARRANTY that application will be able
368 * to interpret them. The reason is that we
369 * cannot make anything clever here.
370 *
371 * We are not end-node, so that if packet contains
372 * AH/ESP, we cannot make anything.
373 * Defragmentation also would be mistake, RA packets
374 * cannot be fragmented, because there is no warranty
375 * that different fragments will go along one path. --ANK
376 */
377 if (opt->ra) {
d56f90a7 378 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
379 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
380 return 0;
381 }
382
383 /*
384 * check and decrement ttl
385 */
386 if (hdr->hop_limit <= 1) {
387 /* Force OUTPUT device used as source address */
388 skb->dev = dst->dev;
3ffe533c 389 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
483a47d2
DL
390 IP6_INC_STATS_BH(net,
391 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
392
393 kfree_skb(skb);
394 return -ETIMEDOUT;
395 }
396
fbea49e1 397 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 398 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 399 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
400 int proxied = ip6_forward_proxy_check(skb);
401 if (proxied > 0)
e21e0b5f 402 return ip6_input(skb);
74553b09 403 else if (proxied < 0) {
3bd653c8
DL
404 IP6_INC_STATS(net, ip6_dst_idev(dst),
405 IPSTATS_MIB_INDISCARDS);
74553b09
VN
406 goto drop;
407 }
e21e0b5f
VN
408 }
409
1da177e4 410 if (!xfrm6_route_forward(skb)) {
3bd653c8 411 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
412 goto drop;
413 }
adf30907 414 dst = skb_dst(skb);
1da177e4
LT
415
416 /* IPv6 specs say nothing about it, but it is clear that we cannot
417 send redirects to source routed frames.
1e5dc146 418 We don't send redirects to frames decapsulated from IPsec.
1da177e4 419 */
c45a3dfb 420 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 421 struct in6_addr *target = NULL;
fbfe95a4 422 struct inet_peer *peer;
1da177e4 423 struct rt6_info *rt;
1da177e4
LT
424
425 /*
426 * incoming and outgoing devices are the same
427 * send a redirect.
428 */
429
430 rt = (struct rt6_info *) dst;
c45a3dfb
DM
431 if (rt->rt6i_flags & RTF_GATEWAY)
432 target = &rt->rt6i_gateway;
1da177e4
LT
433 else
434 target = &hdr->daddr;
435
1d861aa4 436 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
92d86829 437
1da177e4
LT
438 /* Limit redirects both by destination (here)
439 and by source (inside ndisc_send_redirect)
440 */
fbfe95a4 441 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 442 ndisc_send_redirect(skb, target);
1d861aa4
DM
443 if (peer)
444 inet_putpeer(peer);
5bb1ab09
DS
445 } else {
446 int addrtype = ipv6_addr_type(&hdr->saddr);
447
1da177e4 448 /* This check is security critical. */
f81b2e7d
YH
449 if (addrtype == IPV6_ADDR_ANY ||
450 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
451 goto error;
452 if (addrtype & IPV6_ADDR_LINKLOCAL) {
453 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 454 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
455 goto error;
456 }
1da177e4
LT
457 }
458
14f3ad6f
UW
459 mtu = dst_mtu(dst);
460 if (mtu < IPV6_MIN_MTU)
461 mtu = IPV6_MIN_MTU;
462
4cdd3408
PM
463 if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
464 (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
1da177e4
LT
465 /* Again, force OUTPUT device used as source address */
466 skb->dev = dst->dev;
14f3ad6f 467 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
483a47d2
DL
468 IP6_INC_STATS_BH(net,
469 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
470 IP6_INC_STATS_BH(net,
471 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
472 kfree_skb(skb);
473 return -EMSGSIZE;
474 }
475
476 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 477 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
478 goto drop;
479 }
480
0660e03f 481 hdr = ipv6_hdr(skb);
1da177e4
LT
482
483 /* Mangling hops number delayed to point after skb COW */
1ab1457c 484
1da177e4
LT
485 hdr->hop_limit--;
486
483a47d2 487 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 488 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
b2e0b385 489 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
6e23ae2a 490 ip6_forward_finish);
1da177e4
LT
491
492error:
483a47d2 493 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
494drop:
495 kfree_skb(skb);
496 return -EINVAL;
497}
498
499static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
500{
501 to->pkt_type = from->pkt_type;
502 to->priority = from->priority;
503 to->protocol = from->protocol;
adf30907
ED
504 skb_dst_drop(to);
505 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 506 to->dev = from->dev;
82e91ffe 507 to->mark = from->mark;
1da177e4
LT
508
509#ifdef CONFIG_NET_SCHED
510 to->tc_index = from->tc_index;
511#endif
e7ac05f3 512 nf_copy(to, from);
07a93626 513#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
ba9dda3a
JK
514 to->nf_trace = from->nf_trace;
515#endif
984bc16c 516 skb_copy_secmark(to, from);
1da177e4
LT
517}
518
ad0081e4 519int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
1da177e4 520{
1da177e4 521 struct sk_buff *frag;
adf30907 522 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 523 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
524 struct ipv6hdr *tmp_hdr;
525 struct frag_hdr *fh;
526 unsigned int mtu, hlen, left, len;
a7ae1992 527 int hroom, troom;
ae08e1f0 528 __be32 frag_id = 0;
1da177e4
LT
529 int ptr, offset = 0, err=0;
530 u8 *prevhdr, nexthdr = 0;
adf30907 531 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 532
1da177e4
LT
533 hlen = ip6_find_1stfragopt(skb, &prevhdr);
534 nexthdr = *prevhdr;
535
628a5c56 536 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
537
538 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 539 * or if the skb it not generated by a local socket.
b881ef76 540 */
4cdd3408
PM
541 if (unlikely(!skb->local_df && skb->len > mtu) ||
542 (IP6CB(skb)->frag_max_size &&
543 IP6CB(skb)->frag_max_size > mtu)) {
a34a101e
ED
544 if (skb->sk && dst_allfrag(skb_dst(skb)))
545 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
546
adf30907 547 skb->dev = skb_dst(skb)->dev;
3ffe533c 548 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 549 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 550 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
551 kfree_skb(skb);
552 return -EMSGSIZE;
553 }
554
d91675f9
YH
555 if (np && np->frag_size < mtu) {
556 if (np->frag_size)
557 mtu = np->frag_size;
558 }
559 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 560
21dc3301 561 if (skb_has_frag_list(skb)) {
1da177e4 562 int first_len = skb_pagelen(skb);
3d13008e 563 struct sk_buff *frag2;
1da177e4
LT
564
565 if (first_len - hlen > mtu ||
566 ((first_len - hlen) & 7) ||
567 skb_cloned(skb))
568 goto slow_path;
569
4d9092bb 570 skb_walk_frags(skb, frag) {
1da177e4
LT
571 /* Correct geometry. */
572 if (frag->len > mtu ||
573 ((frag->len & 7) && frag->next) ||
574 skb_headroom(frag) < hlen)
3d13008e 575 goto slow_path_clean;
1da177e4 576
1da177e4
LT
577 /* Partially cloned skb? */
578 if (skb_shared(frag))
3d13008e 579 goto slow_path_clean;
2fdba6b0
HX
580
581 BUG_ON(frag->sk);
582 if (skb->sk) {
2fdba6b0
HX
583 frag->sk = skb->sk;
584 frag->destructor = sock_wfree;
2fdba6b0 585 }
3d13008e 586 skb->truesize -= frag->truesize;
1da177e4
LT
587 }
588
589 err = 0;
590 offset = 0;
591 frag = skb_shinfo(skb)->frag_list;
4d9092bb 592 skb_frag_list_init(skb);
1da177e4
LT
593 /* BUILD HEADER */
594
9a217a1c 595 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 596 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 597 if (!tmp_hdr) {
adf30907 598 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 599 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
600 return -ENOMEM;
601 }
602
1da177e4
LT
603 __skb_pull(skb, hlen);
604 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
605 __skb_push(skb, hlen);
606 skb_reset_network_header(skb);
d56f90a7 607 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 608
87c48fa3 609 ipv6_select_ident(fh, rt);
1da177e4
LT
610 fh->nexthdr = nexthdr;
611 fh->reserved = 0;
612 fh->frag_off = htons(IP6_MF);
613 frag_id = fh->identification;
614
615 first_len = skb_pagelen(skb);
616 skb->data_len = first_len - skb_headlen(skb);
617 skb->len = first_len;
0660e03f
ACM
618 ipv6_hdr(skb)->payload_len = htons(first_len -
619 sizeof(struct ipv6hdr));
a11d206d 620
d8d1f30b 621 dst_hold(&rt->dst);
1da177e4
LT
622
623 for (;;) {
624 /* Prepare header of the next frame,
625 * before previous one went down. */
626 if (frag) {
627 frag->ip_summed = CHECKSUM_NONE;
badff6d0 628 skb_reset_transport_header(frag);
1da177e4 629 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
630 __skb_push(frag, hlen);
631 skb_reset_network_header(frag);
d56f90a7
ACM
632 memcpy(skb_network_header(frag), tmp_hdr,
633 hlen);
1da177e4
LT
634 offset += skb->len - hlen - sizeof(struct frag_hdr);
635 fh->nexthdr = nexthdr;
636 fh->reserved = 0;
637 fh->frag_off = htons(offset);
638 if (frag->next != NULL)
639 fh->frag_off |= htons(IP6_MF);
640 fh->identification = frag_id;
0660e03f
ACM
641 ipv6_hdr(frag)->payload_len =
642 htons(frag->len -
643 sizeof(struct ipv6hdr));
1da177e4
LT
644 ip6_copy_metadata(frag, skb);
645 }
1ab1457c 646
1da177e4 647 err = output(skb);
dafee490 648 if(!err)
d8d1f30b 649 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 650 IPSTATS_MIB_FRAGCREATES);
dafee490 651
1da177e4
LT
652 if (err || !frag)
653 break;
654
655 skb = frag;
656 frag = skb->next;
657 skb->next = NULL;
658 }
659
a51482bd 660 kfree(tmp_hdr);
1da177e4
LT
661
662 if (err == 0) {
d8d1f30b 663 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 664 IPSTATS_MIB_FRAGOKS);
94e187c0 665 ip6_rt_put(rt);
1da177e4
LT
666 return 0;
667 }
668
669 while (frag) {
670 skb = frag->next;
671 kfree_skb(frag);
672 frag = skb;
673 }
674
d8d1f30b 675 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 676 IPSTATS_MIB_FRAGFAILS);
94e187c0 677 ip6_rt_put(rt);
1da177e4 678 return err;
3d13008e
ED
679
680slow_path_clean:
681 skb_walk_frags(skb, frag2) {
682 if (frag2 == frag)
683 break;
684 frag2->sk = NULL;
685 frag2->destructor = NULL;
686 skb->truesize += frag2->truesize;
687 }
1da177e4
LT
688 }
689
690slow_path:
72e843bb
ED
691 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
692 skb_checksum_help(skb))
693 goto fail;
694
1da177e4
LT
695 left = skb->len - hlen; /* Space per frame */
696 ptr = hlen; /* Where to start from */
697
698 /*
699 * Fragment the datagram.
700 */
701
702 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992
HX
703 hroom = LL_RESERVED_SPACE(rt->dst.dev);
704 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
705
706 /*
707 * Keep copying data until we run out.
708 */
709 while(left > 0) {
710 len = left;
711 /* IF: it doesn't fit, use 'mtu' - the data space left */
712 if (len > mtu)
713 len = mtu;
25985edc 714 /* IF: we are not sending up to and including the packet end
1da177e4
LT
715 then align the next start on an eight byte boundary */
716 if (len < left) {
717 len &= ~7;
718 }
719 /*
720 * Allocate buffer.
721 */
722
a7ae1992
HX
723 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
724 hroom + troom, GFP_ATOMIC)) == NULL) {
64ce2073 725 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 726 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 727 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
728 err = -ENOMEM;
729 goto fail;
730 }
731
732 /*
733 * Set up data on packet
734 */
735
736 ip6_copy_metadata(frag, skb);
a7ae1992 737 skb_reserve(frag, hroom);
1da177e4 738 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 739 skb_reset_network_header(frag);
badff6d0 740 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
741 frag->transport_header = (frag->network_header + hlen +
742 sizeof(struct frag_hdr));
1da177e4
LT
743
744 /*
745 * Charge the memory for the fragment to any owner
746 * it might possess
747 */
748 if (skb->sk)
749 skb_set_owner_w(frag, skb->sk);
750
751 /*
752 * Copy the packet header into the new buffer.
753 */
d626f62b 754 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
755
756 /*
757 * Build fragment header.
758 */
759 fh->nexthdr = nexthdr;
760 fh->reserved = 0;
f36d6ab1 761 if (!frag_id) {
87c48fa3 762 ipv6_select_ident(fh, rt);
1da177e4
LT
763 frag_id = fh->identification;
764 } else
765 fh->identification = frag_id;
766
767 /*
768 * Copy a block of the IP datagram.
769 */
8984e41d 770 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
771 BUG();
772 left -= len;
773
774 fh->frag_off = htons(offset);
775 if (left > 0)
776 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
777 ipv6_hdr(frag)->payload_len = htons(frag->len -
778 sizeof(struct ipv6hdr));
1da177e4
LT
779
780 ptr += len;
781 offset += len;
782
783 /*
784 * Put this fragment into the sending queue.
785 */
1da177e4
LT
786 err = output(frag);
787 if (err)
788 goto fail;
dafee490 789
adf30907 790 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 791 IPSTATS_MIB_FRAGCREATES);
1da177e4 792 }
adf30907 793 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 794 IPSTATS_MIB_FRAGOKS);
808db80a 795 consume_skb(skb);
1da177e4
LT
796 return err;
797
798fail:
adf30907 799 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 800 IPSTATS_MIB_FRAGFAILS);
1ab1457c 801 kfree_skb(skb);
1da177e4
LT
802 return err;
803}
804
b71d1d42
ED
805static inline int ip6_rt_check(const struct rt6key *rt_key,
806 const struct in6_addr *fl_addr,
807 const struct in6_addr *addr_cache)
cf6b1982 808{
a02cec21
ED
809 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
810 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
811}
812
497c615a
HX
813static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
814 struct dst_entry *dst,
b71d1d42 815 const struct flowi6 *fl6)
1da177e4 816{
497c615a
HX
817 struct ipv6_pinfo *np = inet6_sk(sk);
818 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 819
497c615a
HX
820 if (!dst)
821 goto out;
822
823 /* Yes, checking route validity in not connected
824 * case is not very simple. Take into account,
825 * that we do not support routing by source, TOS,
826 * and MSG_DONTROUTE --ANK (980726)
827 *
cf6b1982
YH
828 * 1. ip6_rt_check(): If route was host route,
829 * check that cached destination is current.
497c615a
HX
830 * If it is network route, we still may
831 * check its validity using saved pointer
832 * to the last used address: daddr_cache.
833 * We do not want to save whole address now,
834 * (because main consumer of this service
835 * is tcp, which has not this problem),
836 * so that the last trick works only on connected
837 * sockets.
838 * 2. oif also should be the same.
839 */
4c9483b2 840 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 841#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 842 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 843#endif
4c9483b2 844 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
845 dst_release(dst);
846 dst = NULL;
1da177e4
LT
847 }
848
497c615a
HX
849out:
850 return dst;
851}
852
853static int ip6_dst_lookup_tail(struct sock *sk,
4c9483b2 854 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 855{
3b1e0a65 856 struct net *net = sock_net(sk);
69cce1d1
DM
857#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
858 struct neighbour *n;
97cac082 859 struct rt6_info *rt;
69cce1d1
DM
860#endif
861 int err;
497c615a 862
1da177e4 863 if (*dst == NULL)
4c9483b2 864 *dst = ip6_route_output(net, sk, fl6);
1da177e4
LT
865
866 if ((err = (*dst)->error))
867 goto out_err_release;
868
4c9483b2 869 if (ipv6_addr_any(&fl6->saddr)) {
c3968a85
DW
870 struct rt6_info *rt = (struct rt6_info *) *dst;
871 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
872 sk ? inet6_sk(sk)->srcprefs : 0,
873 &fl6->saddr);
44456d37 874 if (err)
1da177e4 875 goto out_err_release;
1da177e4
LT
876 }
877
95c385b4 878#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
879 /*
880 * Here if the dst entry we've looked up
881 * has a neighbour entry that is in the INCOMPLETE
882 * state and the src address from the flow is
883 * marked as OPTIMISTIC, we release the found
884 * dst entry and replace it instead with the
885 * dst entry of the nexthop router
886 */
c56bf6fe 887 rt = (struct rt6_info *) *dst;
707be1ff
YH
888 rcu_read_lock_bh();
889 n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
890 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
891 rcu_read_unlock_bh();
892
893 if (err) {
e550dfb0 894 struct inet6_ifaddr *ifp;
4c9483b2 895 struct flowi6 fl_gw6;
e550dfb0
NH
896 int redirect;
897
4c9483b2 898 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
899 (*dst)->dev, 1);
900
901 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
902 if (ifp)
903 in6_ifa_put(ifp);
904
905 if (redirect) {
906 /*
907 * We need to get the dst entry for the
908 * default router instead
909 */
910 dst_release(*dst);
4c9483b2
DM
911 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
912 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
913 *dst = ip6_route_output(net, sk, &fl_gw6);
e550dfb0
NH
914 if ((err = (*dst)->error))
915 goto out_err_release;
95c385b4 916 }
e550dfb0 917 }
95c385b4
NH
918#endif
919
1da177e4
LT
920 return 0;
921
922out_err_release:
ca46f9c8 923 if (err == -ENETUNREACH)
483a47d2 924 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
925 dst_release(*dst);
926 *dst = NULL;
927 return err;
928}
34a0b3cd 929
497c615a
HX
930/**
931 * ip6_dst_lookup - perform route lookup on flow
932 * @sk: socket which provides route info
933 * @dst: pointer to dst_entry * for result
4c9483b2 934 * @fl6: flow to lookup
497c615a
HX
935 *
936 * This function performs a route lookup on the given flow.
937 *
938 * It returns zero on success, or a standard errno code on error.
939 */
4c9483b2 940int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
497c615a
HX
941{
942 *dst = NULL;
4c9483b2 943 return ip6_dst_lookup_tail(sk, dst, fl6);
497c615a 944}
3cf3dc6c
ACM
945EXPORT_SYMBOL_GPL(ip6_dst_lookup);
946
497c615a 947/**
68d0c6d3
DM
948 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
949 * @sk: socket which provides route info
4c9483b2 950 * @fl6: flow to lookup
68d0c6d3 951 * @final_dst: final destination address for ipsec lookup
a1414715 952 * @can_sleep: we are in a sleepable context
68d0c6d3
DM
953 *
954 * This function performs a route lookup on the given flow.
955 *
956 * It returns a valid dst pointer on success, or a pointer encoded
957 * error code.
958 */
4c9483b2 959struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 960 const struct in6_addr *final_dst,
a1414715 961 bool can_sleep)
68d0c6d3
DM
962{
963 struct dst_entry *dst = NULL;
964 int err;
965
4c9483b2 966 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
967 if (err)
968 return ERR_PTR(err);
969 if (final_dst)
4e3fd7a0 970 fl6->daddr = *final_dst;
2774c131 971 if (can_sleep)
4c9483b2 972 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 973
4c9483b2 974 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
975}
976EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
977
978/**
979 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 980 * @sk: socket which provides the dst cache and route info
4c9483b2 981 * @fl6: flow to lookup
68d0c6d3 982 * @final_dst: final destination address for ipsec lookup
a1414715 983 * @can_sleep: we are in a sleepable context
497c615a
HX
984 *
985 * This function performs a route lookup on the given flow with the
986 * possibility of using the cached route in the socket if it is valid.
987 * It will take the socket dst lock when operating on the dst cache.
988 * As a result, this function can only be used in process context.
989 *
68d0c6d3
DM
990 * It returns a valid dst pointer on success, or a pointer encoded
991 * error code.
497c615a 992 */
4c9483b2 993struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 994 const struct in6_addr *final_dst,
a1414715 995 bool can_sleep)
497c615a 996{
68d0c6d3
DM
997 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
998 int err;
497c615a 999
4c9483b2 1000 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1001
4c9483b2 1002 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1003 if (err)
1004 return ERR_PTR(err);
1005 if (final_dst)
4e3fd7a0 1006 fl6->daddr = *final_dst;
2774c131 1007 if (can_sleep)
4c9483b2 1008 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 1009
4c9483b2 1010 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1011}
68d0c6d3 1012EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1013
34a0b3cd 1014static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1015 int getfrag(void *from, char *to, int offset, int len,
1016 int odd, struct sk_buff *skb),
1017 void *from, int length, int hh_len, int fragheaderlen,
87c48fa3
ED
1018 int transhdrlen, int mtu,unsigned int flags,
1019 struct rt6_info *rt)
e89e9cf5
AR
1020
1021{
1022 struct sk_buff *skb;
1023 int err;
1024
1025 /* There is support for UDP large send offload by network
1026 * device, so create one single skb packet containing complete
1027 * udp datagram
1028 */
1029 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1030 skb = sock_alloc_send_skb(sk,
1031 hh_len + fragheaderlen + transhdrlen + 20,
1032 (flags & MSG_DONTWAIT), &err);
1033 if (skb == NULL)
504744e4 1034 return err;
e89e9cf5
AR
1035
1036 /* reserve space for Hardware header */
1037 skb_reserve(skb, hh_len);
1038
1039 /* create space for UDP/IP header */
1040 skb_put(skb,fragheaderlen + transhdrlen);
1041
1042 /* initialize network header pointer */
c1d2bbe1 1043 skb_reset_network_header(skb);
e89e9cf5
AR
1044
1045 /* initialize protocol header pointer */
b0e380b1 1046 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1047
84fa7933 1048 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5 1049 skb->csum = 0;
e89e9cf5
AR
1050 }
1051
1052 err = skb_append_datato_frags(sk,skb, getfrag, from,
1053 (length - transhdrlen));
1054 if (!err) {
1055 struct frag_hdr fhdr;
1056
c31d5326
SS
1057 /* Specify the length of each IPv6 datagram fragment.
1058 * It has to be a multiple of 8.
1059 */
1060 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1061 sizeof(struct frag_hdr)) & ~7;
f83ef8c0 1062 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
87c48fa3 1063 ipv6_select_ident(&fhdr, rt);
e89e9cf5
AR
1064 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1065 __skb_queue_tail(&sk->sk_write_queue, skb);
1066
1067 return 0;
1068 }
1069 /* There is not enough support do UPD LSO,
1070 * so follow normal path
1071 */
1072 kfree_skb(skb);
1073
1074 return err;
1075}
1da177e4 1076
0178b695
HX
1077static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1078 gfp_t gfp)
1079{
1080 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1081}
1082
1083static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1084 gfp_t gfp)
1085{
1086 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1087}
1088
0c183379
G
1089static void ip6_append_data_mtu(int *mtu,
1090 int *maxfraglen,
1091 unsigned int fragheaderlen,
1092 struct sk_buff *skb,
1093 struct rt6_info *rt)
1094{
1095 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1096 if (skb == NULL) {
1097 /* first fragment, reserve header_len */
1098 *mtu = *mtu - rt->dst.header_len;
1099
1100 } else {
1101 /*
1102 * this fragment is not first, the headers
1103 * space is regarded as data space.
1104 */
1105 *mtu = dst_mtu(rt->dst.path);
1106 }
1107 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1108 + fragheaderlen - sizeof(struct frag_hdr);
1109 }
1110}
1111
41a1f8ea
YH
1112int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1113 int offset, int len, int odd, struct sk_buff *skb),
1114 void *from, int length, int transhdrlen,
4c9483b2 1115 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
13b52cd4 1116 struct rt6_info *rt, unsigned int flags, int dontfrag)
1da177e4
LT
1117{
1118 struct inet_sock *inet = inet_sk(sk);
1119 struct ipv6_pinfo *np = inet6_sk(sk);
bdc712b4 1120 struct inet_cork *cork;
0c183379 1121 struct sk_buff *skb, *skb_prev = NULL;
1da177e4
LT
1122 unsigned int maxfraglen, fragheaderlen;
1123 int exthdrlen;
299b0767 1124 int dst_exthdrlen;
1da177e4
LT
1125 int hh_len;
1126 int mtu;
1127 int copy;
1128 int err;
1129 int offset = 0;
a693e698 1130 __u8 tx_flags = 0;
1da177e4
LT
1131
1132 if (flags&MSG_PROBE)
1133 return 0;
bdc712b4 1134 cork = &inet->cork.base;
1da177e4
LT
1135 if (skb_queue_empty(&sk->sk_write_queue)) {
1136 /*
1137 * setup for corking
1138 */
1139 if (opt) {
0178b695 1140 if (WARN_ON(np->cork.opt))
1da177e4 1141 return -EINVAL;
0178b695
HX
1142
1143 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1144 if (unlikely(np->cork.opt == NULL))
1145 return -ENOBUFS;
1146
1147 np->cork.opt->tot_len = opt->tot_len;
1148 np->cork.opt->opt_flen = opt->opt_flen;
1149 np->cork.opt->opt_nflen = opt->opt_nflen;
1150
1151 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1152 sk->sk_allocation);
1153 if (opt->dst0opt && !np->cork.opt->dst0opt)
1154 return -ENOBUFS;
1155
1156 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1157 sk->sk_allocation);
1158 if (opt->dst1opt && !np->cork.opt->dst1opt)
1159 return -ENOBUFS;
1160
1161 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1162 sk->sk_allocation);
1163 if (opt->hopopt && !np->cork.opt->hopopt)
1164 return -ENOBUFS;
1165
1166 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1167 sk->sk_allocation);
1168 if (opt->srcrt && !np->cork.opt->srcrt)
1169 return -ENOBUFS;
1170
1da177e4
LT
1171 /* need source address above miyazawa*/
1172 }
d8d1f30b 1173 dst_hold(&rt->dst);
bdc712b4 1174 cork->dst = &rt->dst;
4c9483b2 1175 inet->cork.fl.u.ip6 = *fl6;
1da177e4 1176 np->cork.hop_limit = hlimit;
41a1f8ea 1177 np->cork.tclass = tclass;
0c183379
G
1178 if (rt->dst.flags & DST_XFRM_TUNNEL)
1179 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1180 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1181 else
1182 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1183 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
c7503609 1184 if (np->frag_size < mtu) {
d91675f9
YH
1185 if (np->frag_size)
1186 mtu = np->frag_size;
1187 }
bdc712b4 1188 cork->fragsize = mtu;
d8d1f30b 1189 if (dst_allfrag(rt->dst.path))
bdc712b4
DM
1190 cork->flags |= IPCORK_ALLFRAG;
1191 cork->length = 0;
7efdba5b 1192 exthdrlen = (opt ? opt->opt_flen : 0);
1da177e4
LT
1193 length += exthdrlen;
1194 transhdrlen += exthdrlen;
7efdba5b 1195 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1196 } else {
bdc712b4 1197 rt = (struct rt6_info *)cork->dst;
4c9483b2 1198 fl6 = &inet->cork.fl.u.ip6;
0178b695 1199 opt = np->cork.opt;
1da177e4
LT
1200 transhdrlen = 0;
1201 exthdrlen = 0;
299b0767 1202 dst_exthdrlen = 0;
bdc712b4 1203 mtu = cork->fragsize;
1da177e4
LT
1204 }
1205
d8d1f30b 1206 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1207
a1b05140 1208 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1209 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1210 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1211
1212 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
bdc712b4 1213 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
4c9483b2 1214 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1da177e4
LT
1215 return -EMSGSIZE;
1216 }
1217 }
1218
a693e698
AB
1219 /* For UDP, check if TX timestamp is enabled */
1220 if (sk->sk_type == SOCK_DGRAM) {
1221 err = sock_tx_timestamp(sk, &tx_flags);
1222 if (err)
1223 goto error;
1224 }
1225
1da177e4
LT
1226 /*
1227 * Let's try using as much space as possible.
1228 * Use MTU if total length of the message fits into the MTU.
1229 * Otherwise, we need to reserve fragment header and
1230 * fragment alignment (= 8-15 octects, in total).
1231 *
1232 * Note that we may need to "move" the data from the tail of
1ab1457c 1233 * of the buffer to the new fragment when we split
1da177e4
LT
1234 * the message.
1235 *
1ab1457c 1236 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1237 * at once if non-fragmentable extension headers
1238 * are too large.
1ab1457c 1239 * --yoshfuji
1da177e4
LT
1240 */
1241
bdc712b4 1242 cork->length += length;
4b340ae2
BH
1243 if (length > mtu) {
1244 int proto = sk->sk_protocol;
1245 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
4c9483b2 1246 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
4b340ae2
BH
1247 return -EMSGSIZE;
1248 }
e89e9cf5 1249
4b340ae2 1250 if (proto == IPPROTO_UDP &&
d8d1f30b 1251 (rt->dst.dev->features & NETIF_F_UFO)) {
4b340ae2
BH
1252
1253 err = ip6_ufo_append_data(sk, getfrag, from, length,
1254 hh_len, fragheaderlen,
87c48fa3 1255 transhdrlen, mtu, flags, rt);
4b340ae2
BH
1256 if (err)
1257 goto error;
1258 return 0;
1259 }
e89e9cf5 1260 }
1da177e4
LT
1261
1262 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1263 goto alloc_new_skb;
1264
1265 while (length > 0) {
1266 /* Check if the remaining data fits into current packet. */
bdc712b4 1267 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1268 if (copy < length)
1269 copy = maxfraglen - skb->len;
1270
1271 if (copy <= 0) {
1272 char *data;
1273 unsigned int datalen;
1274 unsigned int fraglen;
1275 unsigned int fraggap;
1276 unsigned int alloclen;
1da177e4 1277alloc_new_skb:
1da177e4 1278 /* There's no room in the current skb */
0c183379
G
1279 if (skb)
1280 fraggap = skb->len - maxfraglen;
1da177e4
LT
1281 else
1282 fraggap = 0;
0c183379
G
1283 /* update mtu and maxfraglen if necessary */
1284 if (skb == NULL || skb_prev == NULL)
1285 ip6_append_data_mtu(&mtu, &maxfraglen,
1286 fragheaderlen, skb, rt);
1287
1288 skb_prev = skb;
1da177e4
LT
1289
1290 /*
1291 * If remaining data exceeds the mtu,
1292 * we know we need more fragment(s).
1293 */
1294 datalen = length + fraggap;
1da177e4 1295
0c183379
G
1296 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1297 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1298 if ((flags & MSG_MORE) &&
d8d1f30b 1299 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1300 alloclen = mtu;
1301 else
1302 alloclen = datalen + fragheaderlen;
1303
299b0767
SK
1304 alloclen += dst_exthdrlen;
1305
0c183379
G
1306 if (datalen != length + fraggap) {
1307 /*
1308 * this is not the last fragment, the trailer
1309 * space is regarded as data space.
1310 */
1311 datalen += rt->dst.trailer_len;
1312 }
1313
1314 alloclen += rt->dst.trailer_len;
1315 fraglen = datalen + fragheaderlen;
1da177e4
LT
1316
1317 /*
1318 * We just reserve space for fragment header.
1ab1457c 1319 * Note: this may be overallocation if the message
1da177e4
LT
1320 * (without MSG_MORE) fits into the MTU.
1321 */
1322 alloclen += sizeof(struct frag_hdr);
1323
1324 if (transhdrlen) {
1325 skb = sock_alloc_send_skb(sk,
1326 alloclen + hh_len,
1327 (flags & MSG_DONTWAIT), &err);
1328 } else {
1329 skb = NULL;
1330 if (atomic_read(&sk->sk_wmem_alloc) <=
1331 2 * sk->sk_sndbuf)
1332 skb = sock_wmalloc(sk,
1333 alloclen + hh_len, 1,
1334 sk->sk_allocation);
1335 if (unlikely(skb == NULL))
1336 err = -ENOBUFS;
a693e698
AB
1337 else {
1338 /* Only the initial fragment
1339 * is time stamped.
1340 */
1341 tx_flags = 0;
1342 }
1da177e4
LT
1343 }
1344 if (skb == NULL)
1345 goto error;
1346 /*
1347 * Fill in the control structures
1348 */
d7f7c0ac 1349 skb->ip_summed = CHECKSUM_NONE;
1da177e4 1350 skb->csum = 0;
1f85851e
G
1351 /* reserve for fragmentation and ipsec header */
1352 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1353 dst_exthdrlen);
1da177e4 1354
a693e698
AB
1355 if (sk->sk_type == SOCK_DGRAM)
1356 skb_shinfo(skb)->tx_flags = tx_flags;
1357
1da177e4
LT
1358 /*
1359 * Find where to start putting bytes
1360 */
1f85851e
G
1361 data = skb_put(skb, fraglen);
1362 skb_set_network_header(skb, exthdrlen);
1363 data += fragheaderlen;
b0e380b1
ACM
1364 skb->transport_header = (skb->network_header +
1365 fragheaderlen);
1da177e4
LT
1366 if (fraggap) {
1367 skb->csum = skb_copy_and_csum_bits(
1368 skb_prev, maxfraglen,
1369 data + transhdrlen, fraggap, 0);
1370 skb_prev->csum = csum_sub(skb_prev->csum,
1371 skb->csum);
1372 data += fraggap;
e9fa4f7b 1373 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1374 }
1375 copy = datalen - transhdrlen - fraggap;
299b0767 1376
1da177e4
LT
1377 if (copy < 0) {
1378 err = -EINVAL;
1379 kfree_skb(skb);
1380 goto error;
1381 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1382 err = -EFAULT;
1383 kfree_skb(skb);
1384 goto error;
1385 }
1386
1387 offset += copy;
1388 length -= datalen - fraggap;
1389 transhdrlen = 0;
1390 exthdrlen = 0;
299b0767 1391 dst_exthdrlen = 0;
1da177e4
LT
1392
1393 /*
1394 * Put the packet on the pending queue
1395 */
1396 __skb_queue_tail(&sk->sk_write_queue, skb);
1397 continue;
1398 }
1399
1400 if (copy > length)
1401 copy = length;
1402
d8d1f30b 1403 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1404 unsigned int off;
1405
1406 off = skb->len;
1407 if (getfrag(from, skb_put(skb, copy),
1408 offset, copy, off, skb) < 0) {
1409 __skb_trim(skb, off);
1410 err = -EFAULT;
1411 goto error;
1412 }
1413 } else {
1414 int i = skb_shinfo(skb)->nr_frags;
5640f768 1415 struct page_frag *pfrag = sk_page_frag(sk);
1da177e4 1416
5640f768
ED
1417 err = -ENOMEM;
1418 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1419 goto error;
5640f768
ED
1420
1421 if (!skb_can_coalesce(skb, i, pfrag->page,
1422 pfrag->offset)) {
1423 err = -EMSGSIZE;
1424 if (i == MAX_SKB_FRAGS)
1425 goto error;
1426
1427 __skb_fill_page_desc(skb, i, pfrag->page,
1428 pfrag->offset, 0);
1429 skb_shinfo(skb)->nr_frags = ++i;
1430 get_page(pfrag->page);
1da177e4 1431 }
5640f768 1432 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1433 if (getfrag(from,
5640f768
ED
1434 page_address(pfrag->page) + pfrag->offset,
1435 offset, copy, skb->len, skb) < 0)
1436 goto error_efault;
1437
1438 pfrag->offset += copy;
1439 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1440 skb->len += copy;
1441 skb->data_len += copy;
f945fa7a
HX
1442 skb->truesize += copy;
1443 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1444 }
1445 offset += copy;
1446 length -= copy;
1447 }
5640f768 1448
1da177e4 1449 return 0;
5640f768
ED
1450
1451error_efault:
1452 err = -EFAULT;
1da177e4 1453error:
bdc712b4 1454 cork->length -= length;
3bd653c8 1455 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1456 return err;
1457}
a495f836 1458EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1459
bf138862
PE
1460static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1461{
0178b695
HX
1462 if (np->cork.opt) {
1463 kfree(np->cork.opt->dst0opt);
1464 kfree(np->cork.opt->dst1opt);
1465 kfree(np->cork.opt->hopopt);
1466 kfree(np->cork.opt->srcrt);
1467 kfree(np->cork.opt);
1468 np->cork.opt = NULL;
1469 }
1470
bdc712b4
DM
1471 if (inet->cork.base.dst) {
1472 dst_release(inet->cork.base.dst);
1473 inet->cork.base.dst = NULL;
1474 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
bf138862
PE
1475 }
1476 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1477}
1478
1da177e4
LT
1479int ip6_push_pending_frames(struct sock *sk)
1480{
1481 struct sk_buff *skb, *tmp_skb;
1482 struct sk_buff **tail_skb;
1483 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1484 struct inet_sock *inet = inet_sk(sk);
1485 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1486 struct net *net = sock_net(sk);
1da177e4
LT
1487 struct ipv6hdr *hdr;
1488 struct ipv6_txoptions *opt = np->cork.opt;
bdc712b4 1489 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
4c9483b2
DM
1490 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1491 unsigned char proto = fl6->flowi6_proto;
1da177e4
LT
1492 int err = 0;
1493
1494 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1495 goto out;
1496 tail_skb = &(skb_shinfo(skb)->frag_list);
1497
1498 /* move skb->data to ip header from ext header */
d56f90a7 1499 if (skb->data < skb_network_header(skb))
bbe735e4 1500 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1501 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1502 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1503 *tail_skb = tmp_skb;
1504 tail_skb = &(tmp_skb->next);
1505 skb->len += tmp_skb->len;
1506 skb->data_len += tmp_skb->len;
1da177e4 1507 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1508 tmp_skb->destructor = NULL;
1509 tmp_skb->sk = NULL;
1da177e4
LT
1510 }
1511
28a89453 1512 /* Allow local fragmentation. */
b5c15fc0 1513 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1514 skb->local_df = 1;
1515
4e3fd7a0 1516 *final_dst = fl6->daddr;
cfe1fc77 1517 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1518 if (opt && opt->opt_flen)
1519 ipv6_push_frag_opts(skb, opt, &proto);
1520 if (opt && opt->opt_nflen)
1521 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1522
e2d1bca7
ACM
1523 skb_push(skb, sizeof(struct ipv6hdr));
1524 skb_reset_network_header(skb);
0660e03f 1525 hdr = ipv6_hdr(skb);
1ab1457c 1526
3e4e4c1f 1527 ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1da177e4
LT
1528 hdr->hop_limit = np->cork.hop_limit;
1529 hdr->nexthdr = proto;
4e3fd7a0
AD
1530 hdr->saddr = fl6->saddr;
1531 hdr->daddr = *final_dst;
1da177e4 1532
a2c2064f 1533 skb->priority = sk->sk_priority;
4a19ec58 1534 skb->mark = sk->sk_mark;
a2c2064f 1535
d8d1f30b 1536 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1537 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1538 if (proto == IPPROTO_ICMPV6) {
adf30907 1539 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1540
5a57d4c7 1541 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1542 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1543 }
1544
ef76bc23 1545 err = ip6_local_out(skb);
1da177e4
LT
1546 if (err) {
1547 if (err > 0)
6ce9e7b5 1548 err = net_xmit_errno(err);
1da177e4
LT
1549 if (err)
1550 goto error;
1551 }
1552
1553out:
bf138862 1554 ip6_cork_release(inet, np);
1da177e4
LT
1555 return err;
1556error:
06254914 1557 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1558 goto out;
1559}
a495f836 1560EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4
LT
1561
1562void ip6_flush_pending_frames(struct sock *sk)
1563{
1da177e4
LT
1564 struct sk_buff *skb;
1565
1566 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1567 if (skb_dst(skb))
1568 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1569 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1570 kfree_skb(skb);
1571 }
1572
bf138862 1573 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1574}
a495f836 1575EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);