ipv6: Assume the loopback address in link-local scope.
[linux-2.6-block.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
1da177e4
LT
40
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv6.h>
43
44#include <net/sock.h>
45#include <net/snmp.h>
46
47#include <net/ipv6.h>
48#include <net/ndisc.h>
49#include <net/protocol.h>
50#include <net/ip6_route.h>
51#include <net/addrconf.h>
52#include <net/rawv6.h>
53#include <net/icmp.h>
54#include <net/xfrm.h>
55#include <net/checksum.h>
7bc570c8 56#include <linux/mroute6.h>
1da177e4
LT
57
58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61{
62 static u32 ipv6_fragmentation_id = 1;
63 static DEFINE_SPINLOCK(ip6_id_lock);
64
65 spin_lock_bh(&ip6_id_lock);
66 fhdr->identification = htonl(ipv6_fragmentation_id);
67 if (++ipv6_fragmentation_id == 0)
68 ipv6_fragmentation_id = 1;
69 spin_unlock_bh(&ip6_id_lock);
70}
71
ef76bc23
HX
72int __ip6_local_out(struct sk_buff *skb)
73{
74 int len;
75
76 len = skb->len - sizeof(struct ipv6hdr);
77 if (len > IPV6_MAXPLEN)
78 len = 0;
79 ipv6_hdr(skb)->payload_len = htons(len);
80
6e23ae2a 81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
ef76bc23
HX
82 dst_output);
83}
84
85int ip6_local_out(struct sk_buff *skb)
86{
87 int err;
88
89 err = __ip6_local_out(skb);
90 if (likely(err == 1))
91 err = dst_output(skb);
92
93 return err;
94}
95EXPORT_SYMBOL_GPL(ip6_local_out);
96
ad643a79 97static int ip6_output_finish(struct sk_buff *skb)
1da177e4 98{
1da177e4 99 struct dst_entry *dst = skb->dst;
1da177e4 100
3644f0ce
SH
101 if (dst->hh)
102 return neigh_hh_output(dst->hh, skb);
103 else if (dst->neighbour)
1da177e4
LT
104 return dst->neighbour->output(skb);
105
a11d206d 106 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
107 kfree_skb(skb);
108 return -EINVAL;
109
110}
111
112/* dev_loopback_xmit for use with netfilter. */
113static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
114{
459a98ed 115 skb_reset_mac_header(newskb);
bbe735e4 116 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
117 newskb->pkt_type = PACKET_LOOPBACK;
118 newskb->ip_summed = CHECKSUM_UNNECESSARY;
119 BUG_TRAP(newskb->dst);
120
121 netif_rx(newskb);
122 return 0;
123}
124
125
126static int ip6_output2(struct sk_buff *skb)
127{
128 struct dst_entry *dst = skb->dst;
129 struct net_device *dev = dst->dev;
130
131 skb->protocol = htons(ETH_P_IPV6);
132 skb->dev = dev;
133
0660e03f 134 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1da177e4 135 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
a11d206d 136 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1da177e4
LT
137
138 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
7bc570c8
YH
139 ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144 /* Do not check for IFF_ALLMULTI; multicast routing
145 is not supported in any case.
146 */
147 if (newskb)
6e23ae2a
PM
148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149 NULL, newskb->dev,
1da177e4
LT
150 ip6_dev_loopback_xmit);
151
0660e03f 152 if (ipv6_hdr(skb)->hop_limit == 0) {
a11d206d 153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
154 kfree_skb(skb);
155 return 0;
156 }
157 }
158
a11d206d 159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
1da177e4
LT
160 }
161
6e23ae2a
PM
162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
1da177e4
LT
164}
165
628a5c56
JH
166static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167{
168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169
170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171 skb->dst->dev->mtu : dst_mtu(skb->dst);
172}
173
1da177e4
LT
174int ip6_output(struct sk_buff *skb)
175{
628a5c56 176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
e89e9cf5 177 dst_allfrag(skb->dst))
1da177e4
LT
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
181}
182
1da177e4
LT
183/*
184 * xmit an sk_buff (used by TCP)
185 */
186
187int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
189{
b30bd282 190 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4
LT
191 struct in6_addr *first_hop = &fl->fl6_dst;
192 struct dst_entry *dst = skb->dst;
193 struct ipv6hdr *hdr;
194 u8 proto = fl->proto;
195 int seg_len = skb->len;
41a1f8ea 196 int hlimit, tclass;
1da177e4
LT
197 u32 mtu;
198
199 if (opt) {
c2636b4d 200 unsigned int head_room;
1da177e4
LT
201
202 /* First: exthdrs may take lots of space (~8K for now)
203 MAX_HEADER is not enough.
204 */
205 head_room = opt->opt_nflen + opt->opt_flen;
206 seg_len += head_room;
207 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
208
209 if (skb_headroom(skb) < head_room) {
210 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d
YH
211 if (skb2 == NULL) {
212 IP6_INC_STATS(ip6_dst_idev(skb->dst),
213 IPSTATS_MIB_OUTDISCARDS);
214 kfree_skb(skb);
1da177e4
LT
215 return -ENOBUFS;
216 }
a11d206d
YH
217 kfree_skb(skb);
218 skb = skb2;
1da177e4
LT
219 if (sk)
220 skb_set_owner_w(skb, sk);
221 }
222 if (opt->opt_flen)
223 ipv6_push_frag_opts(skb, opt, &proto);
224 if (opt->opt_nflen)
225 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
226 }
227
e2d1bca7
ACM
228 skb_push(skb, sizeof(struct ipv6hdr));
229 skb_reset_network_header(skb);
0660e03f 230 hdr = ipv6_hdr(skb);
1da177e4
LT
231
232 /*
233 * Fill in the IPv6 header
234 */
235
1da177e4
LT
236 hlimit = -1;
237 if (np)
238 hlimit = np->hop_limit;
239 if (hlimit < 0)
6b75d090 240 hlimit = ip6_dst_hoplimit(dst);
1da177e4 241
41a1f8ea
YH
242 tclass = -1;
243 if (np)
244 tclass = np->tclass;
245 if (tclass < 0)
246 tclass = 0;
247
90bcaf7b 248 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 249
1da177e4
LT
250 hdr->payload_len = htons(seg_len);
251 hdr->nexthdr = proto;
252 hdr->hop_limit = hlimit;
253
254 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
255 ipv6_addr_copy(&hdr->daddr, first_hop);
256
a2c2064f 257 skb->priority = sk->sk_priority;
4a19ec58 258 skb->mark = sk->sk_mark;
a2c2064f 259
1da177e4 260 mtu = dst_mtu(dst);
89114afd 261 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
a11d206d
YH
262 IP6_INC_STATS(ip6_dst_idev(skb->dst),
263 IPSTATS_MIB_OUTREQUESTS);
6e23ae2a 264 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
6869c4d8 265 dst_output);
1da177e4
LT
266 }
267
268 if (net_ratelimit())
269 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
270 skb->dev = dst->dev;
271 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
a11d206d 272 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
273 kfree_skb(skb);
274 return -EMSGSIZE;
275}
276
7159039a
YH
277EXPORT_SYMBOL(ip6_xmit);
278
1da177e4
LT
279/*
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
284 */
285
286int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 287 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
288 int proto, int len)
289{
290 struct ipv6_pinfo *np = inet6_sk(sk);
291 struct ipv6hdr *hdr;
292 int totlen;
293
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->dev = dev;
296
297 totlen = len + sizeof(struct ipv6hdr);
298
55f79cc0
ACM
299 skb_reset_network_header(skb);
300 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 301 hdr = ipv6_hdr(skb);
1da177e4 302
ae08e1f0 303 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
304
305 hdr->payload_len = htons(len);
306 hdr->nexthdr = proto;
307 hdr->hop_limit = np->hop_limit;
308
309 ipv6_addr_copy(&hdr->saddr, saddr);
310 ipv6_addr_copy(&hdr->daddr, daddr);
311
312 return 0;
313}
314
315static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316{
317 struct ip6_ra_chain *ra;
318 struct sock *last = NULL;
319
320 read_lock(&ip6_ra_lock);
321 for (ra = ip6_ra_chain; ra; ra = ra->next) {
322 struct sock *sk = ra->sk;
0bd1b59b
AM
323 if (sk && ra->sel == sel &&
324 (!sk->sk_bound_dev_if ||
325 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
326 if (last) {
327 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328 if (skb2)
329 rawv6_rcv(last, skb2);
330 }
331 last = sk;
332 }
333 }
334
335 if (last) {
336 rawv6_rcv(last, skb);
337 read_unlock(&ip6_ra_lock);
338 return 1;
339 }
340 read_unlock(&ip6_ra_lock);
341 return 0;
342}
343
e21e0b5f
VN
344static int ip6_forward_proxy_check(struct sk_buff *skb)
345{
0660e03f 346 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
347 u8 nexthdr = hdr->nexthdr;
348 int offset;
349
350 if (ipv6_ext_hdr(nexthdr)) {
351 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
352 if (offset < 0)
353 return 0;
354 } else
355 offset = sizeof(struct ipv6hdr);
356
357 if (nexthdr == IPPROTO_ICMPV6) {
358 struct icmp6hdr *icmp6;
359
d56f90a7
ACM
360 if (!pskb_may_pull(skb, (skb_network_header(skb) +
361 offset + 1 - skb->data)))
e21e0b5f
VN
362 return 0;
363
d56f90a7 364 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
365
366 switch (icmp6->icmp6_type) {
367 case NDISC_ROUTER_SOLICITATION:
368 case NDISC_ROUTER_ADVERTISEMENT:
369 case NDISC_NEIGHBOUR_SOLICITATION:
370 case NDISC_NEIGHBOUR_ADVERTISEMENT:
371 case NDISC_REDIRECT:
372 /* For reaction involving unicast neighbor discovery
373 * message destined to the proxied address, pass it to
374 * input function.
375 */
376 return 1;
377 default:
378 break;
379 }
380 }
381
74553b09
VN
382 /*
383 * The proxying router can't forward traffic sent to a link-local
384 * address, so signal the sender and discard the packet. This
385 * behavior is clarified by the MIPv6 specification.
386 */
387 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
388 dst_link_failure(skb);
389 return -1;
390 }
391
e21e0b5f
VN
392 return 0;
393}
394
1da177e4
LT
395static inline int ip6_forward_finish(struct sk_buff *skb)
396{
397 return dst_output(skb);
398}
399
400int ip6_forward(struct sk_buff *skb)
401{
402 struct dst_entry *dst = skb->dst;
0660e03f 403 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 404 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 405 struct net *net = dev_net(dst->dev);
1ab1457c 406
1da177e4
LT
407 if (ipv6_devconf.forwarding == 0)
408 goto error;
409
4497b076
BH
410 if (skb_warn_if_lro(skb))
411 goto drop;
412
1da177e4 413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
a11d206d 414 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
415 goto drop;
416 }
417
35fc92a9 418 skb_forward_csum(skb);
1da177e4
LT
419
420 /*
421 * We DO NOT make any processing on
422 * RA packets, pushing them to user level AS IS
423 * without ane WARRANTY that application will be able
424 * to interpret them. The reason is that we
425 * cannot make anything clever here.
426 *
427 * We are not end-node, so that if packet contains
428 * AH/ESP, we cannot make anything.
429 * Defragmentation also would be mistake, RA packets
430 * cannot be fragmented, because there is no warranty
431 * that different fragments will go along one path. --ANK
432 */
433 if (opt->ra) {
d56f90a7 434 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
435 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
436 return 0;
437 }
438
439 /*
440 * check and decrement ttl
441 */
442 if (hdr->hop_limit <= 1) {
443 /* Force OUTPUT device used as source address */
444 skb->dev = dst->dev;
445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
446 0, skb->dev);
a11d206d 447 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
448
449 kfree_skb(skb);
450 return -ETIMEDOUT;
451 }
452
fbea49e1
YH
453 /* XXX: idev->cnf.proxy_ndp? */
454 if (ipv6_devconf.proxy_ndp &&
8a3edd80 455 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
456 int proxied = ip6_forward_proxy_check(skb);
457 if (proxied > 0)
e21e0b5f 458 return ip6_input(skb);
74553b09 459 else if (proxied < 0) {
a11d206d 460 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
74553b09
VN
461 goto drop;
462 }
e21e0b5f
VN
463 }
464
1da177e4 465 if (!xfrm6_route_forward(skb)) {
a11d206d 466 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
467 goto drop;
468 }
469 dst = skb->dst;
470
471 /* IPv6 specs say nothing about it, but it is clear that we cannot
472 send redirects to source routed frames.
1e5dc146 473 We don't send redirects to frames decapsulated from IPsec.
1da177e4 474 */
1e5dc146
MN
475 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
476 !skb->sp) {
1da177e4
LT
477 struct in6_addr *target = NULL;
478 struct rt6_info *rt;
479 struct neighbour *n = dst->neighbour;
480
481 /*
482 * incoming and outgoing devices are the same
483 * send a redirect.
484 */
485
486 rt = (struct rt6_info *) dst;
487 if ((rt->rt6i_flags & RTF_GATEWAY))
488 target = (struct in6_addr*)&n->primary_key;
489 else
490 target = &hdr->daddr;
491
492 /* Limit redirects both by destination (here)
493 and by source (inside ndisc_send_redirect)
494 */
495 if (xrlim_allow(dst, 1*HZ))
496 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
497 } else {
498 int addrtype = ipv6_addr_type(&hdr->saddr);
499
1da177e4 500 /* This check is security critical. */
f81b2e7d
YH
501 if (addrtype == IPV6_ADDR_ANY ||
502 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
503 goto error;
504 if (addrtype & IPV6_ADDR_LINKLOCAL) {
505 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
506 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
507 goto error;
508 }
1da177e4
LT
509 }
510
511 if (skb->len > dst_mtu(dst)) {
512 /* Again, force OUTPUT device used as source address */
513 skb->dev = dst->dev;
514 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
a11d206d
YH
515 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
516 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
517 kfree_skb(skb);
518 return -EMSGSIZE;
519 }
520
521 if (skb_cow(skb, dst->dev->hard_header_len)) {
a11d206d 522 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
523 goto drop;
524 }
525
0660e03f 526 hdr = ipv6_hdr(skb);
1da177e4
LT
527
528 /* Mangling hops number delayed to point after skb COW */
1ab1457c 529
1da177e4
LT
530 hdr->hop_limit--;
531
a11d206d 532 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
6e23ae2a
PM
533 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
534 ip6_forward_finish);
1da177e4
LT
535
536error:
a11d206d 537 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
538drop:
539 kfree_skb(skb);
540 return -EINVAL;
541}
542
543static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
544{
545 to->pkt_type = from->pkt_type;
546 to->priority = from->priority;
547 to->protocol = from->protocol;
1da177e4
LT
548 dst_release(to->dst);
549 to->dst = dst_clone(from->dst);
550 to->dev = from->dev;
82e91ffe 551 to->mark = from->mark;
1da177e4
LT
552
553#ifdef CONFIG_NET_SCHED
554 to->tc_index = from->tc_index;
555#endif
e7ac05f3 556 nf_copy(to, from);
ba9dda3a
JK
557#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
558 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
559 to->nf_trace = from->nf_trace;
560#endif
984bc16c 561 skb_copy_secmark(to, from);
1da177e4
LT
562}
563
564int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
565{
566 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
567 struct ipv6_opt_hdr *exthdr =
568 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 569 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 570 int found_rhdr = 0;
0660e03f 571 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
572
573 while (offset + 1 <= packet_len) {
574
575 switch (**nexthdr) {
576
577 case NEXTHDR_HOP:
27637df9 578 break;
1da177e4 579 case NEXTHDR_ROUTING:
27637df9
MN
580 found_rhdr = 1;
581 break;
1da177e4 582 case NEXTHDR_DEST:
59fbb3a6 583#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
584 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
585 break;
586#endif
587 if (found_rhdr)
588 return offset;
1da177e4
LT
589 break;
590 default :
591 return offset;
592 }
27637df9
MN
593
594 offset += ipv6_optlen(exthdr);
595 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
596 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
597 offset);
1da177e4
LT
598 }
599
600 return offset;
601}
602
603static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
604{
605 struct net_device *dev;
606 struct sk_buff *frag;
607 struct rt6_info *rt = (struct rt6_info*)skb->dst;
d91675f9 608 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
609 struct ipv6hdr *tmp_hdr;
610 struct frag_hdr *fh;
611 unsigned int mtu, hlen, left, len;
ae08e1f0 612 __be32 frag_id = 0;
1da177e4
LT
613 int ptr, offset = 0, err=0;
614 u8 *prevhdr, nexthdr = 0;
615
616 dev = rt->u.dst.dev;
617 hlen = ip6_find_1stfragopt(skb, &prevhdr);
618 nexthdr = *prevhdr;
619
628a5c56 620 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
621
622 /* We must not fragment if the socket is set to force MTU discovery
623 * or if the skb it not generated by a local socket. (This last
624 * check should be redundant, but it's free.)
625 */
b5c15fc0 626 if (!skb->local_df) {
b881ef76
JH
627 skb->dev = skb->dst->dev;
628 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
629 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
630 kfree_skb(skb);
631 return -EMSGSIZE;
632 }
633
d91675f9
YH
634 if (np && np->frag_size < mtu) {
635 if (np->frag_size)
636 mtu = np->frag_size;
637 }
638 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4
LT
639
640 if (skb_shinfo(skb)->frag_list) {
641 int first_len = skb_pagelen(skb);
29ffe1a5 642 int truesizes = 0;
1da177e4
LT
643
644 if (first_len - hlen > mtu ||
645 ((first_len - hlen) & 7) ||
646 skb_cloned(skb))
647 goto slow_path;
648
649 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
650 /* Correct geometry. */
651 if (frag->len > mtu ||
652 ((frag->len & 7) && frag->next) ||
653 skb_headroom(frag) < hlen)
654 goto slow_path;
655
1da177e4
LT
656 /* Partially cloned skb? */
657 if (skb_shared(frag))
658 goto slow_path;
2fdba6b0
HX
659
660 BUG_ON(frag->sk);
661 if (skb->sk) {
662 sock_hold(skb->sk);
663 frag->sk = skb->sk;
664 frag->destructor = sock_wfree;
29ffe1a5 665 truesizes += frag->truesize;
2fdba6b0 666 }
1da177e4
LT
667 }
668
669 err = 0;
670 offset = 0;
671 frag = skb_shinfo(skb)->frag_list;
672 skb_shinfo(skb)->frag_list = NULL;
673 /* BUILD HEADER */
674
9a217a1c 675 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 676 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 677 if (!tmp_hdr) {
a11d206d 678 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
679 return -ENOMEM;
680 }
681
1da177e4
LT
682 __skb_pull(skb, hlen);
683 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
684 __skb_push(skb, hlen);
685 skb_reset_network_header(skb);
d56f90a7 686 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4
LT
687
688 ipv6_select_ident(skb, fh);
689 fh->nexthdr = nexthdr;
690 fh->reserved = 0;
691 fh->frag_off = htons(IP6_MF);
692 frag_id = fh->identification;
693
694 first_len = skb_pagelen(skb);
695 skb->data_len = first_len - skb_headlen(skb);
29ffe1a5 696 skb->truesize -= truesizes;
1da177e4 697 skb->len = first_len;
0660e03f
ACM
698 ipv6_hdr(skb)->payload_len = htons(first_len -
699 sizeof(struct ipv6hdr));
a11d206d
YH
700
701 dst_hold(&rt->u.dst);
1da177e4
LT
702
703 for (;;) {
704 /* Prepare header of the next frame,
705 * before previous one went down. */
706 if (frag) {
707 frag->ip_summed = CHECKSUM_NONE;
badff6d0 708 skb_reset_transport_header(frag);
1da177e4 709 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
710 __skb_push(frag, hlen);
711 skb_reset_network_header(frag);
d56f90a7
ACM
712 memcpy(skb_network_header(frag), tmp_hdr,
713 hlen);
1da177e4
LT
714 offset += skb->len - hlen - sizeof(struct frag_hdr);
715 fh->nexthdr = nexthdr;
716 fh->reserved = 0;
717 fh->frag_off = htons(offset);
718 if (frag->next != NULL)
719 fh->frag_off |= htons(IP6_MF);
720 fh->identification = frag_id;
0660e03f
ACM
721 ipv6_hdr(frag)->payload_len =
722 htons(frag->len -
723 sizeof(struct ipv6hdr));
1da177e4
LT
724 ip6_copy_metadata(frag, skb);
725 }
1ab1457c 726
1da177e4 727 err = output(skb);
dafee490 728 if(!err)
a11d206d 729 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
dafee490 730
1da177e4
LT
731 if (err || !frag)
732 break;
733
734 skb = frag;
735 frag = skb->next;
736 skb->next = NULL;
737 }
738
a51482bd 739 kfree(tmp_hdr);
1da177e4
LT
740
741 if (err == 0) {
a11d206d
YH
742 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
743 dst_release(&rt->u.dst);
1da177e4
LT
744 return 0;
745 }
746
747 while (frag) {
748 skb = frag->next;
749 kfree_skb(frag);
750 frag = skb;
751 }
752
a11d206d
YH
753 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
754 dst_release(&rt->u.dst);
1da177e4
LT
755 return err;
756 }
757
758slow_path:
759 left = skb->len - hlen; /* Space per frame */
760 ptr = hlen; /* Where to start from */
761
762 /*
763 * Fragment the datagram.
764 */
765
766 *prevhdr = NEXTHDR_FRAGMENT;
767
768 /*
769 * Keep copying data until we run out.
770 */
771 while(left > 0) {
772 len = left;
773 /* IF: it doesn't fit, use 'mtu' - the data space left */
774 if (len > mtu)
775 len = mtu;
776 /* IF: we are not sending upto and including the packet end
777 then align the next start on an eight byte boundary */
778 if (len < left) {
779 len &= ~7;
780 }
781 /*
782 * Allocate buffer.
783 */
784
f5184d26 785 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 786 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
a11d206d
YH
787 IP6_INC_STATS(ip6_dst_idev(skb->dst),
788 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
789 err = -ENOMEM;
790 goto fail;
791 }
792
793 /*
794 * Set up data on packet
795 */
796
797 ip6_copy_metadata(frag, skb);
798 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
799 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 800 skb_reset_network_header(frag);
badff6d0 801 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
802 frag->transport_header = (frag->network_header + hlen +
803 sizeof(struct frag_hdr));
1da177e4
LT
804
805 /*
806 * Charge the memory for the fragment to any owner
807 * it might possess
808 */
809 if (skb->sk)
810 skb_set_owner_w(frag, skb->sk);
811
812 /*
813 * Copy the packet header into the new buffer.
814 */
d626f62b 815 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
816
817 /*
818 * Build fragment header.
819 */
820 fh->nexthdr = nexthdr;
821 fh->reserved = 0;
f36d6ab1 822 if (!frag_id) {
1da177e4
LT
823 ipv6_select_ident(skb, fh);
824 frag_id = fh->identification;
825 } else
826 fh->identification = frag_id;
827
828 /*
829 * Copy a block of the IP datagram.
830 */
8984e41d 831 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
832 BUG();
833 left -= len;
834
835 fh->frag_off = htons(offset);
836 if (left > 0)
837 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
838 ipv6_hdr(frag)->payload_len = htons(frag->len -
839 sizeof(struct ipv6hdr));
1da177e4
LT
840
841 ptr += len;
842 offset += len;
843
844 /*
845 * Put this fragment into the sending queue.
846 */
1da177e4
LT
847 err = output(frag);
848 if (err)
849 goto fail;
dafee490 850
a11d206d 851 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
1da177e4 852 }
a11d206d
YH
853 IP6_INC_STATS(ip6_dst_idev(skb->dst),
854 IPSTATS_MIB_FRAGOKS);
1da177e4 855 kfree_skb(skb);
1da177e4
LT
856 return err;
857
858fail:
a11d206d
YH
859 IP6_INC_STATS(ip6_dst_idev(skb->dst),
860 IPSTATS_MIB_FRAGFAILS);
1ab1457c 861 kfree_skb(skb);
1da177e4
LT
862 return err;
863}
864
cf6b1982
YH
865static inline int ip6_rt_check(struct rt6key *rt_key,
866 struct in6_addr *fl_addr,
867 struct in6_addr *addr_cache)
868{
869 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
870 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
871}
872
497c615a
HX
873static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
874 struct dst_entry *dst,
875 struct flowi *fl)
1da177e4 876{
497c615a
HX
877 struct ipv6_pinfo *np = inet6_sk(sk);
878 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 879
497c615a
HX
880 if (!dst)
881 goto out;
882
883 /* Yes, checking route validity in not connected
884 * case is not very simple. Take into account,
885 * that we do not support routing by source, TOS,
886 * and MSG_DONTROUTE --ANK (980726)
887 *
cf6b1982
YH
888 * 1. ip6_rt_check(): If route was host route,
889 * check that cached destination is current.
497c615a
HX
890 * If it is network route, we still may
891 * check its validity using saved pointer
892 * to the last used address: daddr_cache.
893 * We do not want to save whole address now,
894 * (because main consumer of this service
895 * is tcp, which has not this problem),
896 * so that the last trick works only on connected
897 * sockets.
898 * 2. oif also should be the same.
899 */
cf6b1982 900 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
901#ifdef CONFIG_IPV6_SUBTREES
902 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
903#endif
cf6b1982 904 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
905 dst_release(dst);
906 dst = NULL;
1da177e4
LT
907 }
908
497c615a
HX
909out:
910 return dst;
911}
912
913static int ip6_dst_lookup_tail(struct sock *sk,
914 struct dst_entry **dst, struct flowi *fl)
915{
916 int err;
3b1e0a65 917 struct net *net = sock_net(sk);
497c615a 918
1da177e4 919 if (*dst == NULL)
8a3edd80 920 *dst = ip6_route_output(net, sk, fl);
1da177e4
LT
921
922 if ((err = (*dst)->error))
923 goto out_err_release;
924
925 if (ipv6_addr_any(&fl->fl6_src)) {
5e5f3f0f 926 err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
7cbca67c
YH
927 &fl->fl6_dst,
928 sk ? inet6_sk(sk)->srcprefs : 0,
929 &fl->fl6_src);
44456d37 930 if (err)
1da177e4 931 goto out_err_release;
1da177e4
LT
932 }
933
95c385b4
NH
934#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
935 /*
936 * Here if the dst entry we've looked up
937 * has a neighbour entry that is in the INCOMPLETE
938 * state and the src address from the flow is
939 * marked as OPTIMISTIC, we release the found
940 * dst entry and replace it instead with the
941 * dst entry of the nexthop router
942 */
943 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
944 struct inet6_ifaddr *ifp;
945 struct flowi fl_gw;
946 int redirect;
947
8a3edd80 948 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
1cab3da6 949 (*dst)->dev, 1);
95c385b4
NH
950
951 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
952 if (ifp)
953 in6_ifa_put(ifp);
954
955 if (redirect) {
956 /*
957 * We need to get the dst entry for the
958 * default router instead
959 */
960 dst_release(*dst);
961 memcpy(&fl_gw, fl, sizeof(struct flowi));
962 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
8a3edd80 963 *dst = ip6_route_output(net, sk, &fl_gw);
95c385b4
NH
964 if ((err = (*dst)->error))
965 goto out_err_release;
966 }
967 }
968#endif
969
1da177e4
LT
970 return 0;
971
972out_err_release:
ca46f9c8
MC
973 if (err == -ENETUNREACH)
974 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
975 dst_release(*dst);
976 *dst = NULL;
977 return err;
978}
34a0b3cd 979
497c615a
HX
980/**
981 * ip6_dst_lookup - perform route lookup on flow
982 * @sk: socket which provides route info
983 * @dst: pointer to dst_entry * for result
984 * @fl: flow to lookup
985 *
986 * This function performs a route lookup on the given flow.
987 *
988 * It returns zero on success, or a standard errno code on error.
989 */
990int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
991{
992 *dst = NULL;
993 return ip6_dst_lookup_tail(sk, dst, fl);
994}
3cf3dc6c
ACM
995EXPORT_SYMBOL_GPL(ip6_dst_lookup);
996
497c615a
HX
997/**
998 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
999 * @sk: socket which provides the dst cache and route info
1000 * @dst: pointer to dst_entry * for result
1001 * @fl: flow to lookup
1002 *
1003 * This function performs a route lookup on the given flow with the
1004 * possibility of using the cached route in the socket if it is valid.
1005 * It will take the socket dst lock when operating on the dst cache.
1006 * As a result, this function can only be used in process context.
1007 *
1008 * It returns zero on success, or a standard errno code on error.
1009 */
1010int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1011{
1012 *dst = NULL;
1013 if (sk) {
1014 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1015 *dst = ip6_sk_dst_check(sk, *dst, fl);
1016 }
1017
1018 return ip6_dst_lookup_tail(sk, dst, fl);
1019}
1020EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1021
34a0b3cd 1022static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1023 int getfrag(void *from, char *to, int offset, int len,
1024 int odd, struct sk_buff *skb),
1025 void *from, int length, int hh_len, int fragheaderlen,
1026 int transhdrlen, int mtu,unsigned int flags)
1027
1028{
1029 struct sk_buff *skb;
1030 int err;
1031
1032 /* There is support for UDP large send offload by network
1033 * device, so create one single skb packet containing complete
1034 * udp datagram
1035 */
1036 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1037 skb = sock_alloc_send_skb(sk,
1038 hh_len + fragheaderlen + transhdrlen + 20,
1039 (flags & MSG_DONTWAIT), &err);
1040 if (skb == NULL)
1041 return -ENOMEM;
1042
1043 /* reserve space for Hardware header */
1044 skb_reserve(skb, hh_len);
1045
1046 /* create space for UDP/IP header */
1047 skb_put(skb,fragheaderlen + transhdrlen);
1048
1049 /* initialize network header pointer */
c1d2bbe1 1050 skb_reset_network_header(skb);
e89e9cf5
AR
1051
1052 /* initialize protocol header pointer */
b0e380b1 1053 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1054
84fa7933 1055 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1056 skb->csum = 0;
1057 sk->sk_sndmsg_off = 0;
1058 }
1059
1060 err = skb_append_datato_frags(sk,skb, getfrag, from,
1061 (length - transhdrlen));
1062 if (!err) {
1063 struct frag_hdr fhdr;
1064
1065 /* specify the length of each IP datagram fragment*/
1ab1457c 1066 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
7967168c 1067 sizeof(struct frag_hdr);
f83ef8c0 1068 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
e89e9cf5
AR
1069 ipv6_select_ident(skb, &fhdr);
1070 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1071 __skb_queue_tail(&sk->sk_write_queue, skb);
1072
1073 return 0;
1074 }
1075 /* There is not enough support do UPD LSO,
1076 * so follow normal path
1077 */
1078 kfree_skb(skb);
1079
1080 return err;
1081}
1da177e4 1082
41a1f8ea
YH
1083int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1084 int offset, int len, int odd, struct sk_buff *skb),
1085 void *from, int length, int transhdrlen,
1086 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1087 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
1088{
1089 struct inet_sock *inet = inet_sk(sk);
1090 struct ipv6_pinfo *np = inet6_sk(sk);
1091 struct sk_buff *skb;
1092 unsigned int maxfraglen, fragheaderlen;
1093 int exthdrlen;
1094 int hh_len;
1095 int mtu;
1096 int copy;
1097 int err;
1098 int offset = 0;
1099 int csummode = CHECKSUM_NONE;
1100
1101 if (flags&MSG_PROBE)
1102 return 0;
1103 if (skb_queue_empty(&sk->sk_write_queue)) {
1104 /*
1105 * setup for corking
1106 */
1107 if (opt) {
1108 if (np->cork.opt == NULL) {
1109 np->cork.opt = kmalloc(opt->tot_len,
1110 sk->sk_allocation);
1111 if (unlikely(np->cork.opt == NULL))
1112 return -ENOBUFS;
1113 } else if (np->cork.opt->tot_len < opt->tot_len) {
1114 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1115 return -EINVAL;
1116 }
1117 memcpy(np->cork.opt, opt, opt->tot_len);
1118 inet->cork.flags |= IPCORK_OPT;
1119 /* need source address above miyazawa*/
1120 }
1121 dst_hold(&rt->u.dst);
c8cdaf99 1122 inet->cork.dst = &rt->u.dst;
1da177e4
LT
1123 inet->cork.fl = *fl;
1124 np->cork.hop_limit = hlimit;
41a1f8ea 1125 np->cork.tclass = tclass;
628a5c56
JH
1126 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1127 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
c7503609 1128 if (np->frag_size < mtu) {
d91675f9
YH
1129 if (np->frag_size)
1130 mtu = np->frag_size;
1131 }
1132 inet->cork.fragsize = mtu;
1da177e4
LT
1133 if (dst_allfrag(rt->u.dst.path))
1134 inet->cork.flags |= IPCORK_ALLFRAG;
1135 inet->cork.length = 0;
1136 sk->sk_sndmsg_page = NULL;
1137 sk->sk_sndmsg_off = 0;
01488942 1138 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1139 rt->rt6i_nfheader_len;
1da177e4
LT
1140 length += exthdrlen;
1141 transhdrlen += exthdrlen;
1142 } else {
c8cdaf99 1143 rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1144 fl = &inet->cork.fl;
1145 if (inet->cork.flags & IPCORK_OPT)
1146 opt = np->cork.opt;
1147 transhdrlen = 0;
1148 exthdrlen = 0;
1149 mtu = inet->cork.fragsize;
1150 }
1151
1152 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1153
a1b05140 1154 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1155 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1156 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1157
1158 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1159 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1160 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1161 return -EMSGSIZE;
1162 }
1163 }
1164
1165 /*
1166 * Let's try using as much space as possible.
1167 * Use MTU if total length of the message fits into the MTU.
1168 * Otherwise, we need to reserve fragment header and
1169 * fragment alignment (= 8-15 octects, in total).
1170 *
1171 * Note that we may need to "move" the data from the tail of
1ab1457c 1172 * of the buffer to the new fragment when we split
1da177e4
LT
1173 * the message.
1174 *
1ab1457c 1175 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1176 * at once if non-fragmentable extension headers
1177 * are too large.
1ab1457c 1178 * --yoshfuji
1da177e4
LT
1179 */
1180
1181 inet->cork.length += length;
e89e9cf5
AR
1182 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1183 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1184
baa829d8
PM
1185 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1186 fragheaderlen, transhdrlen, mtu,
1187 flags);
1188 if (err)
e89e9cf5 1189 goto error;
e89e9cf5
AR
1190 return 0;
1191 }
1da177e4
LT
1192
1193 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1194 goto alloc_new_skb;
1195
1196 while (length > 0) {
1197 /* Check if the remaining data fits into current packet. */
1198 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1199 if (copy < length)
1200 copy = maxfraglen - skb->len;
1201
1202 if (copy <= 0) {
1203 char *data;
1204 unsigned int datalen;
1205 unsigned int fraglen;
1206 unsigned int fraggap;
1207 unsigned int alloclen;
1208 struct sk_buff *skb_prev;
1209alloc_new_skb:
1210 skb_prev = skb;
1211
1212 /* There's no room in the current skb */
1213 if (skb_prev)
1214 fraggap = skb_prev->len - maxfraglen;
1215 else
1216 fraggap = 0;
1217
1218 /*
1219 * If remaining data exceeds the mtu,
1220 * we know we need more fragment(s).
1221 */
1222 datalen = length + fraggap;
1223 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1224 datalen = maxfraglen - fragheaderlen;
1225
1226 fraglen = datalen + fragheaderlen;
1227 if ((flags & MSG_MORE) &&
1228 !(rt->u.dst.dev->features&NETIF_F_SG))
1229 alloclen = mtu;
1230 else
1231 alloclen = datalen + fragheaderlen;
1232
1233 /*
1234 * The last fragment gets additional space at tail.
1235 * Note: we overallocate on fragments with MSG_MODE
1236 * because we have no idea if we're the last one.
1237 */
1238 if (datalen == length + fraggap)
1239 alloclen += rt->u.dst.trailer_len;
1240
1241 /*
1242 * We just reserve space for fragment header.
1ab1457c 1243 * Note: this may be overallocation if the message
1da177e4
LT
1244 * (without MSG_MORE) fits into the MTU.
1245 */
1246 alloclen += sizeof(struct frag_hdr);
1247
1248 if (transhdrlen) {
1249 skb = sock_alloc_send_skb(sk,
1250 alloclen + hh_len,
1251 (flags & MSG_DONTWAIT), &err);
1252 } else {
1253 skb = NULL;
1254 if (atomic_read(&sk->sk_wmem_alloc) <=
1255 2 * sk->sk_sndbuf)
1256 skb = sock_wmalloc(sk,
1257 alloclen + hh_len, 1,
1258 sk->sk_allocation);
1259 if (unlikely(skb == NULL))
1260 err = -ENOBUFS;
1261 }
1262 if (skb == NULL)
1263 goto error;
1264 /*
1265 * Fill in the control structures
1266 */
1267 skb->ip_summed = csummode;
1268 skb->csum = 0;
1269 /* reserve for fragmentation */
1270 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1271
1272 /*
1273 * Find where to start putting bytes
1274 */
1275 data = skb_put(skb, fraglen);
c14d2450 1276 skb_set_network_header(skb, exthdrlen);
1da177e4 1277 data += fragheaderlen;
b0e380b1
ACM
1278 skb->transport_header = (skb->network_header +
1279 fragheaderlen);
1da177e4
LT
1280 if (fraggap) {
1281 skb->csum = skb_copy_and_csum_bits(
1282 skb_prev, maxfraglen,
1283 data + transhdrlen, fraggap, 0);
1284 skb_prev->csum = csum_sub(skb_prev->csum,
1285 skb->csum);
1286 data += fraggap;
e9fa4f7b 1287 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1288 }
1289 copy = datalen - transhdrlen - fraggap;
1290 if (copy < 0) {
1291 err = -EINVAL;
1292 kfree_skb(skb);
1293 goto error;
1294 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1295 err = -EFAULT;
1296 kfree_skb(skb);
1297 goto error;
1298 }
1299
1300 offset += copy;
1301 length -= datalen - fraggap;
1302 transhdrlen = 0;
1303 exthdrlen = 0;
1304 csummode = CHECKSUM_NONE;
1305
1306 /*
1307 * Put the packet on the pending queue
1308 */
1309 __skb_queue_tail(&sk->sk_write_queue, skb);
1310 continue;
1311 }
1312
1313 if (copy > length)
1314 copy = length;
1315
1316 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1317 unsigned int off;
1318
1319 off = skb->len;
1320 if (getfrag(from, skb_put(skb, copy),
1321 offset, copy, off, skb) < 0) {
1322 __skb_trim(skb, off);
1323 err = -EFAULT;
1324 goto error;
1325 }
1326 } else {
1327 int i = skb_shinfo(skb)->nr_frags;
1328 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1329 struct page *page = sk->sk_sndmsg_page;
1330 int off = sk->sk_sndmsg_off;
1331 unsigned int left;
1332
1333 if (page && (left = PAGE_SIZE - off) > 0) {
1334 if (copy >= left)
1335 copy = left;
1336 if (page != frag->page) {
1337 if (i == MAX_SKB_FRAGS) {
1338 err = -EMSGSIZE;
1339 goto error;
1340 }
1341 get_page(page);
1342 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1343 frag = &skb_shinfo(skb)->frags[i];
1344 }
1345 } else if(i < MAX_SKB_FRAGS) {
1346 if (copy > PAGE_SIZE)
1347 copy = PAGE_SIZE;
1348 page = alloc_pages(sk->sk_allocation, 0);
1349 if (page == NULL) {
1350 err = -ENOMEM;
1351 goto error;
1352 }
1353 sk->sk_sndmsg_page = page;
1354 sk->sk_sndmsg_off = 0;
1355
1356 skb_fill_page_desc(skb, i, page, 0, 0);
1357 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1358 } else {
1359 err = -EMSGSIZE;
1360 goto error;
1361 }
1362 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1363 err = -EFAULT;
1364 goto error;
1365 }
1366 sk->sk_sndmsg_off += copy;
1367 frag->size += copy;
1368 skb->len += copy;
1369 skb->data_len += copy;
f945fa7a
HX
1370 skb->truesize += copy;
1371 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1372 }
1373 offset += copy;
1374 length -= copy;
1375 }
1376 return 0;
1377error:
1378 inet->cork.length -= length;
a11d206d 1379 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1380 return err;
1381}
1382
bf138862
PE
1383static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1384{
1385 inet->cork.flags &= ~IPCORK_OPT;
1386 kfree(np->cork.opt);
1387 np->cork.opt = NULL;
c8cdaf99
YH
1388 if (inet->cork.dst) {
1389 dst_release(inet->cork.dst);
1390 inet->cork.dst = NULL;
bf138862
PE
1391 inet->cork.flags &= ~IPCORK_ALLFRAG;
1392 }
1393 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1394}
1395
1da177e4
LT
1396int ip6_push_pending_frames(struct sock *sk)
1397{
1398 struct sk_buff *skb, *tmp_skb;
1399 struct sk_buff **tail_skb;
1400 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1401 struct inet_sock *inet = inet_sk(sk);
1402 struct ipv6_pinfo *np = inet6_sk(sk);
1403 struct ipv6hdr *hdr;
1404 struct ipv6_txoptions *opt = np->cork.opt;
c8cdaf99 1405 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1406 struct flowi *fl = &inet->cork.fl;
1407 unsigned char proto = fl->proto;
1408 int err = 0;
1409
1410 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1411 goto out;
1412 tail_skb = &(skb_shinfo(skb)->frag_list);
1413
1414 /* move skb->data to ip header from ext header */
d56f90a7 1415 if (skb->data < skb_network_header(skb))
bbe735e4 1416 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1417 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1418 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1419 *tail_skb = tmp_skb;
1420 tail_skb = &(tmp_skb->next);
1421 skb->len += tmp_skb->len;
1422 skb->data_len += tmp_skb->len;
1da177e4
LT
1423 skb->truesize += tmp_skb->truesize;
1424 __sock_put(tmp_skb->sk);
1425 tmp_skb->destructor = NULL;
1426 tmp_skb->sk = NULL;
1da177e4
LT
1427 }
1428
28a89453 1429 /* Allow local fragmentation. */
b5c15fc0 1430 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1431 skb->local_df = 1;
1432
1da177e4 1433 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1434 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1435 if (opt && opt->opt_flen)
1436 ipv6_push_frag_opts(skb, opt, &proto);
1437 if (opt && opt->opt_nflen)
1438 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1439
e2d1bca7
ACM
1440 skb_push(skb, sizeof(struct ipv6hdr));
1441 skb_reset_network_header(skb);
0660e03f 1442 hdr = ipv6_hdr(skb);
1ab1457c 1443
90bcaf7b 1444 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1445 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1446
1da177e4
LT
1447 hdr->hop_limit = np->cork.hop_limit;
1448 hdr->nexthdr = proto;
1449 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1450 ipv6_addr_copy(&hdr->daddr, final_dst);
1451
a2c2064f 1452 skb->priority = sk->sk_priority;
4a19ec58 1453 skb->mark = sk->sk_mark;
a2c2064f 1454
1da177e4 1455 skb->dst = dst_clone(&rt->u.dst);
a11d206d 1456 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
14878f75
DS
1457 if (proto == IPPROTO_ICMPV6) {
1458 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1459
1460 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1461 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1462 }
1463
ef76bc23 1464 err = ip6_local_out(skb);
1da177e4
LT
1465 if (err) {
1466 if (err > 0)
3320da89 1467 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1468 if (err)
1469 goto error;
1470 }
1471
1472out:
bf138862 1473 ip6_cork_release(inet, np);
1da177e4
LT
1474 return err;
1475error:
1476 goto out;
1477}
1478
1479void ip6_flush_pending_frames(struct sock *sk)
1480{
1da177e4
LT
1481 struct sk_buff *skb;
1482
1483 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
e1f52208
YH
1484 if (skb->dst)
1485 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1486 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1487 kfree_skb(skb);
1488 }
1489
bf138862 1490 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1491}