net: Set LL_MAX_HEADER properly for wireless.
[linux-2.6-block.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on linux/net/ipv4/ip_output.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
23 *
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
29 */
30
1da177e4 31#include <linux/errno.h>
ef76bc23 32#include <linux/kernel.h>
1da177e4
LT
33#include <linux/string.h>
34#include <linux/socket.h>
35#include <linux/net.h>
36#include <linux/netdevice.h>
37#include <linux/if_arp.h>
38#include <linux/in6.h>
39#include <linux/tcp.h>
40#include <linux/route.h>
b59f45d0 41#include <linux/module.h>
1da177e4
LT
42
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
7bc570c8 58#include <linux/mroute6.h>
1da177e4
LT
59
60static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61
62static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
63{
64 static u32 ipv6_fragmentation_id = 1;
65 static DEFINE_SPINLOCK(ip6_id_lock);
66
67 spin_lock_bh(&ip6_id_lock);
68 fhdr->identification = htonl(ipv6_fragmentation_id);
69 if (++ipv6_fragmentation_id == 0)
70 ipv6_fragmentation_id = 1;
71 spin_unlock_bh(&ip6_id_lock);
72}
73
ef76bc23
HX
74int __ip6_local_out(struct sk_buff *skb)
75{
76 int len;
77
78 len = skb->len - sizeof(struct ipv6hdr);
79 if (len > IPV6_MAXPLEN)
80 len = 0;
81 ipv6_hdr(skb)->payload_len = htons(len);
82
6e23ae2a 83 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
ef76bc23
HX
84 dst_output);
85}
86
87int ip6_local_out(struct sk_buff *skb)
88{
89 int err;
90
91 err = __ip6_local_out(skb);
92 if (likely(err == 1))
93 err = dst_output(skb);
94
95 return err;
96}
97EXPORT_SYMBOL_GPL(ip6_local_out);
98
ad643a79 99static int ip6_output_finish(struct sk_buff *skb)
1da177e4 100{
1da177e4 101 struct dst_entry *dst = skb->dst;
1da177e4 102
3644f0ce
SH
103 if (dst->hh)
104 return neigh_hh_output(dst->hh, skb);
105 else if (dst->neighbour)
1da177e4
LT
106 return dst->neighbour->output(skb);
107
a11d206d 108 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
109 kfree_skb(skb);
110 return -EINVAL;
111
112}
113
114/* dev_loopback_xmit for use with netfilter. */
115static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
116{
459a98ed 117 skb_reset_mac_header(newskb);
bbe735e4 118 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
119 newskb->pkt_type = PACKET_LOOPBACK;
120 newskb->ip_summed = CHECKSUM_UNNECESSARY;
121 BUG_TRAP(newskb->dst);
122
123 netif_rx(newskb);
124 return 0;
125}
126
127
128static int ip6_output2(struct sk_buff *skb)
129{
130 struct dst_entry *dst = skb->dst;
131 struct net_device *dev = dst->dev;
132
133 skb->protocol = htons(ETH_P_IPV6);
134 skb->dev = dev;
135
0660e03f 136 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1da177e4 137 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
a11d206d 138 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1da177e4
LT
139
140 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
7bc570c8
YH
141 ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
142 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
143 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
144 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
145
146 /* Do not check for IFF_ALLMULTI; multicast routing
147 is not supported in any case.
148 */
149 if (newskb)
6e23ae2a
PM
150 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
151 NULL, newskb->dev,
1da177e4
LT
152 ip6_dev_loopback_xmit);
153
0660e03f 154 if (ipv6_hdr(skb)->hop_limit == 0) {
a11d206d 155 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
156 kfree_skb(skb);
157 return 0;
158 }
159 }
160
a11d206d 161 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
1da177e4
LT
162 }
163
6e23ae2a
PM
164 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
165 ip6_output_finish);
1da177e4
LT
166}
167
628a5c56
JH
168static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
169{
170 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
171
172 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
173 skb->dst->dev->mtu : dst_mtu(skb->dst);
174}
175
1da177e4
LT
176int ip6_output(struct sk_buff *skb)
177{
628a5c56 178 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
e89e9cf5 179 dst_allfrag(skb->dst))
1da177e4
LT
180 return ip6_fragment(skb, ip6_output2);
181 else
182 return ip6_output2(skb);
183}
184
1da177e4
LT
185/*
186 * xmit an sk_buff (used by TCP)
187 */
188
189int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
190 struct ipv6_txoptions *opt, int ipfragok)
191{
b30bd282 192 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4
LT
193 struct in6_addr *first_hop = &fl->fl6_dst;
194 struct dst_entry *dst = skb->dst;
195 struct ipv6hdr *hdr;
196 u8 proto = fl->proto;
197 int seg_len = skb->len;
41a1f8ea 198 int hlimit, tclass;
1da177e4
LT
199 u32 mtu;
200
201 if (opt) {
c2636b4d 202 unsigned int head_room;
1da177e4
LT
203
204 /* First: exthdrs may take lots of space (~8K for now)
205 MAX_HEADER is not enough.
206 */
207 head_room = opt->opt_nflen + opt->opt_flen;
208 seg_len += head_room;
209 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210
211 if (skb_headroom(skb) < head_room) {
212 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d
YH
213 if (skb2 == NULL) {
214 IP6_INC_STATS(ip6_dst_idev(skb->dst),
215 IPSTATS_MIB_OUTDISCARDS);
216 kfree_skb(skb);
1da177e4
LT
217 return -ENOBUFS;
218 }
a11d206d
YH
219 kfree_skb(skb);
220 skb = skb2;
1da177e4
LT
221 if (sk)
222 skb_set_owner_w(skb, sk);
223 }
224 if (opt->opt_flen)
225 ipv6_push_frag_opts(skb, opt, &proto);
226 if (opt->opt_nflen)
227 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
228 }
229
e2d1bca7
ACM
230 skb_push(skb, sizeof(struct ipv6hdr));
231 skb_reset_network_header(skb);
0660e03f 232 hdr = ipv6_hdr(skb);
1da177e4
LT
233
234 /*
235 * Fill in the IPv6 header
236 */
237
1da177e4
LT
238 hlimit = -1;
239 if (np)
240 hlimit = np->hop_limit;
241 if (hlimit < 0)
6b75d090 242 hlimit = ip6_dst_hoplimit(dst);
1da177e4 243
41a1f8ea
YH
244 tclass = -1;
245 if (np)
246 tclass = np->tclass;
247 if (tclass < 0)
248 tclass = 0;
249
90bcaf7b 250 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 251
1da177e4
LT
252 hdr->payload_len = htons(seg_len);
253 hdr->nexthdr = proto;
254 hdr->hop_limit = hlimit;
255
256 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
257 ipv6_addr_copy(&hdr->daddr, first_hop);
258
a2c2064f 259 skb->priority = sk->sk_priority;
4a19ec58 260 skb->mark = sk->sk_mark;
a2c2064f 261
1da177e4 262 mtu = dst_mtu(dst);
89114afd 263 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
a11d206d
YH
264 IP6_INC_STATS(ip6_dst_idev(skb->dst),
265 IPSTATS_MIB_OUTREQUESTS);
6e23ae2a 266 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
6869c4d8 267 dst_output);
1da177e4
LT
268 }
269
270 if (net_ratelimit())
271 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
272 skb->dev = dst->dev;
273 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
a11d206d 274 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
275 kfree_skb(skb);
276 return -EMSGSIZE;
277}
278
7159039a
YH
279EXPORT_SYMBOL(ip6_xmit);
280
1da177e4
LT
281/*
282 * To avoid extra problems ND packets are send through this
283 * routine. It's code duplication but I really want to avoid
284 * extra checks since ipv6_build_header is used by TCP (which
285 * is for us performance critical)
286 */
287
288int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 289 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
290 int proto, int len)
291{
292 struct ipv6_pinfo *np = inet6_sk(sk);
293 struct ipv6hdr *hdr;
294 int totlen;
295
296 skb->protocol = htons(ETH_P_IPV6);
297 skb->dev = dev;
298
299 totlen = len + sizeof(struct ipv6hdr);
300
55f79cc0
ACM
301 skb_reset_network_header(skb);
302 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 303 hdr = ipv6_hdr(skb);
1da177e4 304
ae08e1f0 305 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
306
307 hdr->payload_len = htons(len);
308 hdr->nexthdr = proto;
309 hdr->hop_limit = np->hop_limit;
310
311 ipv6_addr_copy(&hdr->saddr, saddr);
312 ipv6_addr_copy(&hdr->daddr, daddr);
313
314 return 0;
315}
316
317static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
318{
319 struct ip6_ra_chain *ra;
320 struct sock *last = NULL;
321
322 read_lock(&ip6_ra_lock);
323 for (ra = ip6_ra_chain; ra; ra = ra->next) {
324 struct sock *sk = ra->sk;
0bd1b59b
AM
325 if (sk && ra->sel == sel &&
326 (!sk->sk_bound_dev_if ||
327 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
328 if (last) {
329 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
330 if (skb2)
331 rawv6_rcv(last, skb2);
332 }
333 last = sk;
334 }
335 }
336
337 if (last) {
338 rawv6_rcv(last, skb);
339 read_unlock(&ip6_ra_lock);
340 return 1;
341 }
342 read_unlock(&ip6_ra_lock);
343 return 0;
344}
345
e21e0b5f
VN
346static int ip6_forward_proxy_check(struct sk_buff *skb)
347{
0660e03f 348 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
349 u8 nexthdr = hdr->nexthdr;
350 int offset;
351
352 if (ipv6_ext_hdr(nexthdr)) {
353 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
354 if (offset < 0)
355 return 0;
356 } else
357 offset = sizeof(struct ipv6hdr);
358
359 if (nexthdr == IPPROTO_ICMPV6) {
360 struct icmp6hdr *icmp6;
361
d56f90a7
ACM
362 if (!pskb_may_pull(skb, (skb_network_header(skb) +
363 offset + 1 - skb->data)))
e21e0b5f
VN
364 return 0;
365
d56f90a7 366 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
367
368 switch (icmp6->icmp6_type) {
369 case NDISC_ROUTER_SOLICITATION:
370 case NDISC_ROUTER_ADVERTISEMENT:
371 case NDISC_NEIGHBOUR_SOLICITATION:
372 case NDISC_NEIGHBOUR_ADVERTISEMENT:
373 case NDISC_REDIRECT:
374 /* For reaction involving unicast neighbor discovery
375 * message destined to the proxied address, pass it to
376 * input function.
377 */
378 return 1;
379 default:
380 break;
381 }
382 }
383
74553b09
VN
384 /*
385 * The proxying router can't forward traffic sent to a link-local
386 * address, so signal the sender and discard the packet. This
387 * behavior is clarified by the MIPv6 specification.
388 */
389 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
390 dst_link_failure(skb);
391 return -1;
392 }
393
e21e0b5f
VN
394 return 0;
395}
396
1da177e4
LT
397static inline int ip6_forward_finish(struct sk_buff *skb)
398{
399 return dst_output(skb);
400}
401
402int ip6_forward(struct sk_buff *skb)
403{
404 struct dst_entry *dst = skb->dst;
0660e03f 405 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 406 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 407 struct net *net = dev_net(dst->dev);
1ab1457c 408
1da177e4
LT
409 if (ipv6_devconf.forwarding == 0)
410 goto error;
411
412 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
a11d206d 413 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
414 goto drop;
415 }
416
35fc92a9 417 skb_forward_csum(skb);
1da177e4
LT
418
419 /*
420 * We DO NOT make any processing on
421 * RA packets, pushing them to user level AS IS
422 * without ane WARRANTY that application will be able
423 * to interpret them. The reason is that we
424 * cannot make anything clever here.
425 *
426 * We are not end-node, so that if packet contains
427 * AH/ESP, we cannot make anything.
428 * Defragmentation also would be mistake, RA packets
429 * cannot be fragmented, because there is no warranty
430 * that different fragments will go along one path. --ANK
431 */
432 if (opt->ra) {
d56f90a7 433 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
434 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
435 return 0;
436 }
437
438 /*
439 * check and decrement ttl
440 */
441 if (hdr->hop_limit <= 1) {
442 /* Force OUTPUT device used as source address */
443 skb->dev = dst->dev;
444 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
445 0, skb->dev);
a11d206d 446 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
447
448 kfree_skb(skb);
449 return -ETIMEDOUT;
450 }
451
fbea49e1
YH
452 /* XXX: idev->cnf.proxy_ndp? */
453 if (ipv6_devconf.proxy_ndp &&
8a3edd80 454 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
455 int proxied = ip6_forward_proxy_check(skb);
456 if (proxied > 0)
e21e0b5f 457 return ip6_input(skb);
74553b09 458 else if (proxied < 0) {
a11d206d 459 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
74553b09
VN
460 goto drop;
461 }
e21e0b5f
VN
462 }
463
1da177e4 464 if (!xfrm6_route_forward(skb)) {
a11d206d 465 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
466 goto drop;
467 }
468 dst = skb->dst;
469
470 /* IPv6 specs say nothing about it, but it is clear that we cannot
471 send redirects to source routed frames.
1e5dc146 472 We don't send redirects to frames decapsulated from IPsec.
1da177e4 473 */
1e5dc146
MN
474 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
475 !skb->sp) {
1da177e4
LT
476 struct in6_addr *target = NULL;
477 struct rt6_info *rt;
478 struct neighbour *n = dst->neighbour;
479
480 /*
481 * incoming and outgoing devices are the same
482 * send a redirect.
483 */
484
485 rt = (struct rt6_info *) dst;
486 if ((rt->rt6i_flags & RTF_GATEWAY))
487 target = (struct in6_addr*)&n->primary_key;
488 else
489 target = &hdr->daddr;
490
491 /* Limit redirects both by destination (here)
492 and by source (inside ndisc_send_redirect)
493 */
494 if (xrlim_allow(dst, 1*HZ))
495 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
496 } else {
497 int addrtype = ipv6_addr_type(&hdr->saddr);
498
1da177e4 499 /* This check is security critical. */
5bb1ab09
DS
500 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
501 goto error;
502 if (addrtype & IPV6_ADDR_LINKLOCAL) {
503 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
504 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
505 goto error;
506 }
1da177e4
LT
507 }
508
509 if (skb->len > dst_mtu(dst)) {
510 /* Again, force OUTPUT device used as source address */
511 skb->dev = dst->dev;
512 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
a11d206d
YH
513 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
514 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
515 kfree_skb(skb);
516 return -EMSGSIZE;
517 }
518
519 if (skb_cow(skb, dst->dev->hard_header_len)) {
a11d206d 520 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
521 goto drop;
522 }
523
0660e03f 524 hdr = ipv6_hdr(skb);
1da177e4
LT
525
526 /* Mangling hops number delayed to point after skb COW */
1ab1457c 527
1da177e4
LT
528 hdr->hop_limit--;
529
a11d206d 530 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
6e23ae2a
PM
531 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
532 ip6_forward_finish);
1da177e4
LT
533
534error:
a11d206d 535 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
536drop:
537 kfree_skb(skb);
538 return -EINVAL;
539}
540
541static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
542{
543 to->pkt_type = from->pkt_type;
544 to->priority = from->priority;
545 to->protocol = from->protocol;
1da177e4
LT
546 dst_release(to->dst);
547 to->dst = dst_clone(from->dst);
548 to->dev = from->dev;
82e91ffe 549 to->mark = from->mark;
1da177e4
LT
550
551#ifdef CONFIG_NET_SCHED
552 to->tc_index = from->tc_index;
553#endif
e7ac05f3 554 nf_copy(to, from);
ba9dda3a
JK
555#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
556 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
557 to->nf_trace = from->nf_trace;
558#endif
984bc16c 559 skb_copy_secmark(to, from);
1da177e4
LT
560}
561
562int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
563{
564 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
565 struct ipv6_opt_hdr *exthdr =
566 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 567 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 568 int found_rhdr = 0;
0660e03f 569 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
570
571 while (offset + 1 <= packet_len) {
572
573 switch (**nexthdr) {
574
575 case NEXTHDR_HOP:
27637df9 576 break;
1da177e4 577 case NEXTHDR_ROUTING:
27637df9
MN
578 found_rhdr = 1;
579 break;
1da177e4 580 case NEXTHDR_DEST:
59fbb3a6 581#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
582 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
583 break;
584#endif
585 if (found_rhdr)
586 return offset;
1da177e4
LT
587 break;
588 default :
589 return offset;
590 }
27637df9
MN
591
592 offset += ipv6_optlen(exthdr);
593 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
594 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
595 offset);
1da177e4
LT
596 }
597
598 return offset;
599}
600
601static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
602{
603 struct net_device *dev;
604 struct sk_buff *frag;
605 struct rt6_info *rt = (struct rt6_info*)skb->dst;
d91675f9 606 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
607 struct ipv6hdr *tmp_hdr;
608 struct frag_hdr *fh;
609 unsigned int mtu, hlen, left, len;
ae08e1f0 610 __be32 frag_id = 0;
1da177e4
LT
611 int ptr, offset = 0, err=0;
612 u8 *prevhdr, nexthdr = 0;
613
614 dev = rt->u.dst.dev;
615 hlen = ip6_find_1stfragopt(skb, &prevhdr);
616 nexthdr = *prevhdr;
617
628a5c56 618 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
619
620 /* We must not fragment if the socket is set to force MTU discovery
621 * or if the skb it not generated by a local socket. (This last
622 * check should be redundant, but it's free.)
623 */
b5c15fc0 624 if (!skb->local_df) {
b881ef76
JH
625 skb->dev = skb->dst->dev;
626 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
627 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
628 kfree_skb(skb);
629 return -EMSGSIZE;
630 }
631
d91675f9
YH
632 if (np && np->frag_size < mtu) {
633 if (np->frag_size)
634 mtu = np->frag_size;
635 }
636 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4
LT
637
638 if (skb_shinfo(skb)->frag_list) {
639 int first_len = skb_pagelen(skb);
29ffe1a5 640 int truesizes = 0;
1da177e4
LT
641
642 if (first_len - hlen > mtu ||
643 ((first_len - hlen) & 7) ||
644 skb_cloned(skb))
645 goto slow_path;
646
647 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
648 /* Correct geometry. */
649 if (frag->len > mtu ||
650 ((frag->len & 7) && frag->next) ||
651 skb_headroom(frag) < hlen)
652 goto slow_path;
653
1da177e4
LT
654 /* Partially cloned skb? */
655 if (skb_shared(frag))
656 goto slow_path;
2fdba6b0
HX
657
658 BUG_ON(frag->sk);
659 if (skb->sk) {
660 sock_hold(skb->sk);
661 frag->sk = skb->sk;
662 frag->destructor = sock_wfree;
29ffe1a5 663 truesizes += frag->truesize;
2fdba6b0 664 }
1da177e4
LT
665 }
666
667 err = 0;
668 offset = 0;
669 frag = skb_shinfo(skb)->frag_list;
670 skb_shinfo(skb)->frag_list = NULL;
671 /* BUILD HEADER */
672
9a217a1c 673 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 674 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 675 if (!tmp_hdr) {
a11d206d 676 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
677 return -ENOMEM;
678 }
679
1da177e4
LT
680 __skb_pull(skb, hlen);
681 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
682 __skb_push(skb, hlen);
683 skb_reset_network_header(skb);
d56f90a7 684 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4
LT
685
686 ipv6_select_ident(skb, fh);
687 fh->nexthdr = nexthdr;
688 fh->reserved = 0;
689 fh->frag_off = htons(IP6_MF);
690 frag_id = fh->identification;
691
692 first_len = skb_pagelen(skb);
693 skb->data_len = first_len - skb_headlen(skb);
29ffe1a5 694 skb->truesize -= truesizes;
1da177e4 695 skb->len = first_len;
0660e03f
ACM
696 ipv6_hdr(skb)->payload_len = htons(first_len -
697 sizeof(struct ipv6hdr));
a11d206d
YH
698
699 dst_hold(&rt->u.dst);
1da177e4
LT
700
701 for (;;) {
702 /* Prepare header of the next frame,
703 * before previous one went down. */
704 if (frag) {
705 frag->ip_summed = CHECKSUM_NONE;
badff6d0 706 skb_reset_transport_header(frag);
1da177e4 707 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
708 __skb_push(frag, hlen);
709 skb_reset_network_header(frag);
d56f90a7
ACM
710 memcpy(skb_network_header(frag), tmp_hdr,
711 hlen);
1da177e4
LT
712 offset += skb->len - hlen - sizeof(struct frag_hdr);
713 fh->nexthdr = nexthdr;
714 fh->reserved = 0;
715 fh->frag_off = htons(offset);
716 if (frag->next != NULL)
717 fh->frag_off |= htons(IP6_MF);
718 fh->identification = frag_id;
0660e03f
ACM
719 ipv6_hdr(frag)->payload_len =
720 htons(frag->len -
721 sizeof(struct ipv6hdr));
1da177e4
LT
722 ip6_copy_metadata(frag, skb);
723 }
1ab1457c 724
1da177e4 725 err = output(skb);
dafee490 726 if(!err)
a11d206d 727 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
dafee490 728
1da177e4
LT
729 if (err || !frag)
730 break;
731
732 skb = frag;
733 frag = skb->next;
734 skb->next = NULL;
735 }
736
a51482bd 737 kfree(tmp_hdr);
1da177e4
LT
738
739 if (err == 0) {
a11d206d
YH
740 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
741 dst_release(&rt->u.dst);
1da177e4
LT
742 return 0;
743 }
744
745 while (frag) {
746 skb = frag->next;
747 kfree_skb(frag);
748 frag = skb;
749 }
750
a11d206d
YH
751 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
752 dst_release(&rt->u.dst);
1da177e4
LT
753 return err;
754 }
755
756slow_path:
757 left = skb->len - hlen; /* Space per frame */
758 ptr = hlen; /* Where to start from */
759
760 /*
761 * Fragment the datagram.
762 */
763
764 *prevhdr = NEXTHDR_FRAGMENT;
765
766 /*
767 * Keep copying data until we run out.
768 */
769 while(left > 0) {
770 len = left;
771 /* IF: it doesn't fit, use 'mtu' - the data space left */
772 if (len > mtu)
773 len = mtu;
774 /* IF: we are not sending upto and including the packet end
775 then align the next start on an eight byte boundary */
776 if (len < left) {
777 len &= ~7;
778 }
779 /*
780 * Allocate buffer.
781 */
782
783 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 784 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
a11d206d
YH
785 IP6_INC_STATS(ip6_dst_idev(skb->dst),
786 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
787 err = -ENOMEM;
788 goto fail;
789 }
790
791 /*
792 * Set up data on packet
793 */
794
795 ip6_copy_metadata(frag, skb);
796 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
797 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 798 skb_reset_network_header(frag);
badff6d0 799 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
800 frag->transport_header = (frag->network_header + hlen +
801 sizeof(struct frag_hdr));
1da177e4
LT
802
803 /*
804 * Charge the memory for the fragment to any owner
805 * it might possess
806 */
807 if (skb->sk)
808 skb_set_owner_w(frag, skb->sk);
809
810 /*
811 * Copy the packet header into the new buffer.
812 */
d626f62b 813 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
814
815 /*
816 * Build fragment header.
817 */
818 fh->nexthdr = nexthdr;
819 fh->reserved = 0;
f36d6ab1 820 if (!frag_id) {
1da177e4
LT
821 ipv6_select_ident(skb, fh);
822 frag_id = fh->identification;
823 } else
824 fh->identification = frag_id;
825
826 /*
827 * Copy a block of the IP datagram.
828 */
8984e41d 829 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
830 BUG();
831 left -= len;
832
833 fh->frag_off = htons(offset);
834 if (left > 0)
835 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
836 ipv6_hdr(frag)->payload_len = htons(frag->len -
837 sizeof(struct ipv6hdr));
1da177e4
LT
838
839 ptr += len;
840 offset += len;
841
842 /*
843 * Put this fragment into the sending queue.
844 */
1da177e4
LT
845 err = output(frag);
846 if (err)
847 goto fail;
dafee490 848
a11d206d 849 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
1da177e4 850 }
a11d206d
YH
851 IP6_INC_STATS(ip6_dst_idev(skb->dst),
852 IPSTATS_MIB_FRAGOKS);
1da177e4 853 kfree_skb(skb);
1da177e4
LT
854 return err;
855
856fail:
a11d206d
YH
857 IP6_INC_STATS(ip6_dst_idev(skb->dst),
858 IPSTATS_MIB_FRAGFAILS);
1ab1457c 859 kfree_skb(skb);
1da177e4
LT
860 return err;
861}
862
cf6b1982
YH
863static inline int ip6_rt_check(struct rt6key *rt_key,
864 struct in6_addr *fl_addr,
865 struct in6_addr *addr_cache)
866{
867 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
868 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
869}
870
497c615a
HX
871static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
872 struct dst_entry *dst,
873 struct flowi *fl)
1da177e4 874{
497c615a
HX
875 struct ipv6_pinfo *np = inet6_sk(sk);
876 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 877
497c615a
HX
878 if (!dst)
879 goto out;
880
881 /* Yes, checking route validity in not connected
882 * case is not very simple. Take into account,
883 * that we do not support routing by source, TOS,
884 * and MSG_DONTROUTE --ANK (980726)
885 *
cf6b1982
YH
886 * 1. ip6_rt_check(): If route was host route,
887 * check that cached destination is current.
497c615a
HX
888 * If it is network route, we still may
889 * check its validity using saved pointer
890 * to the last used address: daddr_cache.
891 * We do not want to save whole address now,
892 * (because main consumer of this service
893 * is tcp, which has not this problem),
894 * so that the last trick works only on connected
895 * sockets.
896 * 2. oif also should be the same.
897 */
cf6b1982 898 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
899#ifdef CONFIG_IPV6_SUBTREES
900 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
901#endif
cf6b1982 902 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
903 dst_release(dst);
904 dst = NULL;
1da177e4
LT
905 }
906
497c615a
HX
907out:
908 return dst;
909}
910
911static int ip6_dst_lookup_tail(struct sock *sk,
912 struct dst_entry **dst, struct flowi *fl)
913{
914 int err;
3b1e0a65 915 struct net *net = sock_net(sk);
497c615a 916
1da177e4 917 if (*dst == NULL)
8a3edd80 918 *dst = ip6_route_output(net, sk, fl);
1da177e4
LT
919
920 if ((err = (*dst)->error))
921 goto out_err_release;
922
923 if (ipv6_addr_any(&fl->fl6_src)) {
5e5f3f0f 924 err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
7cbca67c
YH
925 &fl->fl6_dst,
926 sk ? inet6_sk(sk)->srcprefs : 0,
927 &fl->fl6_src);
44456d37 928 if (err)
1da177e4 929 goto out_err_release;
1da177e4
LT
930 }
931
95c385b4
NH
932#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
933 /*
934 * Here if the dst entry we've looked up
935 * has a neighbour entry that is in the INCOMPLETE
936 * state and the src address from the flow is
937 * marked as OPTIMISTIC, we release the found
938 * dst entry and replace it instead with the
939 * dst entry of the nexthop router
940 */
941 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
942 struct inet6_ifaddr *ifp;
943 struct flowi fl_gw;
944 int redirect;
945
8a3edd80 946 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
1cab3da6 947 (*dst)->dev, 1);
95c385b4
NH
948
949 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
950 if (ifp)
951 in6_ifa_put(ifp);
952
953 if (redirect) {
954 /*
955 * We need to get the dst entry for the
956 * default router instead
957 */
958 dst_release(*dst);
959 memcpy(&fl_gw, fl, sizeof(struct flowi));
960 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
8a3edd80 961 *dst = ip6_route_output(net, sk, &fl_gw);
95c385b4
NH
962 if ((err = (*dst)->error))
963 goto out_err_release;
964 }
965 }
966#endif
967
1da177e4
LT
968 return 0;
969
970out_err_release:
ca46f9c8
MC
971 if (err == -ENETUNREACH)
972 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
973 dst_release(*dst);
974 *dst = NULL;
975 return err;
976}
34a0b3cd 977
497c615a
HX
978/**
979 * ip6_dst_lookup - perform route lookup on flow
980 * @sk: socket which provides route info
981 * @dst: pointer to dst_entry * for result
982 * @fl: flow to lookup
983 *
984 * This function performs a route lookup on the given flow.
985 *
986 * It returns zero on success, or a standard errno code on error.
987 */
988int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
989{
990 *dst = NULL;
991 return ip6_dst_lookup_tail(sk, dst, fl);
992}
3cf3dc6c
ACM
993EXPORT_SYMBOL_GPL(ip6_dst_lookup);
994
497c615a
HX
995/**
996 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
997 * @sk: socket which provides the dst cache and route info
998 * @dst: pointer to dst_entry * for result
999 * @fl: flow to lookup
1000 *
1001 * This function performs a route lookup on the given flow with the
1002 * possibility of using the cached route in the socket if it is valid.
1003 * It will take the socket dst lock when operating on the dst cache.
1004 * As a result, this function can only be used in process context.
1005 *
1006 * It returns zero on success, or a standard errno code on error.
1007 */
1008int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1009{
1010 *dst = NULL;
1011 if (sk) {
1012 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1013 *dst = ip6_sk_dst_check(sk, *dst, fl);
1014 }
1015
1016 return ip6_dst_lookup_tail(sk, dst, fl);
1017}
1018EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1019
34a0b3cd 1020static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1021 int getfrag(void *from, char *to, int offset, int len,
1022 int odd, struct sk_buff *skb),
1023 void *from, int length, int hh_len, int fragheaderlen,
1024 int transhdrlen, int mtu,unsigned int flags)
1025
1026{
1027 struct sk_buff *skb;
1028 int err;
1029
1030 /* There is support for UDP large send offload by network
1031 * device, so create one single skb packet containing complete
1032 * udp datagram
1033 */
1034 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1035 skb = sock_alloc_send_skb(sk,
1036 hh_len + fragheaderlen + transhdrlen + 20,
1037 (flags & MSG_DONTWAIT), &err);
1038 if (skb == NULL)
1039 return -ENOMEM;
1040
1041 /* reserve space for Hardware header */
1042 skb_reserve(skb, hh_len);
1043
1044 /* create space for UDP/IP header */
1045 skb_put(skb,fragheaderlen + transhdrlen);
1046
1047 /* initialize network header pointer */
c1d2bbe1 1048 skb_reset_network_header(skb);
e89e9cf5
AR
1049
1050 /* initialize protocol header pointer */
b0e380b1 1051 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1052
84fa7933 1053 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1054 skb->csum = 0;
1055 sk->sk_sndmsg_off = 0;
1056 }
1057
1058 err = skb_append_datato_frags(sk,skb, getfrag, from,
1059 (length - transhdrlen));
1060 if (!err) {
1061 struct frag_hdr fhdr;
1062
1063 /* specify the length of each IP datagram fragment*/
1ab1457c 1064 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
7967168c 1065 sizeof(struct frag_hdr);
f83ef8c0 1066 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
e89e9cf5
AR
1067 ipv6_select_ident(skb, &fhdr);
1068 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1069 __skb_queue_tail(&sk->sk_write_queue, skb);
1070
1071 return 0;
1072 }
1073 /* There is not enough support do UPD LSO,
1074 * so follow normal path
1075 */
1076 kfree_skb(skb);
1077
1078 return err;
1079}
1da177e4 1080
41a1f8ea
YH
1081int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1082 int offset, int len, int odd, struct sk_buff *skb),
1083 void *from, int length, int transhdrlen,
1084 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1085 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
1086{
1087 struct inet_sock *inet = inet_sk(sk);
1088 struct ipv6_pinfo *np = inet6_sk(sk);
1089 struct sk_buff *skb;
1090 unsigned int maxfraglen, fragheaderlen;
1091 int exthdrlen;
1092 int hh_len;
1093 int mtu;
1094 int copy;
1095 int err;
1096 int offset = 0;
1097 int csummode = CHECKSUM_NONE;
1098
1099 if (flags&MSG_PROBE)
1100 return 0;
1101 if (skb_queue_empty(&sk->sk_write_queue)) {
1102 /*
1103 * setup for corking
1104 */
1105 if (opt) {
1106 if (np->cork.opt == NULL) {
1107 np->cork.opt = kmalloc(opt->tot_len,
1108 sk->sk_allocation);
1109 if (unlikely(np->cork.opt == NULL))
1110 return -ENOBUFS;
1111 } else if (np->cork.opt->tot_len < opt->tot_len) {
1112 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1113 return -EINVAL;
1114 }
1115 memcpy(np->cork.opt, opt, opt->tot_len);
1116 inet->cork.flags |= IPCORK_OPT;
1117 /* need source address above miyazawa*/
1118 }
1119 dst_hold(&rt->u.dst);
c8cdaf99 1120 inet->cork.dst = &rt->u.dst;
1da177e4
LT
1121 inet->cork.fl = *fl;
1122 np->cork.hop_limit = hlimit;
41a1f8ea 1123 np->cork.tclass = tclass;
628a5c56
JH
1124 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1125 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
c7503609 1126 if (np->frag_size < mtu) {
d91675f9
YH
1127 if (np->frag_size)
1128 mtu = np->frag_size;
1129 }
1130 inet->cork.fragsize = mtu;
1da177e4
LT
1131 if (dst_allfrag(rt->u.dst.path))
1132 inet->cork.flags |= IPCORK_ALLFRAG;
1133 inet->cork.length = 0;
1134 sk->sk_sndmsg_page = NULL;
1135 sk->sk_sndmsg_off = 0;
01488942 1136 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1137 rt->rt6i_nfheader_len;
1da177e4
LT
1138 length += exthdrlen;
1139 transhdrlen += exthdrlen;
1140 } else {
c8cdaf99 1141 rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1142 fl = &inet->cork.fl;
1143 if (inet->cork.flags & IPCORK_OPT)
1144 opt = np->cork.opt;
1145 transhdrlen = 0;
1146 exthdrlen = 0;
1147 mtu = inet->cork.fragsize;
1148 }
1149
1150 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1151
a1b05140 1152 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1153 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1154 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1155
1156 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1157 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1158 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1159 return -EMSGSIZE;
1160 }
1161 }
1162
1163 /*
1164 * Let's try using as much space as possible.
1165 * Use MTU if total length of the message fits into the MTU.
1166 * Otherwise, we need to reserve fragment header and
1167 * fragment alignment (= 8-15 octects, in total).
1168 *
1169 * Note that we may need to "move" the data from the tail of
1ab1457c 1170 * of the buffer to the new fragment when we split
1da177e4
LT
1171 * the message.
1172 *
1ab1457c 1173 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1174 * at once if non-fragmentable extension headers
1175 * are too large.
1ab1457c 1176 * --yoshfuji
1da177e4
LT
1177 */
1178
1179 inet->cork.length += length;
e89e9cf5
AR
1180 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1181 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1182
baa829d8
PM
1183 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1184 fragheaderlen, transhdrlen, mtu,
1185 flags);
1186 if (err)
e89e9cf5 1187 goto error;
e89e9cf5
AR
1188 return 0;
1189 }
1da177e4
LT
1190
1191 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1192 goto alloc_new_skb;
1193
1194 while (length > 0) {
1195 /* Check if the remaining data fits into current packet. */
1196 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1197 if (copy < length)
1198 copy = maxfraglen - skb->len;
1199
1200 if (copy <= 0) {
1201 char *data;
1202 unsigned int datalen;
1203 unsigned int fraglen;
1204 unsigned int fraggap;
1205 unsigned int alloclen;
1206 struct sk_buff *skb_prev;
1207alloc_new_skb:
1208 skb_prev = skb;
1209
1210 /* There's no room in the current skb */
1211 if (skb_prev)
1212 fraggap = skb_prev->len - maxfraglen;
1213 else
1214 fraggap = 0;
1215
1216 /*
1217 * If remaining data exceeds the mtu,
1218 * we know we need more fragment(s).
1219 */
1220 datalen = length + fraggap;
1221 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1222 datalen = maxfraglen - fragheaderlen;
1223
1224 fraglen = datalen + fragheaderlen;
1225 if ((flags & MSG_MORE) &&
1226 !(rt->u.dst.dev->features&NETIF_F_SG))
1227 alloclen = mtu;
1228 else
1229 alloclen = datalen + fragheaderlen;
1230
1231 /*
1232 * The last fragment gets additional space at tail.
1233 * Note: we overallocate on fragments with MSG_MODE
1234 * because we have no idea if we're the last one.
1235 */
1236 if (datalen == length + fraggap)
1237 alloclen += rt->u.dst.trailer_len;
1238
1239 /*
1240 * We just reserve space for fragment header.
1ab1457c 1241 * Note: this may be overallocation if the message
1da177e4
LT
1242 * (without MSG_MORE) fits into the MTU.
1243 */
1244 alloclen += sizeof(struct frag_hdr);
1245
1246 if (transhdrlen) {
1247 skb = sock_alloc_send_skb(sk,
1248 alloclen + hh_len,
1249 (flags & MSG_DONTWAIT), &err);
1250 } else {
1251 skb = NULL;
1252 if (atomic_read(&sk->sk_wmem_alloc) <=
1253 2 * sk->sk_sndbuf)
1254 skb = sock_wmalloc(sk,
1255 alloclen + hh_len, 1,
1256 sk->sk_allocation);
1257 if (unlikely(skb == NULL))
1258 err = -ENOBUFS;
1259 }
1260 if (skb == NULL)
1261 goto error;
1262 /*
1263 * Fill in the control structures
1264 */
1265 skb->ip_summed = csummode;
1266 skb->csum = 0;
1267 /* reserve for fragmentation */
1268 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1269
1270 /*
1271 * Find where to start putting bytes
1272 */
1273 data = skb_put(skb, fraglen);
c14d2450 1274 skb_set_network_header(skb, exthdrlen);
1da177e4 1275 data += fragheaderlen;
b0e380b1
ACM
1276 skb->transport_header = (skb->network_header +
1277 fragheaderlen);
1da177e4
LT
1278 if (fraggap) {
1279 skb->csum = skb_copy_and_csum_bits(
1280 skb_prev, maxfraglen,
1281 data + transhdrlen, fraggap, 0);
1282 skb_prev->csum = csum_sub(skb_prev->csum,
1283 skb->csum);
1284 data += fraggap;
e9fa4f7b 1285 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1286 }
1287 copy = datalen - transhdrlen - fraggap;
1288 if (copy < 0) {
1289 err = -EINVAL;
1290 kfree_skb(skb);
1291 goto error;
1292 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1293 err = -EFAULT;
1294 kfree_skb(skb);
1295 goto error;
1296 }
1297
1298 offset += copy;
1299 length -= datalen - fraggap;
1300 transhdrlen = 0;
1301 exthdrlen = 0;
1302 csummode = CHECKSUM_NONE;
1303
1304 /*
1305 * Put the packet on the pending queue
1306 */
1307 __skb_queue_tail(&sk->sk_write_queue, skb);
1308 continue;
1309 }
1310
1311 if (copy > length)
1312 copy = length;
1313
1314 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1315 unsigned int off;
1316
1317 off = skb->len;
1318 if (getfrag(from, skb_put(skb, copy),
1319 offset, copy, off, skb) < 0) {
1320 __skb_trim(skb, off);
1321 err = -EFAULT;
1322 goto error;
1323 }
1324 } else {
1325 int i = skb_shinfo(skb)->nr_frags;
1326 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1327 struct page *page = sk->sk_sndmsg_page;
1328 int off = sk->sk_sndmsg_off;
1329 unsigned int left;
1330
1331 if (page && (left = PAGE_SIZE - off) > 0) {
1332 if (copy >= left)
1333 copy = left;
1334 if (page != frag->page) {
1335 if (i == MAX_SKB_FRAGS) {
1336 err = -EMSGSIZE;
1337 goto error;
1338 }
1339 get_page(page);
1340 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1341 frag = &skb_shinfo(skb)->frags[i];
1342 }
1343 } else if(i < MAX_SKB_FRAGS) {
1344 if (copy > PAGE_SIZE)
1345 copy = PAGE_SIZE;
1346 page = alloc_pages(sk->sk_allocation, 0);
1347 if (page == NULL) {
1348 err = -ENOMEM;
1349 goto error;
1350 }
1351 sk->sk_sndmsg_page = page;
1352 sk->sk_sndmsg_off = 0;
1353
1354 skb_fill_page_desc(skb, i, page, 0, 0);
1355 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1356 } else {
1357 err = -EMSGSIZE;
1358 goto error;
1359 }
1360 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1361 err = -EFAULT;
1362 goto error;
1363 }
1364 sk->sk_sndmsg_off += copy;
1365 frag->size += copy;
1366 skb->len += copy;
1367 skb->data_len += copy;
f945fa7a
HX
1368 skb->truesize += copy;
1369 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1370 }
1371 offset += copy;
1372 length -= copy;
1373 }
1374 return 0;
1375error:
1376 inet->cork.length -= length;
a11d206d 1377 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1378 return err;
1379}
1380
bf138862
PE
1381static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1382{
1383 inet->cork.flags &= ~IPCORK_OPT;
1384 kfree(np->cork.opt);
1385 np->cork.opt = NULL;
c8cdaf99
YH
1386 if (inet->cork.dst) {
1387 dst_release(inet->cork.dst);
1388 inet->cork.dst = NULL;
bf138862
PE
1389 inet->cork.flags &= ~IPCORK_ALLFRAG;
1390 }
1391 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1392}
1393
1da177e4
LT
1394int ip6_push_pending_frames(struct sock *sk)
1395{
1396 struct sk_buff *skb, *tmp_skb;
1397 struct sk_buff **tail_skb;
1398 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1399 struct inet_sock *inet = inet_sk(sk);
1400 struct ipv6_pinfo *np = inet6_sk(sk);
1401 struct ipv6hdr *hdr;
1402 struct ipv6_txoptions *opt = np->cork.opt;
c8cdaf99 1403 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1404 struct flowi *fl = &inet->cork.fl;
1405 unsigned char proto = fl->proto;
1406 int err = 0;
1407
1408 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1409 goto out;
1410 tail_skb = &(skb_shinfo(skb)->frag_list);
1411
1412 /* move skb->data to ip header from ext header */
d56f90a7 1413 if (skb->data < skb_network_header(skb))
bbe735e4 1414 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1415 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1416 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1417 *tail_skb = tmp_skb;
1418 tail_skb = &(tmp_skb->next);
1419 skb->len += tmp_skb->len;
1420 skb->data_len += tmp_skb->len;
1da177e4
LT
1421 skb->truesize += tmp_skb->truesize;
1422 __sock_put(tmp_skb->sk);
1423 tmp_skb->destructor = NULL;
1424 tmp_skb->sk = NULL;
1da177e4
LT
1425 }
1426
28a89453 1427 /* Allow local fragmentation. */
b5c15fc0 1428 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1429 skb->local_df = 1;
1430
1da177e4 1431 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1432 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1433 if (opt && opt->opt_flen)
1434 ipv6_push_frag_opts(skb, opt, &proto);
1435 if (opt && opt->opt_nflen)
1436 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1437
e2d1bca7
ACM
1438 skb_push(skb, sizeof(struct ipv6hdr));
1439 skb_reset_network_header(skb);
0660e03f 1440 hdr = ipv6_hdr(skb);
1ab1457c 1441
90bcaf7b 1442 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1443 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1444
1da177e4
LT
1445 hdr->hop_limit = np->cork.hop_limit;
1446 hdr->nexthdr = proto;
1447 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1448 ipv6_addr_copy(&hdr->daddr, final_dst);
1449
a2c2064f 1450 skb->priority = sk->sk_priority;
4a19ec58 1451 skb->mark = sk->sk_mark;
a2c2064f 1452
1da177e4 1453 skb->dst = dst_clone(&rt->u.dst);
a11d206d 1454 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
14878f75
DS
1455 if (proto == IPPROTO_ICMPV6) {
1456 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1457
1458 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1459 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1460 }
1461
ef76bc23 1462 err = ip6_local_out(skb);
1da177e4
LT
1463 if (err) {
1464 if (err > 0)
3320da89 1465 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1466 if (err)
1467 goto error;
1468 }
1469
1470out:
bf138862 1471 ip6_cork_release(inet, np);
1da177e4
LT
1472 return err;
1473error:
1474 goto out;
1475}
1476
1477void ip6_flush_pending_frames(struct sock *sk)
1478{
1da177e4
LT
1479 struct sk_buff *skb;
1480
1481 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
e1f52208
YH
1482 if (skb->dst)
1483 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1484 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1485 kfree_skb(skb);
1486 }
1487
bf138862 1488 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1489}