netfilter: Pass net into okfn
[linux-2.6-block.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
7026b1dd 59static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
1da177e4 60{
adf30907 61 struct dst_entry *dst = skb_dst(skb);
1da177e4 62 struct net_device *dev = dst->dev;
78126c41 63 struct net *net = dev_net(dev);
f6b72b62 64 struct neighbour *neigh;
6fd6ce20
YH
65 struct in6_addr *nexthop;
66 int ret;
1da177e4
LT
67
68 skb->protocol = htons(ETH_P_IPV6);
69 skb->dev = dev;
70
0660e03f 71 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 72 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 73
7026b1dd 74 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 75 ((mroute6_socket(net, skb) &&
bd91b8bf 76 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
77 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
78 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
79 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80
81 /* Do not check for IFF_ALLMULTI; multicast routing
82 is not supported in any case.
83 */
84 if (newskb)
b2e0b385 85 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 86 net, sk, newskb, NULL, newskb->dev,
95603e22 87 dev_loopback_xmit);
1da177e4 88
0660e03f 89 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 90 IP6_INC_STATS(net, idev,
3bd653c8 91 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
92 kfree_skb(skb);
93 return 0;
94 }
95 }
96
78126c41 97 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
1da177e4
LT
105 }
106
6fd6ce20 107 rcu_read_lock_bh();
2647a9b0 108 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
05e3aa09 118
78126c41 119 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
120 kfree_skb(skb);
121 return -EINVAL;
1da177e4
LT
122}
123
0c4b51f0 124static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490
JE
125{
126 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
127 dst_allfrag(skb_dst(skb)) ||
128 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7026b1dd 129 return ip6_fragment(sk, skb, ip6_finish_output2);
9e508490 130 else
7026b1dd 131 return ip6_finish_output2(sk, skb);
9e508490
JE
132}
133
aad88724 134int ip6_output(struct sock *sk, struct sk_buff *skb)
1da177e4 135{
9e508490 136 struct net_device *dev = skb_dst(skb)->dev;
adf30907 137 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
19a0644c 138 struct net *net = dev_net(dev);
778d80be 139 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 140 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
141 kfree_skb(skb);
142 return 0;
143 }
144
29a26a56
EB
145 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
146 net, sk, skb, NULL, dev,
9c6eb28a
JE
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
149}
150
1da177e4 151/*
b5d43998 152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
153 */
154
4c9483b2 155int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 156 struct ipv6_txoptions *opt, int tclass)
1da177e4 157{
3bd653c8 158 struct net *net = sock_net(sk);
b30bd282 159 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 160 struct in6_addr *first_hop = &fl6->daddr;
adf30907 161 struct dst_entry *dst = skb_dst(skb);
1da177e4 162 struct ipv6hdr *hdr;
4c9483b2 163 u8 proto = fl6->flowi6_proto;
1da177e4 164 int seg_len = skb->len;
e651f03a 165 int hlimit = -1;
1da177e4
LT
166 u32 mtu;
167
168 if (opt) {
c2636b4d 169 unsigned int head_room;
1da177e4
LT
170
171 /* First: exthdrs may take lots of space (~8K for now)
172 MAX_HEADER is not enough.
173 */
174 head_room = opt->opt_nflen + opt->opt_flen;
175 seg_len += head_room;
176 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
177
178 if (skb_headroom(skb) < head_room) {
179 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 180 if (!skb2) {
adf30907 181 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
182 IPSTATS_MIB_OUTDISCARDS);
183 kfree_skb(skb);
1da177e4
LT
184 return -ENOBUFS;
185 }
808db80a 186 consume_skb(skb);
a11d206d 187 skb = skb2;
83d7eb29 188 skb_set_owner_w(skb, sk);
1da177e4
LT
189 }
190 if (opt->opt_flen)
191 ipv6_push_frag_opts(skb, opt, &proto);
192 if (opt->opt_nflen)
193 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
194 }
195
e2d1bca7
ACM
196 skb_push(skb, sizeof(struct ipv6hdr));
197 skb_reset_network_header(skb);
0660e03f 198 hdr = ipv6_hdr(skb);
1da177e4
LT
199
200 /*
201 * Fill in the IPv6 header
202 */
b903d324 203 if (np)
1da177e4
LT
204 hlimit = np->hop_limit;
205 if (hlimit < 0)
6b75d090 206 hlimit = ip6_dst_hoplimit(dst);
1da177e4 207
cb1ce2ef 208 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 209 np->autoflowlabel, fl6));
41a1f8ea 210
1da177e4
LT
211 hdr->payload_len = htons(seg_len);
212 hdr->nexthdr = proto;
213 hdr->hop_limit = hlimit;
214
4e3fd7a0
AD
215 hdr->saddr = fl6->saddr;
216 hdr->daddr = *first_hop;
1da177e4 217
9c9c9ad5 218 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 219 skb->priority = sk->sk_priority;
4a19ec58 220 skb->mark = sk->sk_mark;
a2c2064f 221
1da177e4 222 mtu = dst_mtu(dst);
60ff7467 223 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 224 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 225 IPSTATS_MIB_OUT, skb->len);
29a26a56
EB
226 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
227 net, sk, skb, NULL, dst->dev,
0c4b51f0 228 dst_output_okfn);
1da177e4
LT
229 }
230
1da177e4 231 skb->dev = dst->dev;
f4e53e29 232 ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
adf30907 233 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
234 kfree_skb(skb);
235 return -EMSGSIZE;
236}
7159039a
YH
237EXPORT_SYMBOL(ip6_xmit);
238
1da177e4
LT
239static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
240{
241 struct ip6_ra_chain *ra;
242 struct sock *last = NULL;
243
244 read_lock(&ip6_ra_lock);
245 for (ra = ip6_ra_chain; ra; ra = ra->next) {
246 struct sock *sk = ra->sk;
0bd1b59b
AM
247 if (sk && ra->sel == sel &&
248 (!sk->sk_bound_dev_if ||
249 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
250 if (last) {
251 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
252 if (skb2)
253 rawv6_rcv(last, skb2);
254 }
255 last = sk;
256 }
257 }
258
259 if (last) {
260 rawv6_rcv(last, skb);
261 read_unlock(&ip6_ra_lock);
262 return 1;
263 }
264 read_unlock(&ip6_ra_lock);
265 return 0;
266}
267
e21e0b5f
VN
268static int ip6_forward_proxy_check(struct sk_buff *skb)
269{
0660e03f 270 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 271 u8 nexthdr = hdr->nexthdr;
75f2811c 272 __be16 frag_off;
e21e0b5f
VN
273 int offset;
274
275 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 276 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
277 if (offset < 0)
278 return 0;
279 } else
280 offset = sizeof(struct ipv6hdr);
281
282 if (nexthdr == IPPROTO_ICMPV6) {
283 struct icmp6hdr *icmp6;
284
d56f90a7
ACM
285 if (!pskb_may_pull(skb, (skb_network_header(skb) +
286 offset + 1 - skb->data)))
e21e0b5f
VN
287 return 0;
288
d56f90a7 289 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
290
291 switch (icmp6->icmp6_type) {
292 case NDISC_ROUTER_SOLICITATION:
293 case NDISC_ROUTER_ADVERTISEMENT:
294 case NDISC_NEIGHBOUR_SOLICITATION:
295 case NDISC_NEIGHBOUR_ADVERTISEMENT:
296 case NDISC_REDIRECT:
297 /* For reaction involving unicast neighbor discovery
298 * message destined to the proxied address, pass it to
299 * input function.
300 */
301 return 1;
302 default:
303 break;
304 }
305 }
306
74553b09
VN
307 /*
308 * The proxying router can't forward traffic sent to a link-local
309 * address, so signal the sender and discard the packet. This
310 * behavior is clarified by the MIPv6 specification.
311 */
312 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
313 dst_link_failure(skb);
314 return -1;
315 }
316
e21e0b5f
VN
317 return 0;
318}
319
0c4b51f0
EB
320static inline int ip6_forward_finish(struct net *net, struct sock *sk,
321 struct sk_buff *skb)
1da177e4 322{
c29390c6 323 skb_sender_cpu_clear(skb);
5a70649e 324 return dst_output(sk, skb);
1da177e4
LT
325}
326
0954cf9c
HFS
327static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
328{
329 unsigned int mtu;
330 struct inet6_dev *idev;
331
332 if (dst_metric_locked(dst, RTAX_MTU)) {
333 mtu = dst_metric_raw(dst, RTAX_MTU);
334 if (mtu)
335 return mtu;
336 }
337
338 mtu = IPV6_MIN_MTU;
339 rcu_read_lock();
340 idev = __in6_dev_get(dst->dev);
341 if (idev)
342 mtu = idev->cnf.mtu6;
343 rcu_read_unlock();
344
345 return mtu;
346}
347
fe6cc55f
FW
348static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
349{
418a3156 350 if (skb->len <= mtu)
fe6cc55f
FW
351 return false;
352
60ff7467 353 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
354 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
355 return true;
356
60ff7467 357 if (skb->ignore_df)
418a3156
FW
358 return false;
359
fe6cc55f
FW
360 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
361 return false;
362
363 return true;
364}
365
1da177e4
LT
366int ip6_forward(struct sk_buff *skb)
367{
adf30907 368 struct dst_entry *dst = skb_dst(skb);
0660e03f 369 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 370 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 371 struct net *net = dev_net(dst->dev);
14f3ad6f 372 u32 mtu;
1ab1457c 373
53b7997f 374 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
375 goto error;
376
090f1166
LR
377 if (skb->pkt_type != PACKET_HOST)
378 goto drop;
379
4497b076
BH
380 if (skb_warn_if_lro(skb))
381 goto drop;
382
1da177e4 383 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
15c77d8b
ED
384 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
385 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
386 goto drop;
387 }
388
35fc92a9 389 skb_forward_csum(skb);
1da177e4
LT
390
391 /*
392 * We DO NOT make any processing on
393 * RA packets, pushing them to user level AS IS
394 * without ane WARRANTY that application will be able
395 * to interpret them. The reason is that we
396 * cannot make anything clever here.
397 *
398 * We are not end-node, so that if packet contains
399 * AH/ESP, we cannot make anything.
400 * Defragmentation also would be mistake, RA packets
401 * cannot be fragmented, because there is no warranty
402 * that different fragments will go along one path. --ANK
403 */
ab4eb353
YH
404 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
405 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
406 return 0;
407 }
408
409 /*
410 * check and decrement ttl
411 */
412 if (hdr->hop_limit <= 1) {
413 /* Force OUTPUT device used as source address */
414 skb->dev = dst->dev;
3ffe533c 415 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
15c77d8b
ED
416 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
417 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
418
419 kfree_skb(skb);
420 return -ETIMEDOUT;
421 }
422
fbea49e1 423 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 424 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 425 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
426 int proxied = ip6_forward_proxy_check(skb);
427 if (proxied > 0)
e21e0b5f 428 return ip6_input(skb);
74553b09 429 else if (proxied < 0) {
15c77d8b
ED
430 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
431 IPSTATS_MIB_INDISCARDS);
74553b09
VN
432 goto drop;
433 }
e21e0b5f
VN
434 }
435
1da177e4 436 if (!xfrm6_route_forward(skb)) {
15c77d8b
ED
437 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
438 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
439 goto drop;
440 }
adf30907 441 dst = skb_dst(skb);
1da177e4
LT
442
443 /* IPv6 specs say nothing about it, but it is clear that we cannot
444 send redirects to source routed frames.
1e5dc146 445 We don't send redirects to frames decapsulated from IPsec.
1da177e4 446 */
c45a3dfb 447 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 448 struct in6_addr *target = NULL;
fbfe95a4 449 struct inet_peer *peer;
1da177e4 450 struct rt6_info *rt;
1da177e4
LT
451
452 /*
453 * incoming and outgoing devices are the same
454 * send a redirect.
455 */
456
457 rt = (struct rt6_info *) dst;
c45a3dfb
DM
458 if (rt->rt6i_flags & RTF_GATEWAY)
459 target = &rt->rt6i_gateway;
1da177e4
LT
460 else
461 target = &hdr->daddr;
462
fd0273d7 463 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 464
1da177e4
LT
465 /* Limit redirects both by destination (here)
466 and by source (inside ndisc_send_redirect)
467 */
fbfe95a4 468 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 469 ndisc_send_redirect(skb, target);
1d861aa4
DM
470 if (peer)
471 inet_putpeer(peer);
5bb1ab09
DS
472 } else {
473 int addrtype = ipv6_addr_type(&hdr->saddr);
474
1da177e4 475 /* This check is security critical. */
f81b2e7d
YH
476 if (addrtype == IPV6_ADDR_ANY ||
477 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
478 goto error;
479 if (addrtype & IPV6_ADDR_LINKLOCAL) {
480 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 481 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
482 goto error;
483 }
1da177e4
LT
484 }
485
0954cf9c 486 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
487 if (mtu < IPV6_MIN_MTU)
488 mtu = IPV6_MIN_MTU;
489
fe6cc55f 490 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
491 /* Again, force OUTPUT device used as source address */
492 skb->dev = dst->dev;
14f3ad6f 493 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
15c77d8b
ED
494 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
495 IPSTATS_MIB_INTOOBIGERRORS);
496 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
497 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
498 kfree_skb(skb);
499 return -EMSGSIZE;
500 }
501
502 if (skb_cow(skb, dst->dev->hard_header_len)) {
15c77d8b
ED
503 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
504 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
505 goto drop;
506 }
507
0660e03f 508 hdr = ipv6_hdr(skb);
1da177e4
LT
509
510 /* Mangling hops number delayed to point after skb COW */
1ab1457c 511
1da177e4
LT
512 hdr->hop_limit--;
513
483a47d2 514 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 515 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
516 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
517 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 518 ip6_forward_finish);
1da177e4
LT
519
520error:
483a47d2 521 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
522drop:
523 kfree_skb(skb);
524 return -EINVAL;
525}
526
527static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
528{
529 to->pkt_type = from->pkt_type;
530 to->priority = from->priority;
531 to->protocol = from->protocol;
adf30907
ED
532 skb_dst_drop(to);
533 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 534 to->dev = from->dev;
82e91ffe 535 to->mark = from->mark;
1da177e4
LT
536
537#ifdef CONFIG_NET_SCHED
538 to->tc_index = from->tc_index;
539#endif
e7ac05f3 540 nf_copy(to, from);
984bc16c 541 skb_copy_secmark(to, from);
1da177e4
LT
542}
543
7026b1dd
DM
544int ip6_fragment(struct sock *sk, struct sk_buff *skb,
545 int (*output)(struct sock *, struct sk_buff *))
1da177e4 546{
1da177e4 547 struct sk_buff *frag;
67ba4152 548 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 549 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
550 inet6_sk(skb->sk) : NULL;
1da177e4
LT
551 struct ipv6hdr *tmp_hdr;
552 struct frag_hdr *fh;
553 unsigned int mtu, hlen, left, len;
a7ae1992 554 int hroom, troom;
286c2349 555 __be32 frag_id;
67ba4152 556 int ptr, offset = 0, err = 0;
1da177e4 557 u8 *prevhdr, nexthdr = 0;
adf30907 558 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 559
1da177e4
LT
560 hlen = ip6_find_1stfragopt(skb, &prevhdr);
561 nexthdr = *prevhdr;
562
628a5c56 563 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
564
565 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 566 * or if the skb it not generated by a local socket.
b881ef76 567 */
485fca66
FW
568 if (unlikely(!skb->ignore_df && skb->len > mtu))
569 goto fail_toobig;
a34a101e 570
485fca66
FW
571 if (IP6CB(skb)->frag_max_size) {
572 if (IP6CB(skb)->frag_max_size > mtu)
573 goto fail_toobig;
574
575 /* don't send fragments larger than what we received */
576 mtu = IP6CB(skb)->frag_max_size;
577 if (mtu < IPV6_MIN_MTU)
578 mtu = IPV6_MIN_MTU;
b881ef76
JH
579 }
580
d91675f9
YH
581 if (np && np->frag_size < mtu) {
582 if (np->frag_size)
583 mtu = np->frag_size;
584 }
585 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 586
fd0273d7
MKL
587 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
588 &ipv6_hdr(skb)->saddr);
286c2349 589
21dc3301 590 if (skb_has_frag_list(skb)) {
1da177e4 591 int first_len = skb_pagelen(skb);
3d13008e 592 struct sk_buff *frag2;
1da177e4
LT
593
594 if (first_len - hlen > mtu ||
595 ((first_len - hlen) & 7) ||
596 skb_cloned(skb))
597 goto slow_path;
598
4d9092bb 599 skb_walk_frags(skb, frag) {
1da177e4
LT
600 /* Correct geometry. */
601 if (frag->len > mtu ||
602 ((frag->len & 7) && frag->next) ||
603 skb_headroom(frag) < hlen)
3d13008e 604 goto slow_path_clean;
1da177e4 605
1da177e4
LT
606 /* Partially cloned skb? */
607 if (skb_shared(frag))
3d13008e 608 goto slow_path_clean;
2fdba6b0
HX
609
610 BUG_ON(frag->sk);
611 if (skb->sk) {
2fdba6b0
HX
612 frag->sk = skb->sk;
613 frag->destructor = sock_wfree;
2fdba6b0 614 }
3d13008e 615 skb->truesize -= frag->truesize;
1da177e4
LT
616 }
617
618 err = 0;
619 offset = 0;
620 frag = skb_shinfo(skb)->frag_list;
4d9092bb 621 skb_frag_list_init(skb);
1da177e4
LT
622 /* BUILD HEADER */
623
9a217a1c 624 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 625 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 626 if (!tmp_hdr) {
adf30907 627 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 628 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
629 return -ENOMEM;
630 }
631
1da177e4 632 __skb_pull(skb, hlen);
67ba4152 633 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
634 __skb_push(skb, hlen);
635 skb_reset_network_header(skb);
d56f90a7 636 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 637
1da177e4
LT
638 fh->nexthdr = nexthdr;
639 fh->reserved = 0;
640 fh->frag_off = htons(IP6_MF);
286c2349 641 fh->identification = frag_id;
1da177e4
LT
642
643 first_len = skb_pagelen(skb);
644 skb->data_len = first_len - skb_headlen(skb);
645 skb->len = first_len;
0660e03f
ACM
646 ipv6_hdr(skb)->payload_len = htons(first_len -
647 sizeof(struct ipv6hdr));
a11d206d 648
d8d1f30b 649 dst_hold(&rt->dst);
1da177e4
LT
650
651 for (;;) {
652 /* Prepare header of the next frame,
653 * before previous one went down. */
654 if (frag) {
655 frag->ip_summed = CHECKSUM_NONE;
badff6d0 656 skb_reset_transport_header(frag);
67ba4152 657 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
658 __skb_push(frag, hlen);
659 skb_reset_network_header(frag);
d56f90a7
ACM
660 memcpy(skb_network_header(frag), tmp_hdr,
661 hlen);
1da177e4
LT
662 offset += skb->len - hlen - sizeof(struct frag_hdr);
663 fh->nexthdr = nexthdr;
664 fh->reserved = 0;
665 fh->frag_off = htons(offset);
53b24b8f 666 if (frag->next)
1da177e4
LT
667 fh->frag_off |= htons(IP6_MF);
668 fh->identification = frag_id;
0660e03f
ACM
669 ipv6_hdr(frag)->payload_len =
670 htons(frag->len -
671 sizeof(struct ipv6hdr));
1da177e4
LT
672 ip6_copy_metadata(frag, skb);
673 }
1ab1457c 674
7026b1dd 675 err = output(sk, skb);
67ba4152 676 if (!err)
d8d1f30b 677 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 678 IPSTATS_MIB_FRAGCREATES);
dafee490 679
1da177e4
LT
680 if (err || !frag)
681 break;
682
683 skb = frag;
684 frag = skb->next;
685 skb->next = NULL;
686 }
687
a51482bd 688 kfree(tmp_hdr);
1da177e4
LT
689
690 if (err == 0) {
d8d1f30b 691 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 692 IPSTATS_MIB_FRAGOKS);
94e187c0 693 ip6_rt_put(rt);
1da177e4
LT
694 return 0;
695 }
696
46cfd725 697 kfree_skb_list(frag);
1da177e4 698
d8d1f30b 699 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 700 IPSTATS_MIB_FRAGFAILS);
94e187c0 701 ip6_rt_put(rt);
1da177e4 702 return err;
3d13008e
ED
703
704slow_path_clean:
705 skb_walk_frags(skb, frag2) {
706 if (frag2 == frag)
707 break;
708 frag2->sk = NULL;
709 frag2->destructor = NULL;
710 skb->truesize += frag2->truesize;
711 }
1da177e4
LT
712 }
713
714slow_path:
72e843bb
ED
715 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
716 skb_checksum_help(skb))
717 goto fail;
718
1da177e4
LT
719 left = skb->len - hlen; /* Space per frame */
720 ptr = hlen; /* Where to start from */
721
722 /*
723 * Fragment the datagram.
724 */
725
726 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992
HX
727 hroom = LL_RESERVED_SPACE(rt->dst.dev);
728 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
729
730 /*
731 * Keep copying data until we run out.
732 */
67ba4152 733 while (left > 0) {
1da177e4
LT
734 len = left;
735 /* IF: it doesn't fit, use 'mtu' - the data space left */
736 if (len > mtu)
737 len = mtu;
25985edc 738 /* IF: we are not sending up to and including the packet end
1da177e4
LT
739 then align the next start on an eight byte boundary */
740 if (len < left) {
741 len &= ~7;
742 }
1da177e4 743
cbffccc9
JP
744 /* Allocate buffer */
745 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
746 hroom + troom, GFP_ATOMIC);
747 if (!frag) {
adf30907 748 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 749 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
750 err = -ENOMEM;
751 goto fail;
752 }
753
754 /*
755 * Set up data on packet
756 */
757
758 ip6_copy_metadata(frag, skb);
a7ae1992 759 skb_reserve(frag, hroom);
1da177e4 760 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 761 skb_reset_network_header(frag);
badff6d0 762 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
763 frag->transport_header = (frag->network_header + hlen +
764 sizeof(struct frag_hdr));
1da177e4
LT
765
766 /*
767 * Charge the memory for the fragment to any owner
768 * it might possess
769 */
770 if (skb->sk)
771 skb_set_owner_w(frag, skb->sk);
772
773 /*
774 * Copy the packet header into the new buffer.
775 */
d626f62b 776 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
777
778 /*
779 * Build fragment header.
780 */
781 fh->nexthdr = nexthdr;
782 fh->reserved = 0;
286c2349 783 fh->identification = frag_id;
1da177e4
LT
784
785 /*
786 * Copy a block of the IP datagram.
787 */
e3f0b86b
HS
788 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
789 len));
1da177e4
LT
790 left -= len;
791
792 fh->frag_off = htons(offset);
793 if (left > 0)
794 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
795 ipv6_hdr(frag)->payload_len = htons(frag->len -
796 sizeof(struct ipv6hdr));
1da177e4
LT
797
798 ptr += len;
799 offset += len;
800
801 /*
802 * Put this fragment into the sending queue.
803 */
7026b1dd 804 err = output(sk, frag);
1da177e4
LT
805 if (err)
806 goto fail;
dafee490 807
adf30907 808 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 809 IPSTATS_MIB_FRAGCREATES);
1da177e4 810 }
adf30907 811 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 812 IPSTATS_MIB_FRAGOKS);
808db80a 813 consume_skb(skb);
1da177e4
LT
814 return err;
815
485fca66
FW
816fail_toobig:
817 if (skb->sk && dst_allfrag(skb_dst(skb)))
818 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
819
820 skb->dev = skb_dst(skb)->dev;
821 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
822 err = -EMSGSIZE;
823
1da177e4 824fail:
adf30907 825 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 826 IPSTATS_MIB_FRAGFAILS);
1ab1457c 827 kfree_skb(skb);
1da177e4
LT
828 return err;
829}
830
b71d1d42
ED
831static inline int ip6_rt_check(const struct rt6key *rt_key,
832 const struct in6_addr *fl_addr,
833 const struct in6_addr *addr_cache)
cf6b1982 834{
a02cec21 835 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 836 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
837}
838
497c615a
HX
839static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
840 struct dst_entry *dst,
b71d1d42 841 const struct flowi6 *fl6)
1da177e4 842{
497c615a 843 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 844 struct rt6_info *rt;
1da177e4 845
497c615a
HX
846 if (!dst)
847 goto out;
848
a963a37d
ED
849 if (dst->ops->family != AF_INET6) {
850 dst_release(dst);
851 return NULL;
852 }
853
854 rt = (struct rt6_info *)dst;
497c615a
HX
855 /* Yes, checking route validity in not connected
856 * case is not very simple. Take into account,
857 * that we do not support routing by source, TOS,
67ba4152 858 * and MSG_DONTROUTE --ANK (980726)
497c615a 859 *
cf6b1982
YH
860 * 1. ip6_rt_check(): If route was host route,
861 * check that cached destination is current.
497c615a
HX
862 * If it is network route, we still may
863 * check its validity using saved pointer
864 * to the last used address: daddr_cache.
865 * We do not want to save whole address now,
866 * (because main consumer of this service
867 * is tcp, which has not this problem),
868 * so that the last trick works only on connected
869 * sockets.
870 * 2. oif also should be the same.
871 */
4c9483b2 872 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 873#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 874 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 875#endif
4c9483b2 876 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
877 dst_release(dst);
878 dst = NULL;
1da177e4
LT
879 }
880
497c615a
HX
881out:
882 return dst;
883}
884
343d60aa 885static int ip6_dst_lookup_tail(struct net *net, struct sock *sk,
4c9483b2 886 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 887{
69cce1d1
DM
888#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
889 struct neighbour *n;
97cac082 890 struct rt6_info *rt;
69cce1d1
DM
891#endif
892 int err;
497c615a 893
e16e888b
MS
894 /* The correct way to handle this would be to do
895 * ip6_route_get_saddr, and then ip6_route_output; however,
896 * the route-specific preferred source forces the
897 * ip6_route_output call _before_ ip6_route_get_saddr.
898 *
899 * In source specific routing (no src=any default route),
900 * ip6_route_output will fail given src=any saddr, though, so
901 * that's why we try it again later.
902 */
903 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
904 struct rt6_info *rt;
905 bool had_dst = *dst != NULL;
1da177e4 906
e16e888b
MS
907 if (!had_dst)
908 *dst = ip6_route_output(net, sk, fl6);
909 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
910 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
911 sk ? inet6_sk(sk)->srcprefs : 0,
912 &fl6->saddr);
44456d37 913 if (err)
1da177e4 914 goto out_err_release;
e16e888b
MS
915
916 /* If we had an erroneous initial result, pretend it
917 * never existed and let the SA-enabled version take
918 * over.
919 */
920 if (!had_dst && (*dst)->error) {
921 dst_release(*dst);
922 *dst = NULL;
923 }
1da177e4
LT
924 }
925
e16e888b
MS
926 if (!*dst)
927 *dst = ip6_route_output(net, sk, fl6);
928
929 err = (*dst)->error;
930 if (err)
931 goto out_err_release;
932
95c385b4 933#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
934 /*
935 * Here if the dst entry we've looked up
936 * has a neighbour entry that is in the INCOMPLETE
937 * state and the src address from the flow is
938 * marked as OPTIMISTIC, we release the found
939 * dst entry and replace it instead with the
940 * dst entry of the nexthop router
941 */
c56bf6fe 942 rt = (struct rt6_info *) *dst;
707be1ff 943 rcu_read_lock_bh();
2647a9b0
MKL
944 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
945 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
946 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
947 rcu_read_unlock_bh();
948
949 if (err) {
e550dfb0 950 struct inet6_ifaddr *ifp;
4c9483b2 951 struct flowi6 fl_gw6;
e550dfb0
NH
952 int redirect;
953
4c9483b2 954 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
955 (*dst)->dev, 1);
956
957 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
958 if (ifp)
959 in6_ifa_put(ifp);
960
961 if (redirect) {
962 /*
963 * We need to get the dst entry for the
964 * default router instead
965 */
966 dst_release(*dst);
4c9483b2
DM
967 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
968 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
969 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
970 err = (*dst)->error;
971 if (err)
e550dfb0 972 goto out_err_release;
95c385b4 973 }
e550dfb0 974 }
95c385b4
NH
975#endif
976
1da177e4
LT
977 return 0;
978
979out_err_release:
ca46f9c8 980 if (err == -ENETUNREACH)
5ac68e7c 981 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
982 dst_release(*dst);
983 *dst = NULL;
984 return err;
985}
34a0b3cd 986
497c615a
HX
987/**
988 * ip6_dst_lookup - perform route lookup on flow
989 * @sk: socket which provides route info
990 * @dst: pointer to dst_entry * for result
4c9483b2 991 * @fl6: flow to lookup
497c615a
HX
992 *
993 * This function performs a route lookup on the given flow.
994 *
995 * It returns zero on success, or a standard errno code on error.
996 */
343d60aa
RP
997int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
998 struct flowi6 *fl6)
497c615a
HX
999{
1000 *dst = NULL;
343d60aa 1001 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1002}
3cf3dc6c
ACM
1003EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1004
497c615a 1005/**
68d0c6d3
DM
1006 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1007 * @sk: socket which provides route info
4c9483b2 1008 * @fl6: flow to lookup
68d0c6d3 1009 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1010 *
1011 * This function performs a route lookup on the given flow.
1012 *
1013 * It returns a valid dst pointer on success, or a pointer encoded
1014 * error code.
1015 */
4c9483b2 1016struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1017 const struct in6_addr *final_dst)
68d0c6d3
DM
1018{
1019 struct dst_entry *dst = NULL;
1020 int err;
1021
343d60aa 1022 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1023 if (err)
1024 return ERR_PTR(err);
1025 if (final_dst)
4e3fd7a0 1026 fl6->daddr = *final_dst;
a0a9f33b
PS
1027 if (!fl6->flowi6_oif)
1028 fl6->flowi6_oif = dst->dev->ifindex;
2774c131 1029
f92ee619 1030 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1031}
1032EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1033
1034/**
1035 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1036 * @sk: socket which provides the dst cache and route info
4c9483b2 1037 * @fl6: flow to lookup
68d0c6d3 1038 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1039 *
1040 * This function performs a route lookup on the given flow with the
1041 * possibility of using the cached route in the socket if it is valid.
1042 * It will take the socket dst lock when operating on the dst cache.
1043 * As a result, this function can only be used in process context.
1044 *
68d0c6d3
DM
1045 * It returns a valid dst pointer on success, or a pointer encoded
1046 * error code.
497c615a 1047 */
4c9483b2 1048struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1049 const struct in6_addr *final_dst)
497c615a 1050{
68d0c6d3
DM
1051 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1052 int err;
497c615a 1053
4c9483b2 1054 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1055
343d60aa 1056 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1057 if (err)
1058 return ERR_PTR(err);
1059 if (final_dst)
4e3fd7a0 1060 fl6->daddr = *final_dst;
2774c131 1061
f92ee619 1062 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1063}
68d0c6d3 1064EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1065
34a0b3cd 1066static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1067 struct sk_buff_head *queue,
e89e9cf5
AR
1068 int getfrag(void *from, char *to, int offset, int len,
1069 int odd, struct sk_buff *skb),
1070 void *from, int length, int hh_len, int fragheaderlen,
67ba4152 1071 int transhdrlen, int mtu, unsigned int flags,
fd0273d7 1072 const struct flowi6 *fl6)
e89e9cf5
AR
1073
1074{
1075 struct sk_buff *skb;
1076 int err;
1077
1078 /* There is support for UDP large send offload by network
1079 * device, so create one single skb packet containing complete
1080 * udp datagram
1081 */
0bbe84a6 1082 skb = skb_peek_tail(queue);
63159f29 1083 if (!skb) {
e89e9cf5
AR
1084 skb = sock_alloc_send_skb(sk,
1085 hh_len + fragheaderlen + transhdrlen + 20,
1086 (flags & MSG_DONTWAIT), &err);
63159f29 1087 if (!skb)
504744e4 1088 return err;
e89e9cf5
AR
1089
1090 /* reserve space for Hardware header */
1091 skb_reserve(skb, hh_len);
1092
1093 /* create space for UDP/IP header */
67ba4152 1094 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1095
1096 /* initialize network header pointer */
c1d2bbe1 1097 skb_reset_network_header(skb);
e89e9cf5
AR
1098
1099 /* initialize protocol header pointer */
b0e380b1 1100 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1101
9c9c9ad5 1102 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1103 skb->csum = 0;
e89e9cf5 1104
0bbe84a6 1105 __skb_queue_tail(queue, skb);
c547dbf5
JP
1106 } else if (skb_is_gso(skb)) {
1107 goto append;
e89e9cf5 1108 }
e89e9cf5 1109
c547dbf5
JP
1110 skb->ip_summed = CHECKSUM_PARTIAL;
1111 /* Specify the length of each IPv6 datagram fragment.
1112 * It has to be a multiple of 8.
1113 */
1114 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1115 sizeof(struct frag_hdr)) & ~7;
1116 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
fd0273d7
MKL
1117 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1118 &fl6->daddr,
1119 &fl6->saddr);
c547dbf5
JP
1120
1121append:
2811ebac
HFS
1122 return skb_append_datato_frags(sk, skb, getfrag, from,
1123 (length - transhdrlen));
e89e9cf5 1124}
1da177e4 1125
0178b695
HX
1126static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1127 gfp_t gfp)
1128{
1129 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1130}
1131
1132static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1133 gfp_t gfp)
1134{
1135 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1136}
1137
75a493e6 1138static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1139 int *maxfraglen,
1140 unsigned int fragheaderlen,
1141 struct sk_buff *skb,
75a493e6 1142 struct rt6_info *rt,
e367c2d0 1143 unsigned int orig_mtu)
0c183379
G
1144{
1145 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1146 if (!skb) {
0c183379 1147 /* first fragment, reserve header_len */
e367c2d0 1148 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1149
1150 } else {
1151 /*
1152 * this fragment is not first, the headers
1153 * space is regarded as data space.
1154 */
e367c2d0 1155 *mtu = orig_mtu;
0c183379
G
1156 }
1157 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1158 + fragheaderlen - sizeof(struct frag_hdr);
1159 }
1160}
1161
366e41d9
VY
1162static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1163 struct inet6_cork *v6_cork,
1164 int hlimit, int tclass, struct ipv6_txoptions *opt,
1165 struct rt6_info *rt, struct flowi6 *fl6)
1166{
1167 struct ipv6_pinfo *np = inet6_sk(sk);
1168 unsigned int mtu;
1169
1170 /*
1171 * setup for corking
1172 */
1173 if (opt) {
1174 if (WARN_ON(v6_cork->opt))
1175 return -EINVAL;
1176
1177 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
63159f29 1178 if (unlikely(!v6_cork->opt))
366e41d9
VY
1179 return -ENOBUFS;
1180
1181 v6_cork->opt->tot_len = opt->tot_len;
1182 v6_cork->opt->opt_flen = opt->opt_flen;
1183 v6_cork->opt->opt_nflen = opt->opt_nflen;
1184
1185 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1186 sk->sk_allocation);
1187 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1188 return -ENOBUFS;
1189
1190 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1191 sk->sk_allocation);
1192 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1193 return -ENOBUFS;
1194
1195 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1196 sk->sk_allocation);
1197 if (opt->hopopt && !v6_cork->opt->hopopt)
1198 return -ENOBUFS;
1199
1200 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1201 sk->sk_allocation);
1202 if (opt->srcrt && !v6_cork->opt->srcrt)
1203 return -ENOBUFS;
1204
1205 /* need source address above miyazawa*/
1206 }
1207 dst_hold(&rt->dst);
1208 cork->base.dst = &rt->dst;
1209 cork->fl.u.ip6 = *fl6;
1210 v6_cork->hop_limit = hlimit;
1211 v6_cork->tclass = tclass;
1212 if (rt->dst.flags & DST_XFRM_TUNNEL)
1213 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1214 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1215 else
1216 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1217 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1218 if (np->frag_size < mtu) {
1219 if (np->frag_size)
1220 mtu = np->frag_size;
1221 }
1222 cork->base.fragsize = mtu;
1223 if (dst_allfrag(rt->dst.path))
1224 cork->base.flags |= IPCORK_ALLFRAG;
1225 cork->base.length = 0;
1226
1227 return 0;
1228}
1229
0bbe84a6
VY
1230static int __ip6_append_data(struct sock *sk,
1231 struct flowi6 *fl6,
1232 struct sk_buff_head *queue,
1233 struct inet_cork *cork,
1234 struct inet6_cork *v6_cork,
1235 struct page_frag *pfrag,
1236 int getfrag(void *from, char *to, int offset,
1237 int len, int odd, struct sk_buff *skb),
1238 void *from, int length, int transhdrlen,
1239 unsigned int flags, int dontfrag)
1da177e4 1240{
0c183379 1241 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1242 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1243 int exthdrlen = 0;
1244 int dst_exthdrlen = 0;
1da177e4 1245 int hh_len;
1da177e4
LT
1246 int copy;
1247 int err;
1248 int offset = 0;
a693e698 1249 __u8 tx_flags = 0;
09c2d251 1250 u32 tskey = 0;
0bbe84a6
VY
1251 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1252 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1253 int csummode = CHECKSUM_NONE;
1da177e4 1254
0bbe84a6
VY
1255 skb = skb_peek_tail(queue);
1256 if (!skb) {
1257 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1258 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1259 }
0bbe84a6 1260
366e41d9 1261 mtu = cork->fragsize;
e367c2d0 1262 orig_mtu = mtu;
1da177e4 1263
d8d1f30b 1264 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1265
a1b05140 1266 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1267 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1268 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1269 sizeof(struct frag_hdr);
1da177e4
LT
1270
1271 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
4df98e76
HFS
1272 unsigned int maxnonfragsize, headersize;
1273
1274 headersize = sizeof(struct ipv6hdr) +
3a1cebe7 1275 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
4df98e76
HFS
1276 (dst_allfrag(&rt->dst) ?
1277 sizeof(struct frag_hdr) : 0) +
1278 rt->rt6i_nfheader_len;
1279
60ff7467 1280 if (ip6_sk_ignore_df(sk))
0b95227a
HFS
1281 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1282 else
1283 maxnonfragsize = mtu;
4df98e76
HFS
1284
1285 /* dontfrag active */
1286 if ((cork->length + length > mtu - headersize) && dontfrag &&
1287 (sk->sk_protocol == IPPROTO_UDP ||
1288 sk->sk_protocol == IPPROTO_RAW)) {
1289 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1290 sizeof(struct ipv6hdr));
1291 goto emsgsize;
1292 }
1293
1294 if (cork->length + length > maxnonfragsize - headersize) {
1295emsgsize:
1296 ipv6_local_error(sk, EMSGSIZE, fl6,
1297 mtu - headersize +
1298 sizeof(struct ipv6hdr));
1da177e4
LT
1299 return -EMSGSIZE;
1300 }
1301 }
1302
09c2d251 1303 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
bf84a010 1304 sock_tx_timestamp(sk, &tx_flags);
09c2d251
WB
1305 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1306 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1307 tskey = sk->sk_tskey++;
1308 }
a693e698 1309
32dce968
VY
1310 /* If this is the first and only packet and device
1311 * supports checksum offloading, let's use it.
e87a468e
VY
1312 * Use transhdrlen, same as IPv4, because partial
1313 * sums only work when transhdrlen is set.
32dce968 1314 */
e87a468e 1315 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
32dce968
VY
1316 length + fragheaderlen < mtu &&
1317 rt->dst.dev->features & NETIF_F_V6_CSUM &&
1318 !exthdrlen)
1319 csummode = CHECKSUM_PARTIAL;
1da177e4
LT
1320 /*
1321 * Let's try using as much space as possible.
1322 * Use MTU if total length of the message fits into the MTU.
1323 * Otherwise, we need to reserve fragment header and
1324 * fragment alignment (= 8-15 octects, in total).
1325 *
1326 * Note that we may need to "move" the data from the tail of
1ab1457c 1327 * of the buffer to the new fragment when we split
1da177e4
LT
1328 * the message.
1329 *
1ab1457c 1330 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1331 * at once if non-fragmentable extension headers
1332 * are too large.
1ab1457c 1333 * --yoshfuji
1da177e4
LT
1334 */
1335
2811ebac
HFS
1336 cork->length += length;
1337 if (((length > mtu) ||
1338 (skb && skb_is_gso(skb))) &&
1339 (sk->sk_protocol == IPPROTO_UDP) &&
acf8dd0a
MK
1340 (rt->dst.dev->features & NETIF_F_UFO) &&
1341 (sk->sk_type == SOCK_DGRAM)) {
0bbe84a6 1342 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
2811ebac 1343 hh_len, fragheaderlen,
fd0273d7 1344 transhdrlen, mtu, flags, fl6);
2811ebac
HFS
1345 if (err)
1346 goto error;
1347 return 0;
e89e9cf5 1348 }
1da177e4 1349
2811ebac 1350 if (!skb)
1da177e4
LT
1351 goto alloc_new_skb;
1352
1353 while (length > 0) {
1354 /* Check if the remaining data fits into current packet. */
bdc712b4 1355 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1356 if (copy < length)
1357 copy = maxfraglen - skb->len;
1358
1359 if (copy <= 0) {
1360 char *data;
1361 unsigned int datalen;
1362 unsigned int fraglen;
1363 unsigned int fraggap;
1364 unsigned int alloclen;
1da177e4 1365alloc_new_skb:
1da177e4 1366 /* There's no room in the current skb */
0c183379
G
1367 if (skb)
1368 fraggap = skb->len - maxfraglen;
1da177e4
LT
1369 else
1370 fraggap = 0;
0c183379 1371 /* update mtu and maxfraglen if necessary */
63159f29 1372 if (!skb || !skb_prev)
0c183379 1373 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1374 fragheaderlen, skb, rt,
e367c2d0 1375 orig_mtu);
0c183379
G
1376
1377 skb_prev = skb;
1da177e4
LT
1378
1379 /*
1380 * If remaining data exceeds the mtu,
1381 * we know we need more fragment(s).
1382 */
1383 datalen = length + fraggap;
1da177e4 1384
0c183379
G
1385 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1386 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1387 if ((flags & MSG_MORE) &&
d8d1f30b 1388 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1389 alloclen = mtu;
1390 else
1391 alloclen = datalen + fragheaderlen;
1392
299b0767
SK
1393 alloclen += dst_exthdrlen;
1394
0c183379
G
1395 if (datalen != length + fraggap) {
1396 /*
1397 * this is not the last fragment, the trailer
1398 * space is regarded as data space.
1399 */
1400 datalen += rt->dst.trailer_len;
1401 }
1402
1403 alloclen += rt->dst.trailer_len;
1404 fraglen = datalen + fragheaderlen;
1da177e4
LT
1405
1406 /*
1407 * We just reserve space for fragment header.
1ab1457c 1408 * Note: this may be overallocation if the message
1da177e4
LT
1409 * (without MSG_MORE) fits into the MTU.
1410 */
1411 alloclen += sizeof(struct frag_hdr);
1412
1413 if (transhdrlen) {
1414 skb = sock_alloc_send_skb(sk,
1415 alloclen + hh_len,
1416 (flags & MSG_DONTWAIT), &err);
1417 } else {
1418 skb = NULL;
1419 if (atomic_read(&sk->sk_wmem_alloc) <=
1420 2 * sk->sk_sndbuf)
1421 skb = sock_wmalloc(sk,
1422 alloclen + hh_len, 1,
1423 sk->sk_allocation);
63159f29 1424 if (unlikely(!skb))
1da177e4
LT
1425 err = -ENOBUFS;
1426 }
63159f29 1427 if (!skb)
1da177e4
LT
1428 goto error;
1429 /*
1430 * Fill in the control structures
1431 */
9c9c9ad5 1432 skb->protocol = htons(ETH_P_IPV6);
32dce968 1433 skb->ip_summed = csummode;
1da177e4 1434 skb->csum = 0;
1f85851e
G
1435 /* reserve for fragmentation and ipsec header */
1436 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1437 dst_exthdrlen);
1da177e4 1438
11878b40
WB
1439 /* Only the initial fragment is time stamped */
1440 skb_shinfo(skb)->tx_flags = tx_flags;
1441 tx_flags = 0;
09c2d251
WB
1442 skb_shinfo(skb)->tskey = tskey;
1443 tskey = 0;
a693e698 1444
1da177e4
LT
1445 /*
1446 * Find where to start putting bytes
1447 */
1f85851e
G
1448 data = skb_put(skb, fraglen);
1449 skb_set_network_header(skb, exthdrlen);
1450 data += fragheaderlen;
b0e380b1
ACM
1451 skb->transport_header = (skb->network_header +
1452 fragheaderlen);
1da177e4
LT
1453 if (fraggap) {
1454 skb->csum = skb_copy_and_csum_bits(
1455 skb_prev, maxfraglen,
1456 data + transhdrlen, fraggap, 0);
1457 skb_prev->csum = csum_sub(skb_prev->csum,
1458 skb->csum);
1459 data += fraggap;
e9fa4f7b 1460 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1461 }
1462 copy = datalen - transhdrlen - fraggap;
299b0767 1463
1da177e4
LT
1464 if (copy < 0) {
1465 err = -EINVAL;
1466 kfree_skb(skb);
1467 goto error;
1468 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1469 err = -EFAULT;
1470 kfree_skb(skb);
1471 goto error;
1472 }
1473
1474 offset += copy;
1475 length -= datalen - fraggap;
1476 transhdrlen = 0;
1477 exthdrlen = 0;
299b0767 1478 dst_exthdrlen = 0;
1da177e4
LT
1479
1480 /*
1481 * Put the packet on the pending queue
1482 */
0bbe84a6 1483 __skb_queue_tail(queue, skb);
1da177e4
LT
1484 continue;
1485 }
1486
1487 if (copy > length)
1488 copy = length;
1489
d8d1f30b 1490 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1491 unsigned int off;
1492
1493 off = skb->len;
1494 if (getfrag(from, skb_put(skb, copy),
1495 offset, copy, off, skb) < 0) {
1496 __skb_trim(skb, off);
1497 err = -EFAULT;
1498 goto error;
1499 }
1500 } else {
1501 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1502
5640f768
ED
1503 err = -ENOMEM;
1504 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1505 goto error;
5640f768
ED
1506
1507 if (!skb_can_coalesce(skb, i, pfrag->page,
1508 pfrag->offset)) {
1509 err = -EMSGSIZE;
1510 if (i == MAX_SKB_FRAGS)
1511 goto error;
1512
1513 __skb_fill_page_desc(skb, i, pfrag->page,
1514 pfrag->offset, 0);
1515 skb_shinfo(skb)->nr_frags = ++i;
1516 get_page(pfrag->page);
1da177e4 1517 }
5640f768 1518 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1519 if (getfrag(from,
5640f768
ED
1520 page_address(pfrag->page) + pfrag->offset,
1521 offset, copy, skb->len, skb) < 0)
1522 goto error_efault;
1523
1524 pfrag->offset += copy;
1525 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1526 skb->len += copy;
1527 skb->data_len += copy;
f945fa7a
HX
1528 skb->truesize += copy;
1529 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1530 }
1531 offset += copy;
1532 length -= copy;
1533 }
5640f768 1534
1da177e4 1535 return 0;
5640f768
ED
1536
1537error_efault:
1538 err = -EFAULT;
1da177e4 1539error:
bdc712b4 1540 cork->length -= length;
3bd653c8 1541 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1542 return err;
1543}
0bbe84a6
VY
1544
1545int ip6_append_data(struct sock *sk,
1546 int getfrag(void *from, char *to, int offset, int len,
1547 int odd, struct sk_buff *skb),
1548 void *from, int length, int transhdrlen, int hlimit,
1549 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1550 struct rt6_info *rt, unsigned int flags, int dontfrag)
1551{
1552 struct inet_sock *inet = inet_sk(sk);
1553 struct ipv6_pinfo *np = inet6_sk(sk);
1554 int exthdrlen;
1555 int err;
1556
1557 if (flags&MSG_PROBE)
1558 return 0;
1559 if (skb_queue_empty(&sk->sk_write_queue)) {
1560 /*
1561 * setup for corking
1562 */
1563 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1564 tclass, opt, rt, fl6);
1565 if (err)
1566 return err;
1567
1568 exthdrlen = (opt ? opt->opt_flen : 0);
1569 length += exthdrlen;
1570 transhdrlen += exthdrlen;
1571 } else {
1572 fl6 = &inet->cork.fl.u.ip6;
1573 transhdrlen = 0;
1574 }
1575
1576 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1577 &np->cork, sk_page_frag(sk), getfrag,
1578 from, length, transhdrlen, flags, dontfrag);
1579}
a495f836 1580EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1581
366e41d9
VY
1582static void ip6_cork_release(struct inet_cork_full *cork,
1583 struct inet6_cork *v6_cork)
bf138862 1584{
366e41d9
VY
1585 if (v6_cork->opt) {
1586 kfree(v6_cork->opt->dst0opt);
1587 kfree(v6_cork->opt->dst1opt);
1588 kfree(v6_cork->opt->hopopt);
1589 kfree(v6_cork->opt->srcrt);
1590 kfree(v6_cork->opt);
1591 v6_cork->opt = NULL;
0178b695
HX
1592 }
1593
366e41d9
VY
1594 if (cork->base.dst) {
1595 dst_release(cork->base.dst);
1596 cork->base.dst = NULL;
1597 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1598 }
366e41d9 1599 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1600}
1601
6422398c
VY
1602struct sk_buff *__ip6_make_skb(struct sock *sk,
1603 struct sk_buff_head *queue,
1604 struct inet_cork_full *cork,
1605 struct inet6_cork *v6_cork)
1da177e4
LT
1606{
1607 struct sk_buff *skb, *tmp_skb;
1608 struct sk_buff **tail_skb;
1609 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1610 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1611 struct net *net = sock_net(sk);
1da177e4 1612 struct ipv6hdr *hdr;
6422398c
VY
1613 struct ipv6_txoptions *opt = v6_cork->opt;
1614 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1615 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1616 unsigned char proto = fl6->flowi6_proto;
1da177e4 1617
6422398c 1618 skb = __skb_dequeue(queue);
63159f29 1619 if (!skb)
1da177e4
LT
1620 goto out;
1621 tail_skb = &(skb_shinfo(skb)->frag_list);
1622
1623 /* move skb->data to ip header from ext header */
d56f90a7 1624 if (skb->data < skb_network_header(skb))
bbe735e4 1625 __skb_pull(skb, skb_network_offset(skb));
6422398c 1626 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1627 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1628 *tail_skb = tmp_skb;
1629 tail_skb = &(tmp_skb->next);
1630 skb->len += tmp_skb->len;
1631 skb->data_len += tmp_skb->len;
1da177e4 1632 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1633 tmp_skb->destructor = NULL;
1634 tmp_skb->sk = NULL;
1da177e4
LT
1635 }
1636
28a89453 1637 /* Allow local fragmentation. */
60ff7467 1638 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1639
4e3fd7a0 1640 *final_dst = fl6->daddr;
cfe1fc77 1641 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1642 if (opt && opt->opt_flen)
1643 ipv6_push_frag_opts(skb, opt, &proto);
1644 if (opt && opt->opt_nflen)
1645 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1646
e2d1bca7
ACM
1647 skb_push(skb, sizeof(struct ipv6hdr));
1648 skb_reset_network_header(skb);
0660e03f 1649 hdr = ipv6_hdr(skb);
1ab1457c 1650
6422398c 1651 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1652 ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 1653 np->autoflowlabel, fl6));
6422398c 1654 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1655 hdr->nexthdr = proto;
4e3fd7a0
AD
1656 hdr->saddr = fl6->saddr;
1657 hdr->daddr = *final_dst;
1da177e4 1658
a2c2064f 1659 skb->priority = sk->sk_priority;
4a19ec58 1660 skb->mark = sk->sk_mark;
a2c2064f 1661
d8d1f30b 1662 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1663 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1664 if (proto == IPPROTO_ICMPV6) {
adf30907 1665 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1666
43a43b60
HFS
1667 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1668 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1669 }
1670
6422398c
VY
1671 ip6_cork_release(cork, v6_cork);
1672out:
1673 return skb;
1674}
1675
1676int ip6_send_skb(struct sk_buff *skb)
1677{
1678 struct net *net = sock_net(skb->sk);
1679 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1680 int err;
1681
ef76bc23 1682 err = ip6_local_out(skb);
1da177e4
LT
1683 if (err) {
1684 if (err > 0)
6ce9e7b5 1685 err = net_xmit_errno(err);
1da177e4 1686 if (err)
6422398c
VY
1687 IP6_INC_STATS(net, rt->rt6i_idev,
1688 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1689 }
1690
1da177e4 1691 return err;
6422398c
VY
1692}
1693
1694int ip6_push_pending_frames(struct sock *sk)
1695{
1696 struct sk_buff *skb;
1697
1698 skb = ip6_finish_skb(sk);
1699 if (!skb)
1700 return 0;
1701
1702 return ip6_send_skb(skb);
1da177e4 1703}
a495f836 1704EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1705
0bbe84a6 1706static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1707 struct sk_buff_head *queue,
1708 struct inet_cork_full *cork,
1709 struct inet6_cork *v6_cork)
1da177e4 1710{
1da177e4
LT
1711 struct sk_buff *skb;
1712
0bbe84a6 1713 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1714 if (skb_dst(skb))
1715 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1716 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1717 kfree_skb(skb);
1718 }
1719
6422398c 1720 ip6_cork_release(cork, v6_cork);
1da177e4 1721}
0bbe84a6
VY
1722
1723void ip6_flush_pending_frames(struct sock *sk)
1724{
6422398c
VY
1725 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1726 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1727}
a495f836 1728EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1729
1730struct sk_buff *ip6_make_skb(struct sock *sk,
1731 int getfrag(void *from, char *to, int offset,
1732 int len, int odd, struct sk_buff *skb),
1733 void *from, int length, int transhdrlen,
1734 int hlimit, int tclass,
1735 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1736 struct rt6_info *rt, unsigned int flags,
1737 int dontfrag)
1738{
1739 struct inet_cork_full cork;
1740 struct inet6_cork v6_cork;
1741 struct sk_buff_head queue;
1742 int exthdrlen = (opt ? opt->opt_flen : 0);
1743 int err;
1744
1745 if (flags & MSG_PROBE)
1746 return NULL;
1747
1748 __skb_queue_head_init(&queue);
1749
1750 cork.base.flags = 0;
1751 cork.base.addr = 0;
1752 cork.base.opt = NULL;
1753 v6_cork.opt = NULL;
1754 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1755 if (err)
1756 return ERR_PTR(err);
1757
1758 if (dontfrag < 0)
1759 dontfrag = inet6_sk(sk)->dontfrag;
1760
1761 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1762 &current->task_frag, getfrag, from,
1763 length + exthdrlen, transhdrlen + exthdrlen,
1764 flags, dontfrag);
1765 if (err) {
1766 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1767 return ERR_PTR(err);
1768 }
1769
1770 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1771}