Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / net / ipv6 / ip6_output.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * IPv6 output functions
1ab1457c 4 * Linux INET6 implementation
1da177e4
LT
5 *
6 * Authors:
1ab1457c 7 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 8 *
1da177e4
LT
9 * Based on linux/net/ipv4/ip_output.c
10 *
1da177e4
LT
11 * Changes:
12 * A.N.Kuznetsov : airthmetics in fragmentation.
13 * extension headers are implemented.
14 * route changes now work.
15 * ip6_forward does not confuse sniffers.
16 * etc.
17 *
18 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 19 * Imran Patel : frag id should be in NBO
1da177e4
LT
20 * Kazunori MIYAZAWA @USAGI
21 * : add ip6_append_data and related functions
22 * for datagram xmit
23 */
24
1da177e4 25#include <linux/errno.h>
ef76bc23 26#include <linux/kernel.h>
1da177e4
LT
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/net.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/in6.h>
33#include <linux/tcp.h>
34#include <linux/route.h>
b59f45d0 35#include <linux/module.h>
5a0e3ad6 36#include <linux/slab.h>
1da177e4 37
33b48679 38#include <linux/bpf-cgroup.h>
1da177e4
LT
39#include <linux/netfilter.h>
40#include <linux/netfilter_ipv6.h>
41
42#include <net/sock.h>
43#include <net/snmp.h>
44
45#include <net/ipv6.h>
46#include <net/ndisc.h>
47#include <net/protocol.h>
48#include <net/ip6_route.h>
49#include <net/addrconf.h>
50#include <net/rawv6.h>
51#include <net/icmp.h>
52#include <net/xfrm.h>
53#include <net/checksum.h>
7bc570c8 54#include <linux/mroute6.h>
ca254490 55#include <net/l3mdev.h>
14972cbd 56#include <net/lwtunnel.h>
1da177e4 57
7d8c6e39 58static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 59{
adf30907 60 struct dst_entry *dst = skb_dst(skb);
1da177e4 61 struct net_device *dev = dst->dev;
9b1c1ef1 62 const struct in6_addr *nexthop;
f6b72b62 63 struct neighbour *neigh;
6fd6ce20 64 int ret;
1da177e4 65
0660e03f 66 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 67 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 68
7026b1dd 69 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
8571ab47 70 ((mroute6_is_socket(net, skb) &&
bd91b8bf 71 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
72 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
73 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
74 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
75
76 /* Do not check for IFF_ALLMULTI; multicast routing
77 is not supported in any case.
78 */
79 if (newskb)
b2e0b385 80 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 81 net, sk, newskb, NULL, newskb->dev,
95603e22 82 dev_loopback_xmit);
1da177e4 83
0660e03f 84 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 85 IP6_INC_STATS(net, idev,
3bd653c8 86 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
87 kfree_skb(skb);
88 return 0;
89 }
90 }
91
78126c41 92 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
93
94 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
95 IPV6_ADDR_SCOPE_NODELOCAL &&
96 !(dev->flags & IFF_LOOPBACK)) {
97 kfree_skb(skb);
98 return 0;
99 }
1da177e4
LT
100 }
101
14972cbd
RP
102 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
103 int res = lwtunnel_xmit(skb);
104
105 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
106 return res;
107 }
108
6fd6ce20 109 rcu_read_lock_bh();
2647a9b0 110 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
111 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
112 if (unlikely(!neigh))
113 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
114 if (!IS_ERR(neigh)) {
4ff06203 115 sock_confirm_neigh(skb, neigh);
0353f282 116 ret = neigh_output(neigh, skb, false);
6fd6ce20
YH
117 rcu_read_unlock_bh();
118 return ret;
119 }
120 rcu_read_unlock_bh();
05e3aa09 121
78126c41 122 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
123 kfree_skb(skb);
124 return -EINVAL;
1da177e4
LT
125}
126
956fe219 127static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490 128{
09ee9dba
TB
129#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
130 /* Policy lookup after SNAT yielded a new policy */
131 if (skb_dst(skb)->xfrm) {
132 IPCB(skb)->flags |= IPSKB_REROUTED;
133 return dst_output(net, sk, skb);
134 }
135#endif
136
9e508490 137 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
138 dst_allfrag(skb_dst(skb)) ||
139 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 140 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 141 else
7d8c6e39 142 return ip6_finish_output2(net, sk, skb);
9e508490
JE
143}
144
956fe219 145static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
146{
147 int ret;
148
149 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
150 switch (ret) {
151 case NET_XMIT_SUCCESS:
152 return __ip6_finish_output(net, sk, skb);
153 case NET_XMIT_CN:
154 return __ip6_finish_output(net, sk, skb) ? : ret;
155 default:
156 kfree_skb(skb);
157 return ret;
158 }
159}
160
ede2059d 161int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 162{
28f8bfd1 163 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
adf30907 164 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 165
97a7a37a
CF
166 skb->protocol = htons(ETH_P_IPV6);
167 skb->dev = dev;
168
778d80be 169 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 170 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
171 kfree_skb(skb);
172 return 0;
173 }
174
29a26a56 175 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
28f8bfd1 176 net, sk, skb, indev, dev,
9c6eb28a
JE
177 ip6_finish_output,
178 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
179}
180
e9191ffb 181bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
513674b5
SL
182{
183 if (!np->autoflowlabel_set)
184 return ip6_default_np_autolabel(net);
185 else
186 return np->autoflowlabel;
187}
188
1da177e4 189/*
1c1e9d2b
ED
190 * xmit an sk_buff (used by TCP, SCTP and DCCP)
191 * Note : socket lock is not held for SYNACK packets, but might be modified
192 * by calls to skb_set_owner_w() and ipv6_local_error(),
193 * which are using proper atomic operations or spinlocks.
1da177e4 194 */
1c1e9d2b 195int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
4f6570d7 196 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
1da177e4 197{
3bd653c8 198 struct net *net = sock_net(sk);
1c1e9d2b 199 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 200 struct in6_addr *first_hop = &fl6->daddr;
adf30907 201 struct dst_entry *dst = skb_dst(skb);
66033f47 202 unsigned int head_room;
1da177e4 203 struct ipv6hdr *hdr;
4c9483b2 204 u8 proto = fl6->flowi6_proto;
1da177e4 205 int seg_len = skb->len;
e651f03a 206 int hlimit = -1;
1da177e4
LT
207 u32 mtu;
208
66033f47
SB
209 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210 if (opt)
211 head_room += opt->opt_nflen + opt->opt_flen;
212
213 if (unlikely(skb_headroom(skb) < head_room)) {
214 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
215 if (!skb2) {
216 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
217 IPSTATS_MIB_OUTDISCARDS);
218 kfree_skb(skb);
219 return -ENOBUFS;
1da177e4 220 }
66033f47
SB
221 if (skb->sk)
222 skb_set_owner_w(skb2, skb->sk);
223 consume_skb(skb);
224 skb = skb2;
225 }
226
227 if (opt) {
228 seg_len += opt->opt_nflen + opt->opt_flen;
229
1da177e4
LT
230 if (opt->opt_flen)
231 ipv6_push_frag_opts(skb, opt, &proto);
66033f47 232
1da177e4 233 if (opt->opt_nflen)
613fa3ca
DL
234 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
235 &fl6->saddr);
1da177e4
LT
236 }
237
e2d1bca7
ACM
238 skb_push(skb, sizeof(struct ipv6hdr));
239 skb_reset_network_header(skb);
0660e03f 240 hdr = ipv6_hdr(skb);
1da177e4
LT
241
242 /*
243 * Fill in the IPv6 header
244 */
b903d324 245 if (np)
1da177e4
LT
246 hlimit = np->hop_limit;
247 if (hlimit < 0)
6b75d090 248 hlimit = ip6_dst_hoplimit(dst);
1da177e4 249
cb1ce2ef 250 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 251 ip6_autoflowlabel(net, np), fl6));
41a1f8ea 252
1da177e4
LT
253 hdr->payload_len = htons(seg_len);
254 hdr->nexthdr = proto;
255 hdr->hop_limit = hlimit;
256
4e3fd7a0
AD
257 hdr->saddr = fl6->saddr;
258 hdr->daddr = *first_hop;
1da177e4 259
9c9c9ad5 260 skb->protocol = htons(ETH_P_IPV6);
4f6570d7 261 skb->priority = priority;
92e55f41 262 skb->mark = mark;
a2c2064f 263
1da177e4 264 mtu = dst_mtu(dst);
60ff7467 265 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 266 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 267 IPSTATS_MIB_OUT, skb->len);
a8e3e1a9
DA
268
269 /* if egress device is enslaved to an L3 master device pass the
270 * skb to its handler for processing
271 */
272 skb = l3mdev_ip6_out((struct sock *)sk, skb);
273 if (unlikely(!skb))
274 return 0;
275
1c1e9d2b
ED
276 /* hooks should never assume socket lock is held.
277 * we promote our socket to non const
278 */
29a26a56 279 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 280 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 281 dst_output);
1da177e4
LT
282 }
283
1da177e4 284 skb->dev = dst->dev;
1c1e9d2b
ED
285 /* ipv6_local_error() does not require socket lock,
286 * we promote our socket to non const
287 */
288 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
289
adf30907 290 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
291 kfree_skb(skb);
292 return -EMSGSIZE;
293}
7159039a
YH
294EXPORT_SYMBOL(ip6_xmit);
295
1da177e4
LT
296static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
297{
298 struct ip6_ra_chain *ra;
299 struct sock *last = NULL;
300
301 read_lock(&ip6_ra_lock);
302 for (ra = ip6_ra_chain; ra; ra = ra->next) {
303 struct sock *sk = ra->sk;
0bd1b59b
AM
304 if (sk && ra->sel == sel &&
305 (!sk->sk_bound_dev_if ||
306 sk->sk_bound_dev_if == skb->dev->ifindex)) {
9036b2fe
FR
307 struct ipv6_pinfo *np = inet6_sk(sk);
308
309 if (np && np->rtalert_isolate &&
310 !net_eq(sock_net(sk), dev_net(skb->dev))) {
311 continue;
312 }
1da177e4
LT
313 if (last) {
314 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
315 if (skb2)
316 rawv6_rcv(last, skb2);
317 }
318 last = sk;
319 }
320 }
321
322 if (last) {
323 rawv6_rcv(last, skb);
324 read_unlock(&ip6_ra_lock);
325 return 1;
326 }
327 read_unlock(&ip6_ra_lock);
328 return 0;
329}
330
e21e0b5f
VN
331static int ip6_forward_proxy_check(struct sk_buff *skb)
332{
0660e03f 333 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 334 u8 nexthdr = hdr->nexthdr;
75f2811c 335 __be16 frag_off;
e21e0b5f
VN
336 int offset;
337
338 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 339 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
340 if (offset < 0)
341 return 0;
342 } else
343 offset = sizeof(struct ipv6hdr);
344
345 if (nexthdr == IPPROTO_ICMPV6) {
346 struct icmp6hdr *icmp6;
347
d56f90a7
ACM
348 if (!pskb_may_pull(skb, (skb_network_header(skb) +
349 offset + 1 - skb->data)))
e21e0b5f
VN
350 return 0;
351
d56f90a7 352 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
353
354 switch (icmp6->icmp6_type) {
355 case NDISC_ROUTER_SOLICITATION:
356 case NDISC_ROUTER_ADVERTISEMENT:
357 case NDISC_NEIGHBOUR_SOLICITATION:
358 case NDISC_NEIGHBOUR_ADVERTISEMENT:
359 case NDISC_REDIRECT:
360 /* For reaction involving unicast neighbor discovery
361 * message destined to the proxied address, pass it to
362 * input function.
363 */
364 return 1;
365 default:
366 break;
367 }
368 }
369
74553b09
VN
370 /*
371 * The proxying router can't forward traffic sent to a link-local
372 * address, so signal the sender and discard the packet. This
373 * behavior is clarified by the MIPv6 specification.
374 */
375 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
376 dst_link_failure(skb);
377 return -1;
378 }
379
e21e0b5f
VN
380 return 0;
381}
382
0c4b51f0
EB
383static inline int ip6_forward_finish(struct net *net, struct sock *sk,
384 struct sk_buff *skb)
1da177e4 385{
71a1c915
JB
386 struct dst_entry *dst = skb_dst(skb);
387
388 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
389 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
390
f839a6c9
IS
391#ifdef CONFIG_NET_SWITCHDEV
392 if (skb->offload_l3_fwd_mark) {
393 consume_skb(skb);
394 return 0;
395 }
396#endif
397
8203e2d8 398 skb->tstamp = 0;
13206b6b 399 return dst_output(net, sk, skb);
1da177e4
LT
400}
401
fe6cc55f
FW
402static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
403{
418a3156 404 if (skb->len <= mtu)
fe6cc55f
FW
405 return false;
406
60ff7467 407 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
408 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
409 return true;
410
60ff7467 411 if (skb->ignore_df)
418a3156
FW
412 return false;
413
779b7931 414 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
fe6cc55f
FW
415 return false;
416
417 return true;
418}
419
1da177e4
LT
420int ip6_forward(struct sk_buff *skb)
421{
bdb7cc64 422 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
adf30907 423 struct dst_entry *dst = skb_dst(skb);
0660e03f 424 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 425 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 426 struct net *net = dev_net(dst->dev);
14f3ad6f 427 u32 mtu;
1ab1457c 428
53b7997f 429 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
430 goto error;
431
090f1166
LR
432 if (skb->pkt_type != PACKET_HOST)
433 goto drop;
434
9ef2e965
HFS
435 if (unlikely(skb->sk))
436 goto drop;
437
4497b076
BH
438 if (skb_warn_if_lro(skb))
439 goto drop;
440
1da177e4 441 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
bdb7cc64 442 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
1da177e4
LT
443 goto drop;
444 }
445
35fc92a9 446 skb_forward_csum(skb);
1da177e4
LT
447
448 /*
449 * We DO NOT make any processing on
450 * RA packets, pushing them to user level AS IS
451 * without ane WARRANTY that application will be able
452 * to interpret them. The reason is that we
453 * cannot make anything clever here.
454 *
455 * We are not end-node, so that if packet contains
456 * AH/ESP, we cannot make anything.
457 * Defragmentation also would be mistake, RA packets
458 * cannot be fragmented, because there is no warranty
459 * that different fragments will go along one path. --ANK
460 */
ab4eb353
YH
461 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
462 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
463 return 0;
464 }
465
466 /*
467 * check and decrement ttl
468 */
469 if (hdr->hop_limit <= 1) {
470 /* Force OUTPUT device used as source address */
471 skb->dev = dst->dev;
3ffe533c 472 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
bdb7cc64 473 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
474
475 kfree_skb(skb);
476 return -ETIMEDOUT;
477 }
478
fbea49e1 479 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 480 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 481 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
482 int proxied = ip6_forward_proxy_check(skb);
483 if (proxied > 0)
e21e0b5f 484 return ip6_input(skb);
74553b09 485 else if (proxied < 0) {
bdb7cc64 486 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
74553b09
VN
487 goto drop;
488 }
e21e0b5f
VN
489 }
490
1da177e4 491 if (!xfrm6_route_forward(skb)) {
bdb7cc64 492 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
1da177e4
LT
493 goto drop;
494 }
adf30907 495 dst = skb_dst(skb);
1da177e4
LT
496
497 /* IPv6 specs say nothing about it, but it is clear that we cannot
498 send redirects to source routed frames.
1e5dc146 499 We don't send redirects to frames decapsulated from IPsec.
1da177e4 500 */
2f17becf
SS
501 if (IP6CB(skb)->iif == dst->dev->ifindex &&
502 opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 503 struct in6_addr *target = NULL;
fbfe95a4 504 struct inet_peer *peer;
1da177e4 505 struct rt6_info *rt;
1da177e4
LT
506
507 /*
508 * incoming and outgoing devices are the same
509 * send a redirect.
510 */
511
512 rt = (struct rt6_info *) dst;
c45a3dfb
DM
513 if (rt->rt6i_flags & RTF_GATEWAY)
514 target = &rt->rt6i_gateway;
1da177e4
LT
515 else
516 target = &hdr->daddr;
517
fd0273d7 518 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 519
1da177e4
LT
520 /* Limit redirects both by destination (here)
521 and by source (inside ndisc_send_redirect)
522 */
fbfe95a4 523 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 524 ndisc_send_redirect(skb, target);
1d861aa4
DM
525 if (peer)
526 inet_putpeer(peer);
5bb1ab09
DS
527 } else {
528 int addrtype = ipv6_addr_type(&hdr->saddr);
529
1da177e4 530 /* This check is security critical. */
f81b2e7d
YH
531 if (addrtype == IPV6_ADDR_ANY ||
532 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
533 goto error;
534 if (addrtype & IPV6_ADDR_LINKLOCAL) {
535 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 536 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
537 goto error;
538 }
1da177e4
LT
539 }
540
0954cf9c 541 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
542 if (mtu < IPV6_MIN_MTU)
543 mtu = IPV6_MIN_MTU;
544
fe6cc55f 545 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
546 /* Again, force OUTPUT device used as source address */
547 skb->dev = dst->dev;
14f3ad6f 548 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
bdb7cc64 549 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
1d015503
ED
550 __IP6_INC_STATS(net, ip6_dst_idev(dst),
551 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
552 kfree_skb(skb);
553 return -EMSGSIZE;
554 }
555
556 if (skb_cow(skb, dst->dev->hard_header_len)) {
1d015503
ED
557 __IP6_INC_STATS(net, ip6_dst_idev(dst),
558 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
559 goto drop;
560 }
561
0660e03f 562 hdr = ipv6_hdr(skb);
1da177e4
LT
563
564 /* Mangling hops number delayed to point after skb COW */
1ab1457c 565
1da177e4
LT
566 hdr->hop_limit--;
567
29a26a56
EB
568 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
569 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 570 ip6_forward_finish);
1da177e4
LT
571
572error:
bdb7cc64 573 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
574drop:
575 kfree_skb(skb);
576 return -EINVAL;
577}
578
579static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
580{
581 to->pkt_type = from->pkt_type;
582 to->priority = from->priority;
583 to->protocol = from->protocol;
adf30907
ED
584 skb_dst_drop(to);
585 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 586 to->dev = from->dev;
82e91ffe 587 to->mark = from->mark;
1da177e4 588
3dd1c9a1
PA
589 skb_copy_hash(to, from);
590
1da177e4
LT
591#ifdef CONFIG_NET_SCHED
592 to->tc_index = from->tc_index;
593#endif
e7ac05f3 594 nf_copy(to, from);
df5042f4 595 skb_ext_copy(to, from);
984bc16c 596 skb_copy_secmark(to, from);
1da177e4
LT
597}
598
0feca619
PNA
599int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
600 u8 nexthdr, __be32 frag_id,
601 struct ip6_fraglist_iter *iter)
602{
603 unsigned int first_len;
604 struct frag_hdr *fh;
605
606 /* BUILD HEADER */
607 *prevhdr = NEXTHDR_FRAGMENT;
608 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
609 if (!iter->tmp_hdr)
610 return -ENOMEM;
611
b7034146 612 iter->frag = skb_shinfo(skb)->frag_list;
0feca619
PNA
613 skb_frag_list_init(skb);
614
615 iter->offset = 0;
616 iter->hlen = hlen;
617 iter->frag_id = frag_id;
618 iter->nexthdr = nexthdr;
619
620 __skb_pull(skb, hlen);
621 fh = __skb_push(skb, sizeof(struct frag_hdr));
622 __skb_push(skb, hlen);
623 skb_reset_network_header(skb);
624 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
625
626 fh->nexthdr = nexthdr;
627 fh->reserved = 0;
628 fh->frag_off = htons(IP6_MF);
629 fh->identification = frag_id;
630
631 first_len = skb_pagelen(skb);
632 skb->data_len = first_len - skb_headlen(skb);
633 skb->len = first_len;
634 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
635
636 return 0;
637}
638EXPORT_SYMBOL(ip6_fraglist_init);
639
640void ip6_fraglist_prepare(struct sk_buff *skb,
641 struct ip6_fraglist_iter *iter)
642{
643 struct sk_buff *frag = iter->frag;
644 unsigned int hlen = iter->hlen;
645 struct frag_hdr *fh;
646
647 frag->ip_summed = CHECKSUM_NONE;
648 skb_reset_transport_header(frag);
649 fh = __skb_push(frag, sizeof(struct frag_hdr));
650 __skb_push(frag, hlen);
651 skb_reset_network_header(frag);
652 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
653 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
654 fh->nexthdr = iter->nexthdr;
655 fh->reserved = 0;
656 fh->frag_off = htons(iter->offset);
657 if (frag->next)
658 fh->frag_off |= htons(IP6_MF);
659 fh->identification = iter->frag_id;
660 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
661 ip6_copy_metadata(frag, skb);
662}
663EXPORT_SYMBOL(ip6_fraglist_prepare);
664
8a6a1f17
PNA
665void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
666 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
667 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
668{
669 state->prevhdr = prevhdr;
670 state->nexthdr = nexthdr;
671 state->frag_id = frag_id;
672
673 state->hlen = hlen;
674 state->mtu = mtu;
675
676 state->left = skb->len - hlen; /* Space per frame */
677 state->ptr = hlen; /* Where to start from */
678
679 state->hroom = hdr_room;
680 state->troom = needed_tailroom;
681
682 state->offset = 0;
683}
684EXPORT_SYMBOL(ip6_frag_init);
685
686struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
687{
688 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
689 struct sk_buff *frag;
690 struct frag_hdr *fh;
691 unsigned int len;
692
693 len = state->left;
694 /* IF: it doesn't fit, use 'mtu' - the data space left */
695 if (len > state->mtu)
696 len = state->mtu;
697 /* IF: we are not sending up to and including the packet end
698 then align the next start on an eight byte boundary */
699 if (len < state->left)
700 len &= ~7;
701
702 /* Allocate buffer */
703 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
704 state->hroom + state->troom, GFP_ATOMIC);
705 if (!frag)
706 return ERR_PTR(-ENOMEM);
707
708 /*
709 * Set up data on packet
710 */
711
712 ip6_copy_metadata(frag, skb);
713 skb_reserve(frag, state->hroom);
714 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
715 skb_reset_network_header(frag);
716 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
717 frag->transport_header = (frag->network_header + state->hlen +
718 sizeof(struct frag_hdr));
719
720 /*
721 * Charge the memory for the fragment to any owner
722 * it might possess
723 */
724 if (skb->sk)
725 skb_set_owner_w(frag, skb->sk);
726
727 /*
728 * Copy the packet header into the new buffer.
729 */
730 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
731
732 fragnexthdr_offset = skb_network_header(frag);
733 fragnexthdr_offset += prevhdr - skb_network_header(skb);
734 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
735
736 /*
737 * Build fragment header.
738 */
739 fh->nexthdr = state->nexthdr;
740 fh->reserved = 0;
741 fh->identification = state->frag_id;
742
743 /*
744 * Copy a block of the IP datagram.
745 */
746 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
747 len));
748 state->left -= len;
749
750 fh->frag_off = htons(state->offset);
751 if (state->left > 0)
752 fh->frag_off |= htons(IP6_MF);
753 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
754
755 state->ptr += len;
756 state->offset += len;
757
758 return frag;
759}
760EXPORT_SYMBOL(ip6_frag_next);
761
7d8c6e39
EB
762int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
763 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 764{
1da177e4 765 struct sk_buff *frag;
67ba4152 766 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 767 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
768 inet6_sk(skb->sk) : NULL;
8a6a1f17
PNA
769 struct ip6_frag_state state;
770 unsigned int mtu, hlen, nexthdr_offset;
9669fffc 771 ktime_t tstamp = skb->tstamp;
8a6a1f17 772 int hroom, err = 0;
286c2349 773 __be32 frag_id;
1da177e4
LT
774 u8 *prevhdr, nexthdr = 0;
775
7dd7eb95
DM
776 err = ip6_find_1stfragopt(skb, &prevhdr);
777 if (err < 0)
2423496a 778 goto fail;
7dd7eb95 779 hlen = err;
1da177e4 780 nexthdr = *prevhdr;
ef0efcd3 781 nexthdr_offset = prevhdr - skb_network_header(skb);
1da177e4 782
628a5c56 783 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
784
785 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 786 * or if the skb it not generated by a local socket.
b881ef76 787 */
485fca66
FW
788 if (unlikely(!skb->ignore_df && skb->len > mtu))
789 goto fail_toobig;
a34a101e 790
485fca66
FW
791 if (IP6CB(skb)->frag_max_size) {
792 if (IP6CB(skb)->frag_max_size > mtu)
793 goto fail_toobig;
794
795 /* don't send fragments larger than what we received */
796 mtu = IP6CB(skb)->frag_max_size;
797 if (mtu < IPV6_MIN_MTU)
798 mtu = IPV6_MIN_MTU;
b881ef76
JH
799 }
800
d91675f9
YH
801 if (np && np->frag_size < mtu) {
802 if (np->frag_size)
803 mtu = np->frag_size;
804 }
89bc7848 805 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 806 goto fail_toobig;
1e0d69a9 807 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 808
fd0273d7
MKL
809 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
810 &ipv6_hdr(skb)->saddr);
286c2349 811
405c92f7
HFS
812 if (skb->ip_summed == CHECKSUM_PARTIAL &&
813 (err = skb_checksum_help(skb)))
814 goto fail;
815
ef0efcd3 816 prevhdr = skb_network_header(skb) + nexthdr_offset;
1d325d21 817 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 818 if (skb_has_frag_list(skb)) {
c72d8cda 819 unsigned int first_len = skb_pagelen(skb);
0feca619 820 struct ip6_fraglist_iter iter;
3d13008e 821 struct sk_buff *frag2;
1da177e4
LT
822
823 if (first_len - hlen > mtu ||
824 ((first_len - hlen) & 7) ||
1d325d21
FW
825 skb_cloned(skb) ||
826 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
827 goto slow_path;
828
4d9092bb 829 skb_walk_frags(skb, frag) {
1da177e4
LT
830 /* Correct geometry. */
831 if (frag->len > mtu ||
832 ((frag->len & 7) && frag->next) ||
1d325d21 833 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 834 goto slow_path_clean;
1da177e4 835
1da177e4
LT
836 /* Partially cloned skb? */
837 if (skb_shared(frag))
3d13008e 838 goto slow_path_clean;
2fdba6b0
HX
839
840 BUG_ON(frag->sk);
841 if (skb->sk) {
2fdba6b0
HX
842 frag->sk = skb->sk;
843 frag->destructor = sock_wfree;
2fdba6b0 844 }
3d13008e 845 skb->truesize -= frag->truesize;
1da177e4
LT
846 }
847
0feca619
PNA
848 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
849 &iter);
850 if (err < 0)
1d325d21 851 goto fail;
a11d206d 852
1da177e4
LT
853 for (;;) {
854 /* Prepare header of the next frame,
855 * before previous one went down. */
0feca619
PNA
856 if (iter.frag)
857 ip6_fraglist_prepare(skb, &iter);
1ab1457c 858
9669fffc 859 skb->tstamp = tstamp;
7d8c6e39 860 err = output(net, sk, skb);
67ba4152 861 if (!err)
d8d1f30b 862 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 863 IPSTATS_MIB_FRAGCREATES);
dafee490 864
0feca619 865 if (err || !iter.frag)
1da177e4
LT
866 break;
867
0feca619 868 skb = ip6_fraglist_next(&iter);
1da177e4
LT
869 }
870
0feca619 871 kfree(iter.tmp_hdr);
1da177e4
LT
872
873 if (err == 0) {
d8d1f30b 874 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 875 IPSTATS_MIB_FRAGOKS);
1da177e4
LT
876 return 0;
877 }
878
b7034146 879 kfree_skb_list(iter.frag);
1da177e4 880
d8d1f30b 881 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 882 IPSTATS_MIB_FRAGFAILS);
1da177e4 883 return err;
3d13008e
ED
884
885slow_path_clean:
886 skb_walk_frags(skb, frag2) {
887 if (frag2 == frag)
888 break;
889 frag2->sk = NULL;
890 frag2->destructor = NULL;
891 skb->truesize += frag2->truesize;
892 }
1da177e4
LT
893 }
894
895slow_path:
1da177e4
LT
896 /*
897 * Fragment the datagram.
898 */
899
8a6a1f17
PNA
900 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
901 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
902 &state);
1da177e4
LT
903
904 /*
905 * Keep copying data until we run out.
906 */
1da177e4 907
8a6a1f17
PNA
908 while (state.left > 0) {
909 frag = ip6_frag_next(skb, &state);
910 if (IS_ERR(frag)) {
911 err = PTR_ERR(frag);
1da177e4
LT
912 goto fail;
913 }
914
1da177e4
LT
915 /*
916 * Put this fragment into the sending queue.
917 */
9669fffc 918 frag->tstamp = tstamp;
7d8c6e39 919 err = output(net, sk, frag);
1da177e4
LT
920 if (err)
921 goto fail;
dafee490 922
adf30907 923 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 924 IPSTATS_MIB_FRAGCREATES);
1da177e4 925 }
adf30907 926 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 927 IPSTATS_MIB_FRAGOKS);
808db80a 928 consume_skb(skb);
1da177e4
LT
929 return err;
930
485fca66
FW
931fail_toobig:
932 if (skb->sk && dst_allfrag(skb_dst(skb)))
933 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
934
485fca66
FW
935 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
936 err = -EMSGSIZE;
937
1da177e4 938fail:
adf30907 939 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 940 IPSTATS_MIB_FRAGFAILS);
1ab1457c 941 kfree_skb(skb);
1da177e4
LT
942 return err;
943}
944
b71d1d42
ED
945static inline int ip6_rt_check(const struct rt6key *rt_key,
946 const struct in6_addr *fl_addr,
947 const struct in6_addr *addr_cache)
cf6b1982 948{
a02cec21 949 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 950 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
951}
952
497c615a
HX
953static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
954 struct dst_entry *dst,
b71d1d42 955 const struct flowi6 *fl6)
1da177e4 956{
497c615a 957 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 958 struct rt6_info *rt;
1da177e4 959
497c615a
HX
960 if (!dst)
961 goto out;
962
a963a37d
ED
963 if (dst->ops->family != AF_INET6) {
964 dst_release(dst);
965 return NULL;
966 }
967
968 rt = (struct rt6_info *)dst;
497c615a
HX
969 /* Yes, checking route validity in not connected
970 * case is not very simple. Take into account,
971 * that we do not support routing by source, TOS,
67ba4152 972 * and MSG_DONTROUTE --ANK (980726)
497c615a 973 *
cf6b1982
YH
974 * 1. ip6_rt_check(): If route was host route,
975 * check that cached destination is current.
497c615a
HX
976 * If it is network route, we still may
977 * check its validity using saved pointer
978 * to the last used address: daddr_cache.
979 * We do not want to save whole address now,
980 * (because main consumer of this service
981 * is tcp, which has not this problem),
982 * so that the last trick works only on connected
983 * sockets.
984 * 2. oif also should be the same.
985 */
4c9483b2 986 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 987#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 988 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 989#endif
ca254490
DA
990 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
991 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
992 dst_release(dst);
993 dst = NULL;
1da177e4
LT
994 }
995
497c615a
HX
996out:
997 return dst;
998}
999
3aef934f 1000static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 1001 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 1002{
69cce1d1
DM
1003#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1004 struct neighbour *n;
97cac082 1005 struct rt6_info *rt;
69cce1d1
DM
1006#endif
1007 int err;
6f21c96a 1008 int flags = 0;
497c615a 1009
e16e888b
MS
1010 /* The correct way to handle this would be to do
1011 * ip6_route_get_saddr, and then ip6_route_output; however,
1012 * the route-specific preferred source forces the
1013 * ip6_route_output call _before_ ip6_route_get_saddr.
1014 *
1015 * In source specific routing (no src=any default route),
1016 * ip6_route_output will fail given src=any saddr, though, so
1017 * that's why we try it again later.
1018 */
1019 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
a68886a6 1020 struct fib6_info *from;
e16e888b
MS
1021 struct rt6_info *rt;
1022 bool had_dst = *dst != NULL;
1da177e4 1023
e16e888b
MS
1024 if (!had_dst)
1025 *dst = ip6_route_output(net, sk, fl6);
1026 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
a68886a6
DA
1027
1028 rcu_read_lock();
1029 from = rt ? rcu_dereference(rt->from) : NULL;
1030 err = ip6_route_get_saddr(net, from, &fl6->daddr,
c3968a85
DW
1031 sk ? inet6_sk(sk)->srcprefs : 0,
1032 &fl6->saddr);
a68886a6
DA
1033 rcu_read_unlock();
1034
44456d37 1035 if (err)
1da177e4 1036 goto out_err_release;
e16e888b
MS
1037
1038 /* If we had an erroneous initial result, pretend it
1039 * never existed and let the SA-enabled version take
1040 * over.
1041 */
1042 if (!had_dst && (*dst)->error) {
1043 dst_release(*dst);
1044 *dst = NULL;
1045 }
6f21c96a
PA
1046
1047 if (fl6->flowi6_oif)
1048 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
1049 }
1050
e16e888b 1051 if (!*dst)
6f21c96a 1052 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
1053
1054 err = (*dst)->error;
1055 if (err)
1056 goto out_err_release;
1057
95c385b4 1058#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
1059 /*
1060 * Here if the dst entry we've looked up
1061 * has a neighbour entry that is in the INCOMPLETE
1062 * state and the src address from the flow is
1063 * marked as OPTIMISTIC, we release the found
1064 * dst entry and replace it instead with the
1065 * dst entry of the nexthop router
1066 */
c56bf6fe 1067 rt = (struct rt6_info *) *dst;
707be1ff 1068 rcu_read_lock_bh();
2647a9b0
MKL
1069 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1070 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
1071 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1072 rcu_read_unlock_bh();
1073
1074 if (err) {
e550dfb0 1075 struct inet6_ifaddr *ifp;
4c9483b2 1076 struct flowi6 fl_gw6;
e550dfb0
NH
1077 int redirect;
1078
4c9483b2 1079 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
1080 (*dst)->dev, 1);
1081
1082 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1083 if (ifp)
1084 in6_ifa_put(ifp);
1085
1086 if (redirect) {
1087 /*
1088 * We need to get the dst entry for the
1089 * default router instead
1090 */
1091 dst_release(*dst);
4c9483b2
DM
1092 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1093 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1094 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
1095 err = (*dst)->error;
1096 if (err)
e550dfb0 1097 goto out_err_release;
95c385b4 1098 }
e550dfb0 1099 }
95c385b4 1100#endif
ec5e3b0a 1101 if (ipv6_addr_v4mapped(&fl6->saddr) &&
00ea1cee
WB
1102 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1103 err = -EAFNOSUPPORT;
1104 goto out_err_release;
1105 }
95c385b4 1106
1da177e4
LT
1107 return 0;
1108
1109out_err_release:
1110 dst_release(*dst);
1111 *dst = NULL;
8a966fc0 1112
0d240e78
DA
1113 if (err == -ENETUNREACH)
1114 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1115 return err;
1116}
34a0b3cd 1117
497c615a
HX
1118/**
1119 * ip6_dst_lookup - perform route lookup on flow
1120 * @sk: socket which provides route info
1121 * @dst: pointer to dst_entry * for result
4c9483b2 1122 * @fl6: flow to lookup
497c615a
HX
1123 *
1124 * This function performs a route lookup on the given flow.
1125 *
1126 * It returns zero on success, or a standard errno code on error.
1127 */
343d60aa
RP
1128int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1129 struct flowi6 *fl6)
497c615a
HX
1130{
1131 *dst = NULL;
343d60aa 1132 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1133}
3cf3dc6c
ACM
1134EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1135
497c615a 1136/**
68d0c6d3
DM
1137 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1138 * @sk: socket which provides route info
4c9483b2 1139 * @fl6: flow to lookup
68d0c6d3 1140 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1141 *
1142 * This function performs a route lookup on the given flow.
1143 *
1144 * It returns a valid dst pointer on success, or a pointer encoded
1145 * error code.
1146 */
3aef934f 1147struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1148 const struct in6_addr *final_dst)
68d0c6d3
DM
1149{
1150 struct dst_entry *dst = NULL;
1151 int err;
1152
343d60aa 1153 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1154 if (err)
1155 return ERR_PTR(err);
1156 if (final_dst)
4e3fd7a0 1157 fl6->daddr = *final_dst;
2774c131 1158
f92ee619 1159 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1160}
1161EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1162
1163/**
1164 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1165 * @sk: socket which provides the dst cache and route info
4c9483b2 1166 * @fl6: flow to lookup
68d0c6d3 1167 * @final_dst: final destination address for ipsec lookup
96818159 1168 * @connected: whether @sk is connected or not
497c615a
HX
1169 *
1170 * This function performs a route lookup on the given flow with the
1171 * possibility of using the cached route in the socket if it is valid.
1172 * It will take the socket dst lock when operating on the dst cache.
1173 * As a result, this function can only be used in process context.
1174 *
96818159
AK
1175 * In addition, for a connected socket, cache the dst in the socket
1176 * if the current cache is not valid.
1177 *
68d0c6d3
DM
1178 * It returns a valid dst pointer on success, or a pointer encoded
1179 * error code.
497c615a 1180 */
4c9483b2 1181struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
96818159
AK
1182 const struct in6_addr *final_dst,
1183 bool connected)
497c615a 1184{
68d0c6d3 1185 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
497c615a 1186
4c9483b2 1187 dst = ip6_sk_dst_check(sk, dst, fl6);
96818159
AK
1188 if (dst)
1189 return dst;
1190
1191 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1192 if (connected && !IS_ERR(dst))
1193 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
68d0c6d3 1194
00bc0ef5 1195 return dst;
497c615a 1196}
68d0c6d3 1197EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1198
0178b695
HX
1199static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1200 gfp_t gfp)
1201{
1202 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1203}
1204
1205static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1206 gfp_t gfp)
1207{
1208 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1209}
1210
75a493e6 1211static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1212 int *maxfraglen,
1213 unsigned int fragheaderlen,
1214 struct sk_buff *skb,
75a493e6 1215 struct rt6_info *rt,
e367c2d0 1216 unsigned int orig_mtu)
0c183379
G
1217{
1218 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1219 if (!skb) {
0c183379 1220 /* first fragment, reserve header_len */
e367c2d0 1221 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1222
1223 } else {
1224 /*
1225 * this fragment is not first, the headers
1226 * space is regarded as data space.
1227 */
e367c2d0 1228 *mtu = orig_mtu;
0c183379
G
1229 }
1230 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1231 + fragheaderlen - sizeof(struct frag_hdr);
1232 }
1233}
1234
366e41d9 1235static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
26879da5 1236 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
5fdaa88d 1237 struct rt6_info *rt, struct flowi6 *fl6)
366e41d9
VY
1238{
1239 struct ipv6_pinfo *np = inet6_sk(sk);
1240 unsigned int mtu;
26879da5 1241 struct ipv6_txoptions *opt = ipc6->opt;
366e41d9
VY
1242
1243 /*
1244 * setup for corking
1245 */
1246 if (opt) {
1247 if (WARN_ON(v6_cork->opt))
1248 return -EINVAL;
1249
864e2a1f 1250 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
63159f29 1251 if (unlikely(!v6_cork->opt))
366e41d9
VY
1252 return -ENOBUFS;
1253
864e2a1f 1254 v6_cork->opt->tot_len = sizeof(*opt);
366e41d9
VY
1255 v6_cork->opt->opt_flen = opt->opt_flen;
1256 v6_cork->opt->opt_nflen = opt->opt_nflen;
1257
1258 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1259 sk->sk_allocation);
1260 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1261 return -ENOBUFS;
1262
1263 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1264 sk->sk_allocation);
1265 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1266 return -ENOBUFS;
1267
1268 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1269 sk->sk_allocation);
1270 if (opt->hopopt && !v6_cork->opt->hopopt)
1271 return -ENOBUFS;
1272
1273 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1274 sk->sk_allocation);
1275 if (opt->srcrt && !v6_cork->opt->srcrt)
1276 return -ENOBUFS;
1277
1278 /* need source address above miyazawa*/
1279 }
1280 dst_hold(&rt->dst);
1281 cork->base.dst = &rt->dst;
1282 cork->fl.u.ip6 = *fl6;
26879da5
WW
1283 v6_cork->hop_limit = ipc6->hlimit;
1284 v6_cork->tclass = ipc6->tclass;
366e41d9
VY
1285 if (rt->dst.flags & DST_XFRM_TUNNEL)
1286 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
749439bf 1287 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
366e41d9
VY
1288 else
1289 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
c02b3741 1290 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
366e41d9
VY
1291 if (np->frag_size < mtu) {
1292 if (np->frag_size)
1293 mtu = np->frag_size;
1294 }
749439bf
MM
1295 if (mtu < IPV6_MIN_MTU)
1296 return -EINVAL;
366e41d9 1297 cork->base.fragsize = mtu;
fbf47813 1298 cork->base.gso_size = ipc6->gso_size;
678ca42d 1299 cork->base.tx_flags = 0;
c6af0c22 1300 cork->base.mark = ipc6->sockc.mark;
678ca42d 1301 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
bec1f6f6 1302
0f6c480f 1303 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
366e41d9
VY
1304 cork->base.flags |= IPCORK_ALLFRAG;
1305 cork->base.length = 0;
1306
5fdaa88d 1307 cork->base.transmit_time = ipc6->sockc.transmit_time;
a818f75e 1308
366e41d9
VY
1309 return 0;
1310}
1311
0bbe84a6
VY
1312static int __ip6_append_data(struct sock *sk,
1313 struct flowi6 *fl6,
1314 struct sk_buff_head *queue,
1315 struct inet_cork *cork,
1316 struct inet6_cork *v6_cork,
1317 struct page_frag *pfrag,
1318 int getfrag(void *from, char *to, int offset,
1319 int len, int odd, struct sk_buff *skb),
1320 void *from, int length, int transhdrlen,
5fdaa88d 1321 unsigned int flags, struct ipcm6_cookie *ipc6)
1da177e4 1322{
0c183379 1323 struct sk_buff *skb, *skb_prev = NULL;
10b8a3de 1324 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
b5947e5d 1325 struct ubuf_info *uarg = NULL;
0bbe84a6
VY
1326 int exthdrlen = 0;
1327 int dst_exthdrlen = 0;
1da177e4 1328 int hh_len;
1da177e4
LT
1329 int copy;
1330 int err;
1331 int offset = 0;
09c2d251 1332 u32 tskey = 0;
0bbe84a6
VY
1333 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1334 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1335 int csummode = CHECKSUM_NONE;
682b1a9d 1336 unsigned int maxnonfragsize, headersize;
1f4c6eb2 1337 unsigned int wmem_alloc_delta = 0;
100f6d8e 1338 bool paged, extra_uref = false;
1da177e4 1339
0bbe84a6
VY
1340 skb = skb_peek_tail(queue);
1341 if (!skb) {
1342 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1343 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1344 }
0bbe84a6 1345
15e36f5b 1346 paged = !!cork->gso_size;
bec1f6f6 1347 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
e367c2d0 1348 orig_mtu = mtu;
1da177e4 1349
678ca42d
WB
1350 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1351 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1352 tskey = sk->sk_tskey++;
1353
d8d1f30b 1354 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1355
a1b05140 1356 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1357 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1358 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1359 sizeof(struct frag_hdr);
1da177e4 1360
682b1a9d
HFS
1361 headersize = sizeof(struct ipv6hdr) +
1362 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1363 (dst_allfrag(&rt->dst) ?
1364 sizeof(struct frag_hdr) : 0) +
1365 rt->rt6i_nfheader_len;
1366
10b8a3de
PA
1367 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1368 * the first fragment
1369 */
1370 if (headersize + transhdrlen > mtu)
1371 goto emsgsize;
1372
26879da5 1373 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
682b1a9d
HFS
1374 (sk->sk_protocol == IPPROTO_UDP ||
1375 sk->sk_protocol == IPPROTO_RAW)) {
1376 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1377 sizeof(struct ipv6hdr));
1378 goto emsgsize;
1379 }
4df98e76 1380
682b1a9d
HFS
1381 if (ip6_sk_ignore_df(sk))
1382 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1383 else
1384 maxnonfragsize = mtu;
4df98e76 1385
682b1a9d 1386 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1387emsgsize:
10b8a3de
PA
1388 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1389 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
682b1a9d 1390 return -EMSGSIZE;
1da177e4
LT
1391 }
1392
682b1a9d
HFS
1393 /* CHECKSUM_PARTIAL only with no extension headers and when
1394 * we are not going to fragment
1395 */
1396 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1397 headersize == sizeof(struct ipv6hdr) &&
2b89ed65 1398 length <= mtu - headersize &&
bec1f6f6 1399 (!(flags & MSG_MORE) || cork->gso_size) &&
c8cd0989 1400 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1401 csummode = CHECKSUM_PARTIAL;
1402
b5947e5d
WB
1403 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1404 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1405 if (!uarg)
1406 return -ENOBUFS;
522924b5 1407 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
b5947e5d
WB
1408 if (rt->dst.dev->features & NETIF_F_SG &&
1409 csummode == CHECKSUM_PARTIAL) {
1410 paged = true;
1411 } else {
1412 uarg->zerocopy = 0;
52900d22 1413 skb_zcopy_set(skb, uarg, &extra_uref);
b5947e5d
WB
1414 }
1415 }
1416
1da177e4
LT
1417 /*
1418 * Let's try using as much space as possible.
1419 * Use MTU if total length of the message fits into the MTU.
1420 * Otherwise, we need to reserve fragment header and
1421 * fragment alignment (= 8-15 octects, in total).
1422 *
1423 * Note that we may need to "move" the data from the tail of
1ab1457c 1424 * of the buffer to the new fragment when we split
1da177e4
LT
1425 * the message.
1426 *
1ab1457c 1427 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1428 * at once if non-fragmentable extension headers
1429 * are too large.
1ab1457c 1430 * --yoshfuji
1da177e4
LT
1431 */
1432
2811ebac 1433 cork->length += length;
2811ebac 1434 if (!skb)
1da177e4
LT
1435 goto alloc_new_skb;
1436
1437 while (length > 0) {
1438 /* Check if the remaining data fits into current packet. */
bdc712b4 1439 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1440 if (copy < length)
1441 copy = maxfraglen - skb->len;
1442
1443 if (copy <= 0) {
1444 char *data;
1445 unsigned int datalen;
1446 unsigned int fraglen;
1447 unsigned int fraggap;
1448 unsigned int alloclen;
aba36930 1449 unsigned int pagedlen;
1da177e4 1450alloc_new_skb:
1da177e4 1451 /* There's no room in the current skb */
0c183379
G
1452 if (skb)
1453 fraggap = skb->len - maxfraglen;
1da177e4
LT
1454 else
1455 fraggap = 0;
0c183379 1456 /* update mtu and maxfraglen if necessary */
63159f29 1457 if (!skb || !skb_prev)
0c183379 1458 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1459 fragheaderlen, skb, rt,
e367c2d0 1460 orig_mtu);
0c183379
G
1461
1462 skb_prev = skb;
1da177e4
LT
1463
1464 /*
1465 * If remaining data exceeds the mtu,
1466 * we know we need more fragment(s).
1467 */
1468 datalen = length + fraggap;
1da177e4 1469
0c183379
G
1470 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1471 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
15e36f5b 1472 fraglen = datalen + fragheaderlen;
aba36930 1473 pagedlen = 0;
15e36f5b 1474
1da177e4 1475 if ((flags & MSG_MORE) &&
d8d1f30b 1476 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4 1477 alloclen = mtu;
15e36f5b
WB
1478 else if (!paged)
1479 alloclen = fraglen;
1480 else {
1481 alloclen = min_t(int, fraglen, MAX_HEADER);
1482 pagedlen = fraglen - alloclen;
1483 }
1da177e4 1484
299b0767
SK
1485 alloclen += dst_exthdrlen;
1486
0c183379
G
1487 if (datalen != length + fraggap) {
1488 /*
1489 * this is not the last fragment, the trailer
1490 * space is regarded as data space.
1491 */
1492 datalen += rt->dst.trailer_len;
1493 }
1494
1495 alloclen += rt->dst.trailer_len;
1496 fraglen = datalen + fragheaderlen;
1da177e4
LT
1497
1498 /*
1499 * We just reserve space for fragment header.
1ab1457c 1500 * Note: this may be overallocation if the message
1da177e4
LT
1501 * (without MSG_MORE) fits into the MTU.
1502 */
1503 alloclen += sizeof(struct frag_hdr);
1504
15e36f5b 1505 copy = datalen - transhdrlen - fraggap - pagedlen;
232cd35d
ED
1506 if (copy < 0) {
1507 err = -EINVAL;
1508 goto error;
1509 }
1da177e4
LT
1510 if (transhdrlen) {
1511 skb = sock_alloc_send_skb(sk,
1512 alloclen + hh_len,
1513 (flags & MSG_DONTWAIT), &err);
1514 } else {
1515 skb = NULL;
1f4c6eb2 1516 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1da177e4 1517 2 * sk->sk_sndbuf)
1f4c6eb2
ED
1518 skb = alloc_skb(alloclen + hh_len,
1519 sk->sk_allocation);
63159f29 1520 if (unlikely(!skb))
1da177e4
LT
1521 err = -ENOBUFS;
1522 }
63159f29 1523 if (!skb)
1da177e4
LT
1524 goto error;
1525 /*
1526 * Fill in the control structures
1527 */
9c9c9ad5 1528 skb->protocol = htons(ETH_P_IPV6);
32dce968 1529 skb->ip_summed = csummode;
1da177e4 1530 skb->csum = 0;
1f85851e
G
1531 /* reserve for fragmentation and ipsec header */
1532 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1533 dst_exthdrlen);
1da177e4
LT
1534
1535 /*
1536 * Find where to start putting bytes
1537 */
15e36f5b 1538 data = skb_put(skb, fraglen - pagedlen);
1f85851e
G
1539 skb_set_network_header(skb, exthdrlen);
1540 data += fragheaderlen;
b0e380b1
ACM
1541 skb->transport_header = (skb->network_header +
1542 fragheaderlen);
1da177e4
LT
1543 if (fraggap) {
1544 skb->csum = skb_copy_and_csum_bits(
1545 skb_prev, maxfraglen,
1546 data + transhdrlen, fraggap, 0);
1547 skb_prev->csum = csum_sub(skb_prev->csum,
1548 skb->csum);
1549 data += fraggap;
e9fa4f7b 1550 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4 1551 }
232cd35d
ED
1552 if (copy > 0 &&
1553 getfrag(from, data + transhdrlen, offset,
1554 copy, fraggap, skb) < 0) {
1da177e4
LT
1555 err = -EFAULT;
1556 kfree_skb(skb);
1557 goto error;
1558 }
1559
1560 offset += copy;
15e36f5b 1561 length -= copy + transhdrlen;
1da177e4
LT
1562 transhdrlen = 0;
1563 exthdrlen = 0;
299b0767 1564 dst_exthdrlen = 0;
1da177e4 1565
52900d22
WB
1566 /* Only the initial fragment is time stamped */
1567 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1568 cork->tx_flags = 0;
1569 skb_shinfo(skb)->tskey = tskey;
1570 tskey = 0;
1571 skb_zcopy_set(skb, uarg, &extra_uref);
1572
0dec879f
JA
1573 if ((flags & MSG_CONFIRM) && !skb_prev)
1574 skb_set_dst_pending_confirm(skb, 1);
1575
1da177e4
LT
1576 /*
1577 * Put the packet on the pending queue
1578 */
1f4c6eb2
ED
1579 if (!skb->destructor) {
1580 skb->destructor = sock_wfree;
1581 skb->sk = sk;
1582 wmem_alloc_delta += skb->truesize;
1583 }
0bbe84a6 1584 __skb_queue_tail(queue, skb);
1da177e4
LT
1585 continue;
1586 }
1587
1588 if (copy > length)
1589 copy = length;
1590
113f99c3
WB
1591 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1592 skb_tailroom(skb) >= copy) {
1da177e4
LT
1593 unsigned int off;
1594
1595 off = skb->len;
1596 if (getfrag(from, skb_put(skb, copy),
1597 offset, copy, off, skb) < 0) {
1598 __skb_trim(skb, off);
1599 err = -EFAULT;
1600 goto error;
1601 }
b5947e5d 1602 } else if (!uarg || !uarg->zerocopy) {
1da177e4 1603 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1604
5640f768
ED
1605 err = -ENOMEM;
1606 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1607 goto error;
5640f768
ED
1608
1609 if (!skb_can_coalesce(skb, i, pfrag->page,
1610 pfrag->offset)) {
1611 err = -EMSGSIZE;
1612 if (i == MAX_SKB_FRAGS)
1613 goto error;
1614
1615 __skb_fill_page_desc(skb, i, pfrag->page,
1616 pfrag->offset, 0);
1617 skb_shinfo(skb)->nr_frags = ++i;
1618 get_page(pfrag->page);
1da177e4 1619 }
5640f768 1620 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1621 if (getfrag(from,
5640f768
ED
1622 page_address(pfrag->page) + pfrag->offset,
1623 offset, copy, skb->len, skb) < 0)
1624 goto error_efault;
1625
1626 pfrag->offset += copy;
1627 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1628 skb->len += copy;
1629 skb->data_len += copy;
f945fa7a 1630 skb->truesize += copy;
1f4c6eb2 1631 wmem_alloc_delta += copy;
b5947e5d
WB
1632 } else {
1633 err = skb_zerocopy_iter_dgram(skb, from, copy);
1634 if (err < 0)
1635 goto error;
1da177e4
LT
1636 }
1637 offset += copy;
1638 length -= copy;
1639 }
5640f768 1640
9e8445a5
PA
1641 if (wmem_alloc_delta)
1642 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4 1643 return 0;
5640f768
ED
1644
1645error_efault:
1646 err = -EFAULT;
1da177e4 1647error:
97ef7b4c
WB
1648 if (uarg)
1649 sock_zerocopy_put_abort(uarg, extra_uref);
bdc712b4 1650 cork->length -= length;
3bd653c8 1651 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1f4c6eb2 1652 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4
LT
1653 return err;
1654}
0bbe84a6
VY
1655
1656int ip6_append_data(struct sock *sk,
1657 int getfrag(void *from, char *to, int offset, int len,
1658 int odd, struct sk_buff *skb),
26879da5
WW
1659 void *from, int length, int transhdrlen,
1660 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
5fdaa88d 1661 struct rt6_info *rt, unsigned int flags)
0bbe84a6
VY
1662{
1663 struct inet_sock *inet = inet_sk(sk);
1664 struct ipv6_pinfo *np = inet6_sk(sk);
1665 int exthdrlen;
1666 int err;
1667
1668 if (flags&MSG_PROBE)
1669 return 0;
1670 if (skb_queue_empty(&sk->sk_write_queue)) {
1671 /*
1672 * setup for corking
1673 */
26879da5 1674 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
5fdaa88d 1675 ipc6, rt, fl6);
0bbe84a6
VY
1676 if (err)
1677 return err;
1678
26879da5 1679 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
0bbe84a6
VY
1680 length += exthdrlen;
1681 transhdrlen += exthdrlen;
1682 } else {
1683 fl6 = &inet->cork.fl.u.ip6;
1684 transhdrlen = 0;
1685 }
1686
1687 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1688 &np->cork, sk_page_frag(sk), getfrag,
5fdaa88d 1689 from, length, transhdrlen, flags, ipc6);
0bbe84a6 1690}
a495f836 1691EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1692
366e41d9
VY
1693static void ip6_cork_release(struct inet_cork_full *cork,
1694 struct inet6_cork *v6_cork)
bf138862 1695{
366e41d9
VY
1696 if (v6_cork->opt) {
1697 kfree(v6_cork->opt->dst0opt);
1698 kfree(v6_cork->opt->dst1opt);
1699 kfree(v6_cork->opt->hopopt);
1700 kfree(v6_cork->opt->srcrt);
1701 kfree(v6_cork->opt);
1702 v6_cork->opt = NULL;
0178b695
HX
1703 }
1704
366e41d9
VY
1705 if (cork->base.dst) {
1706 dst_release(cork->base.dst);
1707 cork->base.dst = NULL;
1708 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1709 }
366e41d9 1710 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1711}
1712
6422398c
VY
1713struct sk_buff *__ip6_make_skb(struct sock *sk,
1714 struct sk_buff_head *queue,
1715 struct inet_cork_full *cork,
1716 struct inet6_cork *v6_cork)
1da177e4
LT
1717{
1718 struct sk_buff *skb, *tmp_skb;
1719 struct sk_buff **tail_skb;
1720 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1721 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1722 struct net *net = sock_net(sk);
1da177e4 1723 struct ipv6hdr *hdr;
6422398c
VY
1724 struct ipv6_txoptions *opt = v6_cork->opt;
1725 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1726 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1727 unsigned char proto = fl6->flowi6_proto;
1da177e4 1728
6422398c 1729 skb = __skb_dequeue(queue);
63159f29 1730 if (!skb)
1da177e4
LT
1731 goto out;
1732 tail_skb = &(skb_shinfo(skb)->frag_list);
1733
1734 /* move skb->data to ip header from ext header */
d56f90a7 1735 if (skb->data < skb_network_header(skb))
bbe735e4 1736 __skb_pull(skb, skb_network_offset(skb));
6422398c 1737 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1738 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1739 *tail_skb = tmp_skb;
1740 tail_skb = &(tmp_skb->next);
1741 skb->len += tmp_skb->len;
1742 skb->data_len += tmp_skb->len;
1da177e4 1743 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1744 tmp_skb->destructor = NULL;
1745 tmp_skb->sk = NULL;
1da177e4
LT
1746 }
1747
28a89453 1748 /* Allow local fragmentation. */
60ff7467 1749 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1750
4e3fd7a0 1751 *final_dst = fl6->daddr;
cfe1fc77 1752 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1753 if (opt && opt->opt_flen)
1754 ipv6_push_frag_opts(skb, opt, &proto);
1755 if (opt && opt->opt_nflen)
613fa3ca 1756 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1da177e4 1757
e2d1bca7
ACM
1758 skb_push(skb, sizeof(struct ipv6hdr));
1759 skb_reset_network_header(skb);
0660e03f 1760 hdr = ipv6_hdr(skb);
1ab1457c 1761
6422398c 1762 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1763 ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 1764 ip6_autoflowlabel(net, np), fl6));
6422398c 1765 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1766 hdr->nexthdr = proto;
4e3fd7a0
AD
1767 hdr->saddr = fl6->saddr;
1768 hdr->daddr = *final_dst;
1da177e4 1769
a2c2064f 1770 skb->priority = sk->sk_priority;
c6af0c22 1771 skb->mark = cork->base.mark;
a2c2064f 1772
a818f75e
JSP
1773 skb->tstamp = cork->base.transmit_time;
1774
d8d1f30b 1775 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1776 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1777 if (proto == IPPROTO_ICMPV6) {
adf30907 1778 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1779
43a43b60
HFS
1780 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1781 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1782 }
1783
6422398c
VY
1784 ip6_cork_release(cork, v6_cork);
1785out:
1786 return skb;
1787}
1788
1789int ip6_send_skb(struct sk_buff *skb)
1790{
1791 struct net *net = sock_net(skb->sk);
1792 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1793 int err;
1794
33224b16 1795 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1796 if (err) {
1797 if (err > 0)
6ce9e7b5 1798 err = net_xmit_errno(err);
1da177e4 1799 if (err)
6422398c
VY
1800 IP6_INC_STATS(net, rt->rt6i_idev,
1801 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1802 }
1803
1da177e4 1804 return err;
6422398c
VY
1805}
1806
1807int ip6_push_pending_frames(struct sock *sk)
1808{
1809 struct sk_buff *skb;
1810
1811 skb = ip6_finish_skb(sk);
1812 if (!skb)
1813 return 0;
1814
1815 return ip6_send_skb(skb);
1da177e4 1816}
a495f836 1817EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1818
0bbe84a6 1819static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1820 struct sk_buff_head *queue,
1821 struct inet_cork_full *cork,
1822 struct inet6_cork *v6_cork)
1da177e4 1823{
1da177e4
LT
1824 struct sk_buff *skb;
1825
0bbe84a6 1826 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1827 if (skb_dst(skb))
1828 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1829 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1830 kfree_skb(skb);
1831 }
1832
6422398c 1833 ip6_cork_release(cork, v6_cork);
1da177e4 1834}
0bbe84a6
VY
1835
1836void ip6_flush_pending_frames(struct sock *sk)
1837{
6422398c
VY
1838 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1839 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1840}
a495f836 1841EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1842
1843struct sk_buff *ip6_make_skb(struct sock *sk,
1844 int getfrag(void *from, char *to, int offset,
1845 int len, int odd, struct sk_buff *skb),
1846 void *from, int length, int transhdrlen,
26879da5 1847 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
6422398c 1848 struct rt6_info *rt, unsigned int flags,
5fdaa88d 1849 struct inet_cork_full *cork)
6422398c 1850{
6422398c
VY
1851 struct inet6_cork v6_cork;
1852 struct sk_buff_head queue;
26879da5 1853 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
6422398c
VY
1854 int err;
1855
1856 if (flags & MSG_PROBE)
1857 return NULL;
1858
1859 __skb_queue_head_init(&queue);
1860
1cd7884d
WB
1861 cork->base.flags = 0;
1862 cork->base.addr = 0;
1863 cork->base.opt = NULL;
1864 cork->base.dst = NULL;
6422398c 1865 v6_cork.opt = NULL;
5fdaa88d 1866 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
862c03ee 1867 if (err) {
1cd7884d 1868 ip6_cork_release(cork, &v6_cork);
6422398c 1869 return ERR_PTR(err);
862c03ee 1870 }
26879da5
WW
1871 if (ipc6->dontfrag < 0)
1872 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
6422398c 1873
1cd7884d 1874 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
6422398c
VY
1875 &current->task_frag, getfrag, from,
1876 length + exthdrlen, transhdrlen + exthdrlen,
5fdaa88d 1877 flags, ipc6);
6422398c 1878 if (err) {
1cd7884d 1879 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
6422398c
VY
1880 return ERR_PTR(err);
1881 }
1882
1cd7884d 1883 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
6422398c 1884}