netxen_nic: fix MSI/MSI-x interrupts
[linux-block.git] / net / ipv6 / ip6_output.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * IPv6 output functions
1ab1457c 4 * Linux INET6 implementation
1da177e4
LT
5 *
6 * Authors:
1ab1457c 7 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 8 *
1da177e4
LT
9 * Based on linux/net/ipv4/ip_output.c
10 *
1da177e4
LT
11 * Changes:
12 * A.N.Kuznetsov : airthmetics in fragmentation.
13 * extension headers are implemented.
14 * route changes now work.
15 * ip6_forward does not confuse sniffers.
16 * etc.
17 *
18 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 19 * Imran Patel : frag id should be in NBO
1da177e4
LT
20 * Kazunori MIYAZAWA @USAGI
21 * : add ip6_append_data and related functions
22 * for datagram xmit
23 */
24
1da177e4 25#include <linux/errno.h>
ef76bc23 26#include <linux/kernel.h>
1da177e4
LT
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/net.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/in6.h>
33#include <linux/tcp.h>
34#include <linux/route.h>
b59f45d0 35#include <linux/module.h>
5a0e3ad6 36#include <linux/slab.h>
1da177e4 37
33b48679 38#include <linux/bpf-cgroup.h>
1da177e4
LT
39#include <linux/netfilter.h>
40#include <linux/netfilter_ipv6.h>
41
42#include <net/sock.h>
43#include <net/snmp.h>
44
45#include <net/ipv6.h>
46#include <net/ndisc.h>
47#include <net/protocol.h>
48#include <net/ip6_route.h>
49#include <net/addrconf.h>
50#include <net/rawv6.h>
51#include <net/icmp.h>
52#include <net/xfrm.h>
53#include <net/checksum.h>
7bc570c8 54#include <linux/mroute6.h>
ca254490 55#include <net/l3mdev.h>
14972cbd 56#include <net/lwtunnel.h>
571912c6 57#include <net/ip_tunnels.h>
1da177e4 58
7d8c6e39 59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 60{
adf30907 61 struct dst_entry *dst = skb_dst(skb);
1da177e4 62 struct net_device *dev = dst->dev;
9b1c1ef1 63 const struct in6_addr *nexthop;
f6b72b62 64 struct neighbour *neigh;
6fd6ce20 65 int ret;
1da177e4 66
0660e03f 67 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 68 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 69
7026b1dd 70 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
8571ab47 71 ((mroute6_is_socket(net, skb) &&
bd91b8bf 72 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
73 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
74 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
75 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
76
77 /* Do not check for IFF_ALLMULTI; multicast routing
78 is not supported in any case.
79 */
80 if (newskb)
b2e0b385 81 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 82 net, sk, newskb, NULL, newskb->dev,
95603e22 83 dev_loopback_xmit);
1da177e4 84
0660e03f 85 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 86 IP6_INC_STATS(net, idev,
3bd653c8 87 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
88 kfree_skb(skb);
89 return 0;
90 }
91 }
92
78126c41 93 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
94
95 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
96 IPV6_ADDR_SCOPE_NODELOCAL &&
97 !(dev->flags & IFF_LOOPBACK)) {
98 kfree_skb(skb);
99 return 0;
100 }
1da177e4
LT
101 }
102
14972cbd
RP
103 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
104 int res = lwtunnel_xmit(skb);
105
106 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
107 return res;
108 }
109
6fd6ce20 110 rcu_read_lock_bh();
2647a9b0 111 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
112 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
113 if (unlikely(!neigh))
114 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
115 if (!IS_ERR(neigh)) {
4ff06203 116 sock_confirm_neigh(skb, neigh);
0353f282 117 ret = neigh_output(neigh, skb, false);
6fd6ce20
YH
118 rcu_read_unlock_bh();
119 return ret;
120 }
121 rcu_read_unlock_bh();
05e3aa09 122
78126c41 123 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
124 kfree_skb(skb);
125 return -EINVAL;
1da177e4
LT
126}
127
956fe219 128static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490 129{
09ee9dba
TB
130#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
131 /* Policy lookup after SNAT yielded a new policy */
132 if (skb_dst(skb)->xfrm) {
133 IPCB(skb)->flags |= IPSKB_REROUTED;
134 return dst_output(net, sk, skb);
135 }
136#endif
137
9e508490 138 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
139 dst_allfrag(skb_dst(skb)) ||
140 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 141 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 142 else
7d8c6e39 143 return ip6_finish_output2(net, sk, skb);
9e508490
JE
144}
145
956fe219 146static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
147{
148 int ret;
149
150 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
151 switch (ret) {
152 case NET_XMIT_SUCCESS:
153 return __ip6_finish_output(net, sk, skb);
154 case NET_XMIT_CN:
155 return __ip6_finish_output(net, sk, skb) ? : ret;
156 default:
157 kfree_skb(skb);
158 return ret;
159 }
160}
161
ede2059d 162int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 163{
28f8bfd1 164 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
adf30907 165 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 166
97a7a37a
CF
167 skb->protocol = htons(ETH_P_IPV6);
168 skb->dev = dev;
169
778d80be 170 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 171 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
172 kfree_skb(skb);
173 return 0;
174 }
175
29a26a56 176 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
28f8bfd1 177 net, sk, skb, indev, dev,
9c6eb28a
JE
178 ip6_finish_output,
179 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
180}
181
e9191ffb 182bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
513674b5
SL
183{
184 if (!np->autoflowlabel_set)
185 return ip6_default_np_autolabel(net);
186 else
187 return np->autoflowlabel;
188}
189
1da177e4 190/*
1c1e9d2b
ED
191 * xmit an sk_buff (used by TCP, SCTP and DCCP)
192 * Note : socket lock is not held for SYNACK packets, but might be modified
193 * by calls to skb_set_owner_w() and ipv6_local_error(),
194 * which are using proper atomic operations or spinlocks.
1da177e4 195 */
1c1e9d2b 196int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
4f6570d7 197 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
1da177e4 198{
3bd653c8 199 struct net *net = sock_net(sk);
1c1e9d2b 200 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 201 struct in6_addr *first_hop = &fl6->daddr;
adf30907 202 struct dst_entry *dst = skb_dst(skb);
66033f47 203 unsigned int head_room;
1da177e4 204 struct ipv6hdr *hdr;
4c9483b2 205 u8 proto = fl6->flowi6_proto;
1da177e4 206 int seg_len = skb->len;
e651f03a 207 int hlimit = -1;
1da177e4
LT
208 u32 mtu;
209
66033f47
SB
210 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
211 if (opt)
212 head_room += opt->opt_nflen + opt->opt_flen;
213
214 if (unlikely(skb_headroom(skb) < head_room)) {
215 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
216 if (!skb2) {
217 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
218 IPSTATS_MIB_OUTDISCARDS);
219 kfree_skb(skb);
220 return -ENOBUFS;
1da177e4 221 }
66033f47
SB
222 if (skb->sk)
223 skb_set_owner_w(skb2, skb->sk);
224 consume_skb(skb);
225 skb = skb2;
226 }
227
228 if (opt) {
229 seg_len += opt->opt_nflen + opt->opt_flen;
230
1da177e4
LT
231 if (opt->opt_flen)
232 ipv6_push_frag_opts(skb, opt, &proto);
66033f47 233
1da177e4 234 if (opt->opt_nflen)
613fa3ca
DL
235 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
236 &fl6->saddr);
1da177e4
LT
237 }
238
e2d1bca7
ACM
239 skb_push(skb, sizeof(struct ipv6hdr));
240 skb_reset_network_header(skb);
0660e03f 241 hdr = ipv6_hdr(skb);
1da177e4
LT
242
243 /*
244 * Fill in the IPv6 header
245 */
b903d324 246 if (np)
1da177e4
LT
247 hlimit = np->hop_limit;
248 if (hlimit < 0)
6b75d090 249 hlimit = ip6_dst_hoplimit(dst);
1da177e4 250
cb1ce2ef 251 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 252 ip6_autoflowlabel(net, np), fl6));
41a1f8ea 253
1da177e4
LT
254 hdr->payload_len = htons(seg_len);
255 hdr->nexthdr = proto;
256 hdr->hop_limit = hlimit;
257
4e3fd7a0
AD
258 hdr->saddr = fl6->saddr;
259 hdr->daddr = *first_hop;
1da177e4 260
9c9c9ad5 261 skb->protocol = htons(ETH_P_IPV6);
4f6570d7 262 skb->priority = priority;
92e55f41 263 skb->mark = mark;
a2c2064f 264
1da177e4 265 mtu = dst_mtu(dst);
60ff7467 266 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 267 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 268 IPSTATS_MIB_OUT, skb->len);
a8e3e1a9
DA
269
270 /* if egress device is enslaved to an L3 master device pass the
271 * skb to its handler for processing
272 */
273 skb = l3mdev_ip6_out((struct sock *)sk, skb);
274 if (unlikely(!skb))
275 return 0;
276
1c1e9d2b
ED
277 /* hooks should never assume socket lock is held.
278 * we promote our socket to non const
279 */
29a26a56 280 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 281 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 282 dst_output);
1da177e4
LT
283 }
284
1da177e4 285 skb->dev = dst->dev;
1c1e9d2b
ED
286 /* ipv6_local_error() does not require socket lock,
287 * we promote our socket to non const
288 */
289 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
290
adf30907 291 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
292 kfree_skb(skb);
293 return -EMSGSIZE;
294}
7159039a
YH
295EXPORT_SYMBOL(ip6_xmit);
296
1da177e4
LT
297static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
298{
299 struct ip6_ra_chain *ra;
300 struct sock *last = NULL;
301
302 read_lock(&ip6_ra_lock);
303 for (ra = ip6_ra_chain; ra; ra = ra->next) {
304 struct sock *sk = ra->sk;
0bd1b59b
AM
305 if (sk && ra->sel == sel &&
306 (!sk->sk_bound_dev_if ||
307 sk->sk_bound_dev_if == skb->dev->ifindex)) {
9036b2fe
FR
308 struct ipv6_pinfo *np = inet6_sk(sk);
309
310 if (np && np->rtalert_isolate &&
311 !net_eq(sock_net(sk), dev_net(skb->dev))) {
312 continue;
313 }
1da177e4
LT
314 if (last) {
315 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
316 if (skb2)
317 rawv6_rcv(last, skb2);
318 }
319 last = sk;
320 }
321 }
322
323 if (last) {
324 rawv6_rcv(last, skb);
325 read_unlock(&ip6_ra_lock);
326 return 1;
327 }
328 read_unlock(&ip6_ra_lock);
329 return 0;
330}
331
e21e0b5f
VN
332static int ip6_forward_proxy_check(struct sk_buff *skb)
333{
0660e03f 334 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 335 u8 nexthdr = hdr->nexthdr;
75f2811c 336 __be16 frag_off;
e21e0b5f
VN
337 int offset;
338
339 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 340 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
341 if (offset < 0)
342 return 0;
343 } else
344 offset = sizeof(struct ipv6hdr);
345
346 if (nexthdr == IPPROTO_ICMPV6) {
347 struct icmp6hdr *icmp6;
348
d56f90a7
ACM
349 if (!pskb_may_pull(skb, (skb_network_header(skb) +
350 offset + 1 - skb->data)))
e21e0b5f
VN
351 return 0;
352
d56f90a7 353 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
354
355 switch (icmp6->icmp6_type) {
356 case NDISC_ROUTER_SOLICITATION:
357 case NDISC_ROUTER_ADVERTISEMENT:
358 case NDISC_NEIGHBOUR_SOLICITATION:
359 case NDISC_NEIGHBOUR_ADVERTISEMENT:
360 case NDISC_REDIRECT:
361 /* For reaction involving unicast neighbor discovery
362 * message destined to the proxied address, pass it to
363 * input function.
364 */
365 return 1;
366 default:
367 break;
368 }
369 }
370
74553b09
VN
371 /*
372 * The proxying router can't forward traffic sent to a link-local
373 * address, so signal the sender and discard the packet. This
374 * behavior is clarified by the MIPv6 specification.
375 */
376 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
377 dst_link_failure(skb);
378 return -1;
379 }
380
e21e0b5f
VN
381 return 0;
382}
383
0c4b51f0
EB
384static inline int ip6_forward_finish(struct net *net, struct sock *sk,
385 struct sk_buff *skb)
1da177e4 386{
71a1c915
JB
387 struct dst_entry *dst = skb_dst(skb);
388
389 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
390 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
391
f839a6c9
IS
392#ifdef CONFIG_NET_SWITCHDEV
393 if (skb->offload_l3_fwd_mark) {
394 consume_skb(skb);
395 return 0;
396 }
397#endif
398
8203e2d8 399 skb->tstamp = 0;
13206b6b 400 return dst_output(net, sk, skb);
1da177e4
LT
401}
402
fe6cc55f
FW
403static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
404{
418a3156 405 if (skb->len <= mtu)
fe6cc55f
FW
406 return false;
407
60ff7467 408 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
409 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
410 return true;
411
60ff7467 412 if (skb->ignore_df)
418a3156
FW
413 return false;
414
779b7931 415 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
fe6cc55f
FW
416 return false;
417
418 return true;
419}
420
1da177e4
LT
421int ip6_forward(struct sk_buff *skb)
422{
bdb7cc64 423 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
adf30907 424 struct dst_entry *dst = skb_dst(skb);
0660e03f 425 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 426 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 427 struct net *net = dev_net(dst->dev);
14f3ad6f 428 u32 mtu;
1ab1457c 429
53b7997f 430 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
431 goto error;
432
090f1166
LR
433 if (skb->pkt_type != PACKET_HOST)
434 goto drop;
435
9ef2e965
HFS
436 if (unlikely(skb->sk))
437 goto drop;
438
4497b076
BH
439 if (skb_warn_if_lro(skb))
440 goto drop;
441
1da177e4 442 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
bdb7cc64 443 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
1da177e4
LT
444 goto drop;
445 }
446
35fc92a9 447 skb_forward_csum(skb);
1da177e4
LT
448
449 /*
450 * We DO NOT make any processing on
451 * RA packets, pushing them to user level AS IS
452 * without ane WARRANTY that application will be able
453 * to interpret them. The reason is that we
454 * cannot make anything clever here.
455 *
456 * We are not end-node, so that if packet contains
457 * AH/ESP, we cannot make anything.
458 * Defragmentation also would be mistake, RA packets
459 * cannot be fragmented, because there is no warranty
460 * that different fragments will go along one path. --ANK
461 */
ab4eb353
YH
462 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
463 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
464 return 0;
465 }
466
467 /*
468 * check and decrement ttl
469 */
470 if (hdr->hop_limit <= 1) {
3ffe533c 471 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
bdb7cc64 472 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
473
474 kfree_skb(skb);
475 return -ETIMEDOUT;
476 }
477
fbea49e1 478 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 479 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 480 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
481 int proxied = ip6_forward_proxy_check(skb);
482 if (proxied > 0)
e21e0b5f 483 return ip6_input(skb);
74553b09 484 else if (proxied < 0) {
bdb7cc64 485 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
74553b09
VN
486 goto drop;
487 }
e21e0b5f
VN
488 }
489
1da177e4 490 if (!xfrm6_route_forward(skb)) {
bdb7cc64 491 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
1da177e4
LT
492 goto drop;
493 }
adf30907 494 dst = skb_dst(skb);
1da177e4
LT
495
496 /* IPv6 specs say nothing about it, but it is clear that we cannot
497 send redirects to source routed frames.
1e5dc146 498 We don't send redirects to frames decapsulated from IPsec.
1da177e4 499 */
2f17becf
SS
500 if (IP6CB(skb)->iif == dst->dev->ifindex &&
501 opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 502 struct in6_addr *target = NULL;
fbfe95a4 503 struct inet_peer *peer;
1da177e4 504 struct rt6_info *rt;
1da177e4
LT
505
506 /*
507 * incoming and outgoing devices are the same
508 * send a redirect.
509 */
510
511 rt = (struct rt6_info *) dst;
c45a3dfb
DM
512 if (rt->rt6i_flags & RTF_GATEWAY)
513 target = &rt->rt6i_gateway;
1da177e4
LT
514 else
515 target = &hdr->daddr;
516
fd0273d7 517 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 518
1da177e4
LT
519 /* Limit redirects both by destination (here)
520 and by source (inside ndisc_send_redirect)
521 */
fbfe95a4 522 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 523 ndisc_send_redirect(skb, target);
1d861aa4
DM
524 if (peer)
525 inet_putpeer(peer);
5bb1ab09
DS
526 } else {
527 int addrtype = ipv6_addr_type(&hdr->saddr);
528
1da177e4 529 /* This check is security critical. */
f81b2e7d
YH
530 if (addrtype == IPV6_ADDR_ANY ||
531 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
532 goto error;
533 if (addrtype & IPV6_ADDR_LINKLOCAL) {
534 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 535 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
536 goto error;
537 }
1da177e4
LT
538 }
539
0954cf9c 540 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
541 if (mtu < IPV6_MIN_MTU)
542 mtu = IPV6_MIN_MTU;
543
fe6cc55f 544 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
545 /* Again, force OUTPUT device used as source address */
546 skb->dev = dst->dev;
14f3ad6f 547 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
bdb7cc64 548 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
1d015503
ED
549 __IP6_INC_STATS(net, ip6_dst_idev(dst),
550 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
551 kfree_skb(skb);
552 return -EMSGSIZE;
553 }
554
555 if (skb_cow(skb, dst->dev->hard_header_len)) {
1d015503
ED
556 __IP6_INC_STATS(net, ip6_dst_idev(dst),
557 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
558 goto drop;
559 }
560
0660e03f 561 hdr = ipv6_hdr(skb);
1da177e4
LT
562
563 /* Mangling hops number delayed to point after skb COW */
1ab1457c 564
1da177e4
LT
565 hdr->hop_limit--;
566
29a26a56
EB
567 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
568 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 569 ip6_forward_finish);
1da177e4
LT
570
571error:
bdb7cc64 572 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
573drop:
574 kfree_skb(skb);
575 return -EINVAL;
576}
577
578static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
579{
580 to->pkt_type = from->pkt_type;
581 to->priority = from->priority;
582 to->protocol = from->protocol;
adf30907
ED
583 skb_dst_drop(to);
584 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 585 to->dev = from->dev;
82e91ffe 586 to->mark = from->mark;
1da177e4 587
3dd1c9a1
PA
588 skb_copy_hash(to, from);
589
1da177e4
LT
590#ifdef CONFIG_NET_SCHED
591 to->tc_index = from->tc_index;
592#endif
e7ac05f3 593 nf_copy(to, from);
df5042f4 594 skb_ext_copy(to, from);
984bc16c 595 skb_copy_secmark(to, from);
1da177e4
LT
596}
597
0feca619
PNA
598int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
599 u8 nexthdr, __be32 frag_id,
600 struct ip6_fraglist_iter *iter)
601{
602 unsigned int first_len;
603 struct frag_hdr *fh;
604
605 /* BUILD HEADER */
606 *prevhdr = NEXTHDR_FRAGMENT;
607 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
608 if (!iter->tmp_hdr)
609 return -ENOMEM;
610
b7034146 611 iter->frag = skb_shinfo(skb)->frag_list;
0feca619
PNA
612 skb_frag_list_init(skb);
613
614 iter->offset = 0;
615 iter->hlen = hlen;
616 iter->frag_id = frag_id;
617 iter->nexthdr = nexthdr;
618
619 __skb_pull(skb, hlen);
620 fh = __skb_push(skb, sizeof(struct frag_hdr));
621 __skb_push(skb, hlen);
622 skb_reset_network_header(skb);
623 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
624
625 fh->nexthdr = nexthdr;
626 fh->reserved = 0;
627 fh->frag_off = htons(IP6_MF);
628 fh->identification = frag_id;
629
630 first_len = skb_pagelen(skb);
631 skb->data_len = first_len - skb_headlen(skb);
632 skb->len = first_len;
633 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
634
635 return 0;
636}
637EXPORT_SYMBOL(ip6_fraglist_init);
638
639void ip6_fraglist_prepare(struct sk_buff *skb,
640 struct ip6_fraglist_iter *iter)
641{
642 struct sk_buff *frag = iter->frag;
643 unsigned int hlen = iter->hlen;
644 struct frag_hdr *fh;
645
646 frag->ip_summed = CHECKSUM_NONE;
647 skb_reset_transport_header(frag);
648 fh = __skb_push(frag, sizeof(struct frag_hdr));
649 __skb_push(frag, hlen);
650 skb_reset_network_header(frag);
651 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
652 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
653 fh->nexthdr = iter->nexthdr;
654 fh->reserved = 0;
655 fh->frag_off = htons(iter->offset);
656 if (frag->next)
657 fh->frag_off |= htons(IP6_MF);
658 fh->identification = iter->frag_id;
659 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
660 ip6_copy_metadata(frag, skb);
661}
662EXPORT_SYMBOL(ip6_fraglist_prepare);
663
8a6a1f17
PNA
664void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
665 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
666 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
667{
668 state->prevhdr = prevhdr;
669 state->nexthdr = nexthdr;
670 state->frag_id = frag_id;
671
672 state->hlen = hlen;
673 state->mtu = mtu;
674
675 state->left = skb->len - hlen; /* Space per frame */
676 state->ptr = hlen; /* Where to start from */
677
678 state->hroom = hdr_room;
679 state->troom = needed_tailroom;
680
681 state->offset = 0;
682}
683EXPORT_SYMBOL(ip6_frag_init);
684
685struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
686{
687 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
688 struct sk_buff *frag;
689 struct frag_hdr *fh;
690 unsigned int len;
691
692 len = state->left;
693 /* IF: it doesn't fit, use 'mtu' - the data space left */
694 if (len > state->mtu)
695 len = state->mtu;
696 /* IF: we are not sending up to and including the packet end
697 then align the next start on an eight byte boundary */
698 if (len < state->left)
699 len &= ~7;
700
701 /* Allocate buffer */
702 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
703 state->hroom + state->troom, GFP_ATOMIC);
704 if (!frag)
705 return ERR_PTR(-ENOMEM);
706
707 /*
708 * Set up data on packet
709 */
710
711 ip6_copy_metadata(frag, skb);
712 skb_reserve(frag, state->hroom);
713 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
714 skb_reset_network_header(frag);
715 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
716 frag->transport_header = (frag->network_header + state->hlen +
717 sizeof(struct frag_hdr));
718
719 /*
720 * Charge the memory for the fragment to any owner
721 * it might possess
722 */
723 if (skb->sk)
724 skb_set_owner_w(frag, skb->sk);
725
726 /*
727 * Copy the packet header into the new buffer.
728 */
729 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
730
731 fragnexthdr_offset = skb_network_header(frag);
732 fragnexthdr_offset += prevhdr - skb_network_header(skb);
733 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
734
735 /*
736 * Build fragment header.
737 */
738 fh->nexthdr = state->nexthdr;
739 fh->reserved = 0;
740 fh->identification = state->frag_id;
741
742 /*
743 * Copy a block of the IP datagram.
744 */
745 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
746 len));
747 state->left -= len;
748
749 fh->frag_off = htons(state->offset);
750 if (state->left > 0)
751 fh->frag_off |= htons(IP6_MF);
752 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
753
754 state->ptr += len;
755 state->offset += len;
756
757 return frag;
758}
759EXPORT_SYMBOL(ip6_frag_next);
760
7d8c6e39
EB
761int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
762 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 763{
1da177e4 764 struct sk_buff *frag;
67ba4152 765 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 766 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
767 inet6_sk(skb->sk) : NULL;
8a6a1f17
PNA
768 struct ip6_frag_state state;
769 unsigned int mtu, hlen, nexthdr_offset;
9669fffc 770 ktime_t tstamp = skb->tstamp;
8a6a1f17 771 int hroom, err = 0;
286c2349 772 __be32 frag_id;
1da177e4
LT
773 u8 *prevhdr, nexthdr = 0;
774
7dd7eb95
DM
775 err = ip6_find_1stfragopt(skb, &prevhdr);
776 if (err < 0)
2423496a 777 goto fail;
7dd7eb95 778 hlen = err;
1da177e4 779 nexthdr = *prevhdr;
ef0efcd3 780 nexthdr_offset = prevhdr - skb_network_header(skb);
1da177e4 781
628a5c56 782 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
783
784 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 785 * or if the skb it not generated by a local socket.
b881ef76 786 */
485fca66
FW
787 if (unlikely(!skb->ignore_df && skb->len > mtu))
788 goto fail_toobig;
a34a101e 789
485fca66
FW
790 if (IP6CB(skb)->frag_max_size) {
791 if (IP6CB(skb)->frag_max_size > mtu)
792 goto fail_toobig;
793
794 /* don't send fragments larger than what we received */
795 mtu = IP6CB(skb)->frag_max_size;
796 if (mtu < IPV6_MIN_MTU)
797 mtu = IPV6_MIN_MTU;
b881ef76
JH
798 }
799
d91675f9
YH
800 if (np && np->frag_size < mtu) {
801 if (np->frag_size)
802 mtu = np->frag_size;
803 }
89bc7848 804 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 805 goto fail_toobig;
1e0d69a9 806 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 807
fd0273d7
MKL
808 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
809 &ipv6_hdr(skb)->saddr);
286c2349 810
405c92f7
HFS
811 if (skb->ip_summed == CHECKSUM_PARTIAL &&
812 (err = skb_checksum_help(skb)))
813 goto fail;
814
ef0efcd3 815 prevhdr = skb_network_header(skb) + nexthdr_offset;
1d325d21 816 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 817 if (skb_has_frag_list(skb)) {
c72d8cda 818 unsigned int first_len = skb_pagelen(skb);
0feca619 819 struct ip6_fraglist_iter iter;
3d13008e 820 struct sk_buff *frag2;
1da177e4
LT
821
822 if (first_len - hlen > mtu ||
823 ((first_len - hlen) & 7) ||
1d325d21
FW
824 skb_cloned(skb) ||
825 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
826 goto slow_path;
827
4d9092bb 828 skb_walk_frags(skb, frag) {
1da177e4
LT
829 /* Correct geometry. */
830 if (frag->len > mtu ||
831 ((frag->len & 7) && frag->next) ||
1d325d21 832 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 833 goto slow_path_clean;
1da177e4 834
1da177e4
LT
835 /* Partially cloned skb? */
836 if (skb_shared(frag))
3d13008e 837 goto slow_path_clean;
2fdba6b0
HX
838
839 BUG_ON(frag->sk);
840 if (skb->sk) {
2fdba6b0
HX
841 frag->sk = skb->sk;
842 frag->destructor = sock_wfree;
2fdba6b0 843 }
3d13008e 844 skb->truesize -= frag->truesize;
1da177e4
LT
845 }
846
0feca619
PNA
847 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
848 &iter);
849 if (err < 0)
1d325d21 850 goto fail;
a11d206d 851
1da177e4
LT
852 for (;;) {
853 /* Prepare header of the next frame,
854 * before previous one went down. */
0feca619
PNA
855 if (iter.frag)
856 ip6_fraglist_prepare(skb, &iter);
1ab1457c 857
9669fffc 858 skb->tstamp = tstamp;
7d8c6e39 859 err = output(net, sk, skb);
67ba4152 860 if (!err)
d8d1f30b 861 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 862 IPSTATS_MIB_FRAGCREATES);
dafee490 863
0feca619 864 if (err || !iter.frag)
1da177e4
LT
865 break;
866
0feca619 867 skb = ip6_fraglist_next(&iter);
1da177e4
LT
868 }
869
0feca619 870 kfree(iter.tmp_hdr);
1da177e4
LT
871
872 if (err == 0) {
d8d1f30b 873 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 874 IPSTATS_MIB_FRAGOKS);
1da177e4
LT
875 return 0;
876 }
877
b7034146 878 kfree_skb_list(iter.frag);
1da177e4 879
d8d1f30b 880 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 881 IPSTATS_MIB_FRAGFAILS);
1da177e4 882 return err;
3d13008e
ED
883
884slow_path_clean:
885 skb_walk_frags(skb, frag2) {
886 if (frag2 == frag)
887 break;
888 frag2->sk = NULL;
889 frag2->destructor = NULL;
890 skb->truesize += frag2->truesize;
891 }
1da177e4
LT
892 }
893
894slow_path:
1da177e4
LT
895 /*
896 * Fragment the datagram.
897 */
898
8a6a1f17
PNA
899 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
900 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
901 &state);
1da177e4
LT
902
903 /*
904 * Keep copying data until we run out.
905 */
1da177e4 906
8a6a1f17
PNA
907 while (state.left > 0) {
908 frag = ip6_frag_next(skb, &state);
909 if (IS_ERR(frag)) {
910 err = PTR_ERR(frag);
1da177e4
LT
911 goto fail;
912 }
913
1da177e4
LT
914 /*
915 * Put this fragment into the sending queue.
916 */
9669fffc 917 frag->tstamp = tstamp;
7d8c6e39 918 err = output(net, sk, frag);
1da177e4
LT
919 if (err)
920 goto fail;
dafee490 921
adf30907 922 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 923 IPSTATS_MIB_FRAGCREATES);
1da177e4 924 }
adf30907 925 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 926 IPSTATS_MIB_FRAGOKS);
808db80a 927 consume_skb(skb);
1da177e4
LT
928 return err;
929
485fca66
FW
930fail_toobig:
931 if (skb->sk && dst_allfrag(skb_dst(skb)))
932 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
933
485fca66
FW
934 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
935 err = -EMSGSIZE;
936
1da177e4 937fail:
adf30907 938 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 939 IPSTATS_MIB_FRAGFAILS);
1ab1457c 940 kfree_skb(skb);
1da177e4
LT
941 return err;
942}
943
b71d1d42
ED
944static inline int ip6_rt_check(const struct rt6key *rt_key,
945 const struct in6_addr *fl_addr,
946 const struct in6_addr *addr_cache)
cf6b1982 947{
a02cec21 948 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 949 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
950}
951
497c615a
HX
952static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
953 struct dst_entry *dst,
b71d1d42 954 const struct flowi6 *fl6)
1da177e4 955{
497c615a 956 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 957 struct rt6_info *rt;
1da177e4 958
497c615a
HX
959 if (!dst)
960 goto out;
961
a963a37d
ED
962 if (dst->ops->family != AF_INET6) {
963 dst_release(dst);
964 return NULL;
965 }
966
967 rt = (struct rt6_info *)dst;
497c615a
HX
968 /* Yes, checking route validity in not connected
969 * case is not very simple. Take into account,
970 * that we do not support routing by source, TOS,
67ba4152 971 * and MSG_DONTROUTE --ANK (980726)
497c615a 972 *
cf6b1982
YH
973 * 1. ip6_rt_check(): If route was host route,
974 * check that cached destination is current.
497c615a
HX
975 * If it is network route, we still may
976 * check its validity using saved pointer
977 * to the last used address: daddr_cache.
978 * We do not want to save whole address now,
979 * (because main consumer of this service
980 * is tcp, which has not this problem),
981 * so that the last trick works only on connected
982 * sockets.
983 * 2. oif also should be the same.
984 */
4c9483b2 985 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 986#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 987 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 988#endif
ca254490
DA
989 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
990 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
991 dst_release(dst);
992 dst = NULL;
1da177e4
LT
993 }
994
497c615a
HX
995out:
996 return dst;
997}
998
3aef934f 999static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 1000 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 1001{
69cce1d1
DM
1002#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1003 struct neighbour *n;
97cac082 1004 struct rt6_info *rt;
69cce1d1
DM
1005#endif
1006 int err;
6f21c96a 1007 int flags = 0;
497c615a 1008
e16e888b
MS
1009 /* The correct way to handle this would be to do
1010 * ip6_route_get_saddr, and then ip6_route_output; however,
1011 * the route-specific preferred source forces the
1012 * ip6_route_output call _before_ ip6_route_get_saddr.
1013 *
1014 * In source specific routing (no src=any default route),
1015 * ip6_route_output will fail given src=any saddr, though, so
1016 * that's why we try it again later.
1017 */
1018 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
a68886a6 1019 struct fib6_info *from;
e16e888b
MS
1020 struct rt6_info *rt;
1021 bool had_dst = *dst != NULL;
1da177e4 1022
e16e888b
MS
1023 if (!had_dst)
1024 *dst = ip6_route_output(net, sk, fl6);
1025 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
a68886a6
DA
1026
1027 rcu_read_lock();
1028 from = rt ? rcu_dereference(rt->from) : NULL;
1029 err = ip6_route_get_saddr(net, from, &fl6->daddr,
c3968a85
DW
1030 sk ? inet6_sk(sk)->srcprefs : 0,
1031 &fl6->saddr);
a68886a6
DA
1032 rcu_read_unlock();
1033
44456d37 1034 if (err)
1da177e4 1035 goto out_err_release;
e16e888b
MS
1036
1037 /* If we had an erroneous initial result, pretend it
1038 * never existed and let the SA-enabled version take
1039 * over.
1040 */
1041 if (!had_dst && (*dst)->error) {
1042 dst_release(*dst);
1043 *dst = NULL;
1044 }
6f21c96a
PA
1045
1046 if (fl6->flowi6_oif)
1047 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
1048 }
1049
e16e888b 1050 if (!*dst)
6f21c96a 1051 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
1052
1053 err = (*dst)->error;
1054 if (err)
1055 goto out_err_release;
1056
95c385b4 1057#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
1058 /*
1059 * Here if the dst entry we've looked up
1060 * has a neighbour entry that is in the INCOMPLETE
1061 * state and the src address from the flow is
1062 * marked as OPTIMISTIC, we release the found
1063 * dst entry and replace it instead with the
1064 * dst entry of the nexthop router
1065 */
c56bf6fe 1066 rt = (struct rt6_info *) *dst;
707be1ff 1067 rcu_read_lock_bh();
2647a9b0
MKL
1068 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1069 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
1070 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1071 rcu_read_unlock_bh();
1072
1073 if (err) {
e550dfb0 1074 struct inet6_ifaddr *ifp;
4c9483b2 1075 struct flowi6 fl_gw6;
e550dfb0
NH
1076 int redirect;
1077
4c9483b2 1078 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
1079 (*dst)->dev, 1);
1080
1081 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1082 if (ifp)
1083 in6_ifa_put(ifp);
1084
1085 if (redirect) {
1086 /*
1087 * We need to get the dst entry for the
1088 * default router instead
1089 */
1090 dst_release(*dst);
4c9483b2
DM
1091 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1092 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1093 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
1094 err = (*dst)->error;
1095 if (err)
e550dfb0 1096 goto out_err_release;
95c385b4 1097 }
e550dfb0 1098 }
95c385b4 1099#endif
ec5e3b0a 1100 if (ipv6_addr_v4mapped(&fl6->saddr) &&
00ea1cee
WB
1101 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1102 err = -EAFNOSUPPORT;
1103 goto out_err_release;
1104 }
95c385b4 1105
1da177e4
LT
1106 return 0;
1107
1108out_err_release:
1109 dst_release(*dst);
1110 *dst = NULL;
8a966fc0 1111
0d240e78
DA
1112 if (err == -ENETUNREACH)
1113 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1114 return err;
1115}
34a0b3cd 1116
497c615a
HX
1117/**
1118 * ip6_dst_lookup - perform route lookup on flow
b51cd7c8 1119 * @net: Network namespace to perform lookup in
497c615a
HX
1120 * @sk: socket which provides route info
1121 * @dst: pointer to dst_entry * for result
4c9483b2 1122 * @fl6: flow to lookup
497c615a
HX
1123 *
1124 * This function performs a route lookup on the given flow.
1125 *
1126 * It returns zero on success, or a standard errno code on error.
1127 */
343d60aa
RP
1128int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1129 struct flowi6 *fl6)
497c615a
HX
1130{
1131 *dst = NULL;
343d60aa 1132 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1133}
3cf3dc6c
ACM
1134EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1135
497c615a 1136/**
68d0c6d3 1137 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
b51cd7c8 1138 * @net: Network namespace to perform lookup in
68d0c6d3 1139 * @sk: socket which provides route info
4c9483b2 1140 * @fl6: flow to lookup
68d0c6d3 1141 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1142 *
1143 * This function performs a route lookup on the given flow.
1144 *
1145 * It returns a valid dst pointer on success, or a pointer encoded
1146 * error code.
1147 */
c4e85f73 1148struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1149 const struct in6_addr *final_dst)
68d0c6d3
DM
1150{
1151 struct dst_entry *dst = NULL;
1152 int err;
1153
c4e85f73 1154 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
68d0c6d3
DM
1155 if (err)
1156 return ERR_PTR(err);
1157 if (final_dst)
4e3fd7a0 1158 fl6->daddr = *final_dst;
2774c131 1159
c4e85f73 1160 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1161}
1162EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1163
1164/**
1165 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1166 * @sk: socket which provides the dst cache and route info
4c9483b2 1167 * @fl6: flow to lookup
68d0c6d3 1168 * @final_dst: final destination address for ipsec lookup
96818159 1169 * @connected: whether @sk is connected or not
497c615a
HX
1170 *
1171 * This function performs a route lookup on the given flow with the
1172 * possibility of using the cached route in the socket if it is valid.
1173 * It will take the socket dst lock when operating on the dst cache.
1174 * As a result, this function can only be used in process context.
1175 *
96818159
AK
1176 * In addition, for a connected socket, cache the dst in the socket
1177 * if the current cache is not valid.
1178 *
68d0c6d3
DM
1179 * It returns a valid dst pointer on success, or a pointer encoded
1180 * error code.
497c615a 1181 */
4c9483b2 1182struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
96818159
AK
1183 const struct in6_addr *final_dst,
1184 bool connected)
497c615a 1185{
68d0c6d3 1186 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
497c615a 1187
4c9483b2 1188 dst = ip6_sk_dst_check(sk, dst, fl6);
96818159
AK
1189 if (dst)
1190 return dst;
1191
c4e85f73 1192 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
96818159
AK
1193 if (connected && !IS_ERR(dst))
1194 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
68d0c6d3 1195
00bc0ef5 1196 return dst;
497c615a 1197}
68d0c6d3 1198EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1199
571912c6
MV
1200/**
1201 * ip6_dst_lookup_tunnel - perform route lookup on tunnel
1202 * @skb: Packet for which lookup is done
1203 * @dev: Tunnel device
1204 * @net: Network namespace of tunnel device
b51cd7c8 1205 * @sock: Socket which provides route info
571912c6
MV
1206 * @saddr: Memory to store the src ip address
1207 * @info: Tunnel information
1208 * @protocol: IP protocol
b51cd7c8 1209 * @use_cache: Flag to enable cache usage
571912c6
MV
1210 * This function performs a route lookup on a tunnel
1211 *
1212 * It returns a valid dst pointer and stores src address to be used in
1213 * tunnel in param saddr on success, else a pointer encoded error code.
1214 */
1215
1216struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1217 struct net_device *dev,
1218 struct net *net,
1219 struct socket *sock,
1220 struct in6_addr *saddr,
1221 const struct ip_tunnel_info *info,
1222 u8 protocol,
1223 bool use_cache)
1224{
1225 struct dst_entry *dst = NULL;
1226#ifdef CONFIG_DST_CACHE
1227 struct dst_cache *dst_cache;
1228#endif
1229 struct flowi6 fl6;
1230 __u8 prio;
1231
1232#ifdef CONFIG_DST_CACHE
1233 dst_cache = (struct dst_cache *)&info->dst_cache;
1234 if (use_cache) {
1235 dst = dst_cache_get_ip6(dst_cache, saddr);
1236 if (dst)
1237 return dst;
1238 }
1239#endif
1240 memset(&fl6, 0, sizeof(fl6));
1241 fl6.flowi6_mark = skb->mark;
1242 fl6.flowi6_proto = protocol;
1243 fl6.daddr = info->key.u.ipv6.dst;
1244 fl6.saddr = info->key.u.ipv6.src;
1245 prio = info->key.tos;
1246 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1247 info->key.label);
1248
1249 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1250 NULL);
1251 if (IS_ERR(dst)) {
1252 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1253 return ERR_PTR(-ENETUNREACH);
1254 }
1255 if (dst->dev == dev) { /* is this necessary? */
1256 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1257 dst_release(dst);
1258 return ERR_PTR(-ELOOP);
1259 }
1260#ifdef CONFIG_DST_CACHE
1261 if (use_cache)
1262 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1263#endif
1264 *saddr = fl6.saddr;
1265 return dst;
1266}
1267EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1268
0178b695
HX
1269static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1270 gfp_t gfp)
1271{
1272 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1273}
1274
1275static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1276 gfp_t gfp)
1277{
1278 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1279}
1280
75a493e6 1281static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1282 int *maxfraglen,
1283 unsigned int fragheaderlen,
1284 struct sk_buff *skb,
75a493e6 1285 struct rt6_info *rt,
e367c2d0 1286 unsigned int orig_mtu)
0c183379
G
1287{
1288 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1289 if (!skb) {
0c183379 1290 /* first fragment, reserve header_len */
e367c2d0 1291 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1292
1293 } else {
1294 /*
1295 * this fragment is not first, the headers
1296 * space is regarded as data space.
1297 */
e367c2d0 1298 *mtu = orig_mtu;
0c183379
G
1299 }
1300 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1301 + fragheaderlen - sizeof(struct frag_hdr);
1302 }
1303}
1304
366e41d9 1305static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
26879da5 1306 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
5fdaa88d 1307 struct rt6_info *rt, struct flowi6 *fl6)
366e41d9
VY
1308{
1309 struct ipv6_pinfo *np = inet6_sk(sk);
1310 unsigned int mtu;
26879da5 1311 struct ipv6_txoptions *opt = ipc6->opt;
366e41d9
VY
1312
1313 /*
1314 * setup for corking
1315 */
1316 if (opt) {
1317 if (WARN_ON(v6_cork->opt))
1318 return -EINVAL;
1319
864e2a1f 1320 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
63159f29 1321 if (unlikely(!v6_cork->opt))
366e41d9
VY
1322 return -ENOBUFS;
1323
864e2a1f 1324 v6_cork->opt->tot_len = sizeof(*opt);
366e41d9
VY
1325 v6_cork->opt->opt_flen = opt->opt_flen;
1326 v6_cork->opt->opt_nflen = opt->opt_nflen;
1327
1328 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1329 sk->sk_allocation);
1330 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1331 return -ENOBUFS;
1332
1333 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1334 sk->sk_allocation);
1335 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1336 return -ENOBUFS;
1337
1338 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1339 sk->sk_allocation);
1340 if (opt->hopopt && !v6_cork->opt->hopopt)
1341 return -ENOBUFS;
1342
1343 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1344 sk->sk_allocation);
1345 if (opt->srcrt && !v6_cork->opt->srcrt)
1346 return -ENOBUFS;
1347
1348 /* need source address above miyazawa*/
1349 }
1350 dst_hold(&rt->dst);
1351 cork->base.dst = &rt->dst;
1352 cork->fl.u.ip6 = *fl6;
26879da5
WW
1353 v6_cork->hop_limit = ipc6->hlimit;
1354 v6_cork->tclass = ipc6->tclass;
366e41d9
VY
1355 if (rt->dst.flags & DST_XFRM_TUNNEL)
1356 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
749439bf 1357 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
366e41d9
VY
1358 else
1359 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
c02b3741 1360 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
366e41d9
VY
1361 if (np->frag_size < mtu) {
1362 if (np->frag_size)
1363 mtu = np->frag_size;
1364 }
749439bf
MM
1365 if (mtu < IPV6_MIN_MTU)
1366 return -EINVAL;
366e41d9 1367 cork->base.fragsize = mtu;
fbf47813 1368 cork->base.gso_size = ipc6->gso_size;
678ca42d 1369 cork->base.tx_flags = 0;
c6af0c22 1370 cork->base.mark = ipc6->sockc.mark;
678ca42d 1371 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
bec1f6f6 1372
0f6c480f 1373 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
366e41d9
VY
1374 cork->base.flags |= IPCORK_ALLFRAG;
1375 cork->base.length = 0;
1376
5fdaa88d 1377 cork->base.transmit_time = ipc6->sockc.transmit_time;
a818f75e 1378
366e41d9
VY
1379 return 0;
1380}
1381
0bbe84a6
VY
1382static int __ip6_append_data(struct sock *sk,
1383 struct flowi6 *fl6,
1384 struct sk_buff_head *queue,
1385 struct inet_cork *cork,
1386 struct inet6_cork *v6_cork,
1387 struct page_frag *pfrag,
1388 int getfrag(void *from, char *to, int offset,
1389 int len, int odd, struct sk_buff *skb),
1390 void *from, int length, int transhdrlen,
5fdaa88d 1391 unsigned int flags, struct ipcm6_cookie *ipc6)
1da177e4 1392{
0c183379 1393 struct sk_buff *skb, *skb_prev = NULL;
10b8a3de 1394 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
b5947e5d 1395 struct ubuf_info *uarg = NULL;
0bbe84a6
VY
1396 int exthdrlen = 0;
1397 int dst_exthdrlen = 0;
1da177e4 1398 int hh_len;
1da177e4
LT
1399 int copy;
1400 int err;
1401 int offset = 0;
09c2d251 1402 u32 tskey = 0;
0bbe84a6
VY
1403 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1404 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1405 int csummode = CHECKSUM_NONE;
682b1a9d 1406 unsigned int maxnonfragsize, headersize;
1f4c6eb2 1407 unsigned int wmem_alloc_delta = 0;
100f6d8e 1408 bool paged, extra_uref = false;
1da177e4 1409
0bbe84a6
VY
1410 skb = skb_peek_tail(queue);
1411 if (!skb) {
1412 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1413 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1414 }
0bbe84a6 1415
15e36f5b 1416 paged = !!cork->gso_size;
bec1f6f6 1417 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
e367c2d0 1418 orig_mtu = mtu;
1da177e4 1419
678ca42d
WB
1420 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1421 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1422 tskey = sk->sk_tskey++;
1423
d8d1f30b 1424 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1425
a1b05140 1426 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1427 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1428 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1429 sizeof(struct frag_hdr);
1da177e4 1430
682b1a9d
HFS
1431 headersize = sizeof(struct ipv6hdr) +
1432 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1433 (dst_allfrag(&rt->dst) ?
1434 sizeof(struct frag_hdr) : 0) +
1435 rt->rt6i_nfheader_len;
1436
10b8a3de
PA
1437 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1438 * the first fragment
1439 */
1440 if (headersize + transhdrlen > mtu)
1441 goto emsgsize;
1442
26879da5 1443 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
682b1a9d
HFS
1444 (sk->sk_protocol == IPPROTO_UDP ||
1445 sk->sk_protocol == IPPROTO_RAW)) {
1446 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1447 sizeof(struct ipv6hdr));
1448 goto emsgsize;
1449 }
4df98e76 1450
682b1a9d
HFS
1451 if (ip6_sk_ignore_df(sk))
1452 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1453 else
1454 maxnonfragsize = mtu;
4df98e76 1455
682b1a9d 1456 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1457emsgsize:
10b8a3de
PA
1458 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1459 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
682b1a9d 1460 return -EMSGSIZE;
1da177e4
LT
1461 }
1462
682b1a9d
HFS
1463 /* CHECKSUM_PARTIAL only with no extension headers and when
1464 * we are not going to fragment
1465 */
1466 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1467 headersize == sizeof(struct ipv6hdr) &&
2b89ed65 1468 length <= mtu - headersize &&
bec1f6f6 1469 (!(flags & MSG_MORE) || cork->gso_size) &&
c8cd0989 1470 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1471 csummode = CHECKSUM_PARTIAL;
1472
b5947e5d
WB
1473 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1474 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1475 if (!uarg)
1476 return -ENOBUFS;
522924b5 1477 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
b5947e5d
WB
1478 if (rt->dst.dev->features & NETIF_F_SG &&
1479 csummode == CHECKSUM_PARTIAL) {
1480 paged = true;
1481 } else {
1482 uarg->zerocopy = 0;
52900d22 1483 skb_zcopy_set(skb, uarg, &extra_uref);
b5947e5d
WB
1484 }
1485 }
1486
1da177e4
LT
1487 /*
1488 * Let's try using as much space as possible.
1489 * Use MTU if total length of the message fits into the MTU.
1490 * Otherwise, we need to reserve fragment header and
1491 * fragment alignment (= 8-15 octects, in total).
1492 *
634a63e7 1493 * Note that we may need to "move" the data from the tail
1ab1457c 1494 * of the buffer to the new fragment when we split
1da177e4
LT
1495 * the message.
1496 *
1ab1457c 1497 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1498 * at once if non-fragmentable extension headers
1499 * are too large.
1ab1457c 1500 * --yoshfuji
1da177e4
LT
1501 */
1502
2811ebac 1503 cork->length += length;
2811ebac 1504 if (!skb)
1da177e4
LT
1505 goto alloc_new_skb;
1506
1507 while (length > 0) {
1508 /* Check if the remaining data fits into current packet. */
bdc712b4 1509 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1510 if (copy < length)
1511 copy = maxfraglen - skb->len;
1512
1513 if (copy <= 0) {
1514 char *data;
1515 unsigned int datalen;
1516 unsigned int fraglen;
1517 unsigned int fraggap;
1518 unsigned int alloclen;
aba36930 1519 unsigned int pagedlen;
1da177e4 1520alloc_new_skb:
1da177e4 1521 /* There's no room in the current skb */
0c183379
G
1522 if (skb)
1523 fraggap = skb->len - maxfraglen;
1da177e4
LT
1524 else
1525 fraggap = 0;
0c183379 1526 /* update mtu and maxfraglen if necessary */
63159f29 1527 if (!skb || !skb_prev)
0c183379 1528 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1529 fragheaderlen, skb, rt,
e367c2d0 1530 orig_mtu);
0c183379
G
1531
1532 skb_prev = skb;
1da177e4
LT
1533
1534 /*
1535 * If remaining data exceeds the mtu,
1536 * we know we need more fragment(s).
1537 */
1538 datalen = length + fraggap;
1da177e4 1539
0c183379
G
1540 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1541 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
15e36f5b 1542 fraglen = datalen + fragheaderlen;
aba36930 1543 pagedlen = 0;
15e36f5b 1544
1da177e4 1545 if ((flags & MSG_MORE) &&
d8d1f30b 1546 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4 1547 alloclen = mtu;
15e36f5b
WB
1548 else if (!paged)
1549 alloclen = fraglen;
1550 else {
1551 alloclen = min_t(int, fraglen, MAX_HEADER);
1552 pagedlen = fraglen - alloclen;
1553 }
1da177e4 1554
299b0767
SK
1555 alloclen += dst_exthdrlen;
1556
0c183379
G
1557 if (datalen != length + fraggap) {
1558 /*
1559 * this is not the last fragment, the trailer
1560 * space is regarded as data space.
1561 */
1562 datalen += rt->dst.trailer_len;
1563 }
1564
1565 alloclen += rt->dst.trailer_len;
1566 fraglen = datalen + fragheaderlen;
1da177e4
LT
1567
1568 /*
1569 * We just reserve space for fragment header.
1ab1457c 1570 * Note: this may be overallocation if the message
1da177e4
LT
1571 * (without MSG_MORE) fits into the MTU.
1572 */
1573 alloclen += sizeof(struct frag_hdr);
1574
15e36f5b 1575 copy = datalen - transhdrlen - fraggap - pagedlen;
232cd35d
ED
1576 if (copy < 0) {
1577 err = -EINVAL;
1578 goto error;
1579 }
1da177e4
LT
1580 if (transhdrlen) {
1581 skb = sock_alloc_send_skb(sk,
1582 alloclen + hh_len,
1583 (flags & MSG_DONTWAIT), &err);
1584 } else {
1585 skb = NULL;
1f4c6eb2 1586 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1da177e4 1587 2 * sk->sk_sndbuf)
1f4c6eb2
ED
1588 skb = alloc_skb(alloclen + hh_len,
1589 sk->sk_allocation);
63159f29 1590 if (unlikely(!skb))
1da177e4
LT
1591 err = -ENOBUFS;
1592 }
63159f29 1593 if (!skb)
1da177e4
LT
1594 goto error;
1595 /*
1596 * Fill in the control structures
1597 */
9c9c9ad5 1598 skb->protocol = htons(ETH_P_IPV6);
32dce968 1599 skb->ip_summed = csummode;
1da177e4 1600 skb->csum = 0;
1f85851e
G
1601 /* reserve for fragmentation and ipsec header */
1602 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1603 dst_exthdrlen);
1da177e4
LT
1604
1605 /*
1606 * Find where to start putting bytes
1607 */
15e36f5b 1608 data = skb_put(skb, fraglen - pagedlen);
1f85851e
G
1609 skb_set_network_header(skb, exthdrlen);
1610 data += fragheaderlen;
b0e380b1
ACM
1611 skb->transport_header = (skb->network_header +
1612 fragheaderlen);
1da177e4
LT
1613 if (fraggap) {
1614 skb->csum = skb_copy_and_csum_bits(
1615 skb_prev, maxfraglen,
8d5930df 1616 data + transhdrlen, fraggap);
1da177e4
LT
1617 skb_prev->csum = csum_sub(skb_prev->csum,
1618 skb->csum);
1619 data += fraggap;
e9fa4f7b 1620 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4 1621 }
232cd35d
ED
1622 if (copy > 0 &&
1623 getfrag(from, data + transhdrlen, offset,
1624 copy, fraggap, skb) < 0) {
1da177e4
LT
1625 err = -EFAULT;
1626 kfree_skb(skb);
1627 goto error;
1628 }
1629
1630 offset += copy;
15e36f5b 1631 length -= copy + transhdrlen;
1da177e4
LT
1632 transhdrlen = 0;
1633 exthdrlen = 0;
299b0767 1634 dst_exthdrlen = 0;
1da177e4 1635
52900d22
WB
1636 /* Only the initial fragment is time stamped */
1637 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1638 cork->tx_flags = 0;
1639 skb_shinfo(skb)->tskey = tskey;
1640 tskey = 0;
1641 skb_zcopy_set(skb, uarg, &extra_uref);
1642
0dec879f
JA
1643 if ((flags & MSG_CONFIRM) && !skb_prev)
1644 skb_set_dst_pending_confirm(skb, 1);
1645
1da177e4
LT
1646 /*
1647 * Put the packet on the pending queue
1648 */
1f4c6eb2
ED
1649 if (!skb->destructor) {
1650 skb->destructor = sock_wfree;
1651 skb->sk = sk;
1652 wmem_alloc_delta += skb->truesize;
1653 }
0bbe84a6 1654 __skb_queue_tail(queue, skb);
1da177e4
LT
1655 continue;
1656 }
1657
1658 if (copy > length)
1659 copy = length;
1660
113f99c3
WB
1661 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1662 skb_tailroom(skb) >= copy) {
1da177e4
LT
1663 unsigned int off;
1664
1665 off = skb->len;
1666 if (getfrag(from, skb_put(skb, copy),
1667 offset, copy, off, skb) < 0) {
1668 __skb_trim(skb, off);
1669 err = -EFAULT;
1670 goto error;
1671 }
b5947e5d 1672 } else if (!uarg || !uarg->zerocopy) {
1da177e4 1673 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1674
5640f768
ED
1675 err = -ENOMEM;
1676 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1677 goto error;
5640f768
ED
1678
1679 if (!skb_can_coalesce(skb, i, pfrag->page,
1680 pfrag->offset)) {
1681 err = -EMSGSIZE;
1682 if (i == MAX_SKB_FRAGS)
1683 goto error;
1684
1685 __skb_fill_page_desc(skb, i, pfrag->page,
1686 pfrag->offset, 0);
1687 skb_shinfo(skb)->nr_frags = ++i;
1688 get_page(pfrag->page);
1da177e4 1689 }
5640f768 1690 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1691 if (getfrag(from,
5640f768
ED
1692 page_address(pfrag->page) + pfrag->offset,
1693 offset, copy, skb->len, skb) < 0)
1694 goto error_efault;
1695
1696 pfrag->offset += copy;
1697 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1698 skb->len += copy;
1699 skb->data_len += copy;
f945fa7a 1700 skb->truesize += copy;
1f4c6eb2 1701 wmem_alloc_delta += copy;
b5947e5d
WB
1702 } else {
1703 err = skb_zerocopy_iter_dgram(skb, from, copy);
1704 if (err < 0)
1705 goto error;
1da177e4
LT
1706 }
1707 offset += copy;
1708 length -= copy;
1709 }
5640f768 1710
9e8445a5
PA
1711 if (wmem_alloc_delta)
1712 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4 1713 return 0;
5640f768
ED
1714
1715error_efault:
1716 err = -EFAULT;
1da177e4 1717error:
97ef7b4c
WB
1718 if (uarg)
1719 sock_zerocopy_put_abort(uarg, extra_uref);
bdc712b4 1720 cork->length -= length;
3bd653c8 1721 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1f4c6eb2 1722 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4
LT
1723 return err;
1724}
0bbe84a6
VY
1725
1726int ip6_append_data(struct sock *sk,
1727 int getfrag(void *from, char *to, int offset, int len,
1728 int odd, struct sk_buff *skb),
26879da5
WW
1729 void *from, int length, int transhdrlen,
1730 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
5fdaa88d 1731 struct rt6_info *rt, unsigned int flags)
0bbe84a6
VY
1732{
1733 struct inet_sock *inet = inet_sk(sk);
1734 struct ipv6_pinfo *np = inet6_sk(sk);
1735 int exthdrlen;
1736 int err;
1737
1738 if (flags&MSG_PROBE)
1739 return 0;
1740 if (skb_queue_empty(&sk->sk_write_queue)) {
1741 /*
1742 * setup for corking
1743 */
26879da5 1744 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
5fdaa88d 1745 ipc6, rt, fl6);
0bbe84a6
VY
1746 if (err)
1747 return err;
1748
26879da5 1749 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
0bbe84a6
VY
1750 length += exthdrlen;
1751 transhdrlen += exthdrlen;
1752 } else {
1753 fl6 = &inet->cork.fl.u.ip6;
1754 transhdrlen = 0;
1755 }
1756
1757 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1758 &np->cork, sk_page_frag(sk), getfrag,
5fdaa88d 1759 from, length, transhdrlen, flags, ipc6);
0bbe84a6 1760}
a495f836 1761EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1762
366e41d9
VY
1763static void ip6_cork_release(struct inet_cork_full *cork,
1764 struct inet6_cork *v6_cork)
bf138862 1765{
366e41d9
VY
1766 if (v6_cork->opt) {
1767 kfree(v6_cork->opt->dst0opt);
1768 kfree(v6_cork->opt->dst1opt);
1769 kfree(v6_cork->opt->hopopt);
1770 kfree(v6_cork->opt->srcrt);
1771 kfree(v6_cork->opt);
1772 v6_cork->opt = NULL;
0178b695
HX
1773 }
1774
366e41d9
VY
1775 if (cork->base.dst) {
1776 dst_release(cork->base.dst);
1777 cork->base.dst = NULL;
1778 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1779 }
366e41d9 1780 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1781}
1782
6422398c
VY
1783struct sk_buff *__ip6_make_skb(struct sock *sk,
1784 struct sk_buff_head *queue,
1785 struct inet_cork_full *cork,
1786 struct inet6_cork *v6_cork)
1da177e4
LT
1787{
1788 struct sk_buff *skb, *tmp_skb;
1789 struct sk_buff **tail_skb;
1790 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1791 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1792 struct net *net = sock_net(sk);
1da177e4 1793 struct ipv6hdr *hdr;
6422398c
VY
1794 struct ipv6_txoptions *opt = v6_cork->opt;
1795 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1796 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1797 unsigned char proto = fl6->flowi6_proto;
1da177e4 1798
6422398c 1799 skb = __skb_dequeue(queue);
63159f29 1800 if (!skb)
1da177e4
LT
1801 goto out;
1802 tail_skb = &(skb_shinfo(skb)->frag_list);
1803
1804 /* move skb->data to ip header from ext header */
d56f90a7 1805 if (skb->data < skb_network_header(skb))
bbe735e4 1806 __skb_pull(skb, skb_network_offset(skb));
6422398c 1807 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1808 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1809 *tail_skb = tmp_skb;
1810 tail_skb = &(tmp_skb->next);
1811 skb->len += tmp_skb->len;
1812 skb->data_len += tmp_skb->len;
1da177e4 1813 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1814 tmp_skb->destructor = NULL;
1815 tmp_skb->sk = NULL;
1da177e4
LT
1816 }
1817
28a89453 1818 /* Allow local fragmentation. */
60ff7467 1819 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1820
4e3fd7a0 1821 *final_dst = fl6->daddr;
cfe1fc77 1822 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1823 if (opt && opt->opt_flen)
1824 ipv6_push_frag_opts(skb, opt, &proto);
1825 if (opt && opt->opt_nflen)
613fa3ca 1826 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1da177e4 1827
e2d1bca7
ACM
1828 skb_push(skb, sizeof(struct ipv6hdr));
1829 skb_reset_network_header(skb);
0660e03f 1830 hdr = ipv6_hdr(skb);
1ab1457c 1831
6422398c 1832 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1833 ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 1834 ip6_autoflowlabel(net, np), fl6));
6422398c 1835 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1836 hdr->nexthdr = proto;
4e3fd7a0
AD
1837 hdr->saddr = fl6->saddr;
1838 hdr->daddr = *final_dst;
1da177e4 1839
a2c2064f 1840 skb->priority = sk->sk_priority;
c6af0c22 1841 skb->mark = cork->base.mark;
a2c2064f 1842
a818f75e
JSP
1843 skb->tstamp = cork->base.transmit_time;
1844
d8d1f30b 1845 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1846 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1847 if (proto == IPPROTO_ICMPV6) {
adf30907 1848 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1849
43a43b60
HFS
1850 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1851 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1852 }
1853
6422398c
VY
1854 ip6_cork_release(cork, v6_cork);
1855out:
1856 return skb;
1857}
1858
1859int ip6_send_skb(struct sk_buff *skb)
1860{
1861 struct net *net = sock_net(skb->sk);
1862 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1863 int err;
1864
33224b16 1865 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1866 if (err) {
1867 if (err > 0)
6ce9e7b5 1868 err = net_xmit_errno(err);
1da177e4 1869 if (err)
6422398c
VY
1870 IP6_INC_STATS(net, rt->rt6i_idev,
1871 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1872 }
1873
1da177e4 1874 return err;
6422398c
VY
1875}
1876
1877int ip6_push_pending_frames(struct sock *sk)
1878{
1879 struct sk_buff *skb;
1880
1881 skb = ip6_finish_skb(sk);
1882 if (!skb)
1883 return 0;
1884
1885 return ip6_send_skb(skb);
1da177e4 1886}
a495f836 1887EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1888
0bbe84a6 1889static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1890 struct sk_buff_head *queue,
1891 struct inet_cork_full *cork,
1892 struct inet6_cork *v6_cork)
1da177e4 1893{
1da177e4
LT
1894 struct sk_buff *skb;
1895
0bbe84a6 1896 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1897 if (skb_dst(skb))
1898 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1899 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1900 kfree_skb(skb);
1901 }
1902
6422398c 1903 ip6_cork_release(cork, v6_cork);
1da177e4 1904}
0bbe84a6
VY
1905
1906void ip6_flush_pending_frames(struct sock *sk)
1907{
6422398c
VY
1908 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1909 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1910}
a495f836 1911EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1912
1913struct sk_buff *ip6_make_skb(struct sock *sk,
1914 int getfrag(void *from, char *to, int offset,
1915 int len, int odd, struct sk_buff *skb),
1916 void *from, int length, int transhdrlen,
26879da5 1917 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
6422398c 1918 struct rt6_info *rt, unsigned int flags,
5fdaa88d 1919 struct inet_cork_full *cork)
6422398c 1920{
6422398c
VY
1921 struct inet6_cork v6_cork;
1922 struct sk_buff_head queue;
26879da5 1923 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
6422398c
VY
1924 int err;
1925
1926 if (flags & MSG_PROBE)
1927 return NULL;
1928
1929 __skb_queue_head_init(&queue);
1930
1cd7884d
WB
1931 cork->base.flags = 0;
1932 cork->base.addr = 0;
1933 cork->base.opt = NULL;
1934 cork->base.dst = NULL;
6422398c 1935 v6_cork.opt = NULL;
5fdaa88d 1936 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
862c03ee 1937 if (err) {
1cd7884d 1938 ip6_cork_release(cork, &v6_cork);
6422398c 1939 return ERR_PTR(err);
862c03ee 1940 }
26879da5
WW
1941 if (ipc6->dontfrag < 0)
1942 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
6422398c 1943
1cd7884d 1944 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
6422398c
VY
1945 &current->task_frag, getfrag, from,
1946 length + exthdrlen, transhdrlen + exthdrlen,
5fdaa88d 1947 flags, ipc6);
6422398c 1948 if (err) {
1cd7884d 1949 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
6422398c
VY
1950 return ERR_PTR(err);
1951 }
1952
1cd7884d 1953 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
6422398c 1954}