Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
[linux-2.6-block.git] / net / netfilter / nf_nat_proto.c
CommitLineData
d2912cb1 1// SPDX-License-Identifier: GPL-2.0-only
faec18db
FW
2/* (C) 1999-2001 Paul `Rusty' Russell
3 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
faec18db
FW
4 */
5
6#include <linux/types.h>
7#include <linux/export.h>
8#include <linux/init.h>
9#include <linux/udp.h>
10#include <linux/tcp.h>
11#include <linux/icmp.h>
12#include <linux/icmpv6.h>
13
14#include <linux/dccp.h>
15#include <linux/sctp.h>
16#include <net/sctp/checksum.h>
17
18#include <linux/netfilter.h>
19#include <net/netfilter/nf_nat.h>
faec18db 20
3bf195ae
FW
21#include <linux/ipv6.h>
22#include <linux/netfilter_ipv6.h>
23#include <net/checksum.h>
24#include <net/ip6_checksum.h>
25#include <net/ip6_route.h>
26#include <net/xfrm.h>
27#include <net/ipv6.h>
28
29#include <net/netfilter/nf_conntrack_core.h>
30#include <net/netfilter/nf_conntrack.h>
14cb1a6e 31#include <linux/netfilter/nfnetlink_conntrack.h>
3bf195ae 32
03fe5efc
FW
33static void nf_csum_update(struct sk_buff *skb,
34 unsigned int iphdroff, __sum16 *check,
35 const struct nf_conntrack_tuple *t,
36 enum nf_nat_manip_type maniptype);
37
faec18db
FW
38static void
39__udp_manip_pkt(struct sk_buff *skb,
faec18db
FW
40 unsigned int iphdroff, struct udphdr *hdr,
41 const struct nf_conntrack_tuple *tuple,
42 enum nf_nat_manip_type maniptype, bool do_csum)
43{
44 __be16 *portptr, newport;
45
46 if (maniptype == NF_NAT_MANIP_SRC) {
47 /* Get rid of src port */
48 newport = tuple->src.u.udp.port;
49 portptr = &hdr->source;
50 } else {
51 /* Get rid of dst port */
52 newport = tuple->dst.u.udp.port;
53 portptr = &hdr->dest;
54 }
55 if (do_csum) {
03fe5efc 56 nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
faec18db
FW
57 inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
58 false);
59 if (!hdr->check)
60 hdr->check = CSUM_MANGLED_0;
61 }
62 *portptr = newport;
63}
64
65static bool udp_manip_pkt(struct sk_buff *skb,
faec18db
FW
66 unsigned int iphdroff, unsigned int hdroff,
67 const struct nf_conntrack_tuple *tuple,
68 enum nf_nat_manip_type maniptype)
69{
70 struct udphdr *hdr;
71 bool do_csum;
72
86f04538 73 if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
faec18db
FW
74 return false;
75
76 hdr = (struct udphdr *)(skb->data + hdroff);
77 do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
78
03fe5efc 79 __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum);
faec18db
FW
80 return true;
81}
82
83static bool udplite_manip_pkt(struct sk_buff *skb,
faec18db
FW
84 unsigned int iphdroff, unsigned int hdroff,
85 const struct nf_conntrack_tuple *tuple,
86 enum nf_nat_manip_type maniptype)
87{
88#ifdef CONFIG_NF_CT_PROTO_UDPLITE
89 struct udphdr *hdr;
90
86f04538 91 if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
faec18db
FW
92 return false;
93
94 hdr = (struct udphdr *)(skb->data + hdroff);
03fe5efc 95 __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
faec18db
FW
96#endif
97 return true;
98}
99
100static bool
101sctp_manip_pkt(struct sk_buff *skb,
faec18db
FW
102 unsigned int iphdroff, unsigned int hdroff,
103 const struct nf_conntrack_tuple *tuple,
104 enum nf_nat_manip_type maniptype)
105{
106#ifdef CONFIG_NF_CT_PROTO_SCTP
107 struct sctphdr *hdr;
108 int hdrsize = 8;
109
110 /* This could be an inner header returned in imcp packet; in such
111 * cases we cannot update the checksum field since it is outside
112 * of the 8 bytes of transport layer headers we are guaranteed.
113 */
114 if (skb->len >= hdroff + sizeof(*hdr))
115 hdrsize = sizeof(*hdr);
116
86f04538 117 if (skb_ensure_writable(skb, hdroff + hdrsize))
faec18db
FW
118 return false;
119
120 hdr = (struct sctphdr *)(skb->data + hdroff);
121
122 if (maniptype == NF_NAT_MANIP_SRC) {
123 /* Get rid of src port */
124 hdr->source = tuple->src.u.sctp.port;
125 } else {
126 /* Get rid of dst port */
127 hdr->dest = tuple->dst.u.sctp.port;
128 }
129
130 if (hdrsize < sizeof(*hdr))
131 return true;
132
133 if (skb->ip_summed != CHECKSUM_PARTIAL) {
134 hdr->checksum = sctp_compute_cksum(skb, hdroff);
135 skb->ip_summed = CHECKSUM_NONE;
136 }
137
138#endif
139 return true;
140}
141
142static bool
143tcp_manip_pkt(struct sk_buff *skb,
faec18db
FW
144 unsigned int iphdroff, unsigned int hdroff,
145 const struct nf_conntrack_tuple *tuple,
146 enum nf_nat_manip_type maniptype)
147{
148 struct tcphdr *hdr;
149 __be16 *portptr, newport, oldport;
150 int hdrsize = 8; /* TCP connection tracking guarantees this much */
151
152 /* this could be a inner header returned in icmp packet; in such
153 cases we cannot update the checksum field since it is outside of
154 the 8 bytes of transport layer headers we are guaranteed */
155 if (skb->len >= hdroff + sizeof(struct tcphdr))
156 hdrsize = sizeof(struct tcphdr);
157
86f04538 158 if (skb_ensure_writable(skb, hdroff + hdrsize))
faec18db
FW
159 return false;
160
161 hdr = (struct tcphdr *)(skb->data + hdroff);
162
163 if (maniptype == NF_NAT_MANIP_SRC) {
164 /* Get rid of src port */
165 newport = tuple->src.u.tcp.port;
166 portptr = &hdr->source;
167 } else {
168 /* Get rid of dst port */
169 newport = tuple->dst.u.tcp.port;
170 portptr = &hdr->dest;
171 }
172
173 oldport = *portptr;
174 *portptr = newport;
175
176 if (hdrsize < sizeof(*hdr))
177 return true;
178
03fe5efc 179 nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
faec18db
FW
180 inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
181 return true;
182}
183
184static bool
185dccp_manip_pkt(struct sk_buff *skb,
faec18db
FW
186 unsigned int iphdroff, unsigned int hdroff,
187 const struct nf_conntrack_tuple *tuple,
188 enum nf_nat_manip_type maniptype)
189{
190#ifdef CONFIG_NF_CT_PROTO_DCCP
191 struct dccp_hdr *hdr;
192 __be16 *portptr, oldport, newport;
193 int hdrsize = 8; /* DCCP connection tracking guarantees this much */
194
195 if (skb->len >= hdroff + sizeof(struct dccp_hdr))
196 hdrsize = sizeof(struct dccp_hdr);
197
86f04538 198 if (skb_ensure_writable(skb, hdroff + hdrsize))
faec18db
FW
199 return false;
200
201 hdr = (struct dccp_hdr *)(skb->data + hdroff);
202
203 if (maniptype == NF_NAT_MANIP_SRC) {
204 newport = tuple->src.u.dccp.port;
205 portptr = &hdr->dccph_sport;
206 } else {
207 newport = tuple->dst.u.dccp.port;
208 portptr = &hdr->dccph_dport;
209 }
210
211 oldport = *portptr;
212 *portptr = newport;
213
214 if (hdrsize < sizeof(*hdr))
215 return true;
216
03fe5efc 217 nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
faec18db
FW
218 inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
219 false);
220#endif
221 return true;
222}
223
224static bool
225icmp_manip_pkt(struct sk_buff *skb,
faec18db
FW
226 unsigned int iphdroff, unsigned int hdroff,
227 const struct nf_conntrack_tuple *tuple,
228 enum nf_nat_manip_type maniptype)
229{
230 struct icmphdr *hdr;
231
86f04538 232 if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
faec18db
FW
233 return false;
234
235 hdr = (struct icmphdr *)(skb->data + hdroff);
236 inet_proto_csum_replace2(&hdr->checksum, skb,
237 hdr->un.echo.id, tuple->src.u.icmp.id, false);
238 hdr->un.echo.id = tuple->src.u.icmp.id;
239 return true;
240}
241
242static bool
243icmpv6_manip_pkt(struct sk_buff *skb,
faec18db
FW
244 unsigned int iphdroff, unsigned int hdroff,
245 const struct nf_conntrack_tuple *tuple,
246 enum nf_nat_manip_type maniptype)
247{
248 struct icmp6hdr *hdr;
249
86f04538 250 if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
faec18db
FW
251 return false;
252
253 hdr = (struct icmp6hdr *)(skb->data + hdroff);
03fe5efc 254 nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
faec18db
FW
255 if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
256 hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
257 inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
258 hdr->icmp6_identifier,
259 tuple->src.u.icmp.id, false);
260 hdr->icmp6_identifier = tuple->src.u.icmp.id;
261 }
262 return true;
263}
264
265/* manipulate a GRE packet according to maniptype */
266static bool
267gre_manip_pkt(struct sk_buff *skb,
faec18db
FW
268 unsigned int iphdroff, unsigned int hdroff,
269 const struct nf_conntrack_tuple *tuple,
270 enum nf_nat_manip_type maniptype)
271{
272#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
273 const struct gre_base_hdr *greh;
274 struct pptp_gre_header *pgreh;
275
276 /* pgreh includes two optional 32bit fields which are not required
277 * to be there. That's where the magic '8' comes from */
86f04538 278 if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
faec18db
FW
279 return false;
280
281 greh = (void *)skb->data + hdroff;
282 pgreh = (struct pptp_gre_header *)greh;
283
284 /* we only have destination manip of a packet, since 'source key'
285 * is not present in the packet itself */
286 if (maniptype != NF_NAT_MANIP_DST)
287 return true;
288
289 switch (greh->flags & GRE_VERSION) {
290 case GRE_VERSION_0:
291 /* We do not currently NAT any GREv0 packets.
292 * Try to behave like "nf_nat_proto_unknown" */
293 break;
294 case GRE_VERSION_1:
295 pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
296 pgreh->call_id = tuple->dst.u.gre.key;
297 break;
298 default:
299 pr_debug("can't nat unknown GRE version\n");
300 return false;
301 }
302#endif
303 return true;
304}
305
14cb1a6e 306static bool l4proto_manip_pkt(struct sk_buff *skb,
faec18db
FW
307 unsigned int iphdroff, unsigned int hdroff,
308 const struct nf_conntrack_tuple *tuple,
309 enum nf_nat_manip_type maniptype)
310{
311 switch (tuple->dst.protonum) {
312 case IPPROTO_TCP:
03fe5efc 313 return tcp_manip_pkt(skb, iphdroff, hdroff,
faec18db
FW
314 tuple, maniptype);
315 case IPPROTO_UDP:
03fe5efc 316 return udp_manip_pkt(skb, iphdroff, hdroff,
faec18db
FW
317 tuple, maniptype);
318 case IPPROTO_UDPLITE:
03fe5efc 319 return udplite_manip_pkt(skb, iphdroff, hdroff,
faec18db
FW
320 tuple, maniptype);
321 case IPPROTO_SCTP:
03fe5efc 322 return sctp_manip_pkt(skb, iphdroff, hdroff,
faec18db
FW
323 tuple, maniptype);
324 case IPPROTO_ICMP:
03fe5efc 325 return icmp_manip_pkt(skb, iphdroff, hdroff,
faec18db
FW
326 tuple, maniptype);
327 case IPPROTO_ICMPV6:
03fe5efc 328 return icmpv6_manip_pkt(skb, iphdroff, hdroff,
faec18db
FW
329 tuple, maniptype);
330 case IPPROTO_DCCP:
03fe5efc 331 return dccp_manip_pkt(skb, iphdroff, hdroff,
faec18db
FW
332 tuple, maniptype);
333 case IPPROTO_GRE:
03fe5efc 334 return gre_manip_pkt(skb, iphdroff, hdroff,
faec18db
FW
335 tuple, maniptype);
336 }
337
338 /* If we don't know protocol -- no error, pass it unmodified. */
339 return true;
340}
3bf195ae
FW
341
342static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
343 unsigned int iphdroff,
344 const struct nf_conntrack_tuple *target,
345 enum nf_nat_manip_type maniptype)
346{
347 struct iphdr *iph;
348 unsigned int hdroff;
349
86f04538 350 if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
3bf195ae
FW
351 return false;
352
353 iph = (void *)skb->data + iphdroff;
354 hdroff = iphdroff + iph->ihl * 4;
355
03fe5efc 356 if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
3bf195ae
FW
357 return false;
358 iph = (void *)skb->data + iphdroff;
359
360 if (maniptype == NF_NAT_MANIP_SRC) {
361 csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
362 iph->saddr = target->src.u3.ip;
363 } else {
364 csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
365 iph->daddr = target->dst.u3.ip;
366 }
367 return true;
368}
369
370static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
371 unsigned int iphdroff,
372 const struct nf_conntrack_tuple *target,
373 enum nf_nat_manip_type maniptype)
374{
375#if IS_ENABLED(CONFIG_IPV6)
376 struct ipv6hdr *ipv6h;
377 __be16 frag_off;
378 int hdroff;
379 u8 nexthdr;
380
86f04538 381 if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
3bf195ae
FW
382 return false;
383
384 ipv6h = (void *)skb->data + iphdroff;
385 nexthdr = ipv6h->nexthdr;
386 hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
387 &nexthdr, &frag_off);
388 if (hdroff < 0)
389 goto manip_addr;
390
391 if ((frag_off & htons(~0x7)) == 0 &&
03fe5efc 392 !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
3bf195ae
FW
393 return false;
394
395 /* must reload, offset might have changed */
396 ipv6h = (void *)skb->data + iphdroff;
397
398manip_addr:
399 if (maniptype == NF_NAT_MANIP_SRC)
400 ipv6h->saddr = target->src.u3.in6;
401 else
402 ipv6h->daddr = target->dst.u3.in6;
403
404#endif
405 return true;
406}
407
2e666b22
FW
408unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
409 enum nf_nat_manip_type mtype,
410 enum ip_conntrack_dir dir)
411{
412 struct nf_conntrack_tuple target;
413
414 /* We are aiming to look like inverse of other direction. */
415 nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
416
417 switch (target.src.l3num) {
418 case NFPROTO_IPV6:
419 if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
420 return NF_ACCEPT;
421 break;
422 case NFPROTO_IPV4:
423 if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
424 return NF_ACCEPT;
425 break;
426 default:
427 WARN_ON_ONCE(1);
428 break;
429 }
430
431 return NF_DROP;
432}
433
3bf195ae
FW
434static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
435 unsigned int iphdroff, __sum16 *check,
436 const struct nf_conntrack_tuple *t,
437 enum nf_nat_manip_type maniptype)
438{
439 struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
440 __be32 oldip, newip;
441
442 if (maniptype == NF_NAT_MANIP_SRC) {
443 oldip = iph->saddr;
444 newip = t->src.u3.ip;
445 } else {
446 oldip = iph->daddr;
447 newip = t->dst.u3.ip;
448 }
449 inet_proto_csum_replace4(check, skb, oldip, newip, true);
450}
451
452static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
453 unsigned int iphdroff, __sum16 *check,
454 const struct nf_conntrack_tuple *t,
455 enum nf_nat_manip_type maniptype)
456{
457#if IS_ENABLED(CONFIG_IPV6)
458 const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
459 const struct in6_addr *oldip, *newip;
460
461 if (maniptype == NF_NAT_MANIP_SRC) {
462 oldip = &ipv6h->saddr;
463 newip = &t->src.u3.in6;
464 } else {
465 oldip = &ipv6h->daddr;
466 newip = &t->dst.u3.in6;
467 }
468 inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
469 newip->s6_addr32, true);
470#endif
471}
472
03fe5efc
FW
473static void nf_csum_update(struct sk_buff *skb,
474 unsigned int iphdroff, __sum16 *check,
475 const struct nf_conntrack_tuple *t,
476 enum nf_nat_manip_type maniptype)
477{
478 switch (t->src.l3num) {
479 case NFPROTO_IPV4:
480 nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
481 return;
482 case NFPROTO_IPV6:
483 nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
484 return;
485 }
486}
487
3bf195ae
FW
488static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
489 u8 proto, void *data, __sum16 *check,
490 int datalen, int oldlen)
491{
492 if (skb->ip_summed != CHECKSUM_PARTIAL) {
493 const struct iphdr *iph = ip_hdr(skb);
494
495 skb->ip_summed = CHECKSUM_PARTIAL;
496 skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
497 ip_hdrlen(skb);
498 skb->csum_offset = (void *)check - data;
499 *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
500 proto, 0);
501 } else {
502 inet_proto_csum_replace2(check, skb,
503 htons(oldlen), htons(datalen), true);
504 }
505}
506
507#if IS_ENABLED(CONFIG_IPV6)
508static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
509 u8 proto, void *data, __sum16 *check,
510 int datalen, int oldlen)
511{
512 if (skb->ip_summed != CHECKSUM_PARTIAL) {
513 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
514
515 skb->ip_summed = CHECKSUM_PARTIAL;
516 skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
517 (data - (void *)skb->data);
518 skb->csum_offset = (void *)check - data;
519 *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
520 datalen, proto, 0);
521 } else {
522 inet_proto_csum_replace2(check, skb,
523 htons(oldlen), htons(datalen), true);
524 }
525}
526#endif
527
dac3fe72
FW
528void nf_nat_csum_recalc(struct sk_buff *skb,
529 u8 nfproto, u8 proto, void *data, __sum16 *check,
530 int datalen, int oldlen)
531{
532 switch (nfproto) {
533 case NFPROTO_IPV4:
534 nf_nat_ipv4_csum_recalc(skb, proto, data, check,
535 datalen, oldlen);
536 return;
537#if IS_ENABLED(CONFIG_IPV6)
538 case NFPROTO_IPV6:
539 nf_nat_ipv6_csum_recalc(skb, proto, data, check,
540 datalen, oldlen);
541 return;
542#endif
543 }
544
545 WARN_ON_ONCE(1);
546}
547
3bf195ae
FW
548int nf_nat_icmp_reply_translation(struct sk_buff *skb,
549 struct nf_conn *ct,
550 enum ip_conntrack_info ctinfo,
551 unsigned int hooknum)
552{
553 struct {
554 struct icmphdr icmp;
555 struct iphdr ip;
556 } *inside;
557 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
558 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
559 unsigned int hdrlen = ip_hdrlen(skb);
560 struct nf_conntrack_tuple target;
561 unsigned long statusbit;
562
563 WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
564
86f04538 565 if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
3bf195ae 566 return 0;
5d154984 567 if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP))
3bf195ae
FW
568 return 0;
569
570 inside = (void *)skb->data + hdrlen;
571 if (inside->icmp.type == ICMP_REDIRECT) {
572 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
573 return 0;
574 if (ct->status & IPS_NAT_MASK)
575 return 0;
576 }
577
578 if (manip == NF_NAT_MANIP_SRC)
579 statusbit = IPS_SRC_NAT;
580 else
581 statusbit = IPS_DST_NAT;
582
583 /* Invert if this is reply direction */
584 if (dir == IP_CT_DIR_REPLY)
585 statusbit ^= IPS_NAT_MASK;
586
587 if (!(ct->status & statusbit))
588 return 1;
589
590 if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
591 &ct->tuplehash[!dir].tuple, !manip))
592 return 0;
593
594 if (skb->ip_summed != CHECKSUM_PARTIAL) {
595 /* Reloading "inside" here since manip_pkt may reallocate */
596 inside = (void *)skb->data + hdrlen;
597 inside->icmp.checksum = 0;
598 inside->icmp.checksum =
599 csum_fold(skb_checksum(skb, hdrlen,
600 skb->len - hdrlen, 0));
601 }
602
603 /* Change outer to look like the reply to an incoming packet */
604 nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
605 target.dst.protonum = IPPROTO_ICMP;
606 if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
607 return 0;
608
609 return 1;
610}
611EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
612
613static unsigned int
614nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
615 const struct nf_hook_state *state)
616{
617 struct nf_conn *ct;
618 enum ip_conntrack_info ctinfo;
619
620 ct = nf_ct_get(skb, &ctinfo);
621 if (!ct)
622 return NF_ACCEPT;
623
624 if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
625 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
626 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
627 state->hook))
628 return NF_DROP;
629 else
630 return NF_ACCEPT;
631 }
632 }
633
634 return nf_nat_inet_fn(priv, skb, state);
635}
636
637static unsigned int
638nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
639 const struct nf_hook_state *state)
640{
641 unsigned int ret;
642 __be32 daddr = ip_hdr(skb)->daddr;
643
644 ret = nf_nat_ipv4_fn(priv, skb, state);
645 if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
646 skb_dst_drop(skb);
647
648 return ret;
649}
650
651static unsigned int
652nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
653 const struct nf_hook_state *state)
654{
655#ifdef CONFIG_XFRM
656 const struct nf_conn *ct;
657 enum ip_conntrack_info ctinfo;
658 int err;
659#endif
660 unsigned int ret;
661
662 ret = nf_nat_ipv4_fn(priv, skb, state);
663#ifdef CONFIG_XFRM
664 if (ret != NF_ACCEPT)
665 return ret;
666
667 if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
668 return ret;
669
670 ct = nf_ct_get(skb, &ctinfo);
671 if (ct) {
672 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
673
674 if (ct->tuplehash[dir].tuple.src.u3.ip !=
675 ct->tuplehash[!dir].tuple.dst.u3.ip ||
676 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
677 ct->tuplehash[dir].tuple.src.u.all !=
678 ct->tuplehash[!dir].tuple.dst.u.all)) {
679 err = nf_xfrm_me_harder(state->net, skb, AF_INET);
680 if (err < 0)
681 ret = NF_DROP_ERR(err);
682 }
683 }
684#endif
685 return ret;
686}
687
688static unsigned int
689nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
690 const struct nf_hook_state *state)
691{
692 const struct nf_conn *ct;
693 enum ip_conntrack_info ctinfo;
694 unsigned int ret;
695 int err;
696
697 ret = nf_nat_ipv4_fn(priv, skb, state);
698 if (ret != NF_ACCEPT)
699 return ret;
700
701 ct = nf_ct_get(skb, &ctinfo);
702 if (ct) {
703 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
704
705 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
706 ct->tuplehash[!dir].tuple.src.u3.ip) {
707 err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
708 if (err < 0)
709 ret = NF_DROP_ERR(err);
710 }
711#ifdef CONFIG_XFRM
712 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
713 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
714 ct->tuplehash[dir].tuple.dst.u.all !=
715 ct->tuplehash[!dir].tuple.src.u.all) {
716 err = nf_xfrm_me_harder(state->net, skb, AF_INET);
717 if (err < 0)
718 ret = NF_DROP_ERR(err);
719 }
720#endif
721 }
722 return ret;
723}
724
0a30ba50 725static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
3bf195ae
FW
726 /* Before packet filtering, change destination */
727 {
728 .hook = nf_nat_ipv4_in,
729 .pf = NFPROTO_IPV4,
730 .hooknum = NF_INET_PRE_ROUTING,
731 .priority = NF_IP_PRI_NAT_DST,
732 },
733 /* After packet filtering, change source */
734 {
735 .hook = nf_nat_ipv4_out,
736 .pf = NFPROTO_IPV4,
737 .hooknum = NF_INET_POST_ROUTING,
738 .priority = NF_IP_PRI_NAT_SRC,
739 },
740 /* Before packet filtering, change destination */
741 {
742 .hook = nf_nat_ipv4_local_fn,
743 .pf = NFPROTO_IPV4,
744 .hooknum = NF_INET_LOCAL_OUT,
745 .priority = NF_IP_PRI_NAT_DST,
746 },
747 /* After packet filtering, change source */
748 {
749 .hook = nf_nat_ipv4_fn,
750 .pf = NFPROTO_IPV4,
751 .hooknum = NF_INET_LOCAL_IN,
752 .priority = NF_IP_PRI_NAT_SRC,
753 },
754};
755
d2c5c103 756int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
3bf195ae 757{
d164385e
FW
758 return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops,
759 ARRAY_SIZE(nf_nat_ipv4_ops));
3bf195ae 760}
d2c5c103 761EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
3bf195ae 762
d2c5c103 763void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
3bf195ae 764{
d164385e 765 nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
3bf195ae 766}
d2c5c103 767EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
3bf195ae 768
3bf195ae 769#if IS_ENABLED(CONFIG_IPV6)
3bf195ae
FW
770int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
771 struct nf_conn *ct,
772 enum ip_conntrack_info ctinfo,
773 unsigned int hooknum,
774 unsigned int hdrlen)
775{
776 struct {
777 struct icmp6hdr icmp6;
778 struct ipv6hdr ip6;
779 } *inside;
780 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
781 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
782 struct nf_conntrack_tuple target;
783 unsigned long statusbit;
784
785 WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
786
86f04538 787 if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
3bf195ae
FW
788 return 0;
789 if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
790 return 0;
791
792 inside = (void *)skb->data + hdrlen;
793 if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
794 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
795 return 0;
796 if (ct->status & IPS_NAT_MASK)
797 return 0;
798 }
799
800 if (manip == NF_NAT_MANIP_SRC)
801 statusbit = IPS_SRC_NAT;
802 else
803 statusbit = IPS_DST_NAT;
804
805 /* Invert if this is reply direction */
806 if (dir == IP_CT_DIR_REPLY)
807 statusbit ^= IPS_NAT_MASK;
808
809 if (!(ct->status & statusbit))
810 return 1;
811
812 if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
813 &ct->tuplehash[!dir].tuple, !manip))
814 return 0;
815
816 if (skb->ip_summed != CHECKSUM_PARTIAL) {
817 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
818
819 inside = (void *)skb->data + hdrlen;
820 inside->icmp6.icmp6_cksum = 0;
821 inside->icmp6.icmp6_cksum =
822 csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
823 skb->len - hdrlen, IPPROTO_ICMPV6,
824 skb_checksum(skb, hdrlen,
825 skb->len - hdrlen, 0));
826 }
827
828 nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
829 target.dst.protonum = IPPROTO_ICMPV6;
830 if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
831 return 0;
832
833 return 1;
834}
835EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
836
837static unsigned int
838nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
839 const struct nf_hook_state *state)
840{
841 struct nf_conn *ct;
842 enum ip_conntrack_info ctinfo;
843 __be16 frag_off;
844 int hdrlen;
845 u8 nexthdr;
846
847 ct = nf_ct_get(skb, &ctinfo);
848 /* Can't track? It's not due to stress, or conntrack would
849 * have dropped it. Hence it's the user's responsibilty to
850 * packet filter it out, or implement conntrack/NAT for that
851 * protocol. 8) --RR
852 */
853 if (!ct)
854 return NF_ACCEPT;
855
856 if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
857 nexthdr = ipv6_hdr(skb)->nexthdr;
858 hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
859 &nexthdr, &frag_off);
860
861 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
862 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
863 state->hook,
864 hdrlen))
865 return NF_DROP;
866 else
867 return NF_ACCEPT;
868 }
869 }
870
871 return nf_nat_inet_fn(priv, skb, state);
872}
873
874static unsigned int
875nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
876 const struct nf_hook_state *state)
877{
878 unsigned int ret;
879 struct in6_addr daddr = ipv6_hdr(skb)->daddr;
880
881 ret = nf_nat_ipv6_fn(priv, skb, state);
882 if (ret != NF_DROP && ret != NF_STOLEN &&
883 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
884 skb_dst_drop(skb);
885
886 return ret;
887}
888
889static unsigned int
890nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
891 const struct nf_hook_state *state)
892{
893#ifdef CONFIG_XFRM
894 const struct nf_conn *ct;
895 enum ip_conntrack_info ctinfo;
896 int err;
897#endif
898 unsigned int ret;
899
900 ret = nf_nat_ipv6_fn(priv, skb, state);
901#ifdef CONFIG_XFRM
902 if (ret != NF_ACCEPT)
903 return ret;
904
905 if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
906 return ret;
907 ct = nf_ct_get(skb, &ctinfo);
908 if (ct) {
909 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
910
911 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
912 &ct->tuplehash[!dir].tuple.dst.u3) ||
913 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
914 ct->tuplehash[dir].tuple.src.u.all !=
915 ct->tuplehash[!dir].tuple.dst.u.all)) {
916 err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
917 if (err < 0)
918 ret = NF_DROP_ERR(err);
919 }
920 }
921#endif
922
923 return ret;
924}
925
3bf195ae
FW
926static unsigned int
927nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
928 const struct nf_hook_state *state)
929{
930 const struct nf_conn *ct;
931 enum ip_conntrack_info ctinfo;
932 unsigned int ret;
933 int err;
934
935 ret = nf_nat_ipv6_fn(priv, skb, state);
936 if (ret != NF_ACCEPT)
937 return ret;
938
939 ct = nf_ct_get(skb, &ctinfo);
940 if (ct) {
941 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
942
943 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
944 &ct->tuplehash[!dir].tuple.src.u3)) {
c1deb065 945 err = nf_ip6_route_me_harder(state->net, skb);
3bf195ae
FW
946 if (err < 0)
947 ret = NF_DROP_ERR(err);
948 }
949#ifdef CONFIG_XFRM
950 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
951 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
952 ct->tuplehash[dir].tuple.dst.u.all !=
953 ct->tuplehash[!dir].tuple.src.u.all) {
954 err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
955 if (err < 0)
956 ret = NF_DROP_ERR(err);
957 }
958#endif
959 }
960
961 return ret;
962}
963
0a30ba50 964static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
3bf195ae
FW
965 /* Before packet filtering, change destination */
966 {
967 .hook = nf_nat_ipv6_in,
968 .pf = NFPROTO_IPV6,
969 .hooknum = NF_INET_PRE_ROUTING,
970 .priority = NF_IP6_PRI_NAT_DST,
971 },
972 /* After packet filtering, change source */
973 {
974 .hook = nf_nat_ipv6_out,
975 .pf = NFPROTO_IPV6,
976 .hooknum = NF_INET_POST_ROUTING,
977 .priority = NF_IP6_PRI_NAT_SRC,
978 },
979 /* Before packet filtering, change destination */
980 {
981 .hook = nf_nat_ipv6_local_fn,
982 .pf = NFPROTO_IPV6,
983 .hooknum = NF_INET_LOCAL_OUT,
984 .priority = NF_IP6_PRI_NAT_DST,
985 },
986 /* After packet filtering, change source */
987 {
988 .hook = nf_nat_ipv6_fn,
989 .pf = NFPROTO_IPV6,
990 .hooknum = NF_INET_LOCAL_IN,
991 .priority = NF_IP6_PRI_NAT_SRC,
992 },
993};
994
d2c5c103 995int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
3bf195ae 996{
d164385e 997 return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops,
3bf195ae
FW
998 ARRAY_SIZE(nf_nat_ipv6_ops));
999}
d2c5c103 1000EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
3bf195ae 1001
d2c5c103 1002void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
3bf195ae 1003{
d164385e 1004 nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
3bf195ae 1005}
d2c5c103 1006EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
3bf195ae 1007#endif /* CONFIG_IPV6 */
d164385e
FW
1008
1009#if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
1010int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
1011{
1012 int ret;
1013
1014 if (WARN_ON_ONCE(ops->pf != NFPROTO_INET))
1015 return -EINVAL;
1016
1017 ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops,
1018 ARRAY_SIZE(nf_nat_ipv6_ops));
1019 if (ret)
1020 return ret;
1021
1022 ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
1023 ARRAY_SIZE(nf_nat_ipv4_ops));
1024 if (ret)
1025 nf_nat_ipv6_unregister_fn(net, ops);
1026
1027 return ret;
1028}
1029EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
1030
1031void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1032{
1033 nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
1034 nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1035}
1036EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn);
1037#endif /* NFT INET NAT */