Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
c7232c99 PM |
2 | /* |
3 | * (C) 1999-2001 Paul `Rusty' Russell | |
5b1158e9 | 4 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> |
c7232c99 | 5 | * (C) 2011 Patrick McHardy <kaber@trash.net> |
5b1158e9 JK |
6 | */ |
7 | ||
5191d70f AS |
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
9 | ||
5b1158e9 JK |
10 | #include <linux/module.h> |
11 | #include <linux/types.h> | |
12 | #include <linux/timer.h> | |
13 | #include <linux/skbuff.h> | |
5a0e3ad6 | 14 | #include <linux/gfp.h> |
c7232c99 | 15 | #include <net/xfrm.h> |
5b1158e9 | 16 | #include <linux/jhash.h> |
c7232c99 | 17 | #include <linux/rtnetlink.h> |
5b1158e9 | 18 | |
5b1158e9 JK |
19 | #include <net/netfilter/nf_conntrack.h> |
20 | #include <net/netfilter/nf_conntrack_core.h> | |
21 | #include <net/netfilter/nf_nat.h> | |
5b1158e9 JK |
22 | #include <net/netfilter/nf_nat_helper.h> |
23 | #include <net/netfilter/nf_conntrack_helper.h> | |
41d73ec0 | 24 | #include <net/netfilter/nf_conntrack_seqadj.h> |
5d0aa2cc | 25 | #include <net/netfilter/nf_conntrack_zones.h> |
c7232c99 | 26 | #include <linux/netfilter/nf_nat.h> |
5b1158e9 | 27 | |
1cd472bf FW |
28 | #include "nf_internals.h" |
29 | ||
8073e960 | 30 | static spinlock_t nf_nat_locks[CONNTRACK_LOCKS]; |
e1bf1687 | 31 | |
c7232c99 | 32 | static DEFINE_MUTEX(nf_nat_proto_mutex); |
1cd472bf | 33 | static unsigned int nat_net_id __read_mostly; |
a76ae1c8 | 34 | |
e1bf1687 FW |
35 | static struct hlist_head *nf_nat_bysource __read_mostly; |
36 | static unsigned int nf_nat_htable_size __read_mostly; | |
37 | static unsigned int nf_nat_hash_rnd __read_mostly; | |
c7232c99 | 38 | |
1cd472bf FW |
39 | struct nf_nat_lookup_hook_priv { |
40 | struct nf_hook_entries __rcu *entries; | |
41 | ||
42 | struct rcu_head rcu_head; | |
43 | }; | |
44 | ||
45 | struct nf_nat_hooks_net { | |
46 | struct nf_hook_ops *nat_hook_ops; | |
47 | unsigned int users; | |
48 | }; | |
49 | ||
50 | struct nat_net { | |
51 | struct nf_nat_hooks_net nat_proto_net[NFPROTO_NUMPROTO]; | |
52 | }; | |
53 | ||
c7232c99 | 54 | #ifdef CONFIG_XFRM |
096d0906 FW |
55 | static void nf_nat_ipv4_decode_session(struct sk_buff *skb, |
56 | const struct nf_conn *ct, | |
57 | enum ip_conntrack_dir dir, | |
58 | unsigned long statusbit, | |
59 | struct flowi *fl) | |
60 | { | |
61 | const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; | |
62 | struct flowi4 *fl4 = &fl->u.ip4; | |
63 | ||
64 | if (ct->status & statusbit) { | |
65 | fl4->daddr = t->dst.u3.ip; | |
66 | if (t->dst.protonum == IPPROTO_TCP || | |
67 | t->dst.protonum == IPPROTO_UDP || | |
68 | t->dst.protonum == IPPROTO_UDPLITE || | |
69 | t->dst.protonum == IPPROTO_DCCP || | |
70 | t->dst.protonum == IPPROTO_SCTP) | |
71 | fl4->fl4_dport = t->dst.u.all; | |
72 | } | |
73 | ||
74 | statusbit ^= IPS_NAT_MASK; | |
75 | ||
76 | if (ct->status & statusbit) { | |
77 | fl4->saddr = t->src.u3.ip; | |
78 | if (t->dst.protonum == IPPROTO_TCP || | |
79 | t->dst.protonum == IPPROTO_UDP || | |
80 | t->dst.protonum == IPPROTO_UDPLITE || | |
81 | t->dst.protonum == IPPROTO_DCCP || | |
82 | t->dst.protonum == IPPROTO_SCTP) | |
83 | fl4->fl4_sport = t->src.u.all; | |
84 | } | |
85 | } | |
86 | ||
87 | static void nf_nat_ipv6_decode_session(struct sk_buff *skb, | |
88 | const struct nf_conn *ct, | |
89 | enum ip_conntrack_dir dir, | |
90 | unsigned long statusbit, | |
91 | struct flowi *fl) | |
92 | { | |
93 | #if IS_ENABLED(CONFIG_IPV6) | |
94 | const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; | |
95 | struct flowi6 *fl6 = &fl->u.ip6; | |
96 | ||
97 | if (ct->status & statusbit) { | |
98 | fl6->daddr = t->dst.u3.in6; | |
99 | if (t->dst.protonum == IPPROTO_TCP || | |
100 | t->dst.protonum == IPPROTO_UDP || | |
101 | t->dst.protonum == IPPROTO_UDPLITE || | |
102 | t->dst.protonum == IPPROTO_DCCP || | |
103 | t->dst.protonum == IPPROTO_SCTP) | |
104 | fl6->fl6_dport = t->dst.u.all; | |
105 | } | |
106 | ||
107 | statusbit ^= IPS_NAT_MASK; | |
108 | ||
109 | if (ct->status & statusbit) { | |
110 | fl6->saddr = t->src.u3.in6; | |
111 | if (t->dst.protonum == IPPROTO_TCP || | |
112 | t->dst.protonum == IPPROTO_UDP || | |
113 | t->dst.protonum == IPPROTO_UDPLITE || | |
114 | t->dst.protonum == IPPROTO_DCCP || | |
115 | t->dst.protonum == IPPROTO_SCTP) | |
116 | fl6->fl6_sport = t->src.u.all; | |
117 | } | |
118 | #endif | |
119 | } | |
120 | ||
c7232c99 PM |
121 | static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) |
122 | { | |
c7232c99 PM |
123 | const struct nf_conn *ct; |
124 | enum ip_conntrack_info ctinfo; | |
125 | enum ip_conntrack_dir dir; | |
126 | unsigned long statusbit; | |
127 | u8 family; | |
128 | ||
129 | ct = nf_ct_get(skb, &ctinfo); | |
130 | if (ct == NULL) | |
131 | return; | |
132 | ||
53890234 | 133 | family = nf_ct_l3num(ct); |
c7232c99 PM |
134 | dir = CTINFO2DIR(ctinfo); |
135 | if (dir == IP_CT_DIR_ORIGINAL) | |
136 | statusbit = IPS_DST_NAT; | |
137 | else | |
138 | statusbit = IPS_SRC_NAT; | |
139 | ||
096d0906 FW |
140 | switch (family) { |
141 | case NFPROTO_IPV4: | |
142 | nf_nat_ipv4_decode_session(skb, ct, dir, statusbit, fl); | |
143 | return; | |
144 | case NFPROTO_IPV6: | |
145 | nf_nat_ipv6_decode_session(skb, ct, dir, statusbit, fl); | |
146 | return; | |
147 | } | |
c7232c99 PM |
148 | } |
149 | ||
c7af6483 | 150 | int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) |
c7232c99 PM |
151 | { |
152 | struct flowi fl; | |
153 | unsigned int hh_len; | |
154 | struct dst_entry *dst; | |
f5646501 | 155 | struct sock *sk = skb->sk; |
aaa795ad | 156 | int err; |
c7232c99 | 157 | |
aaa795ad | 158 | err = xfrm_decode_session(skb, &fl, family); |
e7e6f630 | 159 | if (err < 0) |
aaa795ad | 160 | return err; |
c7232c99 PM |
161 | |
162 | dst = skb_dst(skb); | |
163 | if (dst->xfrm) | |
164 | dst = ((struct xfrm_dst *)dst)->route; | |
542fbda0 FW |
165 | if (!dst_hold_safe(dst)) |
166 | return -EHOSTUNREACH; | |
c7232c99 | 167 | |
f5646501 FL |
168 | if (sk && !net_eq(net, sock_net(sk))) |
169 | sk = NULL; | |
170 | ||
171 | dst = xfrm_lookup(net, dst, &fl, sk, 0); | |
c7232c99 | 172 | if (IS_ERR(dst)) |
aaa795ad | 173 | return PTR_ERR(dst); |
c7232c99 PM |
174 | |
175 | skb_dst_drop(skb); | |
176 | skb_dst_set(skb, dst); | |
177 | ||
178 | /* Change in oif may mean change in hh_len. */ | |
179 | hh_len = skb_dst(skb)->dev->hard_header_len; | |
180 | if (skb_headroom(skb) < hh_len && | |
181 | pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) | |
aaa795ad | 182 | return -ENOMEM; |
c7232c99 PM |
183 | return 0; |
184 | } | |
185 | EXPORT_SYMBOL(nf_xfrm_me_harder); | |
186 | #endif /* CONFIG_XFRM */ | |
187 | ||
e1bf1687 FW |
188 | /* We keep an extra hash for each conntrack, for fast searching. */ |
189 | static unsigned int | |
190 | hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) | |
5b1158e9 | 191 | { |
e1bf1687 FW |
192 | unsigned int hash; |
193 | ||
194 | get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd)); | |
7001c6d1 | 195 | |
5b1158e9 | 196 | /* Original src, to ensure we map it consistently if poss. */ |
e1bf1687 FW |
197 | hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), |
198 | tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n)); | |
8fc54f68 | 199 | |
e1bf1687 | 200 | return reciprocal_scale(hash, nf_nat_htable_size); |
5b1158e9 JK |
201 | } |
202 | ||
5b1158e9 | 203 | /* Is this tuple already taken? (not by us) */ |
472caa69 | 204 | static int |
5b1158e9 JK |
205 | nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, |
206 | const struct nf_conn *ignored_conntrack) | |
207 | { | |
208 | /* Conntrack tracking doesn't keep track of outgoing tuples; only | |
c7232c99 PM |
209 | * incoming ones. NAT means they don't have a fixed mapping, |
210 | * so we invert the tuple and look for the incoming reply. | |
211 | * | |
212 | * We could keep a separate hash if this proves too slow. | |
213 | */ | |
5b1158e9 JK |
214 | struct nf_conntrack_tuple reply; |
215 | ||
303e0c55 | 216 | nf_ct_invert_tuple(&reply, tuple); |
5b1158e9 JK |
217 | return nf_conntrack_tuple_taken(&reply, ignored_conntrack); |
218 | } | |
5b1158e9 | 219 | |
40e786bd FW |
220 | static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t, |
221 | const struct nf_nat_range2 *range) | |
222 | { | |
223 | if (t->src.l3num == NFPROTO_IPV4) | |
224 | return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) && | |
225 | ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip); | |
226 | ||
227 | return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 && | |
228 | ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0; | |
229 | } | |
230 | ||
fe2d0020 FW |
231 | /* Is the manipable part of the tuple between min and max incl? */ |
232 | static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, | |
233 | enum nf_nat_manip_type maniptype, | |
234 | const union nf_conntrack_man_proto *min, | |
235 | const union nf_conntrack_man_proto *max) | |
236 | { | |
237 | __be16 port; | |
238 | ||
239 | switch (tuple->dst.protonum) { | |
35acfbab | 240 | case IPPROTO_ICMP: |
fe2d0020 FW |
241 | case IPPROTO_ICMPV6: |
242 | return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && | |
243 | ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); | |
244 | case IPPROTO_GRE: /* all fall though */ | |
245 | case IPPROTO_TCP: | |
246 | case IPPROTO_UDP: | |
247 | case IPPROTO_UDPLITE: | |
248 | case IPPROTO_DCCP: | |
249 | case IPPROTO_SCTP: | |
250 | if (maniptype == NF_NAT_MANIP_SRC) | |
251 | port = tuple->src.u.all; | |
252 | else | |
253 | port = tuple->dst.u.all; | |
254 | ||
255 | return ntohs(port) >= ntohs(min->all) && | |
256 | ntohs(port) <= ntohs(max->all); | |
257 | default: | |
258 | return true; | |
259 | } | |
260 | } | |
261 | ||
5b1158e9 | 262 | /* If we source map this tuple so reply looks like reply_tuple, will |
c7232c99 PM |
263 | * that meet the constraints of range. |
264 | */ | |
fe2d0020 | 265 | static int in_range(const struct nf_conntrack_tuple *tuple, |
2eb0f624 | 266 | const struct nf_nat_range2 *range) |
5b1158e9 | 267 | { |
5b1158e9 | 268 | /* If we are supposed to map IPs, then we must be in the |
c7232c99 PM |
269 | * range specified, otherwise let this drag us onto a new src IP. |
270 | */ | |
271 | if (range->flags & NF_NAT_RANGE_MAP_IPS && | |
40e786bd | 272 | !nf_nat_inet_in_range(tuple, range)) |
c7232c99 | 273 | return 0; |
5b1158e9 | 274 | |
fe2d0020 | 275 | if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) |
c7232c99 | 276 | return 1; |
5b1158e9 | 277 | |
fe2d0020 FW |
278 | return l4proto_in_range(tuple, NF_NAT_MANIP_SRC, |
279 | &range->min_proto, &range->max_proto); | |
5b1158e9 JK |
280 | } |
281 | ||
282 | static inline int | |
283 | same_src(const struct nf_conn *ct, | |
284 | const struct nf_conntrack_tuple *tuple) | |
285 | { | |
286 | const struct nf_conntrack_tuple *t; | |
287 | ||
288 | t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | |
289 | return (t->dst.protonum == tuple->dst.protonum && | |
c7232c99 | 290 | nf_inet_addr_cmp(&t->src.u3, &tuple->src.u3) && |
5b1158e9 JK |
291 | t->src.u.all == tuple->src.u.all); |
292 | } | |
293 | ||
294 | /* Only called for SRC manip */ | |
295 | static int | |
308ac914 DB |
296 | find_appropriate_src(struct net *net, |
297 | const struct nf_conntrack_zone *zone, | |
0c4c9288 | 298 | const struct nf_conntrack_tuple *tuple, |
5b1158e9 | 299 | struct nf_conntrack_tuple *result, |
2eb0f624 | 300 | const struct nf_nat_range2 *range) |
5b1158e9 | 301 | { |
e1bf1687 | 302 | unsigned int h = hash_by_src(net, tuple); |
72b72949 | 303 | const struct nf_conn *ct; |
870190a9 | 304 | |
e1bf1687 FW |
305 | hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) { |
306 | if (same_src(ct, tuple) && | |
307 | net_eq(net, nf_ct_net(ct)) && | |
308 | nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { | |
309 | /* Copy source part from reply tuple. */ | |
303e0c55 | 310 | nf_ct_invert_tuple(result, |
e1bf1687 FW |
311 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); |
312 | result->dst = tuple->dst; | |
313 | ||
fe2d0020 | 314 | if (in_range(result, range)) |
e1bf1687 FW |
315 | return 1; |
316 | } | |
97772bcd | 317 | } |
97772bcd | 318 | return 0; |
5b1158e9 JK |
319 | } |
320 | ||
321 | /* For [FUTURE] fragmentation handling, we want the least-used | |
c7232c99 PM |
322 | * src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus |
323 | * if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports | |
324 | * 1-65535, we don't do pro-rata allocation based on ports; we choose | |
325 | * the ip with the lowest src-ip/dst-ip/proto usage. | |
326 | */ | |
5b1158e9 | 327 | static void |
308ac914 DB |
328 | find_best_ips_proto(const struct nf_conntrack_zone *zone, |
329 | struct nf_conntrack_tuple *tuple, | |
2eb0f624 | 330 | const struct nf_nat_range2 *range, |
5b1158e9 JK |
331 | const struct nf_conn *ct, |
332 | enum nf_nat_manip_type maniptype) | |
333 | { | |
c7232c99 PM |
334 | union nf_inet_addr *var_ipp; |
335 | unsigned int i, max; | |
5b1158e9 | 336 | /* Host order */ |
c7232c99 PM |
337 | u32 minip, maxip, j, dist; |
338 | bool full_range; | |
5b1158e9 JK |
339 | |
340 | /* No IP mapping? Do nothing. */ | |
cbc9f2f4 | 341 | if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) |
5b1158e9 JK |
342 | return; |
343 | ||
cbc9f2f4 | 344 | if (maniptype == NF_NAT_MANIP_SRC) |
c7232c99 | 345 | var_ipp = &tuple->src.u3; |
5b1158e9 | 346 | else |
c7232c99 | 347 | var_ipp = &tuple->dst.u3; |
5b1158e9 JK |
348 | |
349 | /* Fast path: only one choice. */ | |
c7232c99 PM |
350 | if (nf_inet_addr_cmp(&range->min_addr, &range->max_addr)) { |
351 | *var_ipp = range->min_addr; | |
5b1158e9 JK |
352 | return; |
353 | } | |
354 | ||
c7232c99 PM |
355 | if (nf_ct_l3num(ct) == NFPROTO_IPV4) |
356 | max = sizeof(var_ipp->ip) / sizeof(u32) - 1; | |
357 | else | |
358 | max = sizeof(var_ipp->ip6) / sizeof(u32) - 1; | |
359 | ||
5b1158e9 JK |
360 | /* Hashing source and destination IPs gives a fairly even |
361 | * spread in practice (if there are a small number of IPs | |
362 | * involved, there usually aren't that many connections | |
363 | * anyway). The consistency means that servers see the same | |
364 | * client coming from the same IP (some Internet Banking sites | |
c7232c99 PM |
365 | * like this), even across reboots. |
366 | */ | |
5693d68d | 367 | j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32), |
c7232c99 | 368 | range->flags & NF_NAT_RANGE_PERSISTENT ? |
308ac914 | 369 | 0 : (__force u32)tuple->dst.u3.all[max] ^ zone->id); |
c7232c99 PM |
370 | |
371 | full_range = false; | |
372 | for (i = 0; i <= max; i++) { | |
373 | /* If first bytes of the address are at the maximum, use the | |
374 | * distance. Otherwise use the full range. | |
375 | */ | |
376 | if (!full_range) { | |
377 | minip = ntohl((__force __be32)range->min_addr.all[i]); | |
378 | maxip = ntohl((__force __be32)range->max_addr.all[i]); | |
379 | dist = maxip - minip + 1; | |
380 | } else { | |
381 | minip = 0; | |
382 | dist = ~0; | |
383 | } | |
384 | ||
385 | var_ipp->all[i] = (__force __u32) | |
8fc54f68 | 386 | htonl(minip + reciprocal_scale(j, dist)); |
c7232c99 PM |
387 | if (var_ipp->all[i] != range->max_addr.all[i]) |
388 | full_range = true; | |
389 | ||
390 | if (!(range->flags & NF_NAT_RANGE_PERSISTENT)) | |
391 | j ^= (__force u32)tuple->dst.u3.all[i]; | |
392 | } | |
5b1158e9 JK |
393 | } |
394 | ||
203f2e78 FW |
395 | /* Alter the per-proto part of the tuple (depending on maniptype), to |
396 | * give a unique tuple in the given range if possible. | |
397 | * | |
398 | * Per-protocol part of tuple is initialized to the incoming packet. | |
399 | */ | |
716b23c1 FW |
400 | static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, |
401 | const struct nf_nat_range2 *range, | |
402 | enum nf_nat_manip_type maniptype, | |
403 | const struct nf_conn *ct) | |
404 | { | |
405 | unsigned int range_size, min, max, i, attempts; | |
203f2e78 | 406 | __be16 *keyptr; |
716b23c1 FW |
407 | u16 off; |
408 | static const unsigned int max_attempts = 128; | |
409 | ||
203f2e78 FW |
410 | switch (tuple->dst.protonum) { |
411 | case IPPROTO_ICMP: /* fallthrough */ | |
412 | case IPPROTO_ICMPV6: | |
413 | /* id is same for either direction... */ | |
414 | keyptr = &tuple->src.u.icmp.id; | |
5bdac418 FW |
415 | if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { |
416 | min = 0; | |
417 | range_size = 65536; | |
418 | } else { | |
419 | min = ntohs(range->min_proto.icmp.id); | |
420 | range_size = ntohs(range->max_proto.icmp.id) - | |
421 | ntohs(range->min_proto.icmp.id) + 1; | |
422 | } | |
203f2e78 FW |
423 | goto find_free_id; |
424 | #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE) | |
425 | case IPPROTO_GRE: | |
426 | /* If there is no master conntrack we are not PPTP, | |
427 | do not change tuples */ | |
428 | if (!ct->master) | |
429 | return; | |
430 | ||
431 | if (maniptype == NF_NAT_MANIP_SRC) | |
432 | keyptr = &tuple->src.u.gre.key; | |
433 | else | |
434 | keyptr = &tuple->dst.u.gre.key; | |
435 | ||
436 | if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { | |
437 | min = 1; | |
438 | range_size = 65535; | |
439 | } else { | |
440 | min = ntohs(range->min_proto.gre.key); | |
441 | range_size = ntohs(range->max_proto.gre.key) - min + 1; | |
442 | } | |
443 | goto find_free_id; | |
444 | #endif | |
445 | case IPPROTO_UDP: /* fallthrough */ | |
446 | case IPPROTO_UDPLITE: /* fallthrough */ | |
447 | case IPPROTO_TCP: /* fallthrough */ | |
448 | case IPPROTO_SCTP: /* fallthrough */ | |
449 | case IPPROTO_DCCP: /* fallthrough */ | |
450 | if (maniptype == NF_NAT_MANIP_SRC) | |
451 | keyptr = &tuple->src.u.all; | |
452 | else | |
453 | keyptr = &tuple->dst.u.all; | |
454 | ||
455 | break; | |
456 | default: | |
457 | return; | |
458 | } | |
716b23c1 FW |
459 | |
460 | /* If no range specified... */ | |
461 | if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { | |
462 | /* If it's dst rewrite, can't change port */ | |
463 | if (maniptype == NF_NAT_MANIP_DST) | |
464 | return; | |
465 | ||
203f2e78 | 466 | if (ntohs(*keyptr) < 1024) { |
716b23c1 | 467 | /* Loose convention: >> 512 is credential passing */ |
203f2e78 | 468 | if (ntohs(*keyptr) < 512) { |
716b23c1 FW |
469 | min = 1; |
470 | range_size = 511 - min + 1; | |
471 | } else { | |
472 | min = 600; | |
473 | range_size = 1023 - min + 1; | |
474 | } | |
475 | } else { | |
476 | min = 1024; | |
477 | range_size = 65535 - 1024 + 1; | |
478 | } | |
479 | } else { | |
480 | min = ntohs(range->min_proto.all); | |
481 | max = ntohs(range->max_proto.all); | |
482 | if (unlikely(max < min)) | |
483 | swap(max, min); | |
484 | range_size = max - min + 1; | |
485 | } | |
486 | ||
203f2e78 | 487 | find_free_id: |
716b23c1 | 488 | if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) |
203f2e78 | 489 | off = (ntohs(*keyptr) - ntohs(range->base_proto.all)); |
716b23c1 FW |
490 | else |
491 | off = prandom_u32(); | |
492 | ||
493 | attempts = range_size; | |
494 | if (attempts > max_attempts) | |
495 | attempts = max_attempts; | |
496 | ||
497 | /* We are in softirq; doing a search of the entire range risks | |
498 | * soft lockup when all tuples are already used. | |
499 | * | |
500 | * If we can't find any free port from first offset, pick a new | |
501 | * one and try again, with ever smaller search window. | |
502 | */ | |
503 | another_round: | |
504 | for (i = 0; i < attempts; i++, off++) { | |
203f2e78 | 505 | *keyptr = htons(min + off % range_size); |
716b23c1 FW |
506 | if (!nf_nat_used_tuple(tuple, ct)) |
507 | return; | |
508 | } | |
509 | ||
510 | if (attempts >= range_size || attempts < 16) | |
511 | return; | |
512 | attempts /= 2; | |
513 | off = prandom_u32(); | |
514 | goto another_round; | |
515 | } | |
516 | ||
c7232c99 PM |
517 | /* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, |
518 | * we change the source to map into the range. For NF_INET_PRE_ROUTING | |
6e23ae2a | 519 | * and NF_INET_LOCAL_OUT, we change the destination to map into the |
c7232c99 | 520 | * range. It might not be possible to get a unique tuple, but we try. |
5b1158e9 JK |
521 | * At worst (or if we race), we will end up with a final duplicate in |
522 | * __ip_conntrack_confirm and drop the packet. */ | |
523 | static void | |
524 | get_unique_tuple(struct nf_conntrack_tuple *tuple, | |
525 | const struct nf_conntrack_tuple *orig_tuple, | |
2eb0f624 | 526 | const struct nf_nat_range2 *range, |
5b1158e9 JK |
527 | struct nf_conn *ct, |
528 | enum nf_nat_manip_type maniptype) | |
529 | { | |
308ac914 | 530 | const struct nf_conntrack_zone *zone; |
0c4c9288 | 531 | struct net *net = nf_ct_net(ct); |
308ac914 DB |
532 | |
533 | zone = nf_ct_zone(ct); | |
5b1158e9 | 534 | |
c7232c99 PM |
535 | /* 1) If this srcip/proto/src-proto-part is currently mapped, |
536 | * and that same mapping gives a unique tuple within the given | |
537 | * range, use that. | |
538 | * | |
539 | * This is only required for source (ie. NAT/masq) mappings. | |
540 | * So far, we don't do local source mappings, so multiple | |
541 | * manips not an issue. | |
542 | */ | |
cbc9f2f4 | 543 | if (maniptype == NF_NAT_MANIP_SRC && |
34ce3240 | 544 | !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { |
41a7cab6 | 545 | /* try the original tuple first */ |
fe2d0020 | 546 | if (in_range(orig_tuple, range)) { |
41a7cab6 CG |
547 | if (!nf_nat_used_tuple(orig_tuple, ct)) { |
548 | *tuple = *orig_tuple; | |
fe2d0020 | 549 | return; |
41a7cab6 | 550 | } |
fe2d0020 | 551 | } else if (find_appropriate_src(net, zone, |
c7232c99 | 552 | orig_tuple, tuple, range)) { |
0d53778e | 553 | pr_debug("get_unique_tuple: Found current src map\n"); |
0dbff689 | 554 | if (!nf_nat_used_tuple(tuple, ct)) |
fe2d0020 | 555 | return; |
5b1158e9 JK |
556 | } |
557 | } | |
558 | ||
c7232c99 | 559 | /* 2) Select the least-used IP/proto combination in the given range */ |
5b1158e9 | 560 | *tuple = *orig_tuple; |
5d0aa2cc | 561 | find_best_ips_proto(zone, tuple, range, ct, maniptype); |
5b1158e9 JK |
562 | |
563 | /* 3) The per-protocol part of the manip is made to map into | |
c7232c99 PM |
564 | * the range to make a unique tuple. |
565 | */ | |
5b1158e9 JK |
566 | |
567 | /* Only bother mapping if it's not already in range and unique */ | |
34ce3240 | 568 | if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { |
cbc9f2f4 | 569 | if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { |
2eb0f624 | 570 | if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && |
fe2d0020 | 571 | l4proto_in_range(tuple, maniptype, |
2eb0f624 TDT |
572 | &range->min_proto, |
573 | &range->max_proto) && | |
c7232c99 | 574 | (range->min_proto.all == range->max_proto.all || |
99ad3c53 | 575 | !nf_nat_used_tuple(tuple, ct))) |
fe2d0020 | 576 | return; |
99ad3c53 | 577 | } else if (!nf_nat_used_tuple(tuple, ct)) { |
fe2d0020 | 578 | return; |
99ad3c53 CG |
579 | } |
580 | } | |
5b1158e9 | 581 | |
2eb0f624 | 582 | /* Last chance: get protocol to try to obtain unique tuple. */ |
203f2e78 | 583 | nf_nat_l4proto_unique_tuple(tuple, range, maniptype, ct); |
5b1158e9 JK |
584 | } |
585 | ||
f768e5bd FW |
586 | struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct) |
587 | { | |
588 | struct nf_conn_nat *nat = nfct_nat(ct); | |
589 | if (nat) | |
590 | return nat; | |
591 | ||
592 | if (!nf_ct_is_confirmed(ct)) | |
593 | nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); | |
594 | ||
595 | return nat; | |
596 | } | |
597 | EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add); | |
598 | ||
5b1158e9 JK |
599 | unsigned int |
600 | nf_nat_setup_info(struct nf_conn *ct, | |
2eb0f624 | 601 | const struct nf_nat_range2 *range, |
cc01dcbd | 602 | enum nf_nat_manip_type maniptype) |
5b1158e9 | 603 | { |
e1bf1687 | 604 | struct net *net = nf_ct_net(ct); |
5b1158e9 | 605 | struct nf_conntrack_tuple curr_tuple, new_tuple; |
2d59e5ca | 606 | |
d110a394 LZ |
607 | /* Can't setup nat info for confirmed ct. */ |
608 | if (nf_ct_is_confirmed(ct)) | |
609 | return NF_ACCEPT; | |
610 | ||
44d6e2f2 VR |
611 | WARN_ON(maniptype != NF_NAT_MANIP_SRC && |
612 | maniptype != NF_NAT_MANIP_DST); | |
75c26314 FW |
613 | |
614 | if (WARN_ON(nf_nat_initialized(ct, maniptype))) | |
615 | return NF_DROP; | |
5b1158e9 JK |
616 | |
617 | /* What we've got will look like inverse of reply. Normally | |
c7232c99 PM |
618 | * this is what is in the conntrack, except for prior |
619 | * manipulations (future optimization: if num_manips == 0, | |
620 | * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) | |
621 | */ | |
303e0c55 FW |
622 | nf_ct_invert_tuple(&curr_tuple, |
623 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | |
5b1158e9 JK |
624 | |
625 | get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); | |
626 | ||
627 | if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { | |
628 | struct nf_conntrack_tuple reply; | |
629 | ||
630 | /* Alter conntrack table so will recognize replies. */ | |
303e0c55 | 631 | nf_ct_invert_tuple(&reply, &new_tuple); |
5b1158e9 JK |
632 | nf_conntrack_alter_reply(ct, &reply); |
633 | ||
634 | /* Non-atomic: we own this at the moment. */ | |
cbc9f2f4 | 635 | if (maniptype == NF_NAT_MANIP_SRC) |
5b1158e9 JK |
636 | ct->status |= IPS_SRC_NAT; |
637 | else | |
638 | ct->status |= IPS_DST_NAT; | |
41d73ec0 | 639 | |
ab6dd1be | 640 | if (nfct_help(ct) && !nfct_seqadj(ct)) |
4440a2ab GF |
641 | if (!nfct_seqadj_ext_add(ct)) |
642 | return NF_DROP; | |
5b1158e9 JK |
643 | } |
644 | ||
cbc9f2f4 | 645 | if (maniptype == NF_NAT_MANIP_SRC) { |
e1bf1687 | 646 | unsigned int srchash; |
8073e960 | 647 | spinlock_t *lock; |
e1bf1687 FW |
648 | |
649 | srchash = hash_by_src(net, | |
650 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | |
b0ade851 | 651 | lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS]; |
8073e960 | 652 | spin_lock_bh(lock); |
e1bf1687 FW |
653 | hlist_add_head_rcu(&ct->nat_bysource, |
654 | &nf_nat_bysource[srchash]); | |
8073e960 | 655 | spin_unlock_bh(lock); |
5b1158e9 JK |
656 | } |
657 | ||
658 | /* It's done. */ | |
cbc9f2f4 | 659 | if (maniptype == NF_NAT_MANIP_DST) |
a7c2f4d7 | 660 | ct->status |= IPS_DST_NAT_DONE; |
5b1158e9 | 661 | else |
a7c2f4d7 | 662 | ct->status |= IPS_SRC_NAT_DONE; |
5b1158e9 JK |
663 | |
664 | return NF_ACCEPT; | |
665 | } | |
666 | EXPORT_SYMBOL(nf_nat_setup_info); | |
667 | ||
0eba801b PNA |
668 | static unsigned int |
669 | __nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip) | |
f59cb045 PNA |
670 | { |
671 | /* Force range to this IP; let proto decide mapping for | |
672 | * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). | |
673 | * Use reply in case it's already been mangled (eg local packet). | |
674 | */ | |
675 | union nf_inet_addr ip = | |
0eba801b | 676 | (manip == NF_NAT_MANIP_SRC ? |
f59cb045 PNA |
677 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : |
678 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); | |
2eb0f624 | 679 | struct nf_nat_range2 range = { |
f59cb045 PNA |
680 | .flags = NF_NAT_RANGE_MAP_IPS, |
681 | .min_addr = ip, | |
682 | .max_addr = ip, | |
683 | }; | |
0eba801b PNA |
684 | return nf_nat_setup_info(ct, &range, manip); |
685 | } | |
686 | ||
687 | unsigned int | |
688 | nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) | |
689 | { | |
690 | return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum)); | |
f59cb045 PNA |
691 | } |
692 | EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); | |
693 | ||
5b1158e9 JK |
694 | /* Do packet manipulations according to nf_nat_setup_info. */ |
695 | unsigned int nf_nat_packet(struct nf_conn *ct, | |
696 | enum ip_conntrack_info ctinfo, | |
697 | unsigned int hooknum, | |
3db05fea | 698 | struct sk_buff *skb) |
5b1158e9 | 699 | { |
368982cd | 700 | enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); |
5b1158e9 | 701 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
368982cd | 702 | unsigned int verdict = NF_ACCEPT; |
5b1158e9 | 703 | unsigned long statusbit; |
5b1158e9 | 704 | |
cbc9f2f4 | 705 | if (mtype == NF_NAT_MANIP_SRC) |
5b1158e9 JK |
706 | statusbit = IPS_SRC_NAT; |
707 | else | |
708 | statusbit = IPS_DST_NAT; | |
709 | ||
710 | /* Invert if this is reply dir. */ | |
711 | if (dir == IP_CT_DIR_REPLY) | |
712 | statusbit ^= IPS_NAT_MASK; | |
713 | ||
714 | /* Non-atomic: these bits don't change. */ | |
368982cd PNA |
715 | if (ct->status & statusbit) |
716 | verdict = nf_nat_manip_pkt(skb, ct, mtype, dir); | |
5b1158e9 | 717 | |
368982cd | 718 | return verdict; |
5b1158e9 JK |
719 | } |
720 | EXPORT_SYMBOL_GPL(nf_nat_packet); | |
721 | ||
1f55236b FW |
722 | unsigned int |
723 | nf_nat_inet_fn(void *priv, struct sk_buff *skb, | |
9971a514 | 724 | const struct nf_hook_state *state) |
1f55236b FW |
725 | { |
726 | struct nf_conn *ct; | |
727 | enum ip_conntrack_info ctinfo; | |
728 | struct nf_conn_nat *nat; | |
729 | /* maniptype == SRC for postrouting. */ | |
730 | enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook); | |
731 | ||
732 | ct = nf_ct_get(skb, &ctinfo); | |
733 | /* Can't track? It's not due to stress, or conntrack would | |
734 | * have dropped it. Hence it's the user's responsibilty to | |
735 | * packet filter it out, or implement conntrack/NAT for that | |
736 | * protocol. 8) --RR | |
737 | */ | |
738 | if (!ct) | |
739 | return NF_ACCEPT; | |
740 | ||
741 | nat = nfct_nat(ct); | |
742 | ||
743 | switch (ctinfo) { | |
744 | case IP_CT_RELATED: | |
745 | case IP_CT_RELATED_REPLY: | |
746 | /* Only ICMPs can be IP_CT_IS_REPLY. Fallthrough */ | |
747 | case IP_CT_NEW: | |
748 | /* Seen it before? This can happen for loopback, retrans, | |
749 | * or local packets. | |
750 | */ | |
751 | if (!nf_nat_initialized(ct, maniptype)) { | |
9971a514 FW |
752 | struct nf_nat_lookup_hook_priv *lpriv = priv; |
753 | struct nf_hook_entries *e = rcu_dereference(lpriv->entries); | |
1f55236b | 754 | unsigned int ret; |
9971a514 FW |
755 | int i; |
756 | ||
757 | if (!e) | |
758 | goto null_bind; | |
759 | ||
760 | for (i = 0; i < e->num_hook_entries; i++) { | |
761 | ret = e->hooks[i].hook(e->hooks[i].priv, skb, | |
762 | state); | |
763 | if (ret != NF_ACCEPT) | |
764 | return ret; | |
765 | if (nf_nat_initialized(ct, maniptype)) | |
766 | goto do_nat; | |
767 | } | |
768 | null_bind: | |
1f55236b FW |
769 | ret = nf_nat_alloc_null_binding(ct, state->hook); |
770 | if (ret != NF_ACCEPT) | |
771 | return ret; | |
772 | } else { | |
773 | pr_debug("Already setup manip %s for ct %p (status bits 0x%lx)\n", | |
774 | maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", | |
775 | ct, ct->status); | |
776 | if (nf_nat_oif_changed(state->hook, ctinfo, nat, | |
777 | state->out)) | |
778 | goto oif_changed; | |
779 | } | |
780 | break; | |
781 | default: | |
782 | /* ESTABLISHED */ | |
783 | WARN_ON(ctinfo != IP_CT_ESTABLISHED && | |
784 | ctinfo != IP_CT_ESTABLISHED_REPLY); | |
785 | if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) | |
786 | goto oif_changed; | |
787 | } | |
9971a514 | 788 | do_nat: |
1f55236b FW |
789 | return nf_nat_packet(ct, ctinfo, state->hook, skb); |
790 | ||
791 | oif_changed: | |
792 | nf_ct_kill_acct(ct, ctinfo, skb); | |
793 | return NF_DROP; | |
794 | } | |
795 | EXPORT_SYMBOL_GPL(nf_nat_inet_fn); | |
796 | ||
c7232c99 PM |
797 | struct nf_nat_proto_clean { |
798 | u8 l3proto; | |
799 | u8 l4proto; | |
c7232c99 PM |
800 | }; |
801 | ||
c2d421e1 FW |
802 | /* kill conntracks with affected NAT section */ |
803 | static int nf_nat_proto_remove(struct nf_conn *i, void *data) | |
5b1158e9 | 804 | { |
c7232c99 | 805 | const struct nf_nat_proto_clean *clean = data; |
c2d421e1 | 806 | |
c7232c99 PM |
807 | if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || |
808 | (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) | |
5b1158e9 JK |
809 | return 0; |
810 | ||
c2d421e1 | 811 | return i->status & IPS_NAT_MASK ? 1 : 0; |
c7232c99 | 812 | } |
5b1158e9 | 813 | |
8073e960 FW |
814 | static void __nf_nat_cleanup_conntrack(struct nf_conn *ct) |
815 | { | |
816 | unsigned int h; | |
817 | ||
818 | h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | |
b0ade851 | 819 | spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]); |
8073e960 | 820 | hlist_del_rcu(&ct->nat_bysource); |
b0ade851 | 821 | spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]); |
8073e960 FW |
822 | } |
823 | ||
945b2b2d FW |
824 | static int nf_nat_proto_clean(struct nf_conn *ct, void *data) |
825 | { | |
945b2b2d FW |
826 | if (nf_nat_proto_remove(ct, data)) |
827 | return 1; | |
828 | ||
2420770b | 829 | /* This module is being removed and conntrack has nat null binding. |
945b2b2d FW |
830 | * Remove it from bysource hash, as the table will be freed soon. |
831 | * | |
832 | * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() | |
833 | * will delete entry from already-freed table. | |
834 | */ | |
2420770b FW |
835 | if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status)) |
836 | __nf_nat_cleanup_conntrack(ct); | |
945b2b2d | 837 | |
945b2b2d FW |
838 | /* don't delete conntrack. Although that would make things a lot |
839 | * simpler, we'd end up flushing all conntracks on nat rmmod. | |
840 | */ | |
841 | return 0; | |
842 | } | |
843 | ||
25985edc | 844 | /* No one using conntrack by the time this called. */ |
d8a0509a YK |
845 | static void nf_nat_cleanup_conntrack(struct nf_conn *ct) |
846 | { | |
8073e960 FW |
847 | if (ct->status & IPS_SRC_NAT_DONE) |
848 | __nf_nat_cleanup_conntrack(ct); | |
2d59e5ca YK |
849 | } |
850 | ||
61eb3107 | 851 | static struct nf_ct_ext_type nat_extend __read_mostly = { |
d8a0509a YK |
852 | .len = sizeof(struct nf_conn_nat), |
853 | .align = __alignof__(struct nf_conn_nat), | |
854 | .destroy = nf_nat_cleanup_conntrack, | |
d8a0509a | 855 | .id = NF_CT_EXT_NAT, |
2d59e5ca YK |
856 | }; |
857 | ||
24de3d37 | 858 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
e6a7d3c0 PNA |
859 | |
860 | #include <linux/netfilter/nfnetlink.h> | |
861 | #include <linux/netfilter/nfnetlink_conntrack.h> | |
862 | ||
863 | static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { | |
864 | [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, | |
865 | [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, | |
866 | }; | |
867 | ||
76b90019 FW |
868 | static int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], |
869 | struct nf_nat_range2 *range) | |
870 | { | |
871 | if (tb[CTA_PROTONAT_PORT_MIN]) { | |
872 | range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); | |
873 | range->max_proto.all = range->min_proto.all; | |
874 | range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; | |
875 | } | |
876 | if (tb[CTA_PROTONAT_PORT_MAX]) { | |
877 | range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); | |
878 | range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; | |
879 | } | |
880 | return 0; | |
881 | } | |
882 | ||
e6a7d3c0 PNA |
883 | static int nfnetlink_parse_nat_proto(struct nlattr *attr, |
884 | const struct nf_conn *ct, | |
2eb0f624 | 885 | struct nf_nat_range2 *range) |
e6a7d3c0 PNA |
886 | { |
887 | struct nlattr *tb[CTA_PROTONAT_MAX+1]; | |
e6a7d3c0 PNA |
888 | int err; |
889 | ||
8cb08174 JB |
890 | err = nla_parse_nested_deprecated(tb, CTA_PROTONAT_MAX, attr, |
891 | protonat_nla_policy, NULL); | |
e6a7d3c0 PNA |
892 | if (err < 0) |
893 | return err; | |
894 | ||
76b90019 | 895 | return nf_nat_l4proto_nlattr_to_range(tb, range); |
e6a7d3c0 PNA |
896 | } |
897 | ||
898 | static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { | |
c7232c99 PM |
899 | [CTA_NAT_V4_MINIP] = { .type = NLA_U32 }, |
900 | [CTA_NAT_V4_MAXIP] = { .type = NLA_U32 }, | |
58a317f1 PM |
901 | [CTA_NAT_V6_MINIP] = { .len = sizeof(struct in6_addr) }, |
902 | [CTA_NAT_V6_MAXIP] = { .len = sizeof(struct in6_addr) }, | |
329fb58a | 903 | [CTA_NAT_PROTO] = { .type = NLA_NESTED }, |
e6a7d3c0 PNA |
904 | }; |
905 | ||
096d0906 FW |
906 | static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], |
907 | struct nf_nat_range2 *range) | |
908 | { | |
909 | if (tb[CTA_NAT_V4_MINIP]) { | |
910 | range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]); | |
911 | range->flags |= NF_NAT_RANGE_MAP_IPS; | |
912 | } | |
913 | ||
914 | if (tb[CTA_NAT_V4_MAXIP]) | |
915 | range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]); | |
916 | else | |
917 | range->max_addr.ip = range->min_addr.ip; | |
918 | ||
919 | return 0; | |
920 | } | |
921 | ||
922 | static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], | |
923 | struct nf_nat_range2 *range) | |
924 | { | |
925 | if (tb[CTA_NAT_V6_MINIP]) { | |
926 | nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP], | |
927 | sizeof(struct in6_addr)); | |
928 | range->flags |= NF_NAT_RANGE_MAP_IPS; | |
929 | } | |
930 | ||
931 | if (tb[CTA_NAT_V6_MAXIP]) | |
932 | nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP], | |
933 | sizeof(struct in6_addr)); | |
934 | else | |
935 | range->max_addr = range->min_addr; | |
936 | ||
937 | return 0; | |
938 | } | |
939 | ||
e6a7d3c0 | 940 | static int |
39938324 | 941 | nfnetlink_parse_nat(const struct nlattr *nat, |
096d0906 | 942 | const struct nf_conn *ct, struct nf_nat_range2 *range) |
e6a7d3c0 PNA |
943 | { |
944 | struct nlattr *tb[CTA_NAT_MAX+1]; | |
945 | int err; | |
946 | ||
947 | memset(range, 0, sizeof(*range)); | |
948 | ||
8cb08174 JB |
949 | err = nla_parse_nested_deprecated(tb, CTA_NAT_MAX, nat, |
950 | nat_nla_policy, NULL); | |
e6a7d3c0 PNA |
951 | if (err < 0) |
952 | return err; | |
953 | ||
096d0906 FW |
954 | switch (nf_ct_l3num(ct)) { |
955 | case NFPROTO_IPV4: | |
956 | err = nf_nat_ipv4_nlattr_to_range(tb, range); | |
957 | break; | |
958 | case NFPROTO_IPV6: | |
959 | err = nf_nat_ipv6_nlattr_to_range(tb, range); | |
960 | break; | |
961 | default: | |
962 | err = -EPROTONOSUPPORT; | |
963 | break; | |
964 | } | |
965 | ||
966 | if (err) | |
0eba801b | 967 | return err; |
e6a7d3c0 PNA |
968 | |
969 | if (!tb[CTA_NAT_PROTO]) | |
0eba801b | 970 | return 0; |
e6a7d3c0 | 971 | |
0eba801b | 972 | return nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); |
e6a7d3c0 PNA |
973 | } |
974 | ||
0eba801b | 975 | /* This function is called under rcu_read_lock() */ |
e6a7d3c0 PNA |
976 | static int |
977 | nfnetlink_parse_nat_setup(struct nf_conn *ct, | |
978 | enum nf_nat_manip_type manip, | |
39938324 | 979 | const struct nlattr *attr) |
e6a7d3c0 | 980 | { |
2eb0f624 | 981 | struct nf_nat_range2 range; |
c7232c99 | 982 | int err; |
e6a7d3c0 | 983 | |
0eba801b PNA |
984 | /* Should not happen, restricted to creating new conntracks |
985 | * via ctnetlink. | |
986 | */ | |
987 | if (WARN_ON_ONCE(nf_nat_initialized(ct, manip))) | |
988 | return -EEXIST; | |
989 | ||
0eba801b PNA |
990 | /* No NAT information has been passed, allocate the null-binding */ |
991 | if (attr == NULL) | |
7025bac4 | 992 | return __nf_nat_alloc_null_binding(ct, manip) == NF_DROP ? -ENOMEM : 0; |
0eba801b | 993 | |
096d0906 | 994 | err = nfnetlink_parse_nat(attr, ct, &range); |
c7232c99 PM |
995 | if (err < 0) |
996 | return err; | |
e6a7d3c0 | 997 | |
ecfcdfec | 998 | return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; |
e6a7d3c0 PNA |
999 | } |
1000 | #else | |
1001 | static int | |
1002 | nfnetlink_parse_nat_setup(struct nf_conn *ct, | |
1003 | enum nf_nat_manip_type manip, | |
39938324 | 1004 | const struct nlattr *attr) |
e6a7d3c0 PNA |
1005 | { |
1006 | return -EOPNOTSUPP; | |
1007 | } | |
1008 | #endif | |
1009 | ||
544d5c7d PNA |
1010 | static struct nf_ct_helper_expectfn follow_master_nat = { |
1011 | .name = "nat-follow-master", | |
1012 | .expectfn = nf_nat_follow_master, | |
1013 | }; | |
1014 | ||
d164385e | 1015 | int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, |
1cd472bf FW |
1016 | const struct nf_hook_ops *orig_nat_ops, unsigned int ops_count) |
1017 | { | |
1018 | struct nat_net *nat_net = net_generic(net, nat_net_id); | |
1019 | struct nf_nat_hooks_net *nat_proto_net; | |
1020 | struct nf_nat_lookup_hook_priv *priv; | |
1021 | unsigned int hooknum = ops->hooknum; | |
1022 | struct nf_hook_ops *nat_ops; | |
1023 | int i, ret; | |
1024 | ||
d164385e | 1025 | if (WARN_ON_ONCE(pf >= ARRAY_SIZE(nat_net->nat_proto_net))) |
1cd472bf FW |
1026 | return -EINVAL; |
1027 | ||
d164385e | 1028 | nat_proto_net = &nat_net->nat_proto_net[pf]; |
1cd472bf FW |
1029 | |
1030 | for (i = 0; i < ops_count; i++) { | |
1cd472bf FW |
1031 | if (orig_nat_ops[i].hooknum == hooknum) { |
1032 | hooknum = i; | |
1033 | break; | |
1034 | } | |
1035 | } | |
1036 | ||
1037 | if (WARN_ON_ONCE(i == ops_count)) | |
1038 | return -EINVAL; | |
1039 | ||
1040 | mutex_lock(&nf_nat_proto_mutex); | |
1041 | if (!nat_proto_net->nat_hook_ops) { | |
1042 | WARN_ON(nat_proto_net->users != 0); | |
1043 | ||
1044 | nat_ops = kmemdup(orig_nat_ops, sizeof(*orig_nat_ops) * ops_count, GFP_KERNEL); | |
1045 | if (!nat_ops) { | |
1046 | mutex_unlock(&nf_nat_proto_mutex); | |
1047 | return -ENOMEM; | |
1048 | } | |
1049 | ||
1050 | for (i = 0; i < ops_count; i++) { | |
1051 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | |
1052 | if (priv) { | |
1053 | nat_ops[i].priv = priv; | |
1054 | continue; | |
1055 | } | |
1056 | mutex_unlock(&nf_nat_proto_mutex); | |
1057 | while (i) | |
1058 | kfree(nat_ops[--i].priv); | |
1059 | kfree(nat_ops); | |
1060 | return -ENOMEM; | |
1061 | } | |
1062 | ||
1063 | ret = nf_register_net_hooks(net, nat_ops, ops_count); | |
1064 | if (ret < 0) { | |
1065 | mutex_unlock(&nf_nat_proto_mutex); | |
1066 | for (i = 0; i < ops_count; i++) | |
1067 | kfree(nat_ops[i].priv); | |
1068 | kfree(nat_ops); | |
1069 | return ret; | |
1070 | } | |
1071 | ||
1072 | nat_proto_net->nat_hook_ops = nat_ops; | |
1073 | } | |
1074 | ||
1075 | nat_ops = nat_proto_net->nat_hook_ops; | |
1076 | priv = nat_ops[hooknum].priv; | |
1077 | if (WARN_ON_ONCE(!priv)) { | |
1078 | mutex_unlock(&nf_nat_proto_mutex); | |
1079 | return -EOPNOTSUPP; | |
1080 | } | |
1081 | ||
1082 | ret = nf_hook_entries_insert_raw(&priv->entries, ops); | |
1083 | if (ret == 0) | |
1084 | nat_proto_net->users++; | |
1085 | ||
1086 | mutex_unlock(&nf_nat_proto_mutex); | |
1087 | return ret; | |
1088 | } | |
1cd472bf | 1089 | |
d164385e FW |
1090 | void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, |
1091 | unsigned int ops_count) | |
1cd472bf FW |
1092 | { |
1093 | struct nat_net *nat_net = net_generic(net, nat_net_id); | |
1094 | struct nf_nat_hooks_net *nat_proto_net; | |
1095 | struct nf_nat_lookup_hook_priv *priv; | |
1096 | struct nf_hook_ops *nat_ops; | |
1097 | int hooknum = ops->hooknum; | |
1098 | int i; | |
1099 | ||
d164385e | 1100 | if (pf >= ARRAY_SIZE(nat_net->nat_proto_net)) |
1cd472bf FW |
1101 | return; |
1102 | ||
d164385e | 1103 | nat_proto_net = &nat_net->nat_proto_net[pf]; |
1cd472bf FW |
1104 | |
1105 | mutex_lock(&nf_nat_proto_mutex); | |
1106 | if (WARN_ON(nat_proto_net->users == 0)) | |
1107 | goto unlock; | |
1108 | ||
1109 | nat_proto_net->users--; | |
1110 | ||
1111 | nat_ops = nat_proto_net->nat_hook_ops; | |
1112 | for (i = 0; i < ops_count; i++) { | |
1113 | if (nat_ops[i].hooknum == hooknum) { | |
1114 | hooknum = i; | |
1115 | break; | |
1116 | } | |
1117 | } | |
1118 | if (WARN_ON_ONCE(i == ops_count)) | |
1119 | goto unlock; | |
1120 | priv = nat_ops[hooknum].priv; | |
1121 | nf_hook_entries_delete_raw(&priv->entries, ops); | |
1122 | ||
1123 | if (nat_proto_net->users == 0) { | |
1124 | nf_unregister_net_hooks(net, nat_ops, ops_count); | |
1125 | ||
1126 | for (i = 0; i < ops_count; i++) { | |
1127 | priv = nat_ops[i].priv; | |
1128 | kfree_rcu(priv, rcu_head); | |
1129 | } | |
1130 | ||
1131 | nat_proto_net->nat_hook_ops = NULL; | |
1132 | kfree(nat_ops); | |
1133 | } | |
1134 | unlock: | |
1135 | mutex_unlock(&nf_nat_proto_mutex); | |
1136 | } | |
1cd472bf FW |
1137 | |
1138 | static struct pernet_operations nat_net_ops = { | |
1139 | .id = &nat_net_id, | |
1140 | .size = sizeof(struct nat_net), | |
1141 | }; | |
1142 | ||
88491c11 | 1143 | static struct nf_nat_hook nat_hook = { |
2c205dd3 PNA |
1144 | .parse_nat_setup = nfnetlink_parse_nat_setup, |
1145 | #ifdef CONFIG_XFRM | |
1146 | .decode_session = __nf_nat_decode_session, | |
1147 | #endif | |
368982cd | 1148 | .manip_pkt = nf_nat_manip_pkt, |
2c205dd3 PNA |
1149 | }; |
1150 | ||
5b1158e9 JK |
1151 | static int __init nf_nat_init(void) |
1152 | { | |
8073e960 | 1153 | int ret, i; |
2d59e5ca | 1154 | |
e1bf1687 FW |
1155 | /* Leave them the same for the moment. */ |
1156 | nf_nat_htable_size = nf_conntrack_htable_size; | |
b0ade851 GU |
1157 | if (nf_nat_htable_size < CONNTRACK_LOCKS) |
1158 | nf_nat_htable_size = CONNTRACK_LOCKS; | |
e1bf1687 FW |
1159 | |
1160 | nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0); | |
1161 | if (!nf_nat_bysource) | |
1162 | return -ENOMEM; | |
a76ae1c8 | 1163 | |
2d59e5ca YK |
1164 | ret = nf_ct_extend_register(&nat_extend); |
1165 | if (ret < 0) { | |
285189c7 | 1166 | kvfree(nf_nat_bysource); |
5191d70f | 1167 | pr_err("Unable to register extension\n"); |
2d59e5ca YK |
1168 | return ret; |
1169 | } | |
5b1158e9 | 1170 | |
b0ade851 | 1171 | for (i = 0; i < CONNTRACK_LOCKS; i++) |
8073e960 FW |
1172 | spin_lock_init(&nf_nat_locks[i]); |
1173 | ||
1cd472bf FW |
1174 | ret = register_pernet_subsys(&nat_net_ops); |
1175 | if (ret < 0) { | |
1176 | nf_ct_extend_unregister(&nat_extend); | |
1177 | return ret; | |
1178 | } | |
1179 | ||
c7232c99 | 1180 | nf_ct_helper_expectfn_register(&follow_master_nat); |
5b1158e9 | 1181 | |
2c205dd3 PNA |
1182 | WARN_ON(nf_nat_hook != NULL); |
1183 | RCU_INIT_POINTER(nf_nat_hook, &nat_hook); | |
1184 | ||
5b1158e9 JK |
1185 | return 0; |
1186 | } | |
1187 | ||
5b1158e9 JK |
1188 | static void __exit nf_nat_cleanup(void) |
1189 | { | |
8f23f35f | 1190 | struct nf_nat_proto_clean clean = {}; |
c7232c99 | 1191 | |
8f23f35f FW |
1192 | nf_ct_iterate_destroy(nf_nat_proto_clean, &clean); |
1193 | ||
2d59e5ca | 1194 | nf_ct_extend_unregister(&nat_extend); |
544d5c7d | 1195 | nf_ct_helper_expectfn_unregister(&follow_master_nat); |
2c205dd3 PNA |
1196 | RCU_INIT_POINTER(nf_nat_hook, NULL); |
1197 | ||
e1bf1687 | 1198 | synchronize_net(); |
285189c7 | 1199 | kvfree(nf_nat_bysource); |
1cd472bf | 1200 | unregister_pernet_subsys(&nat_net_ops); |
5b1158e9 JK |
1201 | } |
1202 | ||
1203 | MODULE_LICENSE("GPL"); | |
1204 | ||
1205 | module_init(nf_nat_init); | |
1206 | module_exit(nf_nat_cleanup); |