Commit | Line | Data |
---|---|---|
d1aca8ab | 1 | // SPDX-License-Identifier: GPL-2.0 |
8dd33cc9 AB |
2 | |
3 | #include <linux/types.h> | |
8dd33cc9 AB |
4 | #include <linux/atomic.h> |
5 | #include <linux/inetdevice.h> | |
8dd33cc9 | 6 | #include <linux/netfilter.h> |
8dd33cc9 | 7 | #include <linux/netfilter_ipv4.h> |
d1aca8ab FW |
8 | #include <linux/netfilter_ipv6.h> |
9 | ||
bf8981a2 | 10 | #include <net/netfilter/nf_nat_masquerade.h> |
d1aca8ab | 11 | |
30db4069 FW |
12 | struct masq_dev_work { |
13 | struct work_struct work; | |
14 | struct net *net; | |
fc0d026a | 15 | netns_tracker ns_tracker; |
30db4069 FW |
16 | union nf_inet_addr addr; |
17 | int ifindex; | |
18 | int (*iter)(struct nf_conn *i, void *data); | |
19 | }; | |
20 | ||
21 | #define MAX_MASQ_WORKER_COUNT 16 | |
22 | ||
d1aca8ab | 23 | static DEFINE_MUTEX(masq_mutex); |
610a4314 | 24 | static unsigned int masq_refcnt __read_mostly; |
30db4069 | 25 | static atomic_t masq_worker_count __read_mostly; |
8dd33cc9 AB |
26 | |
27 | unsigned int | |
28 | nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, | |
2eb0f624 | 29 | const struct nf_nat_range2 *range, |
8dd33cc9 AB |
30 | const struct net_device *out) |
31 | { | |
32 | struct nf_conn *ct; | |
33 | struct nf_conn_nat *nat; | |
34 | enum ip_conntrack_info ctinfo; | |
2eb0f624 | 35 | struct nf_nat_range2 newrange; |
8dd33cc9 AB |
36 | const struct rtable *rt; |
37 | __be32 newsrc, nh; | |
38 | ||
44d6e2f2 | 39 | WARN_ON(hooknum != NF_INET_POST_ROUTING); |
8dd33cc9 AB |
40 | |
41 | ct = nf_ct_get(skb, &ctinfo); | |
8dd33cc9 | 42 | |
44d6e2f2 VR |
43 | WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || |
44 | ctinfo == IP_CT_RELATED_REPLY))); | |
8dd33cc9 AB |
45 | |
46 | /* Source address is 0.0.0.0 - locally generated packet that is | |
47 | * probably not supposed to be masqueraded. | |
48 | */ | |
49 | if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) | |
50 | return NF_ACCEPT; | |
51 | ||
52 | rt = skb_rtable(skb); | |
53 | nh = rt_nexthop(rt, ip_hdr(skb)->daddr); | |
54 | newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); | |
55 | if (!newsrc) { | |
56 | pr_info("%s ate my IP address\n", out->name); | |
57 | return NF_DROP; | |
58 | } | |
59 | ||
ff459018 FW |
60 | nat = nf_ct_nat_ext_add(ct); |
61 | if (nat) | |
62 | nat->masq_index = out->ifindex; | |
8dd33cc9 AB |
63 | |
64 | /* Transfer from original range. */ | |
65 | memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); | |
66 | memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); | |
67 | newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; | |
68 | newrange.min_addr.ip = newsrc; | |
69 | newrange.max_addr.ip = newsrc; | |
70 | newrange.min_proto = range->min_proto; | |
71 | newrange.max_proto = range->max_proto; | |
72 | ||
73 | /* Hand modified range to generic setup. */ | |
74 | return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); | |
75 | } | |
76 | EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); | |
77 | ||
30db4069 FW |
78 | static void iterate_cleanup_work(struct work_struct *work) |
79 | { | |
8169ff58 | 80 | struct nf_ct_iter_data iter_data = {}; |
30db4069 FW |
81 | struct masq_dev_work *w; |
82 | ||
83 | w = container_of(work, struct masq_dev_work, work); | |
84 | ||
8169ff58 PNA |
85 | iter_data.net = w->net; |
86 | iter_data.data = (void *)w; | |
87 | nf_ct_iterate_cleanup_net(w->iter, &iter_data); | |
30db4069 | 88 | |
fc0d026a | 89 | put_net_track(w->net, &w->ns_tracker); |
30db4069 FW |
90 | kfree(w); |
91 | atomic_dec(&masq_worker_count); | |
92 | module_put(THIS_MODULE); | |
93 | } | |
94 | ||
95 | /* Iterate conntrack table in the background and remove conntrack entries | |
96 | * that use the device/address being removed. | |
97 | * | |
98 | * In case too many work items have been queued already or memory allocation | |
99 | * fails iteration is skipped, conntrack entries will time out eventually. | |
100 | */ | |
101 | static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr, | |
102 | int ifindex, | |
103 | int (*iter)(struct nf_conn *i, void *data), | |
104 | gfp_t gfp_flags) | |
105 | { | |
106 | struct masq_dev_work *w; | |
107 | ||
108 | if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT) | |
109 | return; | |
110 | ||
111 | net = maybe_get_net(net); | |
112 | if (!net) | |
113 | return; | |
114 | ||
115 | if (!try_module_get(THIS_MODULE)) | |
116 | goto err_module; | |
117 | ||
118 | w = kzalloc(sizeof(*w), gfp_flags); | |
119 | if (w) { | |
120 | /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */ | |
121 | atomic_inc(&masq_worker_count); | |
122 | ||
123 | INIT_WORK(&w->work, iterate_cleanup_work); | |
124 | w->ifindex = ifindex; | |
125 | w->net = net; | |
fc0d026a | 126 | netns_tracker_alloc(net, &w->ns_tracker, gfp_flags); |
30db4069 FW |
127 | w->iter = iter; |
128 | if (addr) | |
129 | w->addr = *addr; | |
130 | schedule_work(&w->work); | |
131 | return; | |
132 | } | |
133 | ||
134 | module_put(THIS_MODULE); | |
135 | err_module: | |
136 | put_net(net); | |
137 | } | |
138 | ||
7970a19b | 139 | static int device_cmp(struct nf_conn *i, void *arg) |
8dd33cc9 AB |
140 | { |
141 | const struct nf_conn_nat *nat = nfct_nat(i); | |
7970a19b | 142 | const struct masq_dev_work *w = arg; |
8dd33cc9 AB |
143 | |
144 | if (!nat) | |
145 | return 0; | |
7970a19b | 146 | return nat->masq_index == w->ifindex; |
8dd33cc9 AB |
147 | } |
148 | ||
149 | static int masq_device_event(struct notifier_block *this, | |
150 | unsigned long event, | |
151 | void *ptr) | |
152 | { | |
153 | const struct net_device *dev = netdev_notifier_info_to_dev(ptr); | |
154 | struct net *net = dev_net(dev); | |
155 | ||
156 | if (event == NETDEV_DOWN) { | |
157 | /* Device was downed. Search entire table for | |
158 | * conntracks which were associated with that device, | |
159 | * and forget them. | |
160 | */ | |
8dd33cc9 | 161 | |
7970a19b FW |
162 | nf_nat_masq_schedule(net, NULL, dev->ifindex, |
163 | device_cmp, GFP_KERNEL); | |
8dd33cc9 AB |
164 | } |
165 | ||
166 | return NOTIFY_DONE; | |
167 | } | |
168 | ||
097f95d3 TH |
169 | static int inet_cmp(struct nf_conn *ct, void *ptr) |
170 | { | |
097f95d3 | 171 | struct nf_conntrack_tuple *tuple; |
7970a19b | 172 | struct masq_dev_work *w = ptr; |
097f95d3 | 173 | |
7970a19b | 174 | if (!device_cmp(ct, ptr)) |
097f95d3 TH |
175 | return 0; |
176 | ||
177 | tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
178 | ||
7970a19b | 179 | return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3); |
097f95d3 TH |
180 | } |
181 | ||
8dd33cc9 AB |
182 | static int masq_inet_event(struct notifier_block *this, |
183 | unsigned long event, | |
184 | void *ptr) | |
185 | { | |
7970a19b FW |
186 | const struct in_ifaddr *ifa = ptr; |
187 | const struct in_device *idev; | |
188 | const struct net_device *dev; | |
189 | union nf_inet_addr addr; | |
190 | ||
191 | if (event != NETDEV_DOWN) | |
192 | return NOTIFY_DONE; | |
8dd33cc9 | 193 | |
fbd40ea0 DM |
194 | /* The masq_dev_notifier will catch the case of the device going |
195 | * down. So if the inetdev is dead and being destroyed we have | |
196 | * no work to do. Otherwise this is an individual address removal | |
197 | * and we have to perform the flush. | |
198 | */ | |
7970a19b | 199 | idev = ifa->ifa_dev; |
fbd40ea0 DM |
200 | if (idev->dead) |
201 | return NOTIFY_DONE; | |
202 | ||
7970a19b FW |
203 | memset(&addr, 0, sizeof(addr)); |
204 | ||
205 | addr.ip = ifa->ifa_address; | |
206 | ||
207 | dev = idev->dev; | |
208 | nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex, | |
209 | inet_cmp, GFP_KERNEL); | |
097f95d3 TH |
210 | |
211 | return NOTIFY_DONE; | |
8dd33cc9 AB |
212 | } |
213 | ||
214 | static struct notifier_block masq_dev_notifier = { | |
215 | .notifier_call = masq_device_event, | |
216 | }; | |
217 | ||
218 | static struct notifier_block masq_inet_notifier = { | |
219 | .notifier_call = masq_inet_event, | |
220 | }; | |
221 | ||
d1aca8ab | 222 | #if IS_ENABLED(CONFIG_IPV6) |
d1aca8ab FW |
223 | static int |
224 | nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, | |
225 | const struct in6_addr *daddr, unsigned int srcprefs, | |
226 | struct in6_addr *saddr) | |
227 | { | |
228 | #ifdef CONFIG_IPV6_MODULE | |
229 | const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); | |
230 | ||
231 | if (!v6_ops) | |
232 | return -EHOSTUNREACH; | |
233 | ||
234 | return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); | |
235 | #else | |
236 | return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); | |
237 | #endif | |
238 | } | |
239 | ||
240 | unsigned int | |
241 | nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, | |
242 | const struct net_device *out) | |
243 | { | |
244 | enum ip_conntrack_info ctinfo; | |
245 | struct nf_conn_nat *nat; | |
246 | struct in6_addr src; | |
247 | struct nf_conn *ct; | |
248 | struct nf_nat_range2 newrange; | |
249 | ||
250 | ct = nf_ct_get(skb, &ctinfo); | |
251 | WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || | |
252 | ctinfo == IP_CT_RELATED_REPLY))); | |
253 | ||
254 | if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, | |
255 | &ipv6_hdr(skb)->daddr, 0, &src) < 0) | |
256 | return NF_DROP; | |
257 | ||
258 | nat = nf_ct_nat_ext_add(ct); | |
259 | if (nat) | |
260 | nat->masq_index = out->ifindex; | |
261 | ||
262 | newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; | |
263 | newrange.min_addr.in6 = src; | |
264 | newrange.max_addr.in6 = src; | |
265 | newrange.min_proto = range->min_proto; | |
266 | newrange.max_proto = range->max_proto; | |
267 | ||
268 | return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); | |
269 | } | |
270 | EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); | |
271 | ||
d1aca8ab FW |
272 | /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). |
273 | * | |
274 | * Defer it to the system workqueue. | |
275 | * | |
276 | * As we can have 'a lot' of inet_events (depending on amount of ipv6 | |
277 | * addresses being deleted), we also need to limit work item queue. | |
278 | */ | |
279 | static int masq_inet6_event(struct notifier_block *this, | |
280 | unsigned long event, void *ptr) | |
281 | { | |
282 | struct inet6_ifaddr *ifa = ptr; | |
283 | const struct net_device *dev; | |
30db4069 | 284 | union nf_inet_addr addr; |
d1aca8ab | 285 | |
30db4069 | 286 | if (event != NETDEV_DOWN) |
d1aca8ab FW |
287 | return NOTIFY_DONE; |
288 | ||
289 | dev = ifa->idev->dev; | |
d1aca8ab | 290 | |
30db4069 | 291 | memset(&addr, 0, sizeof(addr)); |
d1aca8ab | 292 | |
30db4069 | 293 | addr.in6 = ifa->addr; |
d1aca8ab | 294 | |
7970a19b | 295 | nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp, |
30db4069 | 296 | GFP_ATOMIC); |
d1aca8ab FW |
297 | return NOTIFY_DONE; |
298 | } | |
299 | ||
300 | static struct notifier_block masq_inet6_notifier = { | |
301 | .notifier_call = masq_inet6_event, | |
302 | }; | |
303 | ||
610a4314 FW |
304 | static int nf_nat_masquerade_ipv6_register_notifier(void) |
305 | { | |
306 | return register_inet6addr_notifier(&masq_inet6_notifier); | |
307 | } | |
308 | #else | |
309 | static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; } | |
310 | #endif | |
311 | ||
312 | int nf_nat_masquerade_inet_register_notifiers(void) | |
d1aca8ab FW |
313 | { |
314 | int ret = 0; | |
315 | ||
316 | mutex_lock(&masq_mutex); | |
610a4314 | 317 | if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) { |
46f7487e | 318 | ret = -EOVERFLOW; |
d1aca8ab | 319 | goto out_unlock; |
46f7487e | 320 | } |
d1aca8ab | 321 | |
610a4314 FW |
322 | /* check if the notifier was already set */ |
323 | if (++masq_refcnt > 1) | |
46f7487e | 324 | goto out_unlock; |
d1aca8ab | 325 | |
610a4314 FW |
326 | /* Register for device down reports */ |
327 | ret = register_netdevice_notifier(&masq_dev_notifier); | |
d1aca8ab | 328 | if (ret) |
46f7487e | 329 | goto err_dec; |
610a4314 FW |
330 | /* Register IP address change reports */ |
331 | ret = register_inetaddr_notifier(&masq_inet_notifier); | |
332 | if (ret) | |
333 | goto err_unregister; | |
334 | ||
335 | ret = nf_nat_masquerade_ipv6_register_notifier(); | |
336 | if (ret) | |
337 | goto err_unreg_inet; | |
d1aca8ab FW |
338 | |
339 | mutex_unlock(&masq_mutex); | |
340 | return ret; | |
610a4314 FW |
341 | err_unreg_inet: |
342 | unregister_inetaddr_notifier(&masq_inet_notifier); | |
343 | err_unregister: | |
344 | unregister_netdevice_notifier(&masq_dev_notifier); | |
d1aca8ab | 345 | err_dec: |
610a4314 | 346 | masq_refcnt--; |
d1aca8ab FW |
347 | out_unlock: |
348 | mutex_unlock(&masq_mutex); | |
349 | return ret; | |
350 | } | |
610a4314 | 351 | EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers); |
d1aca8ab | 352 | |
610a4314 | 353 | void nf_nat_masquerade_inet_unregister_notifiers(void) |
d1aca8ab FW |
354 | { |
355 | mutex_lock(&masq_mutex); | |
610a4314 FW |
356 | /* check if the notifiers still have clients */ |
357 | if (--masq_refcnt > 0) | |
d1aca8ab FW |
358 | goto out_unlock; |
359 | ||
610a4314 FW |
360 | unregister_netdevice_notifier(&masq_dev_notifier); |
361 | unregister_inetaddr_notifier(&masq_inet_notifier); | |
362 | #if IS_ENABLED(CONFIG_IPV6) | |
d1aca8ab | 363 | unregister_inet6addr_notifier(&masq_inet6_notifier); |
610a4314 | 364 | #endif |
d1aca8ab FW |
365 | out_unlock: |
366 | mutex_unlock(&masq_mutex); | |
367 | } | |
610a4314 | 368 | EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers); |