Commit | Line | Data |
---|---|---|
d1aca8ab | 1 | // SPDX-License-Identifier: GPL-2.0 |
8dd33cc9 AB |
2 | |
3 | #include <linux/types.h> | |
8dd33cc9 AB |
4 | #include <linux/atomic.h> |
5 | #include <linux/inetdevice.h> | |
8dd33cc9 | 6 | #include <linux/netfilter.h> |
8dd33cc9 | 7 | #include <linux/netfilter_ipv4.h> |
d1aca8ab FW |
8 | #include <linux/netfilter_ipv6.h> |
9 | ||
bf8981a2 | 10 | #include <net/netfilter/nf_nat_masquerade.h> |
d1aca8ab FW |
11 | |
12 | static DEFINE_MUTEX(masq_mutex); | |
610a4314 | 13 | static unsigned int masq_refcnt __read_mostly; |
8dd33cc9 AB |
14 | |
15 | unsigned int | |
16 | nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, | |
2eb0f624 | 17 | const struct nf_nat_range2 *range, |
8dd33cc9 AB |
18 | const struct net_device *out) |
19 | { | |
20 | struct nf_conn *ct; | |
21 | struct nf_conn_nat *nat; | |
22 | enum ip_conntrack_info ctinfo; | |
2eb0f624 | 23 | struct nf_nat_range2 newrange; |
8dd33cc9 AB |
24 | const struct rtable *rt; |
25 | __be32 newsrc, nh; | |
26 | ||
44d6e2f2 | 27 | WARN_ON(hooknum != NF_INET_POST_ROUTING); |
8dd33cc9 AB |
28 | |
29 | ct = nf_ct_get(skb, &ctinfo); | |
8dd33cc9 | 30 | |
44d6e2f2 VR |
31 | WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || |
32 | ctinfo == IP_CT_RELATED_REPLY))); | |
8dd33cc9 AB |
33 | |
34 | /* Source address is 0.0.0.0 - locally generated packet that is | |
35 | * probably not supposed to be masqueraded. | |
36 | */ | |
37 | if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) | |
38 | return NF_ACCEPT; | |
39 | ||
40 | rt = skb_rtable(skb); | |
41 | nh = rt_nexthop(rt, ip_hdr(skb)->daddr); | |
42 | newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE); | |
43 | if (!newsrc) { | |
44 | pr_info("%s ate my IP address\n", out->name); | |
45 | return NF_DROP; | |
46 | } | |
47 | ||
ff459018 FW |
48 | nat = nf_ct_nat_ext_add(ct); |
49 | if (nat) | |
50 | nat->masq_index = out->ifindex; | |
8dd33cc9 AB |
51 | |
52 | /* Transfer from original range. */ | |
53 | memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); | |
54 | memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); | |
55 | newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; | |
56 | newrange.min_addr.ip = newsrc; | |
57 | newrange.max_addr.ip = newsrc; | |
58 | newrange.min_proto = range->min_proto; | |
59 | newrange.max_proto = range->max_proto; | |
60 | ||
61 | /* Hand modified range to generic setup. */ | |
62 | return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); | |
63 | } | |
64 | EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); | |
65 | ||
66 | static int device_cmp(struct nf_conn *i, void *ifindex) | |
67 | { | |
68 | const struct nf_conn_nat *nat = nfct_nat(i); | |
69 | ||
70 | if (!nat) | |
71 | return 0; | |
8dd33cc9 AB |
72 | return nat->masq_index == (int)(long)ifindex; |
73 | } | |
74 | ||
75 | static int masq_device_event(struct notifier_block *this, | |
76 | unsigned long event, | |
77 | void *ptr) | |
78 | { | |
79 | const struct net_device *dev = netdev_notifier_info_to_dev(ptr); | |
80 | struct net *net = dev_net(dev); | |
81 | ||
82 | if (event == NETDEV_DOWN) { | |
83 | /* Device was downed. Search entire table for | |
84 | * conntracks which were associated with that device, | |
85 | * and forget them. | |
86 | */ | |
8dd33cc9 | 87 | |
9fd6452d FW |
88 | nf_ct_iterate_cleanup_net(net, device_cmp, |
89 | (void *)(long)dev->ifindex, 0, 0); | |
8dd33cc9 AB |
90 | } |
91 | ||
92 | return NOTIFY_DONE; | |
93 | } | |
94 | ||
097f95d3 TH |
95 | static int inet_cmp(struct nf_conn *ct, void *ptr) |
96 | { | |
97 | struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; | |
98 | struct net_device *dev = ifa->ifa_dev->dev; | |
99 | struct nf_conntrack_tuple *tuple; | |
100 | ||
101 | if (!device_cmp(ct, (void *)(long)dev->ifindex)) | |
102 | return 0; | |
103 | ||
104 | tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
105 | ||
106 | return ifa->ifa_address == tuple->dst.u3.ip; | |
107 | } | |
108 | ||
8dd33cc9 AB |
109 | static int masq_inet_event(struct notifier_block *this, |
110 | unsigned long event, | |
111 | void *ptr) | |
112 | { | |
fbd40ea0 | 113 | struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; |
097f95d3 | 114 | struct net *net = dev_net(idev->dev); |
8dd33cc9 | 115 | |
fbd40ea0 DM |
116 | /* The masq_dev_notifier will catch the case of the device going |
117 | * down. So if the inetdev is dead and being destroyed we have | |
118 | * no work to do. Otherwise this is an individual address removal | |
119 | * and we have to perform the flush. | |
120 | */ | |
121 | if (idev->dead) | |
122 | return NOTIFY_DONE; | |
123 | ||
097f95d3 TH |
124 | if (event == NETDEV_DOWN) |
125 | nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0); | |
126 | ||
127 | return NOTIFY_DONE; | |
8dd33cc9 AB |
128 | } |
129 | ||
130 | static struct notifier_block masq_dev_notifier = { | |
131 | .notifier_call = masq_device_event, | |
132 | }; | |
133 | ||
134 | static struct notifier_block masq_inet_notifier = { | |
135 | .notifier_call = masq_inet_event, | |
136 | }; | |
137 | ||
d1aca8ab FW |
138 | #if IS_ENABLED(CONFIG_IPV6) |
139 | static atomic_t v6_worker_count __read_mostly; | |
140 | ||
141 | static int | |
142 | nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, | |
143 | const struct in6_addr *daddr, unsigned int srcprefs, | |
144 | struct in6_addr *saddr) | |
145 | { | |
146 | #ifdef CONFIG_IPV6_MODULE | |
147 | const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); | |
148 | ||
149 | if (!v6_ops) | |
150 | return -EHOSTUNREACH; | |
151 | ||
152 | return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); | |
153 | #else | |
154 | return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); | |
155 | #endif | |
156 | } | |
157 | ||
158 | unsigned int | |
159 | nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, | |
160 | const struct net_device *out) | |
161 | { | |
162 | enum ip_conntrack_info ctinfo; | |
163 | struct nf_conn_nat *nat; | |
164 | struct in6_addr src; | |
165 | struct nf_conn *ct; | |
166 | struct nf_nat_range2 newrange; | |
167 | ||
168 | ct = nf_ct_get(skb, &ctinfo); | |
169 | WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || | |
170 | ctinfo == IP_CT_RELATED_REPLY))); | |
171 | ||
172 | if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, | |
173 | &ipv6_hdr(skb)->daddr, 0, &src) < 0) | |
174 | return NF_DROP; | |
175 | ||
176 | nat = nf_ct_nat_ext_add(ct); | |
177 | if (nat) | |
178 | nat->masq_index = out->ifindex; | |
179 | ||
180 | newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; | |
181 | newrange.min_addr.in6 = src; | |
182 | newrange.max_addr.in6 = src; | |
183 | newrange.min_proto = range->min_proto; | |
184 | newrange.max_proto = range->max_proto; | |
185 | ||
186 | return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); | |
187 | } | |
188 | EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); | |
189 | ||
190 | struct masq_dev_work { | |
191 | struct work_struct work; | |
192 | struct net *net; | |
193 | struct in6_addr addr; | |
194 | int ifindex; | |
195 | }; | |
196 | ||
197 | static int inet6_cmp(struct nf_conn *ct, void *work) | |
198 | { | |
199 | struct masq_dev_work *w = (struct masq_dev_work *)work; | |
200 | struct nf_conntrack_tuple *tuple; | |
201 | ||
202 | if (!device_cmp(ct, (void *)(long)w->ifindex)) | |
203 | return 0; | |
204 | ||
205 | tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
206 | ||
207 | return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); | |
208 | } | |
209 | ||
210 | static void iterate_cleanup_work(struct work_struct *work) | |
211 | { | |
212 | struct masq_dev_work *w; | |
213 | ||
214 | w = container_of(work, struct masq_dev_work, work); | |
215 | ||
216 | nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0); | |
217 | ||
218 | put_net(w->net); | |
219 | kfree(w); | |
220 | atomic_dec(&v6_worker_count); | |
221 | module_put(THIS_MODULE); | |
222 | } | |
223 | ||
224 | /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). | |
225 | * | |
226 | * Defer it to the system workqueue. | |
227 | * | |
228 | * As we can have 'a lot' of inet_events (depending on amount of ipv6 | |
229 | * addresses being deleted), we also need to limit work item queue. | |
230 | */ | |
231 | static int masq_inet6_event(struct notifier_block *this, | |
232 | unsigned long event, void *ptr) | |
233 | { | |
234 | struct inet6_ifaddr *ifa = ptr; | |
235 | const struct net_device *dev; | |
236 | struct masq_dev_work *w; | |
237 | struct net *net; | |
238 | ||
239 | if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16) | |
240 | return NOTIFY_DONE; | |
241 | ||
242 | dev = ifa->idev->dev; | |
243 | net = maybe_get_net(dev_net(dev)); | |
244 | if (!net) | |
245 | return NOTIFY_DONE; | |
246 | ||
247 | if (!try_module_get(THIS_MODULE)) | |
248 | goto err_module; | |
249 | ||
250 | w = kmalloc(sizeof(*w), GFP_ATOMIC); | |
251 | if (w) { | |
252 | atomic_inc(&v6_worker_count); | |
253 | ||
254 | INIT_WORK(&w->work, iterate_cleanup_work); | |
255 | w->ifindex = dev->ifindex; | |
256 | w->net = net; | |
257 | w->addr = ifa->addr; | |
258 | schedule_work(&w->work); | |
259 | ||
260 | return NOTIFY_DONE; | |
261 | } | |
262 | ||
263 | module_put(THIS_MODULE); | |
264 | err_module: | |
265 | put_net(net); | |
266 | return NOTIFY_DONE; | |
267 | } | |
268 | ||
269 | static struct notifier_block masq_inet6_notifier = { | |
270 | .notifier_call = masq_inet6_event, | |
271 | }; | |
272 | ||
610a4314 FW |
273 | static int nf_nat_masquerade_ipv6_register_notifier(void) |
274 | { | |
275 | return register_inet6addr_notifier(&masq_inet6_notifier); | |
276 | } | |
277 | #else | |
278 | static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; } | |
279 | #endif | |
280 | ||
281 | int nf_nat_masquerade_inet_register_notifiers(void) | |
d1aca8ab FW |
282 | { |
283 | int ret = 0; | |
284 | ||
285 | mutex_lock(&masq_mutex); | |
610a4314 | 286 | if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) { |
46f7487e | 287 | ret = -EOVERFLOW; |
d1aca8ab | 288 | goto out_unlock; |
46f7487e | 289 | } |
d1aca8ab | 290 | |
610a4314 FW |
291 | /* check if the notifier was already set */ |
292 | if (++masq_refcnt > 1) | |
46f7487e | 293 | goto out_unlock; |
d1aca8ab | 294 | |
610a4314 FW |
295 | /* Register for device down reports */ |
296 | ret = register_netdevice_notifier(&masq_dev_notifier); | |
d1aca8ab | 297 | if (ret) |
46f7487e | 298 | goto err_dec; |
610a4314 FW |
299 | /* Register IP address change reports */ |
300 | ret = register_inetaddr_notifier(&masq_inet_notifier); | |
301 | if (ret) | |
302 | goto err_unregister; | |
303 | ||
304 | ret = nf_nat_masquerade_ipv6_register_notifier(); | |
305 | if (ret) | |
306 | goto err_unreg_inet; | |
d1aca8ab FW |
307 | |
308 | mutex_unlock(&masq_mutex); | |
309 | return ret; | |
610a4314 FW |
310 | err_unreg_inet: |
311 | unregister_inetaddr_notifier(&masq_inet_notifier); | |
312 | err_unregister: | |
313 | unregister_netdevice_notifier(&masq_dev_notifier); | |
d1aca8ab | 314 | err_dec: |
610a4314 | 315 | masq_refcnt--; |
d1aca8ab FW |
316 | out_unlock: |
317 | mutex_unlock(&masq_mutex); | |
318 | return ret; | |
319 | } | |
610a4314 | 320 | EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers); |
d1aca8ab | 321 | |
610a4314 | 322 | void nf_nat_masquerade_inet_unregister_notifiers(void) |
d1aca8ab FW |
323 | { |
324 | mutex_lock(&masq_mutex); | |
610a4314 FW |
325 | /* check if the notifiers still have clients */ |
326 | if (--masq_refcnt > 0) | |
d1aca8ab FW |
327 | goto out_unlock; |
328 | ||
610a4314 FW |
329 | unregister_netdevice_notifier(&masq_dev_notifier); |
330 | unregister_inetaddr_notifier(&masq_inet_notifier); | |
331 | #if IS_ENABLED(CONFIG_IPV6) | |
d1aca8ab | 332 | unregister_inet6addr_notifier(&masq_inet6_notifier); |
610a4314 | 333 | #endif |
d1aca8ab FW |
334 | out_unlock: |
335 | mutex_unlock(&masq_mutex); | |
336 | } | |
610a4314 | 337 | EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers); |