Commit | Line | Data |
---|---|---|
be6b635c AB |
1 | /* |
2 | * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License version 2 as | |
6 | * published by the Free Software Foundation. | |
7 | * | |
8 | * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6 | |
9 | * NAT funded by Astaro. | |
10 | */ | |
11 | ||
12 | #include <linux/kernel.h> | |
be6b635c AB |
13 | #include <linux/atomic.h> |
14 | #include <linux/netdevice.h> | |
15 | #include <linux/ipv6.h> | |
16 | #include <linux/netfilter.h> | |
17 | #include <linux/netfilter_ipv6.h> | |
18 | #include <net/netfilter/nf_nat.h> | |
19 | #include <net/addrconf.h> | |
20 | #include <net/ipv6.h> | |
21 | #include <net/netfilter/ipv6/nf_nat_masquerade.h> | |
22 | ||
d93c6258 FW |
23 | #define MAX_WORK_COUNT 16 |
24 | ||
25 | static atomic_t v6_worker_count; | |
26 | ||
be6b635c | 27 | unsigned int |
2eb0f624 | 28 | nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, |
be6b635c AB |
29 | const struct net_device *out) |
30 | { | |
31 | enum ip_conntrack_info ctinfo; | |
ff459018 | 32 | struct nf_conn_nat *nat; |
be6b635c AB |
33 | struct in6_addr src; |
34 | struct nf_conn *ct; | |
2eb0f624 | 35 | struct nf_nat_range2 newrange; |
be6b635c AB |
36 | |
37 | ct = nf_ct_get(skb, &ctinfo); | |
44d6e2f2 VR |
38 | WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || |
39 | ctinfo == IP_CT_RELATED_REPLY))); | |
be6b635c | 40 | |
0a031ac5 | 41 | if (ipv6_dev_get_saddr(nf_ct_net(ct), out, |
be6b635c AB |
42 | &ipv6_hdr(skb)->daddr, 0, &src) < 0) |
43 | return NF_DROP; | |
44 | ||
ff459018 FW |
45 | nat = nf_ct_nat_ext_add(ct); |
46 | if (nat) | |
47 | nat->masq_index = out->ifindex; | |
be6b635c AB |
48 | |
49 | newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; | |
50 | newrange.min_addr.in6 = src; | |
51 | newrange.max_addr.in6 = src; | |
52 | newrange.min_proto = range->min_proto; | |
53 | newrange.max_proto = range->max_proto; | |
54 | ||
55 | return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); | |
56 | } | |
57 | EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); | |
58 | ||
59 | static int device_cmp(struct nf_conn *ct, void *ifindex) | |
60 | { | |
61 | const struct nf_conn_nat *nat = nfct_nat(ct); | |
62 | ||
63 | if (!nat) | |
64 | return 0; | |
65 | if (nf_ct_l3num(ct) != NFPROTO_IPV6) | |
66 | return 0; | |
67 | return nat->masq_index == (int)(long)ifindex; | |
68 | } | |
69 | ||
70 | static int masq_device_event(struct notifier_block *this, | |
71 | unsigned long event, void *ptr) | |
72 | { | |
73 | const struct net_device *dev = netdev_notifier_info_to_dev(ptr); | |
74 | struct net *net = dev_net(dev); | |
75 | ||
76 | if (event == NETDEV_DOWN) | |
9fd6452d FW |
77 | nf_ct_iterate_cleanup_net(net, device_cmp, |
78 | (void *)(long)dev->ifindex, 0, 0); | |
be6b635c AB |
79 | |
80 | return NOTIFY_DONE; | |
81 | } | |
82 | ||
83 | static struct notifier_block masq_dev_notifier = { | |
84 | .notifier_call = masq_device_event, | |
85 | }; | |
86 | ||
d93c6258 FW |
87 | struct masq_dev_work { |
88 | struct work_struct work; | |
89 | struct net *net; | |
097f95d3 | 90 | struct in6_addr addr; |
d93c6258 FW |
91 | int ifindex; |
92 | }; | |
93 | ||
097f95d3 TH |
94 | static int inet_cmp(struct nf_conn *ct, void *work) |
95 | { | |
96 | struct masq_dev_work *w = (struct masq_dev_work *)work; | |
97 | struct nf_conntrack_tuple *tuple; | |
98 | ||
99 | if (!device_cmp(ct, (void *)(long)w->ifindex)) | |
100 | return 0; | |
101 | ||
102 | tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
103 | ||
104 | return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); | |
105 | } | |
106 | ||
d93c6258 FW |
107 | static void iterate_cleanup_work(struct work_struct *work) |
108 | { | |
109 | struct masq_dev_work *w; | |
d93c6258 FW |
110 | |
111 | w = container_of(work, struct masq_dev_work, work); | |
112 | ||
097f95d3 | 113 | nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0); |
d93c6258 FW |
114 | |
115 | put_net(w->net); | |
116 | kfree(w); | |
117 | atomic_dec(&v6_worker_count); | |
118 | module_put(THIS_MODULE); | |
119 | } | |
120 | ||
121 | /* ipv6 inet notifier is an atomic notifier, i.e. we cannot | |
122 | * schedule. | |
123 | * | |
9fd6452d | 124 | * Unfortunately, nf_ct_iterate_cleanup_net can run for a long |
d93c6258 FW |
125 | * time if there are lots of conntracks and the system |
126 | * handles high softirq load, so it frequently calls cond_resched | |
127 | * while iterating the conntrack table. | |
128 | * | |
9fd6452d | 129 | * So we defer nf_ct_iterate_cleanup_net walk to the system workqueue. |
d93c6258 FW |
130 | * |
131 | * As we can have 'a lot' of inet_events (depending on amount | |
132 | * of ipv6 addresses being deleted), we also need to add an upper | |
133 | * limit to the number of queued work items. | |
134 | */ | |
584eab29 TY |
135 | static int masq_inet6_event(struct notifier_block *this, |
136 | unsigned long event, void *ptr) | |
be6b635c AB |
137 | { |
138 | struct inet6_ifaddr *ifa = ptr; | |
d93c6258 FW |
139 | const struct net_device *dev; |
140 | struct masq_dev_work *w; | |
141 | struct net *net; | |
142 | ||
143 | if (event != NETDEV_DOWN || | |
144 | atomic_read(&v6_worker_count) >= MAX_WORK_COUNT) | |
145 | return NOTIFY_DONE; | |
146 | ||
147 | dev = ifa->idev->dev; | |
148 | net = maybe_get_net(dev_net(dev)); | |
149 | if (!net) | |
150 | return NOTIFY_DONE; | |
be6b635c | 151 | |
d93c6258 FW |
152 | if (!try_module_get(THIS_MODULE)) |
153 | goto err_module; | |
154 | ||
155 | w = kmalloc(sizeof(*w), GFP_ATOMIC); | |
156 | if (w) { | |
157 | atomic_inc(&v6_worker_count); | |
158 | ||
159 | INIT_WORK(&w->work, iterate_cleanup_work); | |
160 | w->ifindex = dev->ifindex; | |
161 | w->net = net; | |
097f95d3 | 162 | w->addr = ifa->addr; |
d93c6258 FW |
163 | schedule_work(&w->work); |
164 | ||
165 | return NOTIFY_DONE; | |
166 | } | |
167 | ||
168 | module_put(THIS_MODULE); | |
169 | err_module: | |
170 | put_net(net); | |
171 | return NOTIFY_DONE; | |
be6b635c AB |
172 | } |
173 | ||
584eab29 TY |
174 | static struct notifier_block masq_inet6_notifier = { |
175 | .notifier_call = masq_inet6_event, | |
be6b635c AB |
176 | }; |
177 | ||
178 | static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0); | |
179 | ||
584eab29 | 180 | int nf_nat_masquerade_ipv6_register_notifier(void) |
be6b635c | 181 | { |
584eab29 TY |
182 | int ret; |
183 | ||
be6b635c AB |
184 | /* check if the notifier is already set */ |
185 | if (atomic_inc_return(&masquerade_notifier_refcount) > 1) | |
584eab29 | 186 | return 0; |
be6b635c | 187 | |
584eab29 TY |
188 | ret = register_netdevice_notifier(&masq_dev_notifier); |
189 | if (ret) | |
190 | goto err_dec; | |
191 | ||
192 | ret = register_inet6addr_notifier(&masq_inet6_notifier); | |
193 | if (ret) | |
194 | goto err_unregister; | |
195 | ||
196 | return ret; | |
197 | err_unregister: | |
198 | unregister_netdevice_notifier(&masq_dev_notifier); | |
199 | err_dec: | |
200 | atomic_dec(&masquerade_notifier_refcount); | |
201 | return ret; | |
be6b635c AB |
202 | } |
203 | EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier); | |
204 | ||
205 | void nf_nat_masquerade_ipv6_unregister_notifier(void) | |
206 | { | |
207 | /* check if the notifier still has clients */ | |
208 | if (atomic_dec_return(&masquerade_notifier_refcount) > 0) | |
209 | return; | |
210 | ||
584eab29 | 211 | unregister_inet6addr_notifier(&masq_inet6_notifier); |
be6b635c AB |
212 | unregister_netdevice_notifier(&masq_dev_notifier); |
213 | } | |
214 | EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); |