xen-blkfront: don't add indirect pages to list when !feature_persistent
[linux-2.6-block.git] / net / netfilter / core.c
CommitLineData
601e68e1 1/* netfilter.c: look after the filters for various protocols.
f6ebe77f
HW
2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
3 *
4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5 * way.
6 *
7 * Rusty Russell (C)2000 -- This code is GPL.
f229f6ce 8 * Patrick McHardy (c) 2006-2012
f6ebe77f 9 */
f6ebe77f
HW
10#include <linux/kernel.h>
11#include <linux/netfilter.h>
12#include <net/protocol.h>
13#include <linux/init.h>
14#include <linux/skbuff.h>
15#include <linux/wait.h>
16#include <linux/module.h>
17#include <linux/interrupt.h>
18#include <linux/if.h>
19#include <linux/netdevice.h>
56768644 20#include <linux/netfilter_ipv6.h>
f6ebe77f
HW
21#include <linux/inetdevice.h>
22#include <linux/proc_fs.h>
d486dd1f 23#include <linux/mutex.h>
5a0e3ad6 24#include <linux/slab.h>
457c4cbc 25#include <net/net_namespace.h>
f6ebe77f
HW
26#include <net/sock.h>
27
28#include "nf_internals.h"
29
d486dd1f 30static DEFINE_MUTEX(afinfo_mutex);
bce8032e 31
0906a372 32const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
bce8032e 33EXPORT_SYMBOL(nf_afinfo);
2a7851bf
FW
34const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
35EXPORT_SYMBOL_GPL(nf_ipv6_ops);
bce8032e 36
1e796fda 37int nf_register_afinfo(const struct nf_afinfo *afinfo)
bce8032e 38{
7926dbfa 39 mutex_lock(&afinfo_mutex);
a9b3cd7f 40 RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
d486dd1f 41 mutex_unlock(&afinfo_mutex);
bce8032e
PM
42 return 0;
43}
44EXPORT_SYMBOL_GPL(nf_register_afinfo);
45
1e796fda 46void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
bce8032e 47{
d486dd1f 48 mutex_lock(&afinfo_mutex);
a9b3cd7f 49 RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
d486dd1f 50 mutex_unlock(&afinfo_mutex);
bce8032e
PM
51 synchronize_rcu();
52}
53EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
54
7e9c6eeb 55struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
f6ebe77f 56EXPORT_SYMBOL(nf_hooks);
a2d7ec58 57
d1c85c2e 58#ifdef HAVE_JUMP_LABEL
c5905afb 59struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
a2d7ec58
ED
60EXPORT_SYMBOL(nf_hooks_needed);
61#endif
62
fd706d69 63static DEFINE_MUTEX(nf_hook_mutex);
f6ebe77f
HW
64
65int nf_register_hook(struct nf_hook_ops *reg)
66{
4c610979 67 struct nf_hook_ops *elem;
f6ebe77f 68
7926dbfa 69 mutex_lock(&nf_hook_mutex);
4c610979
LZ
70 list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
71 if (reg->priority < elem->priority)
f6ebe77f
HW
72 break;
73 }
4c610979 74 list_add_rcu(&reg->list, elem->list.prev);
fd706d69 75 mutex_unlock(&nf_hook_mutex);
d1c85c2e 76#ifdef HAVE_JUMP_LABEL
c5905afb 77 static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
a2d7ec58 78#endif
f6ebe77f
HW
79 return 0;
80}
81EXPORT_SYMBOL(nf_register_hook);
82
83void nf_unregister_hook(struct nf_hook_ops *reg)
84{
fd706d69 85 mutex_lock(&nf_hook_mutex);
f6ebe77f 86 list_del_rcu(&reg->list);
fd706d69 87 mutex_unlock(&nf_hook_mutex);
d1c85c2e 88#ifdef HAVE_JUMP_LABEL
c5905afb 89 static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
a2d7ec58 90#endif
f6ebe77f
HW
91 synchronize_net();
92}
93EXPORT_SYMBOL(nf_unregister_hook);
94
972d1cb1
PM
95int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
96{
97 unsigned int i;
98 int err = 0;
99
100 for (i = 0; i < n; i++) {
101 err = nf_register_hook(&reg[i]);
102 if (err)
103 goto err;
104 }
105 return err;
106
107err:
108 if (i > 0)
109 nf_unregister_hooks(reg, i);
110 return err;
111}
112EXPORT_SYMBOL(nf_register_hooks);
113
114void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
115{
f68c5301
CG
116 while (n-- > 0)
117 nf_unregister_hook(&reg[n]);
972d1cb1
PM
118}
119EXPORT_SYMBOL(nf_unregister_hooks);
120
f6ebe77f 121unsigned int nf_iterate(struct list_head *head,
3db05fea 122 struct sk_buff *skb,
cfdfab31
DM
123 struct nf_hook_state *state,
124 struct nf_hook_ops **elemp)
f6ebe77f
HW
125{
126 unsigned int verdict;
127
128 /*
129 * The caller must not block between calls to this
130 * function because of risk of continuing from deleted element.
131 */
2a6decfd 132 list_for_each_entry_continue_rcu((*elemp), head, list) {
cfdfab31 133 if (state->thresh > (*elemp)->priority)
f6ebe77f
HW
134 continue;
135
136 /* Optimization: we don't need to hold module
601e68e1 137 reference here, since function can't sleep. --RR */
de9963f0 138repeat:
238e54c9 139 verdict = (*elemp)->hook(*elemp, skb, state);
f6ebe77f
HW
140 if (verdict != NF_ACCEPT) {
141#ifdef CONFIG_NETFILTER_DEBUG
142 if (unlikely((verdict & NF_VERDICT_MASK)
143 > NF_MAX_VERDICT)) {
144 NFDEBUG("Evil return from %p(%u).\n",
cfdfab31 145 (*elemp)->hook, state->hook);
f6ebe77f
HW
146 continue;
147 }
148#endif
2a6decfd 149 if (verdict != NF_REPEAT)
f6ebe77f 150 return verdict;
de9963f0 151 goto repeat;
f6ebe77f
HW
152 }
153 }
154 return NF_ACCEPT;
155}
156
157
158/* Returns 1 if okfn() needs to be executed by the caller,
159 * -EPERM for NF_DROP, 0 otherwise. */
cfdfab31 160int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
f6ebe77f 161{
2a6decfd 162 struct nf_hook_ops *elem;
f6ebe77f
HW
163 unsigned int verdict;
164 int ret = 0;
165
166 /* We may already have this, but read-locks nest anyway */
167 rcu_read_lock();
168
cfdfab31
DM
169 elem = list_entry_rcu(&nf_hooks[state->pf][state->hook],
170 struct nf_hook_ops, list);
f6ebe77f 171next_hook:
cfdfab31
DM
172 verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state,
173 &elem);
f6ebe77f
HW
174 if (verdict == NF_ACCEPT || verdict == NF_STOP) {
175 ret = 1;
da683650 176 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
3db05fea 177 kfree_skb(skb);
f615df76 178 ret = NF_DROP_GETERR(verdict);
da683650
EP
179 if (ret == 0)
180 ret = -EPERM;
f9c63990 181 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
cfdfab31
DM
182 int err = nf_queue(skb, elem, state,
183 verdict >> NF_VERDICT_QBITS);
563e1232
FW
184 if (err < 0) {
185 if (err == -ECANCELED)
06cdb634 186 goto next_hook;
563e1232 187 if (err == -ESRCH &&
94b27cc3
FW
188 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
189 goto next_hook;
06cdb634
FW
190 kfree_skb(skb);
191 }
f6ebe77f 192 }
f6ebe77f
HW
193 rcu_read_unlock();
194 return ret;
195}
196EXPORT_SYMBOL(nf_hook_slow);
197
198
37d41879 199int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
f6ebe77f 200{
37d41879 201 if (writable_len > skb->len)
f6ebe77f
HW
202 return 0;
203
204 /* Not exclusive use of packet? Must copy. */
37d41879
HX
205 if (!skb_cloned(skb)) {
206 if (writable_len <= skb_headlen(skb))
207 return 1;
208 } else if (skb_clone_writable(skb, writable_len))
209 return 1;
210
211 if (writable_len <= skb_headlen(skb))
212 writable_len = 0;
213 else
214 writable_len -= skb_headlen(skb);
215
216 return !!__pskb_pull_tail(skb, writable_len);
f6ebe77f
HW
217}
218EXPORT_SYMBOL(skb_make_writable);
219
c0cd1156 220#if IS_ENABLED(CONFIG_NF_CONNTRACK)
f6ebe77f
HW
221/* This does not belong here, but locally generated errors need it if connection
222 tracking in use: without this, connection may not be in hash table, and hence
223 manufactured ICMP or RST packets will not be associated with it. */
312a0c16
PM
224void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
225 __rcu __read_mostly;
f6ebe77f
HW
226EXPORT_SYMBOL(ip_ct_attach);
227
312a0c16 228void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
f6ebe77f 229{
312a0c16 230 void (*attach)(struct sk_buff *, const struct sk_buff *);
f6ebe77f 231
c3a47ab3
PM
232 if (skb->nfct) {
233 rcu_read_lock();
234 attach = rcu_dereference(ip_ct_attach);
235 if (attach)
236 attach(new, skb);
237 rcu_read_unlock();
f6ebe77f
HW
238 }
239}
240EXPORT_SYMBOL(nf_ct_attach);
de6e05c4 241
0e60ebe0 242void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
de6e05c4
YK
243EXPORT_SYMBOL(nf_ct_destroy);
244
245void nf_conntrack_destroy(struct nf_conntrack *nfct)
246{
247 void (*destroy)(struct nf_conntrack *);
248
249 rcu_read_lock();
250 destroy = rcu_dereference(nf_ct_destroy);
251 BUG_ON(destroy == NULL);
252 destroy(nfct);
253 rcu_read_unlock();
254}
255EXPORT_SYMBOL(nf_conntrack_destroy);
9cb01766 256
5a05fae5 257struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
9cb01766
PNA
258EXPORT_SYMBOL_GPL(nfq_ct_hook);
259
d584a61a
PNA
260struct nfq_ct_nat_hook __rcu *nfq_ct_nat_hook __read_mostly;
261EXPORT_SYMBOL_GPL(nfq_ct_nat_hook);
262
de6e05c4 263#endif /* CONFIG_NF_CONNTRACK */
f6ebe77f 264
c7232c99
PM
265#ifdef CONFIG_NF_NAT_NEEDED
266void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
267EXPORT_SYMBOL(nf_nat_decode_session_hook);
268#endif
269
f3c1a44a
G
270static int __net_init netfilter_net_init(struct net *net)
271{
272#ifdef CONFIG_PROC_FS
273 net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
274 net->proc_net);
12202fa7
PNA
275 if (!net->nf.proc_netfilter) {
276 if (!net_eq(net, &init_net))
277 pr_err("cannot create netfilter proc entry");
278
f3c1a44a
G
279 return -ENOMEM;
280 }
281#endif
282 return 0;
283}
284
285static void __net_exit netfilter_net_exit(struct net *net)
286{
287 remove_proc_entry("netfilter", net->proc_net);
288}
289
290static struct pernet_operations netfilter_net_ops = {
291 .init = netfilter_net_init,
292 .exit = netfilter_net_exit,
293};
294
6d11cfdb 295int __init netfilter_init(void)
f6ebe77f 296{
6d11cfdb
PNA
297 int i, h, ret;
298
7e9c6eeb 299 for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
f6ebe77f
HW
300 for (h = 0; h < NF_MAX_HOOKS; h++)
301 INIT_LIST_HEAD(&nf_hooks[i][h]);
302 }
303
6d11cfdb
PNA
304 ret = register_pernet_subsys(&netfilter_net_ops);
305 if (ret < 0)
306 goto err;
307
308 ret = netfilter_log_init();
309 if (ret < 0)
310 goto err_pernet;
f6ebe77f 311
6d11cfdb
PNA
312 return 0;
313err_pernet:
314 unregister_pernet_subsys(&netfilter_net_ops);
315err:
316 return ret;
f6ebe77f 317}