xen-blkfront: don't add indirect pages to list when !feature_persistent
[linux-2.6-block.git] / net / netfilter / nf_conntrack_proto.c
CommitLineData
8f03dea5
MJ
1/* L3/L4 protocol support for nf_conntrack. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
f229f6ce 6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
8f03dea5
MJ
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/types.h>
14#include <linux/netfilter.h>
15#include <linux/module.h>
5a0e3ad6 16#include <linux/slab.h>
d62f9ed4 17#include <linux/mutex.h>
8f03dea5
MJ
18#include <linux/vmalloc.h>
19#include <linux/stddef.h>
20#include <linux/err.h>
21#include <linux/percpu.h>
8f03dea5
MJ
22#include <linux/notifier.h>
23#include <linux/kernel.h>
24#include <linux/netdevice.h>
25
26#include <net/netfilter/nf_conntrack.h>
27#include <net/netfilter/nf_conntrack_l3proto.h>
605dcad6 28#include <net/netfilter/nf_conntrack_l4proto.h>
8f03dea5
MJ
29#include <net/netfilter/nf_conntrack_core.h>
30
0906a372
AB
31static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly;
32struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly;
13b18339 33EXPORT_SYMBOL_GPL(nf_ct_l3protos);
8f03dea5 34
b19caa0c 35static DEFINE_MUTEX(nf_ct_proto_mutex);
d62f9ed4 36
b19caa0c 37#ifdef CONFIG_SYSCTL
d62f9ed4 38static int
2c352f44
G
39nf_ct_register_sysctl(struct net *net,
40 struct ctl_table_header **header,
41 const char *path,
fa34fff5 42 struct ctl_table *table)
d62f9ed4
PM
43{
44 if (*header == NULL) {
2c352f44 45 *header = register_net_sysctl(net, path, table);
d62f9ed4
PM
46 if (*header == NULL)
47 return -ENOMEM;
48 }
2c352f44 49
d62f9ed4
PM
50 return 0;
51}
52
53static void
54nf_ct_unregister_sysctl(struct ctl_table_header **header,
2c352f44 55 struct ctl_table **table,
fa34fff5 56 unsigned int users)
d62f9ed4 57{
fa34fff5 58 if (users > 0)
d62f9ed4 59 return;
b3fd3ffe 60
5dd3df10 61 unregister_net_sysctl_table(*header);
2c352f44 62 kfree(*table);
d62f9ed4 63 *header = NULL;
2c352f44 64 *table = NULL;
d62f9ed4
PM
65}
66#endif
67
605dcad6
MJ
68struct nf_conntrack_l4proto *
69__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
8f03dea5
MJ
70{
71 if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
605dcad6 72 return &nf_conntrack_l4proto_generic;
8f03dea5 73
923f4902 74 return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
8f03dea5 75}
13b18339 76EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
8f03dea5
MJ
77
78/* this is guaranteed to always return a valid protocol helper, since
79 * it falls back to generic_protocol */
8f03dea5
MJ
80struct nf_conntrack_l3proto *
81nf_ct_l3proto_find_get(u_int16_t l3proto)
82{
83 struct nf_conntrack_l3proto *p;
84
923f4902 85 rcu_read_lock();
8f03dea5
MJ
86 p = __nf_ct_l3proto_find(l3proto);
87 if (!try_module_get(p->me))
605dcad6 88 p = &nf_conntrack_l3proto_generic;
923f4902 89 rcu_read_unlock();
8f03dea5
MJ
90
91 return p;
92}
13b18339 93EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
8f03dea5 94
8f03dea5
MJ
95int
96nf_ct_l3proto_try_module_get(unsigned short l3proto)
97{
98 int ret;
99 struct nf_conntrack_l3proto *p;
100
101retry: p = nf_ct_l3proto_find_get(l3proto);
605dcad6 102 if (p == &nf_conntrack_l3proto_generic) {
8f03dea5
MJ
103 ret = request_module("nf_conntrack-%d", l3proto);
104 if (!ret)
105 goto retry;
106
107 return -EPROTOTYPE;
108 }
109
110 return 0;
111}
13b18339 112EXPORT_SYMBOL_GPL(nf_ct_l3proto_try_module_get);
8f03dea5
MJ
113
114void nf_ct_l3proto_module_put(unsigned short l3proto)
115{
116 struct nf_conntrack_l3proto *p;
117
3b254c54
PM
118 /* rcu_read_lock not necessary since the caller holds a reference, but
119 * taken anyways to avoid lockdep warnings in __nf_ct_l3proto_find()
120 */
121 rcu_read_lock();
8f03dea5 122 p = __nf_ct_l3proto_find(l3proto);
8f03dea5 123 module_put(p->me);
3b254c54 124 rcu_read_unlock();
8f03dea5 125}
13b18339 126EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
8f03dea5 127
c1ebd7df
PNA
128struct nf_conntrack_l4proto *
129nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
130{
131 struct nf_conntrack_l4proto *p;
132
133 rcu_read_lock();
134 p = __nf_ct_l4proto_find(l3num, l4num);
135 if (!try_module_get(p->me))
136 p = &nf_conntrack_l4proto_generic;
137 rcu_read_unlock();
138
139 return p;
140}
141EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
142
143void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p)
144{
145 module_put(p->me);
146}
147EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
148
8f03dea5
MJ
149static int kill_l3proto(struct nf_conn *i, void *data)
150{
5e8fbe2a 151 return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto;
8f03dea5
MJ
152}
153
605dcad6 154static int kill_l4proto(struct nf_conn *i, void *data)
8f03dea5 155{
605dcad6
MJ
156 struct nf_conntrack_l4proto *l4proto;
157 l4proto = (struct nf_conntrack_l4proto *)data;
5e8fbe2a
PM
158 return nf_ct_protonum(i) == l4proto->l4proto &&
159 nf_ct_l3num(i) == l4proto->l3proto;
8f03dea5
MJ
160}
161
524a53e5
G
162static struct nf_ip_net *nf_ct_l3proto_net(struct net *net,
163 struct nf_conntrack_l3proto *l3proto)
d62f9ed4 164{
524a53e5
G
165 if (l3proto->l3proto == PF_INET)
166 return &net->ct.nf_ct_proto;
167 else
168 return NULL;
169}
d62f9ed4 170
524a53e5
G
171static int nf_ct_l3proto_register_sysctl(struct net *net,
172 struct nf_conntrack_l3proto *l3proto)
173{
174 int err = 0;
175 struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
176 /* nf_conntrack_l3proto_ipv6 doesn't support sysctl */
177 if (in == NULL)
178 return 0;
179
180#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
181 if (in->ctl_table != NULL) {
182 err = nf_ct_register_sysctl(net,
183 &in->ctl_table_header,
d62f9ed4 184 l3proto->ctl_table_path,
fa34fff5 185 in->ctl_table);
524a53e5
G
186 if (err < 0) {
187 kfree(in->ctl_table);
188 in->ctl_table = NULL;
189 }
d62f9ed4 190 }
d62f9ed4
PM
191#endif
192 return err;
193}
194
524a53e5
G
195static void nf_ct_l3proto_unregister_sysctl(struct net *net,
196 struct nf_conntrack_l3proto *l3proto)
d62f9ed4 197{
524a53e5
G
198 struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
199
200 if (in == NULL)
201 return;
202#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
203 if (in->ctl_table_header != NULL)
204 nf_ct_unregister_sysctl(&in->ctl_table_header,
205 &in->ctl_table,
fa34fff5 206 0);
d62f9ed4
PM
207#endif
208}
209
6330750d 210int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
8f03dea5
MJ
211{
212 int ret = 0;
0e60ebe0 213 struct nf_conntrack_l3proto *old;
8f03dea5 214
0661cca9
PM
215 if (proto->l3proto >= AF_MAX)
216 return -EBUSY;
ae5718fb 217
d0dba725
HE
218 if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
219 return -EINVAL;
220
b19caa0c 221 mutex_lock(&nf_ct_proto_mutex);
0e60ebe0
ED
222 old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
223 lockdep_is_held(&nf_ct_proto_mutex));
224 if (old != &nf_conntrack_l3proto_generic) {
8f03dea5 225 ret = -EBUSY;
ae5718fb 226 goto out_unlock;
8f03dea5 227 }
d62f9ed4 228
d0dba725
HE
229 if (proto->nlattr_tuple_size)
230 proto->nla_size = 3 * proto->nlattr_tuple_size();
231
0661cca9 232 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
8f03dea5 233
ae5718fb 234out_unlock:
b19caa0c 235 mutex_unlock(&nf_ct_proto_mutex);
8f03dea5 236 return ret;
524a53e5 237
8f03dea5 238}
6330750d 239EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
8f03dea5 240
6330750d 241int nf_ct_l3proto_pernet_register(struct net *net,
524a53e5 242 struct nf_conntrack_l3proto *proto)
8f03dea5 243{
524a53e5
G
244 int ret = 0;
245
fa0f61f0
G
246 if (proto->init_net) {
247 ret = proto->init_net(net);
248 if (ret < 0)
249 return ret;
250 }
524a53e5 251
6330750d 252 return nf_ct_l3proto_register_sysctl(net, proto);
524a53e5 253}
6330750d 254EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
678d6675 255
6330750d 256void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
524a53e5 257{
fe3eb20c 258 BUG_ON(proto->l3proto >= AF_MAX);
ae5718fb 259
b19caa0c 260 mutex_lock(&nf_ct_proto_mutex);
0e60ebe0
ED
261 BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
262 lockdep_is_held(&nf_ct_proto_mutex)
263 ) != proto);
923f4902
PM
264 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
265 &nf_conntrack_l3proto_generic);
b19caa0c 266 mutex_unlock(&nf_ct_proto_mutex);
8f03dea5 267
0661cca9 268 synchronize_rcu();
524a53e5 269}
6330750d 270EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
524a53e5 271
6330750d 272void nf_ct_l3proto_pernet_unregister(struct net *net,
524a53e5
G
273 struct nf_conntrack_l3proto *proto)
274{
524a53e5 275 nf_ct_l3proto_unregister_sysctl(net, proto);
d62f9ed4 276
8f03dea5 277 /* Remove all contrack entries for this protocol */
c655bc68 278 nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
8f03dea5 279}
6330750d 280EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
8f03dea5 281
2c352f44
G
282static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
283 struct nf_conntrack_l4proto *l4proto)
284{
08911475
PNA
285 if (l4proto->get_net_proto) {
286 /* statically built-in protocols use static per-net */
287 return l4proto->get_net_proto(net);
288 } else if (l4proto->net_id) {
289 /* ... and loadable protocols use dynamic per-net */
290 return net_generic(net, *l4proto->net_id);
15f585bd
G
291 }
292 return NULL;
2c352f44
G
293}
294
295static
296int nf_ct_l4proto_register_sysctl(struct net *net,
fa34fff5 297 struct nf_proto_net *pn,
2c352f44 298 struct nf_conntrack_l4proto *l4proto)
d62f9ed4
PM
299{
300 int err = 0;
301
302#ifdef CONFIG_SYSCTL
2c352f44
G
303 if (pn->ctl_table != NULL) {
304 err = nf_ct_register_sysctl(net,
305 &pn->ctl_table_header,
f99e8f71 306 "net/netfilter",
fa34fff5 307 pn->ctl_table);
2c352f44
G
308 if (err < 0) {
309 if (!pn->users) {
310 kfree(pn->ctl_table);
311 pn->ctl_table = NULL;
312 }
2c352f44 313 }
d62f9ed4 314 }
a999e683 315#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
2c352f44 316 if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) {
12c26df3
G
317 if (err < 0) {
318 nf_ct_kfree_compat_sysctl_table(pn);
319 goto out;
320 }
2c352f44
G
321 err = nf_ct_register_sysctl(net,
322 &pn->ctl_compat_header,
f99e8f71 323 "net/ipv4/netfilter",
fa34fff5 324 pn->ctl_compat_table);
a999e683
PM
325 if (err == 0)
326 goto out;
2c352f44 327
f28997e2 328 nf_ct_kfree_compat_sysctl_table(pn);
2c352f44
G
329 nf_ct_unregister_sysctl(&pn->ctl_table_header,
330 &pn->ctl_table,
fa34fff5 331 pn->users);
a999e683 332 }
a999e683 333out:
12c26df3 334#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
933a41e7 335#endif /* CONFIG_SYSCTL */
d62f9ed4
PM
336 return err;
337}
338
2c352f44
G
339static
340void nf_ct_l4proto_unregister_sysctl(struct net *net,
fa34fff5 341 struct nf_proto_net *pn,
2c352f44 342 struct nf_conntrack_l4proto *l4proto)
d62f9ed4
PM
343{
344#ifdef CONFIG_SYSCTL
2c352f44
G
345 if (pn->ctl_table_header != NULL)
346 nf_ct_unregister_sysctl(&pn->ctl_table_header,
347 &pn->ctl_table,
fa34fff5 348 pn->users);
2c352f44 349
a999e683 350#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
2c352f44
G
351 if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL)
352 nf_ct_unregister_sysctl(&pn->ctl_compat_header,
353 &pn->ctl_compat_table,
fa34fff5 354 0);
a999e683 355#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
933a41e7 356#endif /* CONFIG_SYSCTL */
d62f9ed4
PM
357}
358
8f03dea5
MJ
359/* FIXME: Allow NULL functions and sub in pointers to generic for
360 them. --RR */
c296bb4d 361int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
8f03dea5
MJ
362{
363 int ret = 0;
364
0661cca9
PM
365 if (l4proto->l3proto >= PF_MAX)
366 return -EBUSY;
ae5718fb 367
d0dba725
HE
368 if ((l4proto->to_nlattr && !l4proto->nlattr_size)
369 || (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
370 return -EINVAL;
371
b19caa0c 372 mutex_lock(&nf_ct_proto_mutex);
c6a1e615 373 if (!nf_ct_protos[l4proto->l3proto]) {
8f03dea5 374 /* l3proto may be loaded latter. */
c5d277d2 375 struct nf_conntrack_l4proto __rcu **proto_array;
8f03dea5
MJ
376 int i;
377
c6a1e615
PM
378 proto_array = kmalloc(MAX_NF_CT_PROTO *
379 sizeof(struct nf_conntrack_l4proto *),
380 GFP_KERNEL);
8f03dea5
MJ
381 if (proto_array == NULL) {
382 ret = -ENOMEM;
b19caa0c 383 goto out_unlock;
8f03dea5 384 }
c6a1e615 385
8f03dea5 386 for (i = 0; i < MAX_NF_CT_PROTO; i++)
c5d277d2 387 RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
d817d29d
ED
388
389 /* Before making proto_array visible to lockless readers,
390 * we must make sure its content is committed to memory.
391 */
392 smp_wmb();
393
c6a1e615 394 nf_ct_protos[l4proto->l3proto] = proto_array;
0e60ebe0
ED
395 } else if (rcu_dereference_protected(
396 nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
397 lockdep_is_held(&nf_ct_proto_mutex)
398 ) != &nf_conntrack_l4proto_generic) {
c6a1e615
PM
399 ret = -EBUSY;
400 goto out_unlock;
8f03dea5
MJ
401 }
402
d0dba725
HE
403 l4proto->nla_size = 0;
404 if (l4proto->nlattr_size)
405 l4proto->nla_size += l4proto->nlattr_size();
406 if (l4proto->nlattr_tuple_size)
407 l4proto->nla_size += 3 * l4proto->nlattr_tuple_size();
408
c6a1e615
PM
409 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
410 l4proto);
8f03dea5 411out_unlock:
b19caa0c 412 mutex_unlock(&nf_ct_proto_mutex);
8f03dea5
MJ
413 return ret;
414}
c296bb4d 415EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
8f03dea5 416
c296bb4d 417int nf_ct_l4proto_pernet_register(struct net *net,
2c352f44 418 struct nf_conntrack_l4proto *l4proto)
8f03dea5 419{
2c352f44 420 int ret = 0;
fa34fff5 421 struct nf_proto_net *pn = NULL;
2c352f44 422
fa0f61f0 423 if (l4proto->init_net) {
f1caad27 424 ret = l4proto->init_net(net, l4proto->l3proto);
fa0f61f0 425 if (ret < 0)
fa34fff5 426 goto out;
fa0f61f0 427 }
678d6675 428
fa34fff5
G
429 pn = nf_ct_l4proto_net(net, l4proto);
430 if (pn == NULL)
431 goto out;
432
433 ret = nf_ct_l4proto_register_sysctl(net, pn, l4proto);
2c352f44 434 if (ret < 0)
fa34fff5 435 goto out;
2c352f44 436
fa34fff5
G
437 pn->users++;
438out:
fa0f61f0 439 return ret;
2c352f44 440}
c296bb4d 441EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
2c352f44 442
c296bb4d 443void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
2c352f44 444{
fe3eb20c 445 BUG_ON(l4proto->l3proto >= PF_MAX);
ae5718fb 446
b19caa0c 447 mutex_lock(&nf_ct_proto_mutex);
0e60ebe0
ED
448 BUG_ON(rcu_dereference_protected(
449 nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
450 lockdep_is_held(&nf_ct_proto_mutex)
451 ) != l4proto);
923f4902
PM
452 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
453 &nf_conntrack_l4proto_generic);
b19caa0c 454 mutex_unlock(&nf_ct_proto_mutex);
8f03dea5 455
0661cca9 456 synchronize_rcu();
2c352f44 457}
c296bb4d 458EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
d62f9ed4 459
c296bb4d 460void nf_ct_l4proto_pernet_unregister(struct net *net,
2c352f44
G
461 struct nf_conntrack_l4proto *l4proto)
462{
fa34fff5
G
463 struct nf_proto_net *pn = NULL;
464
fa34fff5
G
465 pn = nf_ct_l4proto_net(net, l4proto);
466 if (pn == NULL)
467 return;
468
469 pn->users--;
470 nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
471
8f03dea5 472 /* Remove all contrack entries for this protocol */
c655bc68 473 nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
8f03dea5 474}
c296bb4d 475EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
ac5357eb 476
04d87001 477int nf_conntrack_proto_pernet_init(struct net *net)
ac5357eb 478{
ac5357eb 479 int err;
fa34fff5
G
480 struct nf_proto_net *pn = nf_ct_l4proto_net(net,
481 &nf_conntrack_l4proto_generic);
482
f1caad27
G
483 err = nf_conntrack_l4proto_generic.init_net(net,
484 nf_conntrack_l4proto_generic.l3proto);
15f585bd
G
485 if (err < 0)
486 return err;
487 err = nf_ct_l4proto_register_sysctl(net,
fa34fff5 488 pn,
15f585bd 489 &nf_conntrack_l4proto_generic);
ac5357eb
PM
490 if (err < 0)
491 return err;
492
fa34fff5 493 pn->users++;
ac5357eb
PM
494 return 0;
495}
496
04d87001 497void nf_conntrack_proto_pernet_fini(struct net *net)
ac5357eb 498{
fa34fff5
G
499 struct nf_proto_net *pn = nf_ct_l4proto_net(net,
500 &nf_conntrack_l4proto_generic);
501
502 pn->users--;
15f585bd 503 nf_ct_l4proto_unregister_sysctl(net,
fa34fff5 504 pn,
15f585bd 505 &nf_conntrack_l4proto_generic);
04d87001
G
506}
507
508int nf_conntrack_proto_init(void)
509{
510 unsigned int i;
511 for (i = 0; i < AF_MAX; i++)
512 rcu_assign_pointer(nf_ct_l3protos[i],
513 &nf_conntrack_l3proto_generic);
514 return 0;
515}
516
517void nf_conntrack_proto_fini(void)
518{
519 unsigned int i;
520 /* free l3proto protocol tables */
521 for (i = 0; i < PF_MAX; i++)
522 kfree(nf_ct_protos[i]);
ac5357eb 523}