netfilter: ipset: Consistent userspace testing with nomatch flag
[linux-block.git] / net / netfilter / ipset / ip_set_core.c
CommitLineData
a7b4f989
JK
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
075e64c0 3 * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
a7b4f989
JK
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10/* Kernel module for IP set management */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/ip.h>
16#include <linux/skbuff.h>
17#include <linux/spinlock.h>
a7b4f989 18#include <linux/rculist.h>
a7b4f989
JK
19#include <net/netlink.h>
20
21#include <linux/netfilter.h>
b66554cf 22#include <linux/netfilter/x_tables.h>
a7b4f989
JK
23#include <linux/netfilter/nfnetlink.h>
24#include <linux/netfilter/ipset/ip_set.h>
25
26static LIST_HEAD(ip_set_type_list); /* all registered set types */
27static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */
2f9f28b2 28static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
a7b4f989 29
9076aea7 30static struct ip_set * __rcu *ip_set_list; /* all individual sets */
a7b4f989
JK
31static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
32
9076aea7 33#define IP_SET_INC 64
a7b4f989
JK
34#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
35
36static unsigned int max_sets;
37
38module_param(max_sets, int, 0600);
39MODULE_PARM_DESC(max_sets, "maximal number of sets");
40MODULE_LICENSE("GPL");
41MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
42MODULE_DESCRIPTION("core IP set support");
43MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
44
9076aea7
JK
45/* When the nfnl mutex is held: */
46#define nfnl_dereference(p) \
47 rcu_dereference_protected(p, 1)
48#define nfnl_set(id) \
49 nfnl_dereference(ip_set_list)[id]
50
a7b4f989
JK
51/*
52 * The set types are implemented in modules and registered set types
53 * can be found in ip_set_type_list. Adding/deleting types is
54 * serialized by ip_set_type_mutex.
55 */
56
57static inline void
58ip_set_type_lock(void)
59{
60 mutex_lock(&ip_set_type_mutex);
61}
62
63static inline void
64ip_set_type_unlock(void)
65{
66 mutex_unlock(&ip_set_type_mutex);
67}
68
69/* Register and deregister settype */
70
71static struct ip_set_type *
72find_set_type(const char *name, u8 family, u8 revision)
73{
74 struct ip_set_type *type;
75
76 list_for_each_entry_rcu(type, &ip_set_type_list, list)
77 if (STREQ(type->name, name) &&
3ace95c0
JK
78 (type->family == family ||
79 type->family == NFPROTO_UNSPEC) &&
f1e00b39
JK
80 revision >= type->revision_min &&
81 revision <= type->revision_max)
a7b4f989
JK
82 return type;
83 return NULL;
84}
85
86/* Unlock, try to load a set type module and lock again */
088067f4
JK
87static bool
88load_settype(const char *name)
a7b4f989 89{
c14b78e7 90 nfnl_unlock(NFNL_SUBSYS_IPSET);
a7b4f989
JK
91 pr_debug("try to load ip_set_%s\n", name);
92 if (request_module("ip_set_%s", name) < 0) {
93 pr_warning("Can't find ip_set type %s\n", name);
c14b78e7 94 nfnl_lock(NFNL_SUBSYS_IPSET);
088067f4 95 return false;
a7b4f989 96 }
c14b78e7 97 nfnl_lock(NFNL_SUBSYS_IPSET);
088067f4 98 return true;
a7b4f989
JK
99}
100
101/* Find a set type and reference it */
088067f4
JK
102#define find_set_type_get(name, family, revision, found) \
103 __find_set_type_get(name, family, revision, found, false)
104
a7b4f989 105static int
088067f4
JK
106__find_set_type_get(const char *name, u8 family, u8 revision,
107 struct ip_set_type **found, bool retry)
a7b4f989 108{
5c1aba46
JK
109 struct ip_set_type *type;
110 int err;
111
088067f4
JK
112 if (retry && !load_settype(name))
113 return -IPSET_ERR_FIND_TYPE;
114
a7b4f989
JK
115 rcu_read_lock();
116 *found = find_set_type(name, family, revision);
117 if (*found) {
5c1aba46
JK
118 err = !try_module_get((*found)->me) ? -EFAULT : 0;
119 goto unlock;
a7b4f989 120 }
088067f4
JK
121 /* Make sure the type is already loaded
122 * but we don't support the revision */
5c1aba46
JK
123 list_for_each_entry_rcu(type, &ip_set_type_list, list)
124 if (STREQ(type->name, name)) {
125 err = -IPSET_ERR_FIND_TYPE;
126 goto unlock;
127 }
a7b4f989
JK
128 rcu_read_unlock();
129
088067f4
JK
130 return retry ? -IPSET_ERR_FIND_TYPE :
131 __find_set_type_get(name, family, revision, found, true);
5c1aba46
JK
132
133unlock:
134 rcu_read_unlock();
135 return err;
a7b4f989
JK
136}
137
138/* Find a given set type by name and family.
139 * If we succeeded, the supported minimal and maximum revisions are
140 * filled out.
141 */
088067f4
JK
142#define find_set_type_minmax(name, family, min, max) \
143 __find_set_type_minmax(name, family, min, max, false)
144
a7b4f989 145static int
088067f4
JK
146__find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
147 bool retry)
a7b4f989
JK
148{
149 struct ip_set_type *type;
150 bool found = false;
151
088067f4
JK
152 if (retry && !load_settype(name))
153 return -IPSET_ERR_FIND_TYPE;
154
5c1aba46 155 *min = 255; *max = 0;
a7b4f989
JK
156 rcu_read_lock();
157 list_for_each_entry_rcu(type, &ip_set_type_list, list)
158 if (STREQ(type->name, name) &&
3ace95c0
JK
159 (type->family == family ||
160 type->family == NFPROTO_UNSPEC)) {
a7b4f989 161 found = true;
f1e00b39
JK
162 if (type->revision_min < *min)
163 *min = type->revision_min;
164 if (type->revision_max > *max)
165 *max = type->revision_max;
a7b4f989
JK
166 }
167 rcu_read_unlock();
168 if (found)
169 return 0;
170
088067f4
JK
171 return retry ? -IPSET_ERR_FIND_TYPE :
172 __find_set_type_minmax(name, family, min, max, true);
a7b4f989
JK
173}
174
c15f1c83
JE
175#define family_name(f) ((f) == NFPROTO_IPV4 ? "inet" : \
176 (f) == NFPROTO_IPV6 ? "inet6" : "any")
a7b4f989
JK
177
178/* Register a set type structure. The type is identified by
179 * the unique triple of name, family and revision.
180 */
181int
182ip_set_type_register(struct ip_set_type *type)
183{
184 int ret = 0;
185
186 if (type->protocol != IPSET_PROTOCOL) {
f1e00b39 187 pr_warning("ip_set type %s, family %s, revision %u:%u uses "
a7b4f989
JK
188 "wrong protocol version %u (want %u)\n",
189 type->name, family_name(type->family),
f1e00b39
JK
190 type->revision_min, type->revision_max,
191 type->protocol, IPSET_PROTOCOL);
a7b4f989
JK
192 return -EINVAL;
193 }
194
195 ip_set_type_lock();
f1e00b39 196 if (find_set_type(type->name, type->family, type->revision_min)) {
a7b4f989 197 /* Duplicate! */
f1e00b39 198 pr_warning("ip_set type %s, family %s with revision min %u "
a7b4f989 199 "already registered!\n", type->name,
f1e00b39 200 family_name(type->family), type->revision_min);
a7b4f989
JK
201 ret = -EINVAL;
202 goto unlock;
203 }
204 list_add_rcu(&type->list, &ip_set_type_list);
f1e00b39
JK
205 pr_debug("type %s, family %s, revision %u:%u registered.\n",
206 type->name, family_name(type->family),
207 type->revision_min, type->revision_max);
a7b4f989
JK
208unlock:
209 ip_set_type_unlock();
210 return ret;
211}
212EXPORT_SYMBOL_GPL(ip_set_type_register);
213
214/* Unregister a set type. There's a small race with ip_set_create */
215void
216ip_set_type_unregister(struct ip_set_type *type)
217{
218 ip_set_type_lock();
f1e00b39
JK
219 if (!find_set_type(type->name, type->family, type->revision_min)) {
220 pr_warning("ip_set type %s, family %s with revision min %u "
a7b4f989 221 "not registered\n", type->name,
f1e00b39 222 family_name(type->family), type->revision_min);
a7b4f989
JK
223 goto unlock;
224 }
225 list_del_rcu(&type->list);
f1e00b39
JK
226 pr_debug("type %s, family %s with revision min %u unregistered.\n",
227 type->name, family_name(type->family), type->revision_min);
a7b4f989
JK
228unlock:
229 ip_set_type_unlock();
230
231 synchronize_rcu();
232}
233EXPORT_SYMBOL_GPL(ip_set_type_unregister);
234
235/* Utility functions */
236void *
237ip_set_alloc(size_t size)
238{
239 void *members = NULL;
240
241 if (size < KMALLOC_MAX_SIZE)
242 members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
243
244 if (members) {
245 pr_debug("%p: allocated with kmalloc\n", members);
246 return members;
247 }
248
249 members = vzalloc(size);
250 if (!members)
251 return NULL;
252 pr_debug("%p: allocated with vmalloc\n", members);
253
254 return members;
255}
256EXPORT_SYMBOL_GPL(ip_set_alloc);
257
258void
259ip_set_free(void *members)
260{
261 pr_debug("%p: free with %s\n", members,
262 is_vmalloc_addr(members) ? "vfree" : "kfree");
263 if (is_vmalloc_addr(members))
264 vfree(members);
265 else
266 kfree(members);
267}
268EXPORT_SYMBOL_GPL(ip_set_free);
269
270static inline bool
271flag_nested(const struct nlattr *nla)
272{
273 return nla->nla_type & NLA_F_NESTED;
274}
275
276static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
277 [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 },
278 [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY,
279 .len = sizeof(struct in6_addr) },
280};
281
282int
283ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
284{
285 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
286
287 if (unlikely(!flag_nested(nla)))
288 return -IPSET_ERR_PROTOCOL;
8da560ce 289 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
a7b4f989
JK
290 return -IPSET_ERR_PROTOCOL;
291 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
292 return -IPSET_ERR_PROTOCOL;
293
294 *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
295 return 0;
296}
297EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
298
299int
300ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
301{
302 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
303
304 if (unlikely(!flag_nested(nla)))
305 return -IPSET_ERR_PROTOCOL;
306
8da560ce 307 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
a7b4f989
JK
308 return -IPSET_ERR_PROTOCOL;
309 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
310 return -IPSET_ERR_PROTOCOL;
311
312 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
313 sizeof(struct in6_addr));
314 return 0;
315}
316EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
317
075e64c0
JK
318int
319ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
320 struct ip_set_ext *ext)
321{
322 if (tb[IPSET_ATTR_TIMEOUT]) {
323 if (!(set->extensions & IPSET_EXT_TIMEOUT))
324 return -IPSET_ERR_TIMEOUT;
325 ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
326 }
34d666d4
JK
327 if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
328 if (!(set->extensions & IPSET_EXT_COUNTER))
329 return -IPSET_ERR_COUNTER;
330 if (tb[IPSET_ATTR_BYTES])
331 ext->bytes = be64_to_cpu(nla_get_be64(
332 tb[IPSET_ATTR_BYTES]));
333 if (tb[IPSET_ATTR_PACKETS])
334 ext->packets = be64_to_cpu(nla_get_be64(
335 tb[IPSET_ATTR_PACKETS]));
336 }
075e64c0
JK
337 return 0;
338}
339EXPORT_SYMBOL_GPL(ip_set_get_extensions);
340
a7b4f989
JK
341/*
342 * Creating/destroying/renaming/swapping affect the existence and
343 * the properties of a set. All of these can be executed from userspace
344 * only and serialized by the nfnl mutex indirectly from nfnetlink.
345 *
346 * Sets are identified by their index in ip_set_list and the index
347 * is used by the external references (set/SET netfilter modules).
348 *
349 * The set behind an index may change by swapping only, from userspace.
350 */
351
352static inline void
9076aea7 353__ip_set_get(struct ip_set *set)
a7b4f989 354{
2f9f28b2 355 write_lock_bh(&ip_set_ref_lock);
9076aea7 356 set->ref++;
2f9f28b2 357 write_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
358}
359
360static inline void
9076aea7 361__ip_set_put(struct ip_set *set)
a7b4f989 362{
2f9f28b2 363 write_lock_bh(&ip_set_ref_lock);
9076aea7
JK
364 BUG_ON(set->ref == 0);
365 set->ref--;
2f9f28b2 366 write_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
367}
368
369/*
370 * Add, del and test set entries from kernel.
371 *
372 * The set behind the index must exist and must be referenced
373 * so it can't be destroyed (or changed) under our foot.
374 */
375
9076aea7
JK
376static inline struct ip_set *
377ip_set_rcu_get(ip_set_id_t index)
378{
379 struct ip_set *set;
380
381 rcu_read_lock();
382 /* ip_set_list itself needs to be protected */
383 set = rcu_dereference(ip_set_list)[index];
384 rcu_read_unlock();
385
386 return set;
387}
388
a7b4f989
JK
389int
390ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
075e64c0 391 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
a7b4f989 392{
9076aea7 393 struct ip_set *set = ip_set_rcu_get(index);
a7b4f989
JK
394 int ret = 0;
395
2f9f28b2 396 BUG_ON(set == NULL);
a7b4f989
JK
397 pr_debug("set %s, index %u\n", set->name, index);
398
ac8cc925 399 if (opt->dim < set->type->dimension ||
c15f1c83 400 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
a7b4f989
JK
401 return 0;
402
403 read_lock_bh(&set->lock);
b66554cf 404 ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
a7b4f989
JK
405 read_unlock_bh(&set->lock);
406
407 if (ret == -EAGAIN) {
408 /* Type requests element to be completed */
409 pr_debug("element must be competed, ADD is triggered\n");
410 write_lock_bh(&set->lock);
b66554cf 411 set->variant->kadt(set, skb, par, IPSET_ADD, opt);
a7b4f989
JK
412 write_unlock_bh(&set->lock);
413 ret = 1;
3e0304a5
JK
414 } else {
415 /* --return-nomatch: invert matched element */
6e01781d 416 if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&
3e0304a5
JK
417 (set->type->features & IPSET_TYPE_NOMATCH) &&
418 (ret > 0 || ret == -ENOTEMPTY))
419 ret = -ret;
a7b4f989
JK
420 }
421
422 /* Convert error codes to nomatch */
423 return (ret < 0 ? 0 : ret);
424}
425EXPORT_SYMBOL_GPL(ip_set_test);
426
427int
428ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
075e64c0 429 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
a7b4f989 430{
9076aea7 431 struct ip_set *set = ip_set_rcu_get(index);
a7b4f989
JK
432 int ret;
433
2f9f28b2 434 BUG_ON(set == NULL);
a7b4f989
JK
435 pr_debug("set %s, index %u\n", set->name, index);
436
ac8cc925 437 if (opt->dim < set->type->dimension ||
c15f1c83 438 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
a7b4f989
JK
439 return 0;
440
441 write_lock_bh(&set->lock);
b66554cf 442 ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
a7b4f989
JK
443 write_unlock_bh(&set->lock);
444
445 return ret;
446}
447EXPORT_SYMBOL_GPL(ip_set_add);
448
449int
450ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
075e64c0 451 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
a7b4f989 452{
9076aea7 453 struct ip_set *set = ip_set_rcu_get(index);
a7b4f989
JK
454 int ret = 0;
455
2f9f28b2 456 BUG_ON(set == NULL);
a7b4f989
JK
457 pr_debug("set %s, index %u\n", set->name, index);
458
ac8cc925 459 if (opt->dim < set->type->dimension ||
c15f1c83 460 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
a7b4f989
JK
461 return 0;
462
463 write_lock_bh(&set->lock);
b66554cf 464 ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
a7b4f989
JK
465 write_unlock_bh(&set->lock);
466
467 return ret;
468}
469EXPORT_SYMBOL_GPL(ip_set_del);
470
471/*
472 * Find set by name, reference it once. The reference makes sure the
473 * thing pointed to, does not go away under our feet.
474 *
a7b4f989
JK
475 */
476ip_set_id_t
477ip_set_get_byname(const char *name, struct ip_set **set)
478{
479 ip_set_id_t i, index = IPSET_INVALID_ID;
480 struct ip_set *s;
481
9076aea7 482 rcu_read_lock();
a7b4f989 483 for (i = 0; i < ip_set_max; i++) {
9076aea7 484 s = rcu_dereference(ip_set_list)[i];
a7b4f989 485 if (s != NULL && STREQ(s->name, name)) {
9076aea7 486 __ip_set_get(s);
a7b4f989
JK
487 index = i;
488 *set = s;
9076aea7 489 break;
a7b4f989
JK
490 }
491 }
9076aea7 492 rcu_read_unlock();
a7b4f989
JK
493
494 return index;
495}
496EXPORT_SYMBOL_GPL(ip_set_get_byname);
497
498/*
499 * If the given set pointer points to a valid set, decrement
500 * reference count by 1. The caller shall not assume the index
501 * to be valid, after calling this function.
502 *
a7b4f989
JK
503 */
504void
505ip_set_put_byindex(ip_set_id_t index)
506{
9076aea7
JK
507 struct ip_set *set;
508
509 rcu_read_lock();
510 set = rcu_dereference(ip_set_list)[index];
511 if (set != NULL)
512 __ip_set_put(set);
513 rcu_read_unlock();
a7b4f989
JK
514}
515EXPORT_SYMBOL_GPL(ip_set_put_byindex);
516
517/*
518 * Get the name of a set behind a set index.
519 * We assume the set is referenced, so it does exist and
520 * can't be destroyed. The set cannot be renamed due to
521 * the referencing either.
522 *
a7b4f989
JK
523 */
524const char *
525ip_set_name_byindex(ip_set_id_t index)
526{
9076aea7 527 const struct ip_set *set = ip_set_rcu_get(index);
a7b4f989
JK
528
529 BUG_ON(set == NULL);
2f9f28b2 530 BUG_ON(set->ref == 0);
a7b4f989
JK
531
532 /* Referenced, so it's safe */
533 return set->name;
534}
535EXPORT_SYMBOL_GPL(ip_set_name_byindex);
536
537/*
538 * Routines to call by external subsystems, which do not
539 * call nfnl_lock for us.
540 */
541
542/*
543 * Find set by name, reference it once. The reference makes sure the
544 * thing pointed to, does not go away under our feet.
545 *
546 * The nfnl mutex is used in the function.
547 */
548ip_set_id_t
549ip_set_nfnl_get(const char *name)
550{
9076aea7 551 ip_set_id_t i, index = IPSET_INVALID_ID;
a7b4f989 552 struct ip_set *s;
a7b4f989 553
c14b78e7 554 nfnl_lock(NFNL_SUBSYS_IPSET);
9076aea7
JK
555 for (i = 0; i < ip_set_max; i++) {
556 s = nfnl_set(i);
557 if (s != NULL && STREQ(s->name, name)) {
558 __ip_set_get(s);
559 index = i;
560 break;
561 }
562 }
c14b78e7 563 nfnl_unlock(NFNL_SUBSYS_IPSET);
a7b4f989
JK
564
565 return index;
566}
567EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
568
569/*
570 * Find set by index, reference it once. The reference makes sure the
571 * thing pointed to, does not go away under our feet.
572 *
573 * The nfnl mutex is used in the function.
574 */
575ip_set_id_t
576ip_set_nfnl_get_byindex(ip_set_id_t index)
577{
9076aea7
JK
578 struct ip_set *set;
579
a7b4f989
JK
580 if (index > ip_set_max)
581 return IPSET_INVALID_ID;
582
c14b78e7 583 nfnl_lock(NFNL_SUBSYS_IPSET);
9076aea7
JK
584 set = nfnl_set(index);
585 if (set)
586 __ip_set_get(set);
a7b4f989
JK
587 else
588 index = IPSET_INVALID_ID;
c14b78e7 589 nfnl_unlock(NFNL_SUBSYS_IPSET);
a7b4f989
JK
590
591 return index;
592}
593EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
594
595/*
596 * If the given set pointer points to a valid set, decrement
597 * reference count by 1. The caller shall not assume the index
598 * to be valid, after calling this function.
599 *
600 * The nfnl mutex is used in the function.
601 */
602void
603ip_set_nfnl_put(ip_set_id_t index)
604{
9076aea7 605 struct ip_set *set;
c14b78e7 606 nfnl_lock(NFNL_SUBSYS_IPSET);
9076aea7
JK
607 set = nfnl_set(index);
608 if (set != NULL)
609 __ip_set_put(set);
c14b78e7 610 nfnl_unlock(NFNL_SUBSYS_IPSET);
a7b4f989
JK
611}
612EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
613
614/*
615 * Communication protocol with userspace over netlink.
616 *
2f9f28b2 617 * The commands are serialized by the nfnl mutex.
a7b4f989
JK
618 */
619
620static inline bool
621protocol_failed(const struct nlattr * const tb[])
622{
623 return !tb[IPSET_ATTR_PROTOCOL] ||
624 nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
625}
626
627static inline u32
628flag_exist(const struct nlmsghdr *nlh)
629{
630 return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
631}
632
633static struct nlmsghdr *
15e47304 634start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
a7b4f989
JK
635 enum ipset_cmd cmd)
636{
637 struct nlmsghdr *nlh;
638 struct nfgenmsg *nfmsg;
639
15e47304 640 nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
a7b4f989
JK
641 sizeof(*nfmsg), flags);
642 if (nlh == NULL)
643 return NULL;
644
645 nfmsg = nlmsg_data(nlh);
c15f1c83 646 nfmsg->nfgen_family = NFPROTO_IPV4;
a7b4f989
JK
647 nfmsg->version = NFNETLINK_V0;
648 nfmsg->res_id = 0;
649
650 return nlh;
651}
652
653/* Create a set */
654
655static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
656 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
657 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
658 .len = IPSET_MAXNAMELEN - 1 },
659 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
660 .len = IPSET_MAXNAMELEN - 1},
661 [IPSET_ATTR_REVISION] = { .type = NLA_U8 },
662 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
663 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
664};
665
9076aea7
JK
666static struct ip_set *
667find_set_and_id(const char *name, ip_set_id_t *id)
a7b4f989 668{
9076aea7
JK
669 struct ip_set *set = NULL;
670 ip_set_id_t i;
a7b4f989 671
9076aea7
JK
672 *id = IPSET_INVALID_ID;
673 for (i = 0; i < ip_set_max; i++) {
674 set = nfnl_set(i);
675 if (set != NULL && STREQ(set->name, name)) {
676 *id = i;
677 break;
678 }
a7b4f989 679 }
9076aea7 680 return (*id == IPSET_INVALID_ID ? NULL : set);
a7b4f989
JK
681}
682
683static inline struct ip_set *
684find_set(const char *name)
685{
9076aea7 686 ip_set_id_t id;
a7b4f989 687
9076aea7 688 return find_set_and_id(name, &id);
a7b4f989
JK
689}
690
691static int
692find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
693{
9076aea7 694 struct ip_set *s;
a7b4f989
JK
695 ip_set_id_t i;
696
697 *index = IPSET_INVALID_ID;
698 for (i = 0; i < ip_set_max; i++) {
9076aea7
JK
699 s = nfnl_set(i);
700 if (s == NULL) {
a7b4f989
JK
701 if (*index == IPSET_INVALID_ID)
702 *index = i;
9076aea7 703 } else if (STREQ(name, s->name)) {
a7b4f989 704 /* Name clash */
9076aea7 705 *set = s;
a7b4f989
JK
706 return -EEXIST;
707 }
708 }
709 if (*index == IPSET_INVALID_ID)
710 /* No free slot remained */
711 return -IPSET_ERR_MAX_SETS;
712 return 0;
713}
714
d31f4d44
TB
715static int
716ip_set_none(struct sock *ctnl, struct sk_buff *skb,
717 const struct nlmsghdr *nlh,
718 const struct nlattr * const attr[])
719{
720 return -EOPNOTSUPP;
721}
722
a7b4f989
JK
723static int
724ip_set_create(struct sock *ctnl, struct sk_buff *skb,
725 const struct nlmsghdr *nlh,
726 const struct nlattr * const attr[])
727{
9846ada1 728 struct ip_set *set, *clash = NULL;
a7b4f989
JK
729 ip_set_id_t index = IPSET_INVALID_ID;
730 struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
731 const char *name, *typename;
732 u8 family, revision;
733 u32 flags = flag_exist(nlh);
734 int ret = 0;
735
736 if (unlikely(protocol_failed(attr) ||
737 attr[IPSET_ATTR_SETNAME] == NULL ||
738 attr[IPSET_ATTR_TYPENAME] == NULL ||
739 attr[IPSET_ATTR_REVISION] == NULL ||
740 attr[IPSET_ATTR_FAMILY] == NULL ||
741 (attr[IPSET_ATTR_DATA] != NULL &&
742 !flag_nested(attr[IPSET_ATTR_DATA]))))
743 return -IPSET_ERR_PROTOCOL;
744
745 name = nla_data(attr[IPSET_ATTR_SETNAME]);
746 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
747 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
748 revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
749 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
750 name, typename, family_name(family), revision);
751
752 /*
753 * First, and without any locks, allocate and initialize
754 * a normal base set structure.
755 */
756 set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
757 if (!set)
758 return -ENOMEM;
759 rwlock_init(&set->lock);
760 strlcpy(set->name, name, IPSET_MAXNAMELEN);
a7b4f989 761 set->family = family;
f1e00b39 762 set->revision = revision;
a7b4f989
JK
763
764 /*
765 * Next, check that we know the type, and take
766 * a reference on the type, to make sure it stays available
767 * while constructing our new set.
768 *
769 * After referencing the type, we try to create the type
770 * specific part of the set without holding any locks.
771 */
772 ret = find_set_type_get(typename, family, revision, &(set->type));
773 if (ret)
774 goto out;
775
776 /*
777 * Without holding any locks, create private part.
778 */
779 if (attr[IPSET_ATTR_DATA] &&
8da560ce
PM
780 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
781 set->type->create_policy)) {
15b4d93f
JK
782 ret = -IPSET_ERR_PROTOCOL;
783 goto put_out;
a7b4f989
JK
784 }
785
786 ret = set->type->create(set, tb, flags);
787 if (ret != 0)
788 goto put_out;
789
790 /* BTW, ret==0 here. */
791
792 /*
793 * Here, we have a valid, constructed set and we are protected
2f9f28b2
JK
794 * by the nfnl mutex. Find the first free index in ip_set_list
795 * and check clashing.
a7b4f989 796 */
3ace95c0 797 ret = find_free_id(set->name, &index, &clash);
9076aea7 798 if (ret == -EEXIST) {
a7b4f989 799 /* If this is the same set and requested, ignore error */
9076aea7 800 if ((flags & IPSET_FLAG_EXIST) &&
a7b4f989
JK
801 STREQ(set->type->name, clash->type->name) &&
802 set->type->family == clash->type->family &&
f1e00b39
JK
803 set->type->revision_min == clash->type->revision_min &&
804 set->type->revision_max == clash->type->revision_max &&
a7b4f989
JK
805 set->variant->same_set(set, clash))
806 ret = 0;
807 goto cleanup;
9076aea7
JK
808 } else if (ret == -IPSET_ERR_MAX_SETS) {
809 struct ip_set **list, **tmp;
810 ip_set_id_t i = ip_set_max + IP_SET_INC;
811
812 if (i < ip_set_max || i == IPSET_INVALID_ID)
813 /* Wraparound */
814 goto cleanup;
815
816 list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL);
817 if (!list)
818 goto cleanup;
819 /* nfnl mutex is held, both lists are valid */
820 tmp = nfnl_dereference(ip_set_list);
821 memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max);
822 rcu_assign_pointer(ip_set_list, list);
823 /* Make sure all current packets have passed through */
824 synchronize_net();
825 /* Use new list */
826 index = ip_set_max;
827 ip_set_max = i;
828 kfree(tmp);
829 ret = 0;
830 } else if (ret)
831 goto cleanup;
a7b4f989
JK
832
833 /*
834 * Finally! Add our shiny new set to the list, and be done.
835 */
836 pr_debug("create: '%s' created with index %u!\n", set->name, index);
9076aea7 837 nfnl_set(index) = set;
a7b4f989
JK
838
839 return ret;
840
841cleanup:
842 set->variant->destroy(set);
843put_out:
844 module_put(set->type->me);
845out:
846 kfree(set);
847 return ret;
848}
849
850/* Destroy sets */
851
852static const struct nla_policy
853ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
854 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
855 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
856 .len = IPSET_MAXNAMELEN - 1 },
857};
858
859static void
860ip_set_destroy_set(ip_set_id_t index)
861{
9076aea7 862 struct ip_set *set = nfnl_set(index);
a7b4f989
JK
863
864 pr_debug("set: %s\n", set->name);
9076aea7 865 nfnl_set(index) = NULL;
a7b4f989
JK
866
867 /* Must call it without holding any lock */
868 set->variant->destroy(set);
869 module_put(set->type->me);
870 kfree(set);
871}
872
873static int
874ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
875 const struct nlmsghdr *nlh,
876 const struct nlattr * const attr[])
877{
9076aea7 878 struct ip_set *s;
a7b4f989 879 ip_set_id_t i;
2f9f28b2 880 int ret = 0;
a7b4f989
JK
881
882 if (unlikely(protocol_failed(attr)))
883 return -IPSET_ERR_PROTOCOL;
884
2f9f28b2
JK
885 /* Commands are serialized and references are
886 * protected by the ip_set_ref_lock.
887 * External systems (i.e. xt_set) must call
888 * ip_set_put|get_nfnl_* functions, that way we
889 * can safely check references here.
890 *
891 * list:set timer can only decrement the reference
892 * counter, so if it's already zero, we can proceed
893 * without holding the lock.
894 */
895 read_lock_bh(&ip_set_ref_lock);
a7b4f989
JK
896 if (!attr[IPSET_ATTR_SETNAME]) {
897 for (i = 0; i < ip_set_max; i++) {
9076aea7
JK
898 s = nfnl_set(i);
899 if (s != NULL && s->ref) {
9d883232 900 ret = -IPSET_ERR_BUSY;
2f9f28b2
JK
901 goto out;
902 }
a7b4f989 903 }
2f9f28b2 904 read_unlock_bh(&ip_set_ref_lock);
a7b4f989 905 for (i = 0; i < ip_set_max; i++) {
9076aea7
JK
906 s = nfnl_set(i);
907 if (s != NULL)
a7b4f989
JK
908 ip_set_destroy_set(i);
909 }
910 } else {
9076aea7
JK
911 s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i);
912 if (s == NULL) {
2f9f28b2
JK
913 ret = -ENOENT;
914 goto out;
9076aea7 915 } else if (s->ref) {
2f9f28b2
JK
916 ret = -IPSET_ERR_BUSY;
917 goto out;
918 }
919 read_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
920
921 ip_set_destroy_set(i);
922 }
923 return 0;
2f9f28b2
JK
924out:
925 read_unlock_bh(&ip_set_ref_lock);
926 return ret;
a7b4f989
JK
927}
928
929/* Flush sets */
930
931static void
932ip_set_flush_set(struct ip_set *set)
933{
934 pr_debug("set: %s\n", set->name);
935
936 write_lock_bh(&set->lock);
937 set->variant->flush(set);
938 write_unlock_bh(&set->lock);
939}
940
941static int
942ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
943 const struct nlmsghdr *nlh,
944 const struct nlattr * const attr[])
945{
9076aea7 946 struct ip_set *s;
a7b4f989
JK
947 ip_set_id_t i;
948
949 if (unlikely(protocol_failed(attr)))
9184a9cb 950 return -IPSET_ERR_PROTOCOL;
a7b4f989
JK
951
952 if (!attr[IPSET_ATTR_SETNAME]) {
9076aea7
JK
953 for (i = 0; i < ip_set_max; i++) {
954 s = nfnl_set(i);
955 if (s != NULL)
956 ip_set_flush_set(s);
957 }
a7b4f989 958 } else {
9076aea7
JK
959 s = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
960 if (s == NULL)
a7b4f989
JK
961 return -ENOENT;
962
9076aea7 963 ip_set_flush_set(s);
a7b4f989
JK
964 }
965
966 return 0;
967}
968
969/* Rename a set */
970
971static const struct nla_policy
972ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
973 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
974 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
975 .len = IPSET_MAXNAMELEN - 1 },
976 [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING,
977 .len = IPSET_MAXNAMELEN - 1 },
978};
979
980static int
981ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
982 const struct nlmsghdr *nlh,
983 const struct nlattr * const attr[])
984{
9076aea7 985 struct ip_set *set, *s;
a7b4f989
JK
986 const char *name2;
987 ip_set_id_t i;
2f9f28b2 988 int ret = 0;
a7b4f989
JK
989
990 if (unlikely(protocol_failed(attr) ||
991 attr[IPSET_ATTR_SETNAME] == NULL ||
992 attr[IPSET_ATTR_SETNAME2] == NULL))
993 return -IPSET_ERR_PROTOCOL;
994
995 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
996 if (set == NULL)
997 return -ENOENT;
2f9f28b2
JK
998
999 read_lock_bh(&ip_set_ref_lock);
1000 if (set->ref != 0) {
1001 ret = -IPSET_ERR_REFERENCED;
1002 goto out;
1003 }
a7b4f989
JK
1004
1005 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
1006 for (i = 0; i < ip_set_max; i++) {
9076aea7
JK
1007 s = nfnl_set(i);
1008 if (s != NULL && STREQ(s->name, name2)) {
2f9f28b2
JK
1009 ret = -IPSET_ERR_EXIST_SETNAME2;
1010 goto out;
1011 }
a7b4f989
JK
1012 }
1013 strncpy(set->name, name2, IPSET_MAXNAMELEN);
1014
2f9f28b2
JK
1015out:
1016 read_unlock_bh(&ip_set_ref_lock);
1017 return ret;
a7b4f989
JK
1018}
1019
1020/* Swap two sets so that name/index points to the other.
1021 * References and set names are also swapped.
1022 *
2f9f28b2
JK
1023 * The commands are serialized by the nfnl mutex and references are
1024 * protected by the ip_set_ref_lock. The kernel interfaces
a7b4f989
JK
1025 * do not hold the mutex but the pointer settings are atomic
1026 * so the ip_set_list always contains valid pointers to the sets.
1027 */
1028
1029static int
1030ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
1031 const struct nlmsghdr *nlh,
1032 const struct nlattr * const attr[])
1033{
1034 struct ip_set *from, *to;
1035 ip_set_id_t from_id, to_id;
1036 char from_name[IPSET_MAXNAMELEN];
a7b4f989
JK
1037
1038 if (unlikely(protocol_failed(attr) ||
1039 attr[IPSET_ATTR_SETNAME] == NULL ||
1040 attr[IPSET_ATTR_SETNAME2] == NULL))
1041 return -IPSET_ERR_PROTOCOL;
1042
9076aea7
JK
1043 from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id);
1044 if (from == NULL)
a7b4f989
JK
1045 return -ENOENT;
1046
9076aea7
JK
1047 to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id);
1048 if (to == NULL)
a7b4f989
JK
1049 return -IPSET_ERR_EXIST_SETNAME2;
1050
a7b4f989 1051 /* Features must not change.
25985edc 1052 * Not an artificial restriction anymore, as we must prevent
a7b4f989
JK
1053 * possible loops created by swapping in setlist type of sets. */
1054 if (!(from->type->features == to->type->features &&
1055 from->type->family == to->type->family))
1056 return -IPSET_ERR_TYPE_MISMATCH;
1057
a7b4f989 1058 strncpy(from_name, from->name, IPSET_MAXNAMELEN);
a7b4f989 1059 strncpy(from->name, to->name, IPSET_MAXNAMELEN);
a7b4f989 1060 strncpy(to->name, from_name, IPSET_MAXNAMELEN);
a7b4f989 1061
2f9f28b2
JK
1062 write_lock_bh(&ip_set_ref_lock);
1063 swap(from->ref, to->ref);
9076aea7
JK
1064 nfnl_set(from_id) = to;
1065 nfnl_set(to_id) = from;
2f9f28b2 1066 write_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
1067
1068 return 0;
1069}
1070
1071/* List/save set data */
1072
c1e2e043
JK
1073#define DUMP_INIT 0
1074#define DUMP_ALL 1
1075#define DUMP_ONE 2
1076#define DUMP_LAST 3
1077
1078#define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF)
1079#define DUMP_FLAGS(arg) (((u32)(arg)) >> 16)
a7b4f989
JK
1080
1081static int
1082ip_set_dump_done(struct netlink_callback *cb)
1083{
1084 if (cb->args[2]) {
9076aea7 1085 pr_debug("release set %s\n", nfnl_set(cb->args[1])->name);
2f9f28b2 1086 ip_set_put_byindex((ip_set_id_t) cb->args[1]);
a7b4f989
JK
1087 }
1088 return 0;
1089}
1090
1091static inline void
1092dump_attrs(struct nlmsghdr *nlh)
1093{
1094 const struct nlattr *attr;
1095 int rem;
1096
1097 pr_debug("dump nlmsg\n");
1098 nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
1099 pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
1100 }
1101}
1102
1103static int
1104dump_init(struct netlink_callback *cb)
1105{
1106 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
573ce260 1107 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
a7b4f989
JK
1108 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1109 struct nlattr *attr = (void *)nlh + min_len;
c1e2e043 1110 u32 dump_type;
a7b4f989
JK
1111 ip_set_id_t index;
1112
1113 /* Second pass, so parser can't fail */
1114 nla_parse(cda, IPSET_ATTR_CMD_MAX,
1115 attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
1116
1117 /* cb->args[0] : dump single set/all sets
1118 * [1] : set index
1119 * [..]: type specific
1120 */
1121
c1e2e043 1122 if (cda[IPSET_ATTR_SETNAME]) {
9076aea7
JK
1123 struct ip_set *set;
1124
1125 set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]),
1126 &index);
1127 if (set == NULL)
c1e2e043 1128 return -ENOENT;
a7b4f989 1129
c1e2e043
JK
1130 dump_type = DUMP_ONE;
1131 cb->args[1] = index;
1132 } else
1133 dump_type = DUMP_ALL;
1134
1135 if (cda[IPSET_ATTR_FLAGS]) {
1136 u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
1137 dump_type |= (f << 16);
1138 }
1139 cb->args[0] = dump_type;
a7b4f989 1140
a7b4f989
JK
1141 return 0;
1142}
1143
1144static int
1145ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
1146{
1147 ip_set_id_t index = IPSET_INVALID_ID, max;
1148 struct ip_set *set = NULL;
1149 struct nlmsghdr *nlh = NULL;
15e47304 1150 unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
c1e2e043 1151 u32 dump_type, dump_flags;
a7b4f989
JK
1152 int ret = 0;
1153
c1e2e043 1154 if (!cb->args[0]) {
a7b4f989
JK
1155 ret = dump_init(cb);
1156 if (ret < 0) {
1157 nlh = nlmsg_hdr(cb->skb);
1158 /* We have to create and send the error message
1159 * manually :-( */
1160 if (nlh->nlmsg_flags & NLM_F_ACK)
1161 netlink_ack(cb->skb, nlh, ret);
1162 return ret;
1163 }
1164 }
1165
1166 if (cb->args[1] >= ip_set_max)
1167 goto out;
1168
c1e2e043
JK
1169 dump_type = DUMP_TYPE(cb->args[0]);
1170 dump_flags = DUMP_FLAGS(cb->args[0]);
1171 max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
a8a8a093 1172dump_last:
c1e2e043
JK
1173 pr_debug("args[0]: %u %u args[1]: %ld\n",
1174 dump_type, dump_flags, cb->args[1]);
a7b4f989
JK
1175 for (; cb->args[1] < max; cb->args[1]++) {
1176 index = (ip_set_id_t) cb->args[1];
9076aea7 1177 set = nfnl_set(index);
a7b4f989 1178 if (set == NULL) {
c1e2e043 1179 if (dump_type == DUMP_ONE) {
a7b4f989
JK
1180 ret = -ENOENT;
1181 goto out;
1182 }
1183 continue;
1184 }
1185 /* When dumping all sets, we must dump "sorted"
1186 * so that lists (unions of sets) are dumped last.
1187 */
c1e2e043
JK
1188 if (dump_type != DUMP_ONE &&
1189 ((dump_type == DUMP_ALL) ==
a8a8a093 1190 !!(set->type->features & IPSET_DUMP_LAST)))
a7b4f989
JK
1191 continue;
1192 pr_debug("List set: %s\n", set->name);
1193 if (!cb->args[2]) {
1194 /* Start listing: make sure set won't be destroyed */
1195 pr_debug("reference set\n");
9076aea7 1196 __ip_set_get(set);
a7b4f989 1197 }
15e47304 1198 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
a7b4f989
JK
1199 cb->nlh->nlmsg_seq, flags,
1200 IPSET_CMD_LIST);
1201 if (!nlh) {
1202 ret = -EMSGSIZE;
1203 goto release_refcount;
1204 }
7cf7899d
DM
1205 if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1206 nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
1207 goto nla_put_failure;
c1e2e043
JK
1208 if (dump_flags & IPSET_FLAG_LIST_SETNAME)
1209 goto next_set;
a7b4f989
JK
1210 switch (cb->args[2]) {
1211 case 0:
1212 /* Core header data */
7cf7899d
DM
1213 if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
1214 set->type->name) ||
1215 nla_put_u8(skb, IPSET_ATTR_FAMILY,
1216 set->family) ||
1217 nla_put_u8(skb, IPSET_ATTR_REVISION,
1218 set->revision))
1219 goto nla_put_failure;
a7b4f989
JK
1220 ret = set->variant->head(set, skb);
1221 if (ret < 0)
1222 goto release_refcount;
c1e2e043
JK
1223 if (dump_flags & IPSET_FLAG_LIST_HEADER)
1224 goto next_set;
a7b4f989
JK
1225 /* Fall through and add elements */
1226 default:
1227 read_lock_bh(&set->lock);
1228 ret = set->variant->list(set, skb, cb);
1229 read_unlock_bh(&set->lock);
c1e2e043 1230 if (!cb->args[2])
a7b4f989 1231 /* Set is done, proceed with next one */
c1e2e043 1232 goto next_set;
a7b4f989
JK
1233 goto release_refcount;
1234 }
1235 }
a8a8a093 1236 /* If we dump all sets, continue with dumping last ones */
c1e2e043
JK
1237 if (dump_type == DUMP_ALL) {
1238 dump_type = DUMP_LAST;
1239 cb->args[0] = dump_type | (dump_flags << 16);
a8a8a093
JK
1240 cb->args[1] = 0;
1241 goto dump_last;
1242 }
a7b4f989
JK
1243 goto out;
1244
1245nla_put_failure:
1246 ret = -EFAULT;
c1e2e043
JK
1247next_set:
1248 if (dump_type == DUMP_ONE)
1249 cb->args[1] = IPSET_INVALID_ID;
1250 else
1251 cb->args[1]++;
a7b4f989
JK
1252release_refcount:
1253 /* If there was an error or set is done, release set */
1254 if (ret || !cb->args[2]) {
9076aea7 1255 pr_debug("release set %s\n", nfnl_set(index)->name);
2f9f28b2 1256 ip_set_put_byindex(index);
be94db9d 1257 cb->args[2] = 0;
a7b4f989 1258 }
a7b4f989
JK
1259out:
1260 if (nlh) {
1261 nlmsg_end(skb, nlh);
1262 pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1263 dump_attrs(nlh);
1264 }
1265
1266 return ret < 0 ? ret : skb->len;
1267}
1268
1269static int
1270ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
1271 const struct nlmsghdr *nlh,
1272 const struct nlattr * const attr[])
1273{
1274 if (unlikely(protocol_failed(attr)))
1275 return -IPSET_ERR_PROTOCOL;
1276
80d326fa
PNA
1277 {
1278 struct netlink_dump_control c = {
1279 .dump = ip_set_dump_start,
1280 .done = ip_set_dump_done,
1281 };
1282 return netlink_dump_start(ctnl, skb, nlh, &c);
1283 }
a7b4f989
JK
1284}
1285
1286/* Add, del and test */
1287
1288static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1289 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1290 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
1291 .len = IPSET_MAXNAMELEN - 1 },
1292 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
1293 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
1294 [IPSET_ATTR_ADT] = { .type = NLA_NESTED },
1295};
1296
1297static int
5f52bc3c 1298call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
a7b4f989
JK
1299 struct nlattr *tb[], enum ipset_adt adt,
1300 u32 flags, bool use_lineno)
1301{
3d14b171 1302 int ret;
a7b4f989 1303 u32 lineno = 0;
3d14b171 1304 bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
a7b4f989
JK
1305
1306 do {
1307 write_lock_bh(&set->lock);
3d14b171 1308 ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
a7b4f989 1309 write_unlock_bh(&set->lock);
3d14b171 1310 retried = true;
a7b4f989
JK
1311 } while (ret == -EAGAIN &&
1312 set->variant->resize &&
3d14b171 1313 (ret = set->variant->resize(set, retried)) == 0);
a7b4f989
JK
1314
1315 if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1316 return 0;
1317 if (lineno && use_lineno) {
1318 /* Error in restore/batch mode: send back lineno */
5f52bc3c
JK
1319 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1320 struct sk_buff *skb2;
1321 struct nlmsgerr *errmsg;
1322 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
573ce260 1323 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
a7b4f989 1324 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
5f52bc3c 1325 struct nlattr *cmdattr;
a7b4f989
JK
1326 u32 *errline;
1327
5f52bc3c
JK
1328 skb2 = nlmsg_new(payload, GFP_KERNEL);
1329 if (skb2 == NULL)
1330 return -ENOMEM;
15e47304 1331 rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
5f52bc3c
JK
1332 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1333 errmsg = nlmsg_data(rep);
1334 errmsg->error = ret;
1335 memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1336 cmdattr = (void *)&errmsg->msg + min_len;
1337
a7b4f989
JK
1338 nla_parse(cda, IPSET_ATTR_CMD_MAX,
1339 cmdattr, nlh->nlmsg_len - min_len,
1340 ip_set_adt_policy);
1341
1342 errline = nla_data(cda[IPSET_ATTR_LINENO]);
1343
1344 *errline = lineno;
5f52bc3c 1345
15e47304 1346 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
5f52bc3c
JK
1347 /* Signal netlink not to send its ACK/errmsg. */
1348 return -EINTR;
a7b4f989
JK
1349 }
1350
1351 return ret;
1352}
1353
1354static int
1355ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1356 const struct nlmsghdr *nlh,
1357 const struct nlattr * const attr[])
1358{
1359 struct ip_set *set;
1360 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1361 const struct nlattr *nla;
1362 u32 flags = flag_exist(nlh);
1363 bool use_lineno;
1364 int ret = 0;
1365
1366 if (unlikely(protocol_failed(attr) ||
1367 attr[IPSET_ATTR_SETNAME] == NULL ||
1368 !((attr[IPSET_ATTR_DATA] != NULL) ^
1369 (attr[IPSET_ATTR_ADT] != NULL)) ||
1370 (attr[IPSET_ATTR_DATA] != NULL &&
1371 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1372 (attr[IPSET_ATTR_ADT] != NULL &&
1373 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1374 attr[IPSET_ATTR_LINENO] == NULL))))
1375 return -IPSET_ERR_PROTOCOL;
1376
1377 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1378 if (set == NULL)
1379 return -ENOENT;
1380
1381 use_lineno = !!attr[IPSET_ATTR_LINENO];
1382 if (attr[IPSET_ATTR_DATA]) {
8da560ce
PM
1383 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1384 attr[IPSET_ATTR_DATA],
1385 set->type->adt_policy))
a7b4f989 1386 return -IPSET_ERR_PROTOCOL;
5f52bc3c
JK
1387 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
1388 use_lineno);
a7b4f989
JK
1389 } else {
1390 int nla_rem;
1391
1392 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1393 memset(tb, 0, sizeof(tb));
1394 if (nla_type(nla) != IPSET_ATTR_DATA ||
1395 !flag_nested(nla) ||
8da560ce
PM
1396 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1397 set->type->adt_policy))
a7b4f989 1398 return -IPSET_ERR_PROTOCOL;
5f52bc3c 1399 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
a7b4f989
JK
1400 flags, use_lineno);
1401 if (ret < 0)
1402 return ret;
1403 }
1404 }
1405 return ret;
1406}
1407
1408static int
1409ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1410 const struct nlmsghdr *nlh,
1411 const struct nlattr * const attr[])
1412{
1413 struct ip_set *set;
1414 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1415 const struct nlattr *nla;
1416 u32 flags = flag_exist(nlh);
1417 bool use_lineno;
1418 int ret = 0;
1419
1420 if (unlikely(protocol_failed(attr) ||
1421 attr[IPSET_ATTR_SETNAME] == NULL ||
1422 !((attr[IPSET_ATTR_DATA] != NULL) ^
1423 (attr[IPSET_ATTR_ADT] != NULL)) ||
1424 (attr[IPSET_ATTR_DATA] != NULL &&
1425 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1426 (attr[IPSET_ATTR_ADT] != NULL &&
1427 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1428 attr[IPSET_ATTR_LINENO] == NULL))))
1429 return -IPSET_ERR_PROTOCOL;
1430
1431 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1432 if (set == NULL)
1433 return -ENOENT;
1434
1435 use_lineno = !!attr[IPSET_ATTR_LINENO];
1436 if (attr[IPSET_ATTR_DATA]) {
8da560ce
PM
1437 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1438 attr[IPSET_ATTR_DATA],
1439 set->type->adt_policy))
a7b4f989 1440 return -IPSET_ERR_PROTOCOL;
5f52bc3c
JK
1441 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
1442 use_lineno);
a7b4f989
JK
1443 } else {
1444 int nla_rem;
1445
1446 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1447 memset(tb, 0, sizeof(*tb));
1448 if (nla_type(nla) != IPSET_ATTR_DATA ||
1449 !flag_nested(nla) ||
8da560ce
PM
1450 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1451 set->type->adt_policy))
a7b4f989 1452 return -IPSET_ERR_PROTOCOL;
5f52bc3c 1453 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
a7b4f989
JK
1454 flags, use_lineno);
1455 if (ret < 0)
1456 return ret;
1457 }
1458 }
1459 return ret;
1460}
1461
1462static int
1463ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1464 const struct nlmsghdr *nlh,
1465 const struct nlattr * const attr[])
1466{
1467 struct ip_set *set;
1468 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1469 int ret = 0;
1470
1471 if (unlikely(protocol_failed(attr) ||
1472 attr[IPSET_ATTR_SETNAME] == NULL ||
1473 attr[IPSET_ATTR_DATA] == NULL ||
1474 !flag_nested(attr[IPSET_ATTR_DATA])))
1475 return -IPSET_ERR_PROTOCOL;
1476
1477 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1478 if (set == NULL)
1479 return -ENOENT;
1480
8da560ce
PM
1481 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1482 set->type->adt_policy))
a7b4f989
JK
1483 return -IPSET_ERR_PROTOCOL;
1484
1485 read_lock_bh(&set->lock);
3d14b171 1486 ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
a7b4f989
JK
1487 read_unlock_bh(&set->lock);
1488 /* Userspace can't trigger element to be re-added */
1489 if (ret == -EAGAIN)
1490 ret = 1;
1491
0f1799ba 1492 return ret > 0 ? 0 : -IPSET_ERR_EXIST;
a7b4f989
JK
1493}
1494
1495/* Get headed data of a set */
1496
1497static int
1498ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1499 const struct nlmsghdr *nlh,
1500 const struct nlattr * const attr[])
1501{
1502 const struct ip_set *set;
1503 struct sk_buff *skb2;
1504 struct nlmsghdr *nlh2;
a7b4f989
JK
1505 int ret = 0;
1506
1507 if (unlikely(protocol_failed(attr) ||
1508 attr[IPSET_ATTR_SETNAME] == NULL))
1509 return -IPSET_ERR_PROTOCOL;
1510
9076aea7
JK
1511 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1512 if (set == NULL)
a7b4f989 1513 return -ENOENT;
a7b4f989
JK
1514
1515 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1516 if (skb2 == NULL)
1517 return -ENOMEM;
1518
15e47304 1519 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
a7b4f989
JK
1520 IPSET_CMD_HEADER);
1521 if (!nlh2)
1522 goto nlmsg_failure;
7cf7899d
DM
1523 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1524 nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
1525 nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
1526 nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1527 nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
1528 goto nla_put_failure;
a7b4f989
JK
1529 nlmsg_end(skb2, nlh2);
1530
15e47304 1531 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
a7b4f989
JK
1532 if (ret < 0)
1533 return ret;
1534
1535 return 0;
1536
1537nla_put_failure:
1538 nlmsg_cancel(skb2, nlh2);
1539nlmsg_failure:
1540 kfree_skb(skb2);
1541 return -EMSGSIZE;
1542}
1543
1544/* Get type data */
1545
1546static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1547 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1548 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
1549 .len = IPSET_MAXNAMELEN - 1 },
1550 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
1551};
1552
1553static int
1554ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1555 const struct nlmsghdr *nlh,
1556 const struct nlattr * const attr[])
1557{
1558 struct sk_buff *skb2;
1559 struct nlmsghdr *nlh2;
1560 u8 family, min, max;
1561 const char *typename;
1562 int ret = 0;
1563
1564 if (unlikely(protocol_failed(attr) ||
1565 attr[IPSET_ATTR_TYPENAME] == NULL ||
1566 attr[IPSET_ATTR_FAMILY] == NULL))
1567 return -IPSET_ERR_PROTOCOL;
1568
1569 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1570 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1571 ret = find_set_type_minmax(typename, family, &min, &max);
1572 if (ret)
1573 return ret;
1574
1575 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1576 if (skb2 == NULL)
1577 return -ENOMEM;
1578
15e47304 1579 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
a7b4f989
JK
1580 IPSET_CMD_TYPE);
1581 if (!nlh2)
1582 goto nlmsg_failure;
7cf7899d
DM
1583 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1584 nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
1585 nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
1586 nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
1587 nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min))
1588 goto nla_put_failure;
a7b4f989
JK
1589 nlmsg_end(skb2, nlh2);
1590
1591 pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
15e47304 1592 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
a7b4f989
JK
1593 if (ret < 0)
1594 return ret;
1595
1596 return 0;
1597
1598nla_put_failure:
1599 nlmsg_cancel(skb2, nlh2);
1600nlmsg_failure:
1601 kfree_skb(skb2);
1602 return -EMSGSIZE;
1603}
1604
1605/* Get protocol version */
1606
1607static const struct nla_policy
1608ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1609 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1610};
1611
1612static int
1613ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1614 const struct nlmsghdr *nlh,
1615 const struct nlattr * const attr[])
1616{
1617 struct sk_buff *skb2;
1618 struct nlmsghdr *nlh2;
1619 int ret = 0;
1620
1621 if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
1622 return -IPSET_ERR_PROTOCOL;
1623
1624 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1625 if (skb2 == NULL)
1626 return -ENOMEM;
1627
15e47304 1628 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
a7b4f989
JK
1629 IPSET_CMD_PROTOCOL);
1630 if (!nlh2)
1631 goto nlmsg_failure;
7cf7899d
DM
1632 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
1633 goto nla_put_failure;
a7b4f989
JK
1634 nlmsg_end(skb2, nlh2);
1635
15e47304 1636 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
a7b4f989
JK
1637 if (ret < 0)
1638 return ret;
1639
1640 return 0;
1641
1642nla_put_failure:
1643 nlmsg_cancel(skb2, nlh2);
1644nlmsg_failure:
1645 kfree_skb(skb2);
1646 return -EMSGSIZE;
1647}
1648
1649static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
d31f4d44
TB
1650 [IPSET_CMD_NONE] = {
1651 .call = ip_set_none,
1652 .attr_count = IPSET_ATTR_CMD_MAX,
1653 },
a7b4f989
JK
1654 [IPSET_CMD_CREATE] = {
1655 .call = ip_set_create,
1656 .attr_count = IPSET_ATTR_CMD_MAX,
1657 .policy = ip_set_create_policy,
1658 },
1659 [IPSET_CMD_DESTROY] = {
1660 .call = ip_set_destroy,
1661 .attr_count = IPSET_ATTR_CMD_MAX,
1662 .policy = ip_set_setname_policy,
1663 },
1664 [IPSET_CMD_FLUSH] = {
1665 .call = ip_set_flush,
1666 .attr_count = IPSET_ATTR_CMD_MAX,
1667 .policy = ip_set_setname_policy,
1668 },
1669 [IPSET_CMD_RENAME] = {
1670 .call = ip_set_rename,
1671 .attr_count = IPSET_ATTR_CMD_MAX,
1672 .policy = ip_set_setname2_policy,
1673 },
1674 [IPSET_CMD_SWAP] = {
1675 .call = ip_set_swap,
1676 .attr_count = IPSET_ATTR_CMD_MAX,
1677 .policy = ip_set_setname2_policy,
1678 },
1679 [IPSET_CMD_LIST] = {
1680 .call = ip_set_dump,
1681 .attr_count = IPSET_ATTR_CMD_MAX,
1682 .policy = ip_set_setname_policy,
1683 },
1684 [IPSET_CMD_SAVE] = {
1685 .call = ip_set_dump,
1686 .attr_count = IPSET_ATTR_CMD_MAX,
1687 .policy = ip_set_setname_policy,
1688 },
1689 [IPSET_CMD_ADD] = {
1690 .call = ip_set_uadd,
1691 .attr_count = IPSET_ATTR_CMD_MAX,
1692 .policy = ip_set_adt_policy,
1693 },
1694 [IPSET_CMD_DEL] = {
1695 .call = ip_set_udel,
1696 .attr_count = IPSET_ATTR_CMD_MAX,
1697 .policy = ip_set_adt_policy,
1698 },
1699 [IPSET_CMD_TEST] = {
1700 .call = ip_set_utest,
1701 .attr_count = IPSET_ATTR_CMD_MAX,
1702 .policy = ip_set_adt_policy,
1703 },
1704 [IPSET_CMD_HEADER] = {
1705 .call = ip_set_header,
1706 .attr_count = IPSET_ATTR_CMD_MAX,
1707 .policy = ip_set_setname_policy,
1708 },
1709 [IPSET_CMD_TYPE] = {
1710 .call = ip_set_type,
1711 .attr_count = IPSET_ATTR_CMD_MAX,
1712 .policy = ip_set_type_policy,
1713 },
1714 [IPSET_CMD_PROTOCOL] = {
1715 .call = ip_set_protocol,
1716 .attr_count = IPSET_ATTR_CMD_MAX,
1717 .policy = ip_set_protocol_policy,
1718 },
1719};
1720
1721static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1722 .name = "ip_set",
1723 .subsys_id = NFNL_SUBSYS_IPSET,
1724 .cb_count = IPSET_MSG_MAX,
1725 .cb = ip_set_netlink_subsys_cb,
1726};
1727
1728/* Interface to iptables/ip6tables */
1729
1730static int
1731ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1732{
95c96174 1733 unsigned int *op;
a7b4f989
JK
1734 void *data;
1735 int copylen = *len, ret = 0;
1736
df008c91 1737 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
a7b4f989
JK
1738 return -EPERM;
1739 if (optval != SO_IP_SET)
1740 return -EBADF;
95c96174 1741 if (*len < sizeof(unsigned int))
a7b4f989
JK
1742 return -EINVAL;
1743
1744 data = vmalloc(*len);
1745 if (!data)
1746 return -ENOMEM;
1747 if (copy_from_user(data, user, *len) != 0) {
1748 ret = -EFAULT;
1749 goto done;
1750 }
95c96174 1751 op = (unsigned int *) data;
a7b4f989
JK
1752
1753 if (*op < IP_SET_OP_VERSION) {
1754 /* Check the version at the beginning of operations */
1755 struct ip_set_req_version *req_version = data;
1756 if (req_version->version != IPSET_PROTOCOL) {
1757 ret = -EPROTO;
1758 goto done;
1759 }
1760 }
1761
1762 switch (*op) {
1763 case IP_SET_OP_VERSION: {
1764 struct ip_set_req_version *req_version = data;
1765
1766 if (*len != sizeof(struct ip_set_req_version)) {
1767 ret = -EINVAL;
1768 goto done;
1769 }
1770
1771 req_version->version = IPSET_PROTOCOL;
1772 ret = copy_to_user(user, req_version,
1773 sizeof(struct ip_set_req_version));
1774 goto done;
1775 }
1776 case IP_SET_OP_GET_BYNAME: {
1777 struct ip_set_req_get_set *req_get = data;
9076aea7 1778 ip_set_id_t id;
a7b4f989
JK
1779
1780 if (*len != sizeof(struct ip_set_req_get_set)) {
1781 ret = -EINVAL;
1782 goto done;
1783 }
1784 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
c14b78e7 1785 nfnl_lock(NFNL_SUBSYS_IPSET);
9076aea7
JK
1786 find_set_and_id(req_get->set.name, &id);
1787 req_get->set.index = id;
c14b78e7 1788 nfnl_unlock(NFNL_SUBSYS_IPSET);
a7b4f989
JK
1789 goto copy;
1790 }
1791 case IP_SET_OP_GET_BYINDEX: {
1792 struct ip_set_req_get_set *req_get = data;
9076aea7 1793 struct ip_set *set;
a7b4f989
JK
1794
1795 if (*len != sizeof(struct ip_set_req_get_set) ||
1796 req_get->set.index >= ip_set_max) {
1797 ret = -EINVAL;
1798 goto done;
1799 }
c14b78e7 1800 nfnl_lock(NFNL_SUBSYS_IPSET);
9076aea7
JK
1801 set = nfnl_set(req_get->set.index);
1802 strncpy(req_get->set.name, set ? set->name : "",
a7b4f989 1803 IPSET_MAXNAMELEN);
c14b78e7 1804 nfnl_unlock(NFNL_SUBSYS_IPSET);
a7b4f989
JK
1805 goto copy;
1806 }
1807 default:
1808 ret = -EBADMSG;
1809 goto done;
1810 } /* end of switch(op) */
1811
1812copy:
1813 ret = copy_to_user(user, data, copylen);
1814
1815done:
1816 vfree(data);
1817 if (ret > 0)
1818 ret = 0;
1819 return ret;
1820}
1821
1822static struct nf_sockopt_ops so_set __read_mostly = {
1823 .pf = PF_INET,
1824 .get_optmin = SO_IP_SET,
1825 .get_optmax = SO_IP_SET + 1,
1826 .get = &ip_set_sockfn_get,
1827 .owner = THIS_MODULE,
1828};
1829
1830static int __init
1831ip_set_init(void)
1832{
9076aea7 1833 struct ip_set **list;
a7b4f989
JK
1834 int ret;
1835
1836 if (max_sets)
1837 ip_set_max = max_sets;
1838 if (ip_set_max >= IPSET_INVALID_ID)
1839 ip_set_max = IPSET_INVALID_ID - 1;
1840
9076aea7
JK
1841 list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL);
1842 if (!list)
a7b4f989 1843 return -ENOMEM;
a7b4f989 1844
9076aea7 1845 rcu_assign_pointer(ip_set_list, list);
a7b4f989
JK
1846 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
1847 if (ret != 0) {
1848 pr_err("ip_set: cannot register with nfnetlink.\n");
9076aea7 1849 kfree(list);
a7b4f989
JK
1850 return ret;
1851 }
1852 ret = nf_register_sockopt(&so_set);
1853 if (ret != 0) {
1854 pr_err("SO_SET registry failed: %d\n", ret);
1855 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
9076aea7 1856 kfree(list);
a7b4f989
JK
1857 return ret;
1858 }
1859
1860 pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
1861 return 0;
1862}
1863
1864static void __exit
1865ip_set_fini(void)
1866{
9076aea7
JK
1867 struct ip_set **list = rcu_dereference_protected(ip_set_list, 1);
1868
a7b4f989
JK
1869 /* There can't be any existing set */
1870 nf_unregister_sockopt(&so_set);
1871 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
9076aea7 1872 kfree(list);
a7b4f989
JK
1873 pr_debug("these are the famous last words\n");
1874}
1875
1876module_init(ip_set_init);
1877module_exit(ip_set_fini);