netfilter: ipset: Add hash:net,net module to kernel.
[linux-block.git] / net / netfilter / ipset / ip_set_core.c
CommitLineData
a7b4f989
JK
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
075e64c0 3 * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
a7b4f989
JK
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10/* Kernel module for IP set management */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/ip.h>
16#include <linux/skbuff.h>
17#include <linux/spinlock.h>
a7b4f989 18#include <linux/rculist.h>
a7b4f989
JK
19#include <net/netlink.h>
20
21#include <linux/netfilter.h>
b66554cf 22#include <linux/netfilter/x_tables.h>
a7b4f989
JK
23#include <linux/netfilter/nfnetlink.h>
24#include <linux/netfilter/ipset/ip_set.h>
25
26static LIST_HEAD(ip_set_type_list); /* all registered set types */
27static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */
2f9f28b2 28static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
a7b4f989 29
9076aea7 30static struct ip_set * __rcu *ip_set_list; /* all individual sets */
a7b4f989
JK
31static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
32
9076aea7 33#define IP_SET_INC 64
a7b4f989
JK
34#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
35
36static unsigned int max_sets;
37
38module_param(max_sets, int, 0600);
39MODULE_PARM_DESC(max_sets, "maximal number of sets");
40MODULE_LICENSE("GPL");
41MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
42MODULE_DESCRIPTION("core IP set support");
43MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
44
9076aea7
JK
45/* When the nfnl mutex is held: */
46#define nfnl_dereference(p) \
47 rcu_dereference_protected(p, 1)
48#define nfnl_set(id) \
49 nfnl_dereference(ip_set_list)[id]
50
a7b4f989
JK
51/*
52 * The set types are implemented in modules and registered set types
53 * can be found in ip_set_type_list. Adding/deleting types is
54 * serialized by ip_set_type_mutex.
55 */
56
57static inline void
58ip_set_type_lock(void)
59{
60 mutex_lock(&ip_set_type_mutex);
61}
62
63static inline void
64ip_set_type_unlock(void)
65{
66 mutex_unlock(&ip_set_type_mutex);
67}
68
69/* Register and deregister settype */
70
71static struct ip_set_type *
72find_set_type(const char *name, u8 family, u8 revision)
73{
74 struct ip_set_type *type;
75
76 list_for_each_entry_rcu(type, &ip_set_type_list, list)
77 if (STREQ(type->name, name) &&
3ace95c0
JK
78 (type->family == family ||
79 type->family == NFPROTO_UNSPEC) &&
f1e00b39
JK
80 revision >= type->revision_min &&
81 revision <= type->revision_max)
a7b4f989
JK
82 return type;
83 return NULL;
84}
85
86/* Unlock, try to load a set type module and lock again */
088067f4
JK
87static bool
88load_settype(const char *name)
a7b4f989 89{
c14b78e7 90 nfnl_unlock(NFNL_SUBSYS_IPSET);
a7b4f989
JK
91 pr_debug("try to load ip_set_%s\n", name);
92 if (request_module("ip_set_%s", name) < 0) {
93 pr_warning("Can't find ip_set type %s\n", name);
c14b78e7 94 nfnl_lock(NFNL_SUBSYS_IPSET);
088067f4 95 return false;
a7b4f989 96 }
c14b78e7 97 nfnl_lock(NFNL_SUBSYS_IPSET);
088067f4 98 return true;
a7b4f989
JK
99}
100
101/* Find a set type and reference it */
088067f4
JK
102#define find_set_type_get(name, family, revision, found) \
103 __find_set_type_get(name, family, revision, found, false)
104
a7b4f989 105static int
088067f4
JK
106__find_set_type_get(const char *name, u8 family, u8 revision,
107 struct ip_set_type **found, bool retry)
a7b4f989 108{
5c1aba46
JK
109 struct ip_set_type *type;
110 int err;
111
088067f4
JK
112 if (retry && !load_settype(name))
113 return -IPSET_ERR_FIND_TYPE;
114
a7b4f989
JK
115 rcu_read_lock();
116 *found = find_set_type(name, family, revision);
117 if (*found) {
5c1aba46
JK
118 err = !try_module_get((*found)->me) ? -EFAULT : 0;
119 goto unlock;
a7b4f989 120 }
088067f4
JK
121 /* Make sure the type is already loaded
122 * but we don't support the revision */
5c1aba46
JK
123 list_for_each_entry_rcu(type, &ip_set_type_list, list)
124 if (STREQ(type->name, name)) {
125 err = -IPSET_ERR_FIND_TYPE;
126 goto unlock;
127 }
a7b4f989
JK
128 rcu_read_unlock();
129
088067f4
JK
130 return retry ? -IPSET_ERR_FIND_TYPE :
131 __find_set_type_get(name, family, revision, found, true);
5c1aba46
JK
132
133unlock:
134 rcu_read_unlock();
135 return err;
a7b4f989
JK
136}
137
138/* Find a given set type by name and family.
139 * If we succeeded, the supported minimal and maximum revisions are
140 * filled out.
141 */
088067f4
JK
142#define find_set_type_minmax(name, family, min, max) \
143 __find_set_type_minmax(name, family, min, max, false)
144
a7b4f989 145static int
088067f4
JK
146__find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
147 bool retry)
a7b4f989
JK
148{
149 struct ip_set_type *type;
150 bool found = false;
151
088067f4
JK
152 if (retry && !load_settype(name))
153 return -IPSET_ERR_FIND_TYPE;
154
5c1aba46 155 *min = 255; *max = 0;
a7b4f989
JK
156 rcu_read_lock();
157 list_for_each_entry_rcu(type, &ip_set_type_list, list)
158 if (STREQ(type->name, name) &&
3ace95c0
JK
159 (type->family == family ||
160 type->family == NFPROTO_UNSPEC)) {
a7b4f989 161 found = true;
f1e00b39
JK
162 if (type->revision_min < *min)
163 *min = type->revision_min;
164 if (type->revision_max > *max)
165 *max = type->revision_max;
a7b4f989
JK
166 }
167 rcu_read_unlock();
168 if (found)
169 return 0;
170
088067f4
JK
171 return retry ? -IPSET_ERR_FIND_TYPE :
172 __find_set_type_minmax(name, family, min, max, true);
a7b4f989
JK
173}
174
c15f1c83
JE
175#define family_name(f) ((f) == NFPROTO_IPV4 ? "inet" : \
176 (f) == NFPROTO_IPV6 ? "inet6" : "any")
a7b4f989
JK
177
178/* Register a set type structure. The type is identified by
179 * the unique triple of name, family and revision.
180 */
181int
182ip_set_type_register(struct ip_set_type *type)
183{
184 int ret = 0;
185
186 if (type->protocol != IPSET_PROTOCOL) {
f1e00b39 187 pr_warning("ip_set type %s, family %s, revision %u:%u uses "
a7b4f989
JK
188 "wrong protocol version %u (want %u)\n",
189 type->name, family_name(type->family),
f1e00b39
JK
190 type->revision_min, type->revision_max,
191 type->protocol, IPSET_PROTOCOL);
a7b4f989
JK
192 return -EINVAL;
193 }
194
195 ip_set_type_lock();
f1e00b39 196 if (find_set_type(type->name, type->family, type->revision_min)) {
a7b4f989 197 /* Duplicate! */
f1e00b39 198 pr_warning("ip_set type %s, family %s with revision min %u "
a7b4f989 199 "already registered!\n", type->name,
f1e00b39 200 family_name(type->family), type->revision_min);
a7b4f989
JK
201 ret = -EINVAL;
202 goto unlock;
203 }
204 list_add_rcu(&type->list, &ip_set_type_list);
f1e00b39
JK
205 pr_debug("type %s, family %s, revision %u:%u registered.\n",
206 type->name, family_name(type->family),
207 type->revision_min, type->revision_max);
a7b4f989
JK
208unlock:
209 ip_set_type_unlock();
210 return ret;
211}
212EXPORT_SYMBOL_GPL(ip_set_type_register);
213
214/* Unregister a set type. There's a small race with ip_set_create */
215void
216ip_set_type_unregister(struct ip_set_type *type)
217{
218 ip_set_type_lock();
f1e00b39
JK
219 if (!find_set_type(type->name, type->family, type->revision_min)) {
220 pr_warning("ip_set type %s, family %s with revision min %u "
a7b4f989 221 "not registered\n", type->name,
f1e00b39 222 family_name(type->family), type->revision_min);
a7b4f989
JK
223 goto unlock;
224 }
225 list_del_rcu(&type->list);
f1e00b39
JK
226 pr_debug("type %s, family %s with revision min %u unregistered.\n",
227 type->name, family_name(type->family), type->revision_min);
a7b4f989
JK
228unlock:
229 ip_set_type_unlock();
230
231 synchronize_rcu();
232}
233EXPORT_SYMBOL_GPL(ip_set_type_unregister);
234
235/* Utility functions */
236void *
237ip_set_alloc(size_t size)
238{
239 void *members = NULL;
240
241 if (size < KMALLOC_MAX_SIZE)
242 members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
243
244 if (members) {
245 pr_debug("%p: allocated with kmalloc\n", members);
246 return members;
247 }
248
249 members = vzalloc(size);
250 if (!members)
251 return NULL;
252 pr_debug("%p: allocated with vmalloc\n", members);
253
254 return members;
255}
256EXPORT_SYMBOL_GPL(ip_set_alloc);
257
258void
259ip_set_free(void *members)
260{
261 pr_debug("%p: free with %s\n", members,
262 is_vmalloc_addr(members) ? "vfree" : "kfree");
263 if (is_vmalloc_addr(members))
264 vfree(members);
265 else
266 kfree(members);
267}
268EXPORT_SYMBOL_GPL(ip_set_free);
269
270static inline bool
271flag_nested(const struct nlattr *nla)
272{
273 return nla->nla_type & NLA_F_NESTED;
274}
275
276static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
277 [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 },
278 [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY,
279 .len = sizeof(struct in6_addr) },
280};
281
282int
283ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
284{
285 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
286
287 if (unlikely(!flag_nested(nla)))
288 return -IPSET_ERR_PROTOCOL;
8da560ce 289 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
a7b4f989
JK
290 return -IPSET_ERR_PROTOCOL;
291 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
292 return -IPSET_ERR_PROTOCOL;
293
294 *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
295 return 0;
296}
297EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
298
299int
300ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
301{
302 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
303
304 if (unlikely(!flag_nested(nla)))
305 return -IPSET_ERR_PROTOCOL;
306
8da560ce 307 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
a7b4f989
JK
308 return -IPSET_ERR_PROTOCOL;
309 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
310 return -IPSET_ERR_PROTOCOL;
311
312 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
313 sizeof(struct in6_addr));
314 return 0;
315}
316EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
317
03c8b234
JK
318/* ipset data extension types, in size order */
319
320const struct ip_set_ext_type ip_set_extensions[] = {
321 [IPSET_EXT_ID_COUNTER] = {
322 .type = IPSET_EXT_COUNTER,
323 .flag = IPSET_FLAG_WITH_COUNTERS,
324 .len = sizeof(struct ip_set_counter),
325 .align = __alignof__(struct ip_set_counter),
326 },
327 [IPSET_EXT_ID_TIMEOUT] = {
328 .type = IPSET_EXT_TIMEOUT,
329 .len = sizeof(unsigned long),
330 .align = __alignof__(unsigned long),
331 },
332};
333EXPORT_SYMBOL_GPL(ip_set_extensions);
334
335static inline bool
336add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
337{
338 return ip_set_extensions[id].flag ?
339 (flags & ip_set_extensions[id].flag) :
340 !!tb[IPSET_ATTR_TIMEOUT];
341}
342
343size_t
344ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
345{
346 enum ip_set_ext_id id;
347 size_t offset = 0;
348 u32 cadt_flags = 0;
349
350 if (tb[IPSET_ATTR_CADT_FLAGS])
351 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
352 for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
353 if (!add_extension(id, cadt_flags, tb))
354 continue;
355 offset += ALIGN(len + offset, ip_set_extensions[id].align);
356 set->offset[id] = offset;
357 set->extensions |= ip_set_extensions[id].type;
358 offset += ip_set_extensions[id].len;
359 }
360 return len + offset;
361}
362EXPORT_SYMBOL_GPL(ip_set_elem_len);
363
075e64c0
JK
364int
365ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
366 struct ip_set_ext *ext)
367{
368 if (tb[IPSET_ATTR_TIMEOUT]) {
369 if (!(set->extensions & IPSET_EXT_TIMEOUT))
370 return -IPSET_ERR_TIMEOUT;
371 ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
372 }
34d666d4
JK
373 if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
374 if (!(set->extensions & IPSET_EXT_COUNTER))
375 return -IPSET_ERR_COUNTER;
376 if (tb[IPSET_ATTR_BYTES])
377 ext->bytes = be64_to_cpu(nla_get_be64(
378 tb[IPSET_ATTR_BYTES]));
379 if (tb[IPSET_ATTR_PACKETS])
380 ext->packets = be64_to_cpu(nla_get_be64(
381 tb[IPSET_ATTR_PACKETS]));
382 }
075e64c0
JK
383 return 0;
384}
385EXPORT_SYMBOL_GPL(ip_set_get_extensions);
386
a7b4f989
JK
387/*
388 * Creating/destroying/renaming/swapping affect the existence and
389 * the properties of a set. All of these can be executed from userspace
390 * only and serialized by the nfnl mutex indirectly from nfnetlink.
391 *
392 * Sets are identified by their index in ip_set_list and the index
393 * is used by the external references (set/SET netfilter modules).
394 *
395 * The set behind an index may change by swapping only, from userspace.
396 */
397
398static inline void
9076aea7 399__ip_set_get(struct ip_set *set)
a7b4f989 400{
2f9f28b2 401 write_lock_bh(&ip_set_ref_lock);
9076aea7 402 set->ref++;
2f9f28b2 403 write_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
404}
405
406static inline void
9076aea7 407__ip_set_put(struct ip_set *set)
a7b4f989 408{
2f9f28b2 409 write_lock_bh(&ip_set_ref_lock);
9076aea7
JK
410 BUG_ON(set->ref == 0);
411 set->ref--;
2f9f28b2 412 write_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
413}
414
415/*
416 * Add, del and test set entries from kernel.
417 *
418 * The set behind the index must exist and must be referenced
419 * so it can't be destroyed (or changed) under our foot.
420 */
421
9076aea7
JK
422static inline struct ip_set *
423ip_set_rcu_get(ip_set_id_t index)
424{
425 struct ip_set *set;
426
427 rcu_read_lock();
428 /* ip_set_list itself needs to be protected */
429 set = rcu_dereference(ip_set_list)[index];
430 rcu_read_unlock();
431
432 return set;
433}
434
a7b4f989
JK
435int
436ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
075e64c0 437 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
a7b4f989 438{
9076aea7 439 struct ip_set *set = ip_set_rcu_get(index);
a7b4f989
JK
440 int ret = 0;
441
2f9f28b2 442 BUG_ON(set == NULL);
a7b4f989
JK
443 pr_debug("set %s, index %u\n", set->name, index);
444
ac8cc925 445 if (opt->dim < set->type->dimension ||
c15f1c83 446 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
a7b4f989
JK
447 return 0;
448
449 read_lock_bh(&set->lock);
b66554cf 450 ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
a7b4f989
JK
451 read_unlock_bh(&set->lock);
452
453 if (ret == -EAGAIN) {
454 /* Type requests element to be completed */
455 pr_debug("element must be competed, ADD is triggered\n");
456 write_lock_bh(&set->lock);
b66554cf 457 set->variant->kadt(set, skb, par, IPSET_ADD, opt);
a7b4f989
JK
458 write_unlock_bh(&set->lock);
459 ret = 1;
3e0304a5
JK
460 } else {
461 /* --return-nomatch: invert matched element */
6e01781d 462 if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&
3e0304a5
JK
463 (set->type->features & IPSET_TYPE_NOMATCH) &&
464 (ret > 0 || ret == -ENOTEMPTY))
465 ret = -ret;
a7b4f989
JK
466 }
467
468 /* Convert error codes to nomatch */
469 return (ret < 0 ? 0 : ret);
470}
471EXPORT_SYMBOL_GPL(ip_set_test);
472
473int
474ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
075e64c0 475 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
a7b4f989 476{
9076aea7 477 struct ip_set *set = ip_set_rcu_get(index);
a7b4f989
JK
478 int ret;
479
2f9f28b2 480 BUG_ON(set == NULL);
a7b4f989
JK
481 pr_debug("set %s, index %u\n", set->name, index);
482
ac8cc925 483 if (opt->dim < set->type->dimension ||
c15f1c83 484 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
a7b4f989
JK
485 return 0;
486
487 write_lock_bh(&set->lock);
b66554cf 488 ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
a7b4f989
JK
489 write_unlock_bh(&set->lock);
490
491 return ret;
492}
493EXPORT_SYMBOL_GPL(ip_set_add);
494
495int
496ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
075e64c0 497 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
a7b4f989 498{
9076aea7 499 struct ip_set *set = ip_set_rcu_get(index);
a7b4f989
JK
500 int ret = 0;
501
2f9f28b2 502 BUG_ON(set == NULL);
a7b4f989
JK
503 pr_debug("set %s, index %u\n", set->name, index);
504
ac8cc925 505 if (opt->dim < set->type->dimension ||
c15f1c83 506 !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
a7b4f989
JK
507 return 0;
508
509 write_lock_bh(&set->lock);
b66554cf 510 ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
a7b4f989
JK
511 write_unlock_bh(&set->lock);
512
513 return ret;
514}
515EXPORT_SYMBOL_GPL(ip_set_del);
516
517/*
518 * Find set by name, reference it once. The reference makes sure the
519 * thing pointed to, does not go away under our feet.
520 *
a7b4f989
JK
521 */
522ip_set_id_t
523ip_set_get_byname(const char *name, struct ip_set **set)
524{
525 ip_set_id_t i, index = IPSET_INVALID_ID;
526 struct ip_set *s;
527
9076aea7 528 rcu_read_lock();
a7b4f989 529 for (i = 0; i < ip_set_max; i++) {
9076aea7 530 s = rcu_dereference(ip_set_list)[i];
a7b4f989 531 if (s != NULL && STREQ(s->name, name)) {
9076aea7 532 __ip_set_get(s);
a7b4f989
JK
533 index = i;
534 *set = s;
9076aea7 535 break;
a7b4f989
JK
536 }
537 }
9076aea7 538 rcu_read_unlock();
a7b4f989
JK
539
540 return index;
541}
542EXPORT_SYMBOL_GPL(ip_set_get_byname);
543
544/*
545 * If the given set pointer points to a valid set, decrement
546 * reference count by 1. The caller shall not assume the index
547 * to be valid, after calling this function.
548 *
a7b4f989
JK
549 */
550void
551ip_set_put_byindex(ip_set_id_t index)
552{
9076aea7
JK
553 struct ip_set *set;
554
555 rcu_read_lock();
556 set = rcu_dereference(ip_set_list)[index];
557 if (set != NULL)
558 __ip_set_put(set);
559 rcu_read_unlock();
a7b4f989
JK
560}
561EXPORT_SYMBOL_GPL(ip_set_put_byindex);
562
563/*
564 * Get the name of a set behind a set index.
565 * We assume the set is referenced, so it does exist and
566 * can't be destroyed. The set cannot be renamed due to
567 * the referencing either.
568 *
a7b4f989
JK
569 */
570const char *
571ip_set_name_byindex(ip_set_id_t index)
572{
9076aea7 573 const struct ip_set *set = ip_set_rcu_get(index);
a7b4f989
JK
574
575 BUG_ON(set == NULL);
2f9f28b2 576 BUG_ON(set->ref == 0);
a7b4f989
JK
577
578 /* Referenced, so it's safe */
579 return set->name;
580}
581EXPORT_SYMBOL_GPL(ip_set_name_byindex);
582
583/*
584 * Routines to call by external subsystems, which do not
585 * call nfnl_lock for us.
586 */
587
588/*
589 * Find set by name, reference it once. The reference makes sure the
590 * thing pointed to, does not go away under our feet.
591 *
592 * The nfnl mutex is used in the function.
593 */
594ip_set_id_t
595ip_set_nfnl_get(const char *name)
596{
9076aea7 597 ip_set_id_t i, index = IPSET_INVALID_ID;
a7b4f989 598 struct ip_set *s;
a7b4f989 599
c14b78e7 600 nfnl_lock(NFNL_SUBSYS_IPSET);
9076aea7
JK
601 for (i = 0; i < ip_set_max; i++) {
602 s = nfnl_set(i);
603 if (s != NULL && STREQ(s->name, name)) {
604 __ip_set_get(s);
605 index = i;
606 break;
607 }
608 }
c14b78e7 609 nfnl_unlock(NFNL_SUBSYS_IPSET);
a7b4f989
JK
610
611 return index;
612}
613EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
614
615/*
616 * Find set by index, reference it once. The reference makes sure the
617 * thing pointed to, does not go away under our feet.
618 *
619 * The nfnl mutex is used in the function.
620 */
621ip_set_id_t
622ip_set_nfnl_get_byindex(ip_set_id_t index)
623{
9076aea7
JK
624 struct ip_set *set;
625
a7b4f989
JK
626 if (index > ip_set_max)
627 return IPSET_INVALID_ID;
628
c14b78e7 629 nfnl_lock(NFNL_SUBSYS_IPSET);
9076aea7
JK
630 set = nfnl_set(index);
631 if (set)
632 __ip_set_get(set);
a7b4f989
JK
633 else
634 index = IPSET_INVALID_ID;
c14b78e7 635 nfnl_unlock(NFNL_SUBSYS_IPSET);
a7b4f989
JK
636
637 return index;
638}
639EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
640
641/*
642 * If the given set pointer points to a valid set, decrement
643 * reference count by 1. The caller shall not assume the index
644 * to be valid, after calling this function.
645 *
646 * The nfnl mutex is used in the function.
647 */
648void
649ip_set_nfnl_put(ip_set_id_t index)
650{
9076aea7 651 struct ip_set *set;
c14b78e7 652 nfnl_lock(NFNL_SUBSYS_IPSET);
9076aea7
JK
653 set = nfnl_set(index);
654 if (set != NULL)
655 __ip_set_put(set);
c14b78e7 656 nfnl_unlock(NFNL_SUBSYS_IPSET);
a7b4f989
JK
657}
658EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
659
660/*
661 * Communication protocol with userspace over netlink.
662 *
2f9f28b2 663 * The commands are serialized by the nfnl mutex.
a7b4f989
JK
664 */
665
666static inline bool
667protocol_failed(const struct nlattr * const tb[])
668{
669 return !tb[IPSET_ATTR_PROTOCOL] ||
670 nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
671}
672
673static inline u32
674flag_exist(const struct nlmsghdr *nlh)
675{
676 return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
677}
678
679static struct nlmsghdr *
15e47304 680start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
a7b4f989
JK
681 enum ipset_cmd cmd)
682{
683 struct nlmsghdr *nlh;
684 struct nfgenmsg *nfmsg;
685
15e47304 686 nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
a7b4f989
JK
687 sizeof(*nfmsg), flags);
688 if (nlh == NULL)
689 return NULL;
690
691 nfmsg = nlmsg_data(nlh);
c15f1c83 692 nfmsg->nfgen_family = NFPROTO_IPV4;
a7b4f989
JK
693 nfmsg->version = NFNETLINK_V0;
694 nfmsg->res_id = 0;
695
696 return nlh;
697}
698
699/* Create a set */
700
701static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
702 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
703 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
704 .len = IPSET_MAXNAMELEN - 1 },
705 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
706 .len = IPSET_MAXNAMELEN - 1},
707 [IPSET_ATTR_REVISION] = { .type = NLA_U8 },
708 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
709 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
710};
711
9076aea7
JK
712static struct ip_set *
713find_set_and_id(const char *name, ip_set_id_t *id)
a7b4f989 714{
9076aea7
JK
715 struct ip_set *set = NULL;
716 ip_set_id_t i;
a7b4f989 717
9076aea7
JK
718 *id = IPSET_INVALID_ID;
719 for (i = 0; i < ip_set_max; i++) {
720 set = nfnl_set(i);
721 if (set != NULL && STREQ(set->name, name)) {
722 *id = i;
723 break;
724 }
a7b4f989 725 }
9076aea7 726 return (*id == IPSET_INVALID_ID ? NULL : set);
a7b4f989
JK
727}
728
729static inline struct ip_set *
730find_set(const char *name)
731{
9076aea7 732 ip_set_id_t id;
a7b4f989 733
9076aea7 734 return find_set_and_id(name, &id);
a7b4f989
JK
735}
736
737static int
738find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
739{
9076aea7 740 struct ip_set *s;
a7b4f989
JK
741 ip_set_id_t i;
742
743 *index = IPSET_INVALID_ID;
744 for (i = 0; i < ip_set_max; i++) {
9076aea7
JK
745 s = nfnl_set(i);
746 if (s == NULL) {
a7b4f989
JK
747 if (*index == IPSET_INVALID_ID)
748 *index = i;
9076aea7 749 } else if (STREQ(name, s->name)) {
a7b4f989 750 /* Name clash */
9076aea7 751 *set = s;
a7b4f989
JK
752 return -EEXIST;
753 }
754 }
755 if (*index == IPSET_INVALID_ID)
756 /* No free slot remained */
757 return -IPSET_ERR_MAX_SETS;
758 return 0;
759}
760
d31f4d44
TB
761static int
762ip_set_none(struct sock *ctnl, struct sk_buff *skb,
763 const struct nlmsghdr *nlh,
764 const struct nlattr * const attr[])
765{
766 return -EOPNOTSUPP;
767}
768
a7b4f989
JK
769static int
770ip_set_create(struct sock *ctnl, struct sk_buff *skb,
771 const struct nlmsghdr *nlh,
772 const struct nlattr * const attr[])
773{
9846ada1 774 struct ip_set *set, *clash = NULL;
a7b4f989
JK
775 ip_set_id_t index = IPSET_INVALID_ID;
776 struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
777 const char *name, *typename;
778 u8 family, revision;
779 u32 flags = flag_exist(nlh);
780 int ret = 0;
781
782 if (unlikely(protocol_failed(attr) ||
783 attr[IPSET_ATTR_SETNAME] == NULL ||
784 attr[IPSET_ATTR_TYPENAME] == NULL ||
785 attr[IPSET_ATTR_REVISION] == NULL ||
786 attr[IPSET_ATTR_FAMILY] == NULL ||
787 (attr[IPSET_ATTR_DATA] != NULL &&
788 !flag_nested(attr[IPSET_ATTR_DATA]))))
789 return -IPSET_ERR_PROTOCOL;
790
791 name = nla_data(attr[IPSET_ATTR_SETNAME]);
792 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
793 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
794 revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
795 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
796 name, typename, family_name(family), revision);
797
798 /*
799 * First, and without any locks, allocate and initialize
800 * a normal base set structure.
801 */
802 set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
803 if (!set)
804 return -ENOMEM;
805 rwlock_init(&set->lock);
806 strlcpy(set->name, name, IPSET_MAXNAMELEN);
a7b4f989 807 set->family = family;
f1e00b39 808 set->revision = revision;
a7b4f989
JK
809
810 /*
811 * Next, check that we know the type, and take
812 * a reference on the type, to make sure it stays available
813 * while constructing our new set.
814 *
815 * After referencing the type, we try to create the type
816 * specific part of the set without holding any locks.
817 */
818 ret = find_set_type_get(typename, family, revision, &(set->type));
819 if (ret)
820 goto out;
821
822 /*
823 * Without holding any locks, create private part.
824 */
825 if (attr[IPSET_ATTR_DATA] &&
8da560ce
PM
826 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
827 set->type->create_policy)) {
15b4d93f
JK
828 ret = -IPSET_ERR_PROTOCOL;
829 goto put_out;
a7b4f989
JK
830 }
831
832 ret = set->type->create(set, tb, flags);
833 if (ret != 0)
834 goto put_out;
835
836 /* BTW, ret==0 here. */
837
838 /*
839 * Here, we have a valid, constructed set and we are protected
2f9f28b2
JK
840 * by the nfnl mutex. Find the first free index in ip_set_list
841 * and check clashing.
a7b4f989 842 */
3ace95c0 843 ret = find_free_id(set->name, &index, &clash);
9076aea7 844 if (ret == -EEXIST) {
a7b4f989 845 /* If this is the same set and requested, ignore error */
9076aea7 846 if ((flags & IPSET_FLAG_EXIST) &&
a7b4f989
JK
847 STREQ(set->type->name, clash->type->name) &&
848 set->type->family == clash->type->family &&
f1e00b39
JK
849 set->type->revision_min == clash->type->revision_min &&
850 set->type->revision_max == clash->type->revision_max &&
a7b4f989
JK
851 set->variant->same_set(set, clash))
852 ret = 0;
853 goto cleanup;
9076aea7
JK
854 } else if (ret == -IPSET_ERR_MAX_SETS) {
855 struct ip_set **list, **tmp;
856 ip_set_id_t i = ip_set_max + IP_SET_INC;
857
858 if (i < ip_set_max || i == IPSET_INVALID_ID)
859 /* Wraparound */
860 goto cleanup;
861
862 list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL);
863 if (!list)
864 goto cleanup;
865 /* nfnl mutex is held, both lists are valid */
866 tmp = nfnl_dereference(ip_set_list);
867 memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max);
868 rcu_assign_pointer(ip_set_list, list);
869 /* Make sure all current packets have passed through */
870 synchronize_net();
871 /* Use new list */
872 index = ip_set_max;
873 ip_set_max = i;
874 kfree(tmp);
875 ret = 0;
876 } else if (ret)
877 goto cleanup;
a7b4f989
JK
878
879 /*
880 * Finally! Add our shiny new set to the list, and be done.
881 */
882 pr_debug("create: '%s' created with index %u!\n", set->name, index);
9076aea7 883 nfnl_set(index) = set;
a7b4f989
JK
884
885 return ret;
886
887cleanup:
888 set->variant->destroy(set);
889put_out:
890 module_put(set->type->me);
891out:
892 kfree(set);
893 return ret;
894}
895
896/* Destroy sets */
897
898static const struct nla_policy
899ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
900 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
901 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
902 .len = IPSET_MAXNAMELEN - 1 },
903};
904
905static void
906ip_set_destroy_set(ip_set_id_t index)
907{
9076aea7 908 struct ip_set *set = nfnl_set(index);
a7b4f989
JK
909
910 pr_debug("set: %s\n", set->name);
9076aea7 911 nfnl_set(index) = NULL;
a7b4f989
JK
912
913 /* Must call it without holding any lock */
914 set->variant->destroy(set);
915 module_put(set->type->me);
916 kfree(set);
917}
918
919static int
920ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
921 const struct nlmsghdr *nlh,
922 const struct nlattr * const attr[])
923{
9076aea7 924 struct ip_set *s;
a7b4f989 925 ip_set_id_t i;
2f9f28b2 926 int ret = 0;
a7b4f989
JK
927
928 if (unlikely(protocol_failed(attr)))
929 return -IPSET_ERR_PROTOCOL;
930
2f9f28b2
JK
931 /* Commands are serialized and references are
932 * protected by the ip_set_ref_lock.
933 * External systems (i.e. xt_set) must call
934 * ip_set_put|get_nfnl_* functions, that way we
935 * can safely check references here.
936 *
937 * list:set timer can only decrement the reference
938 * counter, so if it's already zero, we can proceed
939 * without holding the lock.
940 */
941 read_lock_bh(&ip_set_ref_lock);
a7b4f989
JK
942 if (!attr[IPSET_ATTR_SETNAME]) {
943 for (i = 0; i < ip_set_max; i++) {
9076aea7
JK
944 s = nfnl_set(i);
945 if (s != NULL && s->ref) {
9d883232 946 ret = -IPSET_ERR_BUSY;
2f9f28b2
JK
947 goto out;
948 }
a7b4f989 949 }
2f9f28b2 950 read_unlock_bh(&ip_set_ref_lock);
a7b4f989 951 for (i = 0; i < ip_set_max; i++) {
9076aea7
JK
952 s = nfnl_set(i);
953 if (s != NULL)
a7b4f989
JK
954 ip_set_destroy_set(i);
955 }
956 } else {
9076aea7
JK
957 s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i);
958 if (s == NULL) {
2f9f28b2
JK
959 ret = -ENOENT;
960 goto out;
9076aea7 961 } else if (s->ref) {
2f9f28b2
JK
962 ret = -IPSET_ERR_BUSY;
963 goto out;
964 }
965 read_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
966
967 ip_set_destroy_set(i);
968 }
969 return 0;
2f9f28b2
JK
970out:
971 read_unlock_bh(&ip_set_ref_lock);
972 return ret;
a7b4f989
JK
973}
974
975/* Flush sets */
976
977static void
978ip_set_flush_set(struct ip_set *set)
979{
980 pr_debug("set: %s\n", set->name);
981
982 write_lock_bh(&set->lock);
983 set->variant->flush(set);
984 write_unlock_bh(&set->lock);
985}
986
987static int
988ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
989 const struct nlmsghdr *nlh,
990 const struct nlattr * const attr[])
991{
9076aea7 992 struct ip_set *s;
a7b4f989
JK
993 ip_set_id_t i;
994
995 if (unlikely(protocol_failed(attr)))
9184a9cb 996 return -IPSET_ERR_PROTOCOL;
a7b4f989
JK
997
998 if (!attr[IPSET_ATTR_SETNAME]) {
9076aea7
JK
999 for (i = 0; i < ip_set_max; i++) {
1000 s = nfnl_set(i);
1001 if (s != NULL)
1002 ip_set_flush_set(s);
1003 }
a7b4f989 1004 } else {
9076aea7
JK
1005 s = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1006 if (s == NULL)
a7b4f989
JK
1007 return -ENOENT;
1008
9076aea7 1009 ip_set_flush_set(s);
a7b4f989
JK
1010 }
1011
1012 return 0;
1013}
1014
1015/* Rename a set */
1016
1017static const struct nla_policy
1018ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
1019 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1020 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
1021 .len = IPSET_MAXNAMELEN - 1 },
1022 [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING,
1023 .len = IPSET_MAXNAMELEN - 1 },
1024};
1025
1026static int
1027ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
1028 const struct nlmsghdr *nlh,
1029 const struct nlattr * const attr[])
1030{
9076aea7 1031 struct ip_set *set, *s;
a7b4f989
JK
1032 const char *name2;
1033 ip_set_id_t i;
2f9f28b2 1034 int ret = 0;
a7b4f989
JK
1035
1036 if (unlikely(protocol_failed(attr) ||
1037 attr[IPSET_ATTR_SETNAME] == NULL ||
1038 attr[IPSET_ATTR_SETNAME2] == NULL))
1039 return -IPSET_ERR_PROTOCOL;
1040
1041 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1042 if (set == NULL)
1043 return -ENOENT;
2f9f28b2
JK
1044
1045 read_lock_bh(&ip_set_ref_lock);
1046 if (set->ref != 0) {
1047 ret = -IPSET_ERR_REFERENCED;
1048 goto out;
1049 }
a7b4f989
JK
1050
1051 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
1052 for (i = 0; i < ip_set_max; i++) {
9076aea7
JK
1053 s = nfnl_set(i);
1054 if (s != NULL && STREQ(s->name, name2)) {
2f9f28b2
JK
1055 ret = -IPSET_ERR_EXIST_SETNAME2;
1056 goto out;
1057 }
a7b4f989
JK
1058 }
1059 strncpy(set->name, name2, IPSET_MAXNAMELEN);
1060
2f9f28b2
JK
1061out:
1062 read_unlock_bh(&ip_set_ref_lock);
1063 return ret;
a7b4f989
JK
1064}
1065
1066/* Swap two sets so that name/index points to the other.
1067 * References and set names are also swapped.
1068 *
2f9f28b2
JK
1069 * The commands are serialized by the nfnl mutex and references are
1070 * protected by the ip_set_ref_lock. The kernel interfaces
a7b4f989
JK
1071 * do not hold the mutex but the pointer settings are atomic
1072 * so the ip_set_list always contains valid pointers to the sets.
1073 */
1074
1075static int
1076ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
1077 const struct nlmsghdr *nlh,
1078 const struct nlattr * const attr[])
1079{
1080 struct ip_set *from, *to;
1081 ip_set_id_t from_id, to_id;
1082 char from_name[IPSET_MAXNAMELEN];
a7b4f989
JK
1083
1084 if (unlikely(protocol_failed(attr) ||
1085 attr[IPSET_ATTR_SETNAME] == NULL ||
1086 attr[IPSET_ATTR_SETNAME2] == NULL))
1087 return -IPSET_ERR_PROTOCOL;
1088
9076aea7
JK
1089 from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id);
1090 if (from == NULL)
a7b4f989
JK
1091 return -ENOENT;
1092
9076aea7
JK
1093 to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id);
1094 if (to == NULL)
a7b4f989
JK
1095 return -IPSET_ERR_EXIST_SETNAME2;
1096
a7b4f989 1097 /* Features must not change.
25985edc 1098 * Not an artificial restriction anymore, as we must prevent
a7b4f989
JK
1099 * possible loops created by swapping in setlist type of sets. */
1100 if (!(from->type->features == to->type->features &&
169faa2e 1101 from->family == to->family))
a7b4f989
JK
1102 return -IPSET_ERR_TYPE_MISMATCH;
1103
a7b4f989 1104 strncpy(from_name, from->name, IPSET_MAXNAMELEN);
a7b4f989 1105 strncpy(from->name, to->name, IPSET_MAXNAMELEN);
a7b4f989 1106 strncpy(to->name, from_name, IPSET_MAXNAMELEN);
a7b4f989 1107
2f9f28b2
JK
1108 write_lock_bh(&ip_set_ref_lock);
1109 swap(from->ref, to->ref);
9076aea7
JK
1110 nfnl_set(from_id) = to;
1111 nfnl_set(to_id) = from;
2f9f28b2 1112 write_unlock_bh(&ip_set_ref_lock);
a7b4f989
JK
1113
1114 return 0;
1115}
1116
1117/* List/save set data */
1118
c1e2e043
JK
1119#define DUMP_INIT 0
1120#define DUMP_ALL 1
1121#define DUMP_ONE 2
1122#define DUMP_LAST 3
1123
1124#define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF)
1125#define DUMP_FLAGS(arg) (((u32)(arg)) >> 16)
a7b4f989
JK
1126
1127static int
1128ip_set_dump_done(struct netlink_callback *cb)
1129{
1130 if (cb->args[2]) {
9076aea7 1131 pr_debug("release set %s\n", nfnl_set(cb->args[1])->name);
2f9f28b2 1132 ip_set_put_byindex((ip_set_id_t) cb->args[1]);
a7b4f989
JK
1133 }
1134 return 0;
1135}
1136
1137static inline void
1138dump_attrs(struct nlmsghdr *nlh)
1139{
1140 const struct nlattr *attr;
1141 int rem;
1142
1143 pr_debug("dump nlmsg\n");
1144 nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
1145 pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
1146 }
1147}
1148
1149static int
1150dump_init(struct netlink_callback *cb)
1151{
1152 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
573ce260 1153 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
a7b4f989
JK
1154 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1155 struct nlattr *attr = (void *)nlh + min_len;
c1e2e043 1156 u32 dump_type;
a7b4f989
JK
1157 ip_set_id_t index;
1158
1159 /* Second pass, so parser can't fail */
1160 nla_parse(cda, IPSET_ATTR_CMD_MAX,
1161 attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
1162
1163 /* cb->args[0] : dump single set/all sets
1164 * [1] : set index
1165 * [..]: type specific
1166 */
1167
c1e2e043 1168 if (cda[IPSET_ATTR_SETNAME]) {
9076aea7
JK
1169 struct ip_set *set;
1170
1171 set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]),
1172 &index);
1173 if (set == NULL)
c1e2e043 1174 return -ENOENT;
a7b4f989 1175
c1e2e043
JK
1176 dump_type = DUMP_ONE;
1177 cb->args[1] = index;
1178 } else
1179 dump_type = DUMP_ALL;
1180
1181 if (cda[IPSET_ATTR_FLAGS]) {
1182 u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
1183 dump_type |= (f << 16);
1184 }
1185 cb->args[0] = dump_type;
a7b4f989 1186
a7b4f989
JK
1187 return 0;
1188}
1189
1190static int
1191ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
1192{
1193 ip_set_id_t index = IPSET_INVALID_ID, max;
1194 struct ip_set *set = NULL;
1195 struct nlmsghdr *nlh = NULL;
15e47304 1196 unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
c1e2e043 1197 u32 dump_type, dump_flags;
a7b4f989
JK
1198 int ret = 0;
1199
c1e2e043 1200 if (!cb->args[0]) {
a7b4f989
JK
1201 ret = dump_init(cb);
1202 if (ret < 0) {
1203 nlh = nlmsg_hdr(cb->skb);
1204 /* We have to create and send the error message
1205 * manually :-( */
1206 if (nlh->nlmsg_flags & NLM_F_ACK)
1207 netlink_ack(cb->skb, nlh, ret);
1208 return ret;
1209 }
1210 }
1211
1212 if (cb->args[1] >= ip_set_max)
1213 goto out;
1214
c1e2e043
JK
1215 dump_type = DUMP_TYPE(cb->args[0]);
1216 dump_flags = DUMP_FLAGS(cb->args[0]);
1217 max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
a8a8a093 1218dump_last:
c1e2e043
JK
1219 pr_debug("args[0]: %u %u args[1]: %ld\n",
1220 dump_type, dump_flags, cb->args[1]);
a7b4f989
JK
1221 for (; cb->args[1] < max; cb->args[1]++) {
1222 index = (ip_set_id_t) cb->args[1];
9076aea7 1223 set = nfnl_set(index);
a7b4f989 1224 if (set == NULL) {
c1e2e043 1225 if (dump_type == DUMP_ONE) {
a7b4f989
JK
1226 ret = -ENOENT;
1227 goto out;
1228 }
1229 continue;
1230 }
1231 /* When dumping all sets, we must dump "sorted"
1232 * so that lists (unions of sets) are dumped last.
1233 */
c1e2e043
JK
1234 if (dump_type != DUMP_ONE &&
1235 ((dump_type == DUMP_ALL) ==
a8a8a093 1236 !!(set->type->features & IPSET_DUMP_LAST)))
a7b4f989
JK
1237 continue;
1238 pr_debug("List set: %s\n", set->name);
1239 if (!cb->args[2]) {
1240 /* Start listing: make sure set won't be destroyed */
1241 pr_debug("reference set\n");
9076aea7 1242 __ip_set_get(set);
a7b4f989 1243 }
15e47304 1244 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
a7b4f989
JK
1245 cb->nlh->nlmsg_seq, flags,
1246 IPSET_CMD_LIST);
1247 if (!nlh) {
1248 ret = -EMSGSIZE;
1249 goto release_refcount;
1250 }
7cf7899d
DM
1251 if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1252 nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
1253 goto nla_put_failure;
c1e2e043
JK
1254 if (dump_flags & IPSET_FLAG_LIST_SETNAME)
1255 goto next_set;
a7b4f989
JK
1256 switch (cb->args[2]) {
1257 case 0:
1258 /* Core header data */
7cf7899d
DM
1259 if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
1260 set->type->name) ||
1261 nla_put_u8(skb, IPSET_ATTR_FAMILY,
1262 set->family) ||
1263 nla_put_u8(skb, IPSET_ATTR_REVISION,
1264 set->revision))
1265 goto nla_put_failure;
a7b4f989
JK
1266 ret = set->variant->head(set, skb);
1267 if (ret < 0)
1268 goto release_refcount;
c1e2e043
JK
1269 if (dump_flags & IPSET_FLAG_LIST_HEADER)
1270 goto next_set;
a7b4f989
JK
1271 /* Fall through and add elements */
1272 default:
1273 read_lock_bh(&set->lock);
1274 ret = set->variant->list(set, skb, cb);
1275 read_unlock_bh(&set->lock);
c1e2e043 1276 if (!cb->args[2])
a7b4f989 1277 /* Set is done, proceed with next one */
c1e2e043 1278 goto next_set;
a7b4f989
JK
1279 goto release_refcount;
1280 }
1281 }
a8a8a093 1282 /* If we dump all sets, continue with dumping last ones */
c1e2e043
JK
1283 if (dump_type == DUMP_ALL) {
1284 dump_type = DUMP_LAST;
1285 cb->args[0] = dump_type | (dump_flags << 16);
a8a8a093
JK
1286 cb->args[1] = 0;
1287 goto dump_last;
1288 }
a7b4f989
JK
1289 goto out;
1290
1291nla_put_failure:
1292 ret = -EFAULT;
c1e2e043
JK
1293next_set:
1294 if (dump_type == DUMP_ONE)
1295 cb->args[1] = IPSET_INVALID_ID;
1296 else
1297 cb->args[1]++;
a7b4f989
JK
1298release_refcount:
1299 /* If there was an error or set is done, release set */
1300 if (ret || !cb->args[2]) {
9076aea7 1301 pr_debug("release set %s\n", nfnl_set(index)->name);
2f9f28b2 1302 ip_set_put_byindex(index);
be94db9d 1303 cb->args[2] = 0;
a7b4f989 1304 }
a7b4f989
JK
1305out:
1306 if (nlh) {
1307 nlmsg_end(skb, nlh);
1308 pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1309 dump_attrs(nlh);
1310 }
1311
1312 return ret < 0 ? ret : skb->len;
1313}
1314
1315static int
1316ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
1317 const struct nlmsghdr *nlh,
1318 const struct nlattr * const attr[])
1319{
1320 if (unlikely(protocol_failed(attr)))
1321 return -IPSET_ERR_PROTOCOL;
1322
80d326fa
PNA
1323 {
1324 struct netlink_dump_control c = {
1325 .dump = ip_set_dump_start,
1326 .done = ip_set_dump_done,
1327 };
1328 return netlink_dump_start(ctnl, skb, nlh, &c);
1329 }
a7b4f989
JK
1330}
1331
1332/* Add, del and test */
1333
1334static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1335 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1336 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
1337 .len = IPSET_MAXNAMELEN - 1 },
1338 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
1339 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
1340 [IPSET_ATTR_ADT] = { .type = NLA_NESTED },
1341};
1342
1343static int
5f52bc3c 1344call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
a7b4f989
JK
1345 struct nlattr *tb[], enum ipset_adt adt,
1346 u32 flags, bool use_lineno)
1347{
3d14b171 1348 int ret;
a7b4f989 1349 u32 lineno = 0;
3d14b171 1350 bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
a7b4f989
JK
1351
1352 do {
1353 write_lock_bh(&set->lock);
3d14b171 1354 ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
a7b4f989 1355 write_unlock_bh(&set->lock);
3d14b171 1356 retried = true;
a7b4f989
JK
1357 } while (ret == -EAGAIN &&
1358 set->variant->resize &&
3d14b171 1359 (ret = set->variant->resize(set, retried)) == 0);
a7b4f989
JK
1360
1361 if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1362 return 0;
1363 if (lineno && use_lineno) {
1364 /* Error in restore/batch mode: send back lineno */
5f52bc3c
JK
1365 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1366 struct sk_buff *skb2;
1367 struct nlmsgerr *errmsg;
1368 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
573ce260 1369 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
a7b4f989 1370 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
5f52bc3c 1371 struct nlattr *cmdattr;
a7b4f989
JK
1372 u32 *errline;
1373
5f52bc3c
JK
1374 skb2 = nlmsg_new(payload, GFP_KERNEL);
1375 if (skb2 == NULL)
1376 return -ENOMEM;
15e47304 1377 rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
5f52bc3c
JK
1378 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1379 errmsg = nlmsg_data(rep);
1380 errmsg->error = ret;
1381 memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1382 cmdattr = (void *)&errmsg->msg + min_len;
1383
a7b4f989
JK
1384 nla_parse(cda, IPSET_ATTR_CMD_MAX,
1385 cmdattr, nlh->nlmsg_len - min_len,
1386 ip_set_adt_policy);
1387
1388 errline = nla_data(cda[IPSET_ATTR_LINENO]);
1389
1390 *errline = lineno;
5f52bc3c 1391
15e47304 1392 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
5f52bc3c
JK
1393 /* Signal netlink not to send its ACK/errmsg. */
1394 return -EINTR;
a7b4f989
JK
1395 }
1396
1397 return ret;
1398}
1399
1400static int
1401ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1402 const struct nlmsghdr *nlh,
1403 const struct nlattr * const attr[])
1404{
1405 struct ip_set *set;
1406 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1407 const struct nlattr *nla;
1408 u32 flags = flag_exist(nlh);
1409 bool use_lineno;
1410 int ret = 0;
1411
1412 if (unlikely(protocol_failed(attr) ||
1413 attr[IPSET_ATTR_SETNAME] == NULL ||
1414 !((attr[IPSET_ATTR_DATA] != NULL) ^
1415 (attr[IPSET_ATTR_ADT] != NULL)) ||
1416 (attr[IPSET_ATTR_DATA] != NULL &&
1417 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1418 (attr[IPSET_ATTR_ADT] != NULL &&
1419 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1420 attr[IPSET_ATTR_LINENO] == NULL))))
1421 return -IPSET_ERR_PROTOCOL;
1422
1423 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1424 if (set == NULL)
1425 return -ENOENT;
1426
1427 use_lineno = !!attr[IPSET_ATTR_LINENO];
1428 if (attr[IPSET_ATTR_DATA]) {
8da560ce
PM
1429 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1430 attr[IPSET_ATTR_DATA],
1431 set->type->adt_policy))
a7b4f989 1432 return -IPSET_ERR_PROTOCOL;
5f52bc3c
JK
1433 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
1434 use_lineno);
a7b4f989
JK
1435 } else {
1436 int nla_rem;
1437
1438 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1439 memset(tb, 0, sizeof(tb));
1440 if (nla_type(nla) != IPSET_ATTR_DATA ||
1441 !flag_nested(nla) ||
8da560ce
PM
1442 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1443 set->type->adt_policy))
a7b4f989 1444 return -IPSET_ERR_PROTOCOL;
5f52bc3c 1445 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
a7b4f989
JK
1446 flags, use_lineno);
1447 if (ret < 0)
1448 return ret;
1449 }
1450 }
1451 return ret;
1452}
1453
1454static int
1455ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1456 const struct nlmsghdr *nlh,
1457 const struct nlattr * const attr[])
1458{
1459 struct ip_set *set;
1460 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1461 const struct nlattr *nla;
1462 u32 flags = flag_exist(nlh);
1463 bool use_lineno;
1464 int ret = 0;
1465
1466 if (unlikely(protocol_failed(attr) ||
1467 attr[IPSET_ATTR_SETNAME] == NULL ||
1468 !((attr[IPSET_ATTR_DATA] != NULL) ^
1469 (attr[IPSET_ATTR_ADT] != NULL)) ||
1470 (attr[IPSET_ATTR_DATA] != NULL &&
1471 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1472 (attr[IPSET_ATTR_ADT] != NULL &&
1473 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1474 attr[IPSET_ATTR_LINENO] == NULL))))
1475 return -IPSET_ERR_PROTOCOL;
1476
1477 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1478 if (set == NULL)
1479 return -ENOENT;
1480
1481 use_lineno = !!attr[IPSET_ATTR_LINENO];
1482 if (attr[IPSET_ATTR_DATA]) {
8da560ce
PM
1483 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1484 attr[IPSET_ATTR_DATA],
1485 set->type->adt_policy))
a7b4f989 1486 return -IPSET_ERR_PROTOCOL;
5f52bc3c
JK
1487 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
1488 use_lineno);
a7b4f989
JK
1489 } else {
1490 int nla_rem;
1491
1492 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1493 memset(tb, 0, sizeof(*tb));
1494 if (nla_type(nla) != IPSET_ATTR_DATA ||
1495 !flag_nested(nla) ||
8da560ce
PM
1496 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1497 set->type->adt_policy))
a7b4f989 1498 return -IPSET_ERR_PROTOCOL;
5f52bc3c 1499 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
a7b4f989
JK
1500 flags, use_lineno);
1501 if (ret < 0)
1502 return ret;
1503 }
1504 }
1505 return ret;
1506}
1507
1508static int
1509ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1510 const struct nlmsghdr *nlh,
1511 const struct nlattr * const attr[])
1512{
1513 struct ip_set *set;
1514 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1515 int ret = 0;
1516
1517 if (unlikely(protocol_failed(attr) ||
1518 attr[IPSET_ATTR_SETNAME] == NULL ||
1519 attr[IPSET_ATTR_DATA] == NULL ||
1520 !flag_nested(attr[IPSET_ATTR_DATA])))
1521 return -IPSET_ERR_PROTOCOL;
1522
1523 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1524 if (set == NULL)
1525 return -ENOENT;
1526
8da560ce
PM
1527 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1528 set->type->adt_policy))
a7b4f989
JK
1529 return -IPSET_ERR_PROTOCOL;
1530
1531 read_lock_bh(&set->lock);
3d14b171 1532 ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
a7b4f989
JK
1533 read_unlock_bh(&set->lock);
1534 /* Userspace can't trigger element to be re-added */
1535 if (ret == -EAGAIN)
1536 ret = 1;
1537
0f1799ba 1538 return ret > 0 ? 0 : -IPSET_ERR_EXIST;
a7b4f989
JK
1539}
1540
1541/* Get headed data of a set */
1542
1543static int
1544ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1545 const struct nlmsghdr *nlh,
1546 const struct nlattr * const attr[])
1547{
1548 const struct ip_set *set;
1549 struct sk_buff *skb2;
1550 struct nlmsghdr *nlh2;
a7b4f989
JK
1551 int ret = 0;
1552
1553 if (unlikely(protocol_failed(attr) ||
1554 attr[IPSET_ATTR_SETNAME] == NULL))
1555 return -IPSET_ERR_PROTOCOL;
1556
9076aea7
JK
1557 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1558 if (set == NULL)
a7b4f989 1559 return -ENOENT;
a7b4f989
JK
1560
1561 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1562 if (skb2 == NULL)
1563 return -ENOMEM;
1564
15e47304 1565 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
a7b4f989
JK
1566 IPSET_CMD_HEADER);
1567 if (!nlh2)
1568 goto nlmsg_failure;
7cf7899d
DM
1569 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1570 nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
1571 nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
1572 nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
1573 nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision))
1574 goto nla_put_failure;
a7b4f989
JK
1575 nlmsg_end(skb2, nlh2);
1576
15e47304 1577 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
a7b4f989
JK
1578 if (ret < 0)
1579 return ret;
1580
1581 return 0;
1582
1583nla_put_failure:
1584 nlmsg_cancel(skb2, nlh2);
1585nlmsg_failure:
1586 kfree_skb(skb2);
1587 return -EMSGSIZE;
1588}
1589
1590/* Get type data */
1591
1592static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1593 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1594 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
1595 .len = IPSET_MAXNAMELEN - 1 },
1596 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
1597};
1598
1599static int
1600ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1601 const struct nlmsghdr *nlh,
1602 const struct nlattr * const attr[])
1603{
1604 struct sk_buff *skb2;
1605 struct nlmsghdr *nlh2;
1606 u8 family, min, max;
1607 const char *typename;
1608 int ret = 0;
1609
1610 if (unlikely(protocol_failed(attr) ||
1611 attr[IPSET_ATTR_TYPENAME] == NULL ||
1612 attr[IPSET_ATTR_FAMILY] == NULL))
1613 return -IPSET_ERR_PROTOCOL;
1614
1615 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1616 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1617 ret = find_set_type_minmax(typename, family, &min, &max);
1618 if (ret)
1619 return ret;
1620
1621 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1622 if (skb2 == NULL)
1623 return -ENOMEM;
1624
15e47304 1625 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
a7b4f989
JK
1626 IPSET_CMD_TYPE);
1627 if (!nlh2)
1628 goto nlmsg_failure;
7cf7899d
DM
1629 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
1630 nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
1631 nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
1632 nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
1633 nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min))
1634 goto nla_put_failure;
a7b4f989
JK
1635 nlmsg_end(skb2, nlh2);
1636
1637 pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
15e47304 1638 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
a7b4f989
JK
1639 if (ret < 0)
1640 return ret;
1641
1642 return 0;
1643
1644nla_put_failure:
1645 nlmsg_cancel(skb2, nlh2);
1646nlmsg_failure:
1647 kfree_skb(skb2);
1648 return -EMSGSIZE;
1649}
1650
1651/* Get protocol version */
1652
1653static const struct nla_policy
1654ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1655 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1656};
1657
1658static int
1659ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1660 const struct nlmsghdr *nlh,
1661 const struct nlattr * const attr[])
1662{
1663 struct sk_buff *skb2;
1664 struct nlmsghdr *nlh2;
1665 int ret = 0;
1666
1667 if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
1668 return -IPSET_ERR_PROTOCOL;
1669
1670 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1671 if (skb2 == NULL)
1672 return -ENOMEM;
1673
15e47304 1674 nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
a7b4f989
JK
1675 IPSET_CMD_PROTOCOL);
1676 if (!nlh2)
1677 goto nlmsg_failure;
7cf7899d
DM
1678 if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
1679 goto nla_put_failure;
a7b4f989
JK
1680 nlmsg_end(skb2, nlh2);
1681
15e47304 1682 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
a7b4f989
JK
1683 if (ret < 0)
1684 return ret;
1685
1686 return 0;
1687
1688nla_put_failure:
1689 nlmsg_cancel(skb2, nlh2);
1690nlmsg_failure:
1691 kfree_skb(skb2);
1692 return -EMSGSIZE;
1693}
1694
1695static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
d31f4d44
TB
1696 [IPSET_CMD_NONE] = {
1697 .call = ip_set_none,
1698 .attr_count = IPSET_ATTR_CMD_MAX,
1699 },
a7b4f989
JK
1700 [IPSET_CMD_CREATE] = {
1701 .call = ip_set_create,
1702 .attr_count = IPSET_ATTR_CMD_MAX,
1703 .policy = ip_set_create_policy,
1704 },
1705 [IPSET_CMD_DESTROY] = {
1706 .call = ip_set_destroy,
1707 .attr_count = IPSET_ATTR_CMD_MAX,
1708 .policy = ip_set_setname_policy,
1709 },
1710 [IPSET_CMD_FLUSH] = {
1711 .call = ip_set_flush,
1712 .attr_count = IPSET_ATTR_CMD_MAX,
1713 .policy = ip_set_setname_policy,
1714 },
1715 [IPSET_CMD_RENAME] = {
1716 .call = ip_set_rename,
1717 .attr_count = IPSET_ATTR_CMD_MAX,
1718 .policy = ip_set_setname2_policy,
1719 },
1720 [IPSET_CMD_SWAP] = {
1721 .call = ip_set_swap,
1722 .attr_count = IPSET_ATTR_CMD_MAX,
1723 .policy = ip_set_setname2_policy,
1724 },
1725 [IPSET_CMD_LIST] = {
1726 .call = ip_set_dump,
1727 .attr_count = IPSET_ATTR_CMD_MAX,
1728 .policy = ip_set_setname_policy,
1729 },
1730 [IPSET_CMD_SAVE] = {
1731 .call = ip_set_dump,
1732 .attr_count = IPSET_ATTR_CMD_MAX,
1733 .policy = ip_set_setname_policy,
1734 },
1735 [IPSET_CMD_ADD] = {
1736 .call = ip_set_uadd,
1737 .attr_count = IPSET_ATTR_CMD_MAX,
1738 .policy = ip_set_adt_policy,
1739 },
1740 [IPSET_CMD_DEL] = {
1741 .call = ip_set_udel,
1742 .attr_count = IPSET_ATTR_CMD_MAX,
1743 .policy = ip_set_adt_policy,
1744 },
1745 [IPSET_CMD_TEST] = {
1746 .call = ip_set_utest,
1747 .attr_count = IPSET_ATTR_CMD_MAX,
1748 .policy = ip_set_adt_policy,
1749 },
1750 [IPSET_CMD_HEADER] = {
1751 .call = ip_set_header,
1752 .attr_count = IPSET_ATTR_CMD_MAX,
1753 .policy = ip_set_setname_policy,
1754 },
1755 [IPSET_CMD_TYPE] = {
1756 .call = ip_set_type,
1757 .attr_count = IPSET_ATTR_CMD_MAX,
1758 .policy = ip_set_type_policy,
1759 },
1760 [IPSET_CMD_PROTOCOL] = {
1761 .call = ip_set_protocol,
1762 .attr_count = IPSET_ATTR_CMD_MAX,
1763 .policy = ip_set_protocol_policy,
1764 },
1765};
1766
1767static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1768 .name = "ip_set",
1769 .subsys_id = NFNL_SUBSYS_IPSET,
1770 .cb_count = IPSET_MSG_MAX,
1771 .cb = ip_set_netlink_subsys_cb,
1772};
1773
1774/* Interface to iptables/ip6tables */
1775
1776static int
1777ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1778{
95c96174 1779 unsigned int *op;
a7b4f989
JK
1780 void *data;
1781 int copylen = *len, ret = 0;
1782
df008c91 1783 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
a7b4f989
JK
1784 return -EPERM;
1785 if (optval != SO_IP_SET)
1786 return -EBADF;
95c96174 1787 if (*len < sizeof(unsigned int))
a7b4f989
JK
1788 return -EINVAL;
1789
1790 data = vmalloc(*len);
1791 if (!data)
1792 return -ENOMEM;
1793 if (copy_from_user(data, user, *len) != 0) {
1794 ret = -EFAULT;
1795 goto done;
1796 }
95c96174 1797 op = (unsigned int *) data;
a7b4f989
JK
1798
1799 if (*op < IP_SET_OP_VERSION) {
1800 /* Check the version at the beginning of operations */
1801 struct ip_set_req_version *req_version = data;
1802 if (req_version->version != IPSET_PROTOCOL) {
1803 ret = -EPROTO;
1804 goto done;
1805 }
1806 }
1807
1808 switch (*op) {
1809 case IP_SET_OP_VERSION: {
1810 struct ip_set_req_version *req_version = data;
1811
1812 if (*len != sizeof(struct ip_set_req_version)) {
1813 ret = -EINVAL;
1814 goto done;
1815 }
1816
1817 req_version->version = IPSET_PROTOCOL;
1818 ret = copy_to_user(user, req_version,
1819 sizeof(struct ip_set_req_version));
1820 goto done;
1821 }
1822 case IP_SET_OP_GET_BYNAME: {
1823 struct ip_set_req_get_set *req_get = data;
9076aea7 1824 ip_set_id_t id;
a7b4f989
JK
1825
1826 if (*len != sizeof(struct ip_set_req_get_set)) {
1827 ret = -EINVAL;
1828 goto done;
1829 }
1830 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
c14b78e7 1831 nfnl_lock(NFNL_SUBSYS_IPSET);
9076aea7
JK
1832 find_set_and_id(req_get->set.name, &id);
1833 req_get->set.index = id;
c14b78e7 1834 nfnl_unlock(NFNL_SUBSYS_IPSET);
a7b4f989
JK
1835 goto copy;
1836 }
5e04c0c3
JK
1837 case IP_SET_OP_GET_FNAME: {
1838 struct ip_set_req_get_set_family *req_get = data;
1839 ip_set_id_t id;
1840
1841 if (*len != sizeof(struct ip_set_req_get_set_family)) {
1842 ret = -EINVAL;
1843 goto done;
1844 }
1845 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1846 nfnl_lock(NFNL_SUBSYS_IPSET);
1847 find_set_and_id(req_get->set.name, &id);
1848 req_get->set.index = id;
1849 if (id != IPSET_INVALID_ID)
1850 req_get->family = nfnl_set(id)->family;
1851 nfnl_unlock(NFNL_SUBSYS_IPSET);
1852 goto copy;
1853 }
a7b4f989
JK
1854 case IP_SET_OP_GET_BYINDEX: {
1855 struct ip_set_req_get_set *req_get = data;
9076aea7 1856 struct ip_set *set;
a7b4f989
JK
1857
1858 if (*len != sizeof(struct ip_set_req_get_set) ||
1859 req_get->set.index >= ip_set_max) {
1860 ret = -EINVAL;
1861 goto done;
1862 }
c14b78e7 1863 nfnl_lock(NFNL_SUBSYS_IPSET);
9076aea7
JK
1864 set = nfnl_set(req_get->set.index);
1865 strncpy(req_get->set.name, set ? set->name : "",
a7b4f989 1866 IPSET_MAXNAMELEN);
c14b78e7 1867 nfnl_unlock(NFNL_SUBSYS_IPSET);
a7b4f989
JK
1868 goto copy;
1869 }
1870 default:
1871 ret = -EBADMSG;
1872 goto done;
1873 } /* end of switch(op) */
1874
1875copy:
1876 ret = copy_to_user(user, data, copylen);
1877
1878done:
1879 vfree(data);
1880 if (ret > 0)
1881 ret = 0;
1882 return ret;
1883}
1884
1885static struct nf_sockopt_ops so_set __read_mostly = {
1886 .pf = PF_INET,
1887 .get_optmin = SO_IP_SET,
1888 .get_optmax = SO_IP_SET + 1,
1889 .get = &ip_set_sockfn_get,
1890 .owner = THIS_MODULE,
1891};
1892
1893static int __init
1894ip_set_init(void)
1895{
9076aea7 1896 struct ip_set **list;
a7b4f989
JK
1897 int ret;
1898
1899 if (max_sets)
1900 ip_set_max = max_sets;
1901 if (ip_set_max >= IPSET_INVALID_ID)
1902 ip_set_max = IPSET_INVALID_ID - 1;
1903
9076aea7
JK
1904 list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL);
1905 if (!list)
a7b4f989 1906 return -ENOMEM;
a7b4f989 1907
9076aea7 1908 rcu_assign_pointer(ip_set_list, list);
a7b4f989
JK
1909 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
1910 if (ret != 0) {
1911 pr_err("ip_set: cannot register with nfnetlink.\n");
9076aea7 1912 kfree(list);
a7b4f989
JK
1913 return ret;
1914 }
1915 ret = nf_register_sockopt(&so_set);
1916 if (ret != 0) {
1917 pr_err("SO_SET registry failed: %d\n", ret);
1918 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
9076aea7 1919 kfree(list);
a7b4f989
JK
1920 return ret;
1921 }
1922
1923 pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
1924 return 0;
1925}
1926
1927static void __exit
1928ip_set_fini(void)
1929{
9076aea7
JK
1930 struct ip_set **list = rcu_dereference_protected(ip_set_list, 1);
1931
a7b4f989
JK
1932 /* There can't be any existing set */
1933 nf_unregister_sockopt(&so_set);
1934 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
9076aea7 1935 kfree(list);
a7b4f989
JK
1936 pr_debug("these are the famous last words\n");
1937}
1938
1939module_init(ip_set_init);
1940module_exit(ip_set_fini);