Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / net / netfilter / ipset / ip_set_core.c
CommitLineData
a7b4f989
JK
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10/* Kernel module for IP set management */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/ip.h>
16#include <linux/skbuff.h>
17#include <linux/spinlock.h>
18#include <linux/netlink.h>
19#include <linux/rculist.h>
20#include <linux/version.h>
21#include <net/netlink.h>
22
23#include <linux/netfilter.h>
24#include <linux/netfilter/nfnetlink.h>
25#include <linux/netfilter/ipset/ip_set.h>
26
27static LIST_HEAD(ip_set_type_list); /* all registered set types */
28static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */
29
30static struct ip_set **ip_set_list; /* all individual sets */
31static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
32
33#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
34
35static unsigned int max_sets;
36
37module_param(max_sets, int, 0600);
38MODULE_PARM_DESC(max_sets, "maximal number of sets");
39MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
41MODULE_DESCRIPTION("core IP set support");
42MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
43
44/*
45 * The set types are implemented in modules and registered set types
46 * can be found in ip_set_type_list. Adding/deleting types is
47 * serialized by ip_set_type_mutex.
48 */
49
50static inline void
51ip_set_type_lock(void)
52{
53 mutex_lock(&ip_set_type_mutex);
54}
55
56static inline void
57ip_set_type_unlock(void)
58{
59 mutex_unlock(&ip_set_type_mutex);
60}
61
62/* Register and deregister settype */
63
64static struct ip_set_type *
65find_set_type(const char *name, u8 family, u8 revision)
66{
67 struct ip_set_type *type;
68
69 list_for_each_entry_rcu(type, &ip_set_type_list, list)
70 if (STREQ(type->name, name) &&
71 (type->family == family || type->family == AF_UNSPEC) &&
72 type->revision == revision)
73 return type;
74 return NULL;
75}
76
77/* Unlock, try to load a set type module and lock again */
78static int
79try_to_load_type(const char *name)
80{
81 nfnl_unlock();
82 pr_debug("try to load ip_set_%s\n", name);
83 if (request_module("ip_set_%s", name) < 0) {
84 pr_warning("Can't find ip_set type %s\n", name);
85 nfnl_lock();
86 return -IPSET_ERR_FIND_TYPE;
87 }
88 nfnl_lock();
89 return -EAGAIN;
90}
91
92/* Find a set type and reference it */
93static int
94find_set_type_get(const char *name, u8 family, u8 revision,
95 struct ip_set_type **found)
96{
97 rcu_read_lock();
98 *found = find_set_type(name, family, revision);
99 if (*found) {
100 int err = !try_module_get((*found)->me);
101 rcu_read_unlock();
102 return err ? -EFAULT : 0;
103 }
104 rcu_read_unlock();
105
106 return try_to_load_type(name);
107}
108
109/* Find a given set type by name and family.
110 * If we succeeded, the supported minimal and maximum revisions are
111 * filled out.
112 */
113static int
114find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)
115{
116 struct ip_set_type *type;
117 bool found = false;
118
119 *min = *max = 0;
120 rcu_read_lock();
121 list_for_each_entry_rcu(type, &ip_set_type_list, list)
122 if (STREQ(type->name, name) &&
123 (type->family == family || type->family == AF_UNSPEC)) {
124 found = true;
125 if (type->revision < *min)
126 *min = type->revision;
127 else if (type->revision > *max)
128 *max = type->revision;
129 }
130 rcu_read_unlock();
131 if (found)
132 return 0;
133
134 return try_to_load_type(name);
135}
136
137#define family_name(f) ((f) == AF_INET ? "inet" : \
138 (f) == AF_INET6 ? "inet6" : "any")
139
140/* Register a set type structure. The type is identified by
141 * the unique triple of name, family and revision.
142 */
143int
144ip_set_type_register(struct ip_set_type *type)
145{
146 int ret = 0;
147
148 if (type->protocol != IPSET_PROTOCOL) {
149 pr_warning("ip_set type %s, family %s, revision %u uses "
150 "wrong protocol version %u (want %u)\n",
151 type->name, family_name(type->family),
152 type->revision, type->protocol, IPSET_PROTOCOL);
153 return -EINVAL;
154 }
155
156 ip_set_type_lock();
157 if (find_set_type(type->name, type->family, type->revision)) {
158 /* Duplicate! */
159 pr_warning("ip_set type %s, family %s, revision %u "
160 "already registered!\n", type->name,
161 family_name(type->family), type->revision);
162 ret = -EINVAL;
163 goto unlock;
164 }
165 list_add_rcu(&type->list, &ip_set_type_list);
166 pr_debug("type %s, family %s, revision %u registered.\n",
167 type->name, family_name(type->family), type->revision);
168unlock:
169 ip_set_type_unlock();
170 return ret;
171}
172EXPORT_SYMBOL_GPL(ip_set_type_register);
173
174/* Unregister a set type. There's a small race with ip_set_create */
175void
176ip_set_type_unregister(struct ip_set_type *type)
177{
178 ip_set_type_lock();
179 if (!find_set_type(type->name, type->family, type->revision)) {
180 pr_warning("ip_set type %s, family %s, revision %u "
181 "not registered\n", type->name,
182 family_name(type->family), type->revision);
183 goto unlock;
184 }
185 list_del_rcu(&type->list);
186 pr_debug("type %s, family %s, revision %u unregistered.\n",
187 type->name, family_name(type->family), type->revision);
188unlock:
189 ip_set_type_unlock();
190
191 synchronize_rcu();
192}
193EXPORT_SYMBOL_GPL(ip_set_type_unregister);
194
195/* Utility functions */
196void *
197ip_set_alloc(size_t size)
198{
199 void *members = NULL;
200
201 if (size < KMALLOC_MAX_SIZE)
202 members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
203
204 if (members) {
205 pr_debug("%p: allocated with kmalloc\n", members);
206 return members;
207 }
208
209 members = vzalloc(size);
210 if (!members)
211 return NULL;
212 pr_debug("%p: allocated with vmalloc\n", members);
213
214 return members;
215}
216EXPORT_SYMBOL_GPL(ip_set_alloc);
217
218void
219ip_set_free(void *members)
220{
221 pr_debug("%p: free with %s\n", members,
222 is_vmalloc_addr(members) ? "vfree" : "kfree");
223 if (is_vmalloc_addr(members))
224 vfree(members);
225 else
226 kfree(members);
227}
228EXPORT_SYMBOL_GPL(ip_set_free);
229
230static inline bool
231flag_nested(const struct nlattr *nla)
232{
233 return nla->nla_type & NLA_F_NESTED;
234}
235
236static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
237 [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 },
238 [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY,
239 .len = sizeof(struct in6_addr) },
240};
241
242int
243ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
244{
245 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
246
247 if (unlikely(!flag_nested(nla)))
248 return -IPSET_ERR_PROTOCOL;
8da560ce 249 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
a7b4f989
JK
250 return -IPSET_ERR_PROTOCOL;
251 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
252 return -IPSET_ERR_PROTOCOL;
253
254 *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
255 return 0;
256}
257EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
258
259int
260ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
261{
262 struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
263
264 if (unlikely(!flag_nested(nla)))
265 return -IPSET_ERR_PROTOCOL;
266
8da560ce 267 if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
a7b4f989
JK
268 return -IPSET_ERR_PROTOCOL;
269 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
270 return -IPSET_ERR_PROTOCOL;
271
272 memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
273 sizeof(struct in6_addr));
274 return 0;
275}
276EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
277
278/*
279 * Creating/destroying/renaming/swapping affect the existence and
280 * the properties of a set. All of these can be executed from userspace
281 * only and serialized by the nfnl mutex indirectly from nfnetlink.
282 *
283 * Sets are identified by their index in ip_set_list and the index
284 * is used by the external references (set/SET netfilter modules).
285 *
286 * The set behind an index may change by swapping only, from userspace.
287 */
288
289static inline void
290__ip_set_get(ip_set_id_t index)
291{
292 atomic_inc(&ip_set_list[index]->ref);
293}
294
295static inline void
296__ip_set_put(ip_set_id_t index)
297{
298 atomic_dec(&ip_set_list[index]->ref);
299}
300
301/*
302 * Add, del and test set entries from kernel.
303 *
304 * The set behind the index must exist and must be referenced
305 * so it can't be destroyed (or changed) under our foot.
306 */
307
308int
309ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
310 u8 family, u8 dim, u8 flags)
311{
312 struct ip_set *set = ip_set_list[index];
313 int ret = 0;
314
315 BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
316 pr_debug("set %s, index %u\n", set->name, index);
317
318 if (dim < set->type->dimension ||
319 !(family == set->family || set->family == AF_UNSPEC))
320 return 0;
321
322 read_lock_bh(&set->lock);
323 ret = set->variant->kadt(set, skb, IPSET_TEST, family, dim, flags);
324 read_unlock_bh(&set->lock);
325
326 if (ret == -EAGAIN) {
327 /* Type requests element to be completed */
328 pr_debug("element must be competed, ADD is triggered\n");
329 write_lock_bh(&set->lock);
330 set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags);
331 write_unlock_bh(&set->lock);
332 ret = 1;
333 }
334
335 /* Convert error codes to nomatch */
336 return (ret < 0 ? 0 : ret);
337}
338EXPORT_SYMBOL_GPL(ip_set_test);
339
340int
341ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
342 u8 family, u8 dim, u8 flags)
343{
344 struct ip_set *set = ip_set_list[index];
345 int ret;
346
347 BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
348 pr_debug("set %s, index %u\n", set->name, index);
349
350 if (dim < set->type->dimension ||
351 !(family == set->family || set->family == AF_UNSPEC))
352 return 0;
353
354 write_lock_bh(&set->lock);
355 ret = set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags);
356 write_unlock_bh(&set->lock);
357
358 return ret;
359}
360EXPORT_SYMBOL_GPL(ip_set_add);
361
362int
363ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
364 u8 family, u8 dim, u8 flags)
365{
366 struct ip_set *set = ip_set_list[index];
367 int ret = 0;
368
369 BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
370 pr_debug("set %s, index %u\n", set->name, index);
371
372 if (dim < set->type->dimension ||
373 !(family == set->family || set->family == AF_UNSPEC))
374 return 0;
375
376 write_lock_bh(&set->lock);
377 ret = set->variant->kadt(set, skb, IPSET_DEL, family, dim, flags);
378 write_unlock_bh(&set->lock);
379
380 return ret;
381}
382EXPORT_SYMBOL_GPL(ip_set_del);
383
384/*
385 * Find set by name, reference it once. The reference makes sure the
386 * thing pointed to, does not go away under our feet.
387 *
388 * The nfnl mutex must already be activated.
389 */
390ip_set_id_t
391ip_set_get_byname(const char *name, struct ip_set **set)
392{
393 ip_set_id_t i, index = IPSET_INVALID_ID;
394 struct ip_set *s;
395
396 for (i = 0; i < ip_set_max; i++) {
397 s = ip_set_list[i];
398 if (s != NULL && STREQ(s->name, name)) {
399 __ip_set_get(i);
400 index = i;
401 *set = s;
402 }
403 }
404
405 return index;
406}
407EXPORT_SYMBOL_GPL(ip_set_get_byname);
408
409/*
410 * If the given set pointer points to a valid set, decrement
411 * reference count by 1. The caller shall not assume the index
412 * to be valid, after calling this function.
413 *
414 * The nfnl mutex must already be activated.
415 */
416void
417ip_set_put_byindex(ip_set_id_t index)
418{
419 if (ip_set_list[index] != NULL) {
420 BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0);
421 __ip_set_put(index);
422 }
423}
424EXPORT_SYMBOL_GPL(ip_set_put_byindex);
425
426/*
427 * Get the name of a set behind a set index.
428 * We assume the set is referenced, so it does exist and
429 * can't be destroyed. The set cannot be renamed due to
430 * the referencing either.
431 *
432 * The nfnl mutex must already be activated.
433 */
434const char *
435ip_set_name_byindex(ip_set_id_t index)
436{
437 const struct ip_set *set = ip_set_list[index];
438
439 BUG_ON(set == NULL);
440 BUG_ON(atomic_read(&set->ref) == 0);
441
442 /* Referenced, so it's safe */
443 return set->name;
444}
445EXPORT_SYMBOL_GPL(ip_set_name_byindex);
446
447/*
448 * Routines to call by external subsystems, which do not
449 * call nfnl_lock for us.
450 */
451
452/*
453 * Find set by name, reference it once. The reference makes sure the
454 * thing pointed to, does not go away under our feet.
455 *
456 * The nfnl mutex is used in the function.
457 */
458ip_set_id_t
459ip_set_nfnl_get(const char *name)
460{
461 struct ip_set *s;
462 ip_set_id_t index;
463
464 nfnl_lock();
465 index = ip_set_get_byname(name, &s);
466 nfnl_unlock();
467
468 return index;
469}
470EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
471
472/*
473 * Find set by index, reference it once. The reference makes sure the
474 * thing pointed to, does not go away under our feet.
475 *
476 * The nfnl mutex is used in the function.
477 */
478ip_set_id_t
479ip_set_nfnl_get_byindex(ip_set_id_t index)
480{
481 if (index > ip_set_max)
482 return IPSET_INVALID_ID;
483
484 nfnl_lock();
485 if (ip_set_list[index])
486 __ip_set_get(index);
487 else
488 index = IPSET_INVALID_ID;
489 nfnl_unlock();
490
491 return index;
492}
493EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
494
495/*
496 * If the given set pointer points to a valid set, decrement
497 * reference count by 1. The caller shall not assume the index
498 * to be valid, after calling this function.
499 *
500 * The nfnl mutex is used in the function.
501 */
502void
503ip_set_nfnl_put(ip_set_id_t index)
504{
505 nfnl_lock();
506 if (ip_set_list[index] != NULL) {
507 BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0);
508 __ip_set_put(index);
509 }
510 nfnl_unlock();
511}
512EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
513
514/*
515 * Communication protocol with userspace over netlink.
516 *
517 * We already locked by nfnl_lock.
518 */
519
520static inline bool
521protocol_failed(const struct nlattr * const tb[])
522{
523 return !tb[IPSET_ATTR_PROTOCOL] ||
524 nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
525}
526
527static inline u32
528flag_exist(const struct nlmsghdr *nlh)
529{
530 return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
531}
532
533static struct nlmsghdr *
534start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags,
535 enum ipset_cmd cmd)
536{
537 struct nlmsghdr *nlh;
538 struct nfgenmsg *nfmsg;
539
540 nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
541 sizeof(*nfmsg), flags);
542 if (nlh == NULL)
543 return NULL;
544
545 nfmsg = nlmsg_data(nlh);
546 nfmsg->nfgen_family = AF_INET;
547 nfmsg->version = NFNETLINK_V0;
548 nfmsg->res_id = 0;
549
550 return nlh;
551}
552
553/* Create a set */
554
555static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
556 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
557 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
558 .len = IPSET_MAXNAMELEN - 1 },
559 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
560 .len = IPSET_MAXNAMELEN - 1},
561 [IPSET_ATTR_REVISION] = { .type = NLA_U8 },
562 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
563 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
564};
565
566static ip_set_id_t
567find_set_id(const char *name)
568{
569 ip_set_id_t i, index = IPSET_INVALID_ID;
570 const struct ip_set *set;
571
572 for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) {
573 set = ip_set_list[i];
574 if (set != NULL && STREQ(set->name, name))
575 index = i;
576 }
577 return index;
578}
579
580static inline struct ip_set *
581find_set(const char *name)
582{
583 ip_set_id_t index = find_set_id(name);
584
585 return index == IPSET_INVALID_ID ? NULL : ip_set_list[index];
586}
587
588static int
589find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
590{
591 ip_set_id_t i;
592
593 *index = IPSET_INVALID_ID;
594 for (i = 0; i < ip_set_max; i++) {
595 if (ip_set_list[i] == NULL) {
596 if (*index == IPSET_INVALID_ID)
597 *index = i;
598 } else if (STREQ(name, ip_set_list[i]->name)) {
599 /* Name clash */
600 *set = ip_set_list[i];
601 return -EEXIST;
602 }
603 }
604 if (*index == IPSET_INVALID_ID)
605 /* No free slot remained */
606 return -IPSET_ERR_MAX_SETS;
607 return 0;
608}
609
610static int
611ip_set_create(struct sock *ctnl, struct sk_buff *skb,
612 const struct nlmsghdr *nlh,
613 const struct nlattr * const attr[])
614{
9846ada1 615 struct ip_set *set, *clash = NULL;
a7b4f989
JK
616 ip_set_id_t index = IPSET_INVALID_ID;
617 struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
618 const char *name, *typename;
619 u8 family, revision;
620 u32 flags = flag_exist(nlh);
621 int ret = 0;
622
623 if (unlikely(protocol_failed(attr) ||
624 attr[IPSET_ATTR_SETNAME] == NULL ||
625 attr[IPSET_ATTR_TYPENAME] == NULL ||
626 attr[IPSET_ATTR_REVISION] == NULL ||
627 attr[IPSET_ATTR_FAMILY] == NULL ||
628 (attr[IPSET_ATTR_DATA] != NULL &&
629 !flag_nested(attr[IPSET_ATTR_DATA]))))
630 return -IPSET_ERR_PROTOCOL;
631
632 name = nla_data(attr[IPSET_ATTR_SETNAME]);
633 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
634 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
635 revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
636 pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
637 name, typename, family_name(family), revision);
638
639 /*
640 * First, and without any locks, allocate and initialize
641 * a normal base set structure.
642 */
643 set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
644 if (!set)
645 return -ENOMEM;
646 rwlock_init(&set->lock);
647 strlcpy(set->name, name, IPSET_MAXNAMELEN);
648 atomic_set(&set->ref, 0);
649 set->family = family;
650
651 /*
652 * Next, check that we know the type, and take
653 * a reference on the type, to make sure it stays available
654 * while constructing our new set.
655 *
656 * After referencing the type, we try to create the type
657 * specific part of the set without holding any locks.
658 */
659 ret = find_set_type_get(typename, family, revision, &(set->type));
660 if (ret)
661 goto out;
662
663 /*
664 * Without holding any locks, create private part.
665 */
666 if (attr[IPSET_ATTR_DATA] &&
8da560ce
PM
667 nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
668 set->type->create_policy)) {
a7b4f989
JK
669 ret = -IPSET_ERR_PROTOCOL;
670 goto put_out;
671 }
672
673 ret = set->type->create(set, tb, flags);
674 if (ret != 0)
675 goto put_out;
676
677 /* BTW, ret==0 here. */
678
679 /*
680 * Here, we have a valid, constructed set and we are protected
681 * by nfnl_lock. Find the first free index in ip_set_list and
682 * check clashing.
683 */
684 if ((ret = find_free_id(set->name, &index, &clash)) != 0) {
685 /* If this is the same set and requested, ignore error */
686 if (ret == -EEXIST &&
687 (flags & IPSET_FLAG_EXIST) &&
688 STREQ(set->type->name, clash->type->name) &&
689 set->type->family == clash->type->family &&
690 set->type->revision == clash->type->revision &&
691 set->variant->same_set(set, clash))
692 ret = 0;
693 goto cleanup;
694 }
695
696 /*
697 * Finally! Add our shiny new set to the list, and be done.
698 */
699 pr_debug("create: '%s' created with index %u!\n", set->name, index);
700 ip_set_list[index] = set;
701
702 return ret;
703
704cleanup:
705 set->variant->destroy(set);
706put_out:
707 module_put(set->type->me);
708out:
709 kfree(set);
710 return ret;
711}
712
713/* Destroy sets */
714
715static const struct nla_policy
716ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
717 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
718 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
719 .len = IPSET_MAXNAMELEN - 1 },
720};
721
722static void
723ip_set_destroy_set(ip_set_id_t index)
724{
725 struct ip_set *set = ip_set_list[index];
726
727 pr_debug("set: %s\n", set->name);
728 ip_set_list[index] = NULL;
729
730 /* Must call it without holding any lock */
731 set->variant->destroy(set);
732 module_put(set->type->me);
733 kfree(set);
734}
735
736static int
737ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
738 const struct nlmsghdr *nlh,
739 const struct nlattr * const attr[])
740{
741 ip_set_id_t i;
742
743 if (unlikely(protocol_failed(attr)))
744 return -IPSET_ERR_PROTOCOL;
745
746 /* References are protected by the nfnl mutex */
747 if (!attr[IPSET_ATTR_SETNAME]) {
748 for (i = 0; i < ip_set_max; i++) {
749 if (ip_set_list[i] != NULL &&
750 (atomic_read(&ip_set_list[i]->ref)))
751 return -IPSET_ERR_BUSY;
752 }
753 for (i = 0; i < ip_set_max; i++) {
754 if (ip_set_list[i] != NULL)
755 ip_set_destroy_set(i);
756 }
757 } else {
758 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
759 if (i == IPSET_INVALID_ID)
760 return -ENOENT;
761 else if (atomic_read(&ip_set_list[i]->ref))
762 return -IPSET_ERR_BUSY;
763
764 ip_set_destroy_set(i);
765 }
766 return 0;
767}
768
769/* Flush sets */
770
771static void
772ip_set_flush_set(struct ip_set *set)
773{
774 pr_debug("set: %s\n", set->name);
775
776 write_lock_bh(&set->lock);
777 set->variant->flush(set);
778 write_unlock_bh(&set->lock);
779}
780
781static int
782ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
783 const struct nlmsghdr *nlh,
784 const struct nlattr * const attr[])
785{
786 ip_set_id_t i;
787
788 if (unlikely(protocol_failed(attr)))
789 return -EPROTO;
790
791 if (!attr[IPSET_ATTR_SETNAME]) {
792 for (i = 0; i < ip_set_max; i++)
793 if (ip_set_list[i] != NULL)
794 ip_set_flush_set(ip_set_list[i]);
795 } else {
796 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
797 if (i == IPSET_INVALID_ID)
798 return -ENOENT;
799
800 ip_set_flush_set(ip_set_list[i]);
801 }
802
803 return 0;
804}
805
806/* Rename a set */
807
808static const struct nla_policy
809ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
810 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
811 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
812 .len = IPSET_MAXNAMELEN - 1 },
813 [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING,
814 .len = IPSET_MAXNAMELEN - 1 },
815};
816
817static int
818ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
819 const struct nlmsghdr *nlh,
820 const struct nlattr * const attr[])
821{
822 struct ip_set *set;
823 const char *name2;
824 ip_set_id_t i;
825
826 if (unlikely(protocol_failed(attr) ||
827 attr[IPSET_ATTR_SETNAME] == NULL ||
828 attr[IPSET_ATTR_SETNAME2] == NULL))
829 return -IPSET_ERR_PROTOCOL;
830
831 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
832 if (set == NULL)
833 return -ENOENT;
834 if (atomic_read(&set->ref) != 0)
835 return -IPSET_ERR_REFERENCED;
836
837 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
838 for (i = 0; i < ip_set_max; i++) {
839 if (ip_set_list[i] != NULL &&
840 STREQ(ip_set_list[i]->name, name2))
841 return -IPSET_ERR_EXIST_SETNAME2;
842 }
843 strncpy(set->name, name2, IPSET_MAXNAMELEN);
844
845 return 0;
846}
847
848/* Swap two sets so that name/index points to the other.
849 * References and set names are also swapped.
850 *
851 * We are protected by the nfnl mutex and references are
852 * manipulated only by holding the mutex. The kernel interfaces
853 * do not hold the mutex but the pointer settings are atomic
854 * so the ip_set_list always contains valid pointers to the sets.
855 */
856
857static int
858ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
859 const struct nlmsghdr *nlh,
860 const struct nlattr * const attr[])
861{
862 struct ip_set *from, *to;
863 ip_set_id_t from_id, to_id;
864 char from_name[IPSET_MAXNAMELEN];
865 u32 from_ref;
866
867 if (unlikely(protocol_failed(attr) ||
868 attr[IPSET_ATTR_SETNAME] == NULL ||
869 attr[IPSET_ATTR_SETNAME2] == NULL))
870 return -IPSET_ERR_PROTOCOL;
871
872 from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
873 if (from_id == IPSET_INVALID_ID)
874 return -ENOENT;
875
876 to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2]));
877 if (to_id == IPSET_INVALID_ID)
878 return -IPSET_ERR_EXIST_SETNAME2;
879
880 from = ip_set_list[from_id];
881 to = ip_set_list[to_id];
882
883 /* Features must not change.
884 * Not an artifical restriction anymore, as we must prevent
885 * possible loops created by swapping in setlist type of sets. */
886 if (!(from->type->features == to->type->features &&
887 from->type->family == to->type->family))
888 return -IPSET_ERR_TYPE_MISMATCH;
889
890 /* No magic here: ref munging protected by the nfnl_lock */
891 strncpy(from_name, from->name, IPSET_MAXNAMELEN);
892 from_ref = atomic_read(&from->ref);
893
894 strncpy(from->name, to->name, IPSET_MAXNAMELEN);
895 atomic_set(&from->ref, atomic_read(&to->ref));
896 strncpy(to->name, from_name, IPSET_MAXNAMELEN);
897 atomic_set(&to->ref, from_ref);
898
899 ip_set_list[from_id] = to;
900 ip_set_list[to_id] = from;
901
902 return 0;
903}
904
905/* List/save set data */
906
907#define DUMP_INIT 0L
908#define DUMP_ALL 1L
909#define DUMP_ONE 2L
910#define DUMP_LAST 3L
911
912static int
913ip_set_dump_done(struct netlink_callback *cb)
914{
915 if (cb->args[2]) {
916 pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name);
917 __ip_set_put((ip_set_id_t) cb->args[1]);
918 }
919 return 0;
920}
921
922static inline void
923dump_attrs(struct nlmsghdr *nlh)
924{
925 const struct nlattr *attr;
926 int rem;
927
928 pr_debug("dump nlmsg\n");
929 nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
930 pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
931 }
932}
933
934static int
935dump_init(struct netlink_callback *cb)
936{
937 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
938 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
939 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
940 struct nlattr *attr = (void *)nlh + min_len;
941 ip_set_id_t index;
942
943 /* Second pass, so parser can't fail */
944 nla_parse(cda, IPSET_ATTR_CMD_MAX,
945 attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
946
947 /* cb->args[0] : dump single set/all sets
948 * [1] : set index
949 * [..]: type specific
950 */
951
952 if (!cda[IPSET_ATTR_SETNAME]) {
953 cb->args[0] = DUMP_ALL;
954 return 0;
955 }
956
957 index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME]));
958 if (index == IPSET_INVALID_ID)
959 return -ENOENT;
960
961 cb->args[0] = DUMP_ONE;
962 cb->args[1] = index;
963 return 0;
964}
965
966static int
967ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
968{
969 ip_set_id_t index = IPSET_INVALID_ID, max;
970 struct ip_set *set = NULL;
971 struct nlmsghdr *nlh = NULL;
972 unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0;
973 int ret = 0;
974
975 if (cb->args[0] == DUMP_INIT) {
976 ret = dump_init(cb);
977 if (ret < 0) {
978 nlh = nlmsg_hdr(cb->skb);
979 /* We have to create and send the error message
980 * manually :-( */
981 if (nlh->nlmsg_flags & NLM_F_ACK)
982 netlink_ack(cb->skb, nlh, ret);
983 return ret;
984 }
985 }
986
987 if (cb->args[1] >= ip_set_max)
988 goto out;
989
990 pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]);
991 max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
992 for (; cb->args[1] < max; cb->args[1]++) {
993 index = (ip_set_id_t) cb->args[1];
994 set = ip_set_list[index];
995 if (set == NULL) {
996 if (cb->args[0] == DUMP_ONE) {
997 ret = -ENOENT;
998 goto out;
999 }
1000 continue;
1001 }
1002 /* When dumping all sets, we must dump "sorted"
1003 * so that lists (unions of sets) are dumped last.
1004 */
1005 if (cb->args[0] != DUMP_ONE &&
1006 !((cb->args[0] == DUMP_ALL) ^
1007 (set->type->features & IPSET_DUMP_LAST)))
1008 continue;
1009 pr_debug("List set: %s\n", set->name);
1010 if (!cb->args[2]) {
1011 /* Start listing: make sure set won't be destroyed */
1012 pr_debug("reference set\n");
1013 __ip_set_get(index);
1014 }
1015 nlh = start_msg(skb, NETLINK_CB(cb->skb).pid,
1016 cb->nlh->nlmsg_seq, flags,
1017 IPSET_CMD_LIST);
1018 if (!nlh) {
1019 ret = -EMSGSIZE;
1020 goto release_refcount;
1021 }
1022 NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1023 NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name);
1024 switch (cb->args[2]) {
1025 case 0:
1026 /* Core header data */
1027 NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME,
1028 set->type->name);
1029 NLA_PUT_U8(skb, IPSET_ATTR_FAMILY,
1030 set->family);
1031 NLA_PUT_U8(skb, IPSET_ATTR_REVISION,
1032 set->type->revision);
1033 ret = set->variant->head(set, skb);
1034 if (ret < 0)
1035 goto release_refcount;
1036 /* Fall through and add elements */
1037 default:
1038 read_lock_bh(&set->lock);
1039 ret = set->variant->list(set, skb, cb);
1040 read_unlock_bh(&set->lock);
1041 if (!cb->args[2]) {
1042 /* Set is done, proceed with next one */
1043 if (cb->args[0] == DUMP_ONE)
1044 cb->args[1] = IPSET_INVALID_ID;
1045 else
1046 cb->args[1]++;
1047 }
1048 goto release_refcount;
1049 }
1050 }
1051 goto out;
1052
1053nla_put_failure:
1054 ret = -EFAULT;
1055release_refcount:
1056 /* If there was an error or set is done, release set */
1057 if (ret || !cb->args[2]) {
1058 pr_debug("release set %s\n", ip_set_list[index]->name);
1059 __ip_set_put(index);
1060 }
1061
1062 /* If we dump all sets, continue with dumping last ones */
1063 if (cb->args[0] == DUMP_ALL && cb->args[1] >= max && !cb->args[2])
1064 cb->args[0] = DUMP_LAST;
1065
1066out:
1067 if (nlh) {
1068 nlmsg_end(skb, nlh);
1069 pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
1070 dump_attrs(nlh);
1071 }
1072
1073 return ret < 0 ? ret : skb->len;
1074}
1075
1076static int
1077ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
1078 const struct nlmsghdr *nlh,
1079 const struct nlattr * const attr[])
1080{
1081 if (unlikely(protocol_failed(attr)))
1082 return -IPSET_ERR_PROTOCOL;
1083
1084 return netlink_dump_start(ctnl, skb, nlh,
1085 ip_set_dump_start,
1086 ip_set_dump_done);
1087}
1088
1089/* Add, del and test */
1090
1091static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
1092 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1093 [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
1094 .len = IPSET_MAXNAMELEN - 1 },
1095 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
1096 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
1097 [IPSET_ATTR_ADT] = { .type = NLA_NESTED },
1098};
1099
1100static int
5f52bc3c 1101call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
a7b4f989
JK
1102 struct nlattr *tb[], enum ipset_adt adt,
1103 u32 flags, bool use_lineno)
1104{
1105 int ret, retried = 0;
1106 u32 lineno = 0;
1107 bool eexist = flags & IPSET_FLAG_EXIST;
1108
1109 do {
1110 write_lock_bh(&set->lock);
1111 ret = set->variant->uadt(set, tb, adt, &lineno, flags);
1112 write_unlock_bh(&set->lock);
1113 } while (ret == -EAGAIN &&
1114 set->variant->resize &&
1115 (ret = set->variant->resize(set, retried++)) == 0);
1116
1117 if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
1118 return 0;
1119 if (lineno && use_lineno) {
1120 /* Error in restore/batch mode: send back lineno */
5f52bc3c
JK
1121 struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
1122 struct sk_buff *skb2;
1123 struct nlmsgerr *errmsg;
1124 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
a7b4f989
JK
1125 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
1126 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
5f52bc3c 1127 struct nlattr *cmdattr;
a7b4f989
JK
1128 u32 *errline;
1129
5f52bc3c
JK
1130 skb2 = nlmsg_new(payload, GFP_KERNEL);
1131 if (skb2 == NULL)
1132 return -ENOMEM;
1133 rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid,
1134 nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
1135 errmsg = nlmsg_data(rep);
1136 errmsg->error = ret;
1137 memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
1138 cmdattr = (void *)&errmsg->msg + min_len;
1139
a7b4f989
JK
1140 nla_parse(cda, IPSET_ATTR_CMD_MAX,
1141 cmdattr, nlh->nlmsg_len - min_len,
1142 ip_set_adt_policy);
1143
1144 errline = nla_data(cda[IPSET_ATTR_LINENO]);
1145
1146 *errline = lineno;
5f52bc3c
JK
1147
1148 netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1149 /* Signal netlink not to send its ACK/errmsg. */
1150 return -EINTR;
a7b4f989
JK
1151 }
1152
1153 return ret;
1154}
1155
1156static int
1157ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
1158 const struct nlmsghdr *nlh,
1159 const struct nlattr * const attr[])
1160{
1161 struct ip_set *set;
1162 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1163 const struct nlattr *nla;
1164 u32 flags = flag_exist(nlh);
1165 bool use_lineno;
1166 int ret = 0;
1167
1168 if (unlikely(protocol_failed(attr) ||
1169 attr[IPSET_ATTR_SETNAME] == NULL ||
1170 !((attr[IPSET_ATTR_DATA] != NULL) ^
1171 (attr[IPSET_ATTR_ADT] != NULL)) ||
1172 (attr[IPSET_ATTR_DATA] != NULL &&
1173 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1174 (attr[IPSET_ATTR_ADT] != NULL &&
1175 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1176 attr[IPSET_ATTR_LINENO] == NULL))))
1177 return -IPSET_ERR_PROTOCOL;
1178
1179 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1180 if (set == NULL)
1181 return -ENOENT;
1182
1183 use_lineno = !!attr[IPSET_ATTR_LINENO];
1184 if (attr[IPSET_ATTR_DATA]) {
8da560ce
PM
1185 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1186 attr[IPSET_ATTR_DATA],
1187 set->type->adt_policy))
a7b4f989 1188 return -IPSET_ERR_PROTOCOL;
5f52bc3c
JK
1189 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
1190 use_lineno);
a7b4f989
JK
1191 } else {
1192 int nla_rem;
1193
1194 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1195 memset(tb, 0, sizeof(tb));
1196 if (nla_type(nla) != IPSET_ATTR_DATA ||
1197 !flag_nested(nla) ||
8da560ce
PM
1198 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1199 set->type->adt_policy))
a7b4f989 1200 return -IPSET_ERR_PROTOCOL;
5f52bc3c 1201 ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
a7b4f989
JK
1202 flags, use_lineno);
1203 if (ret < 0)
1204 return ret;
1205 }
1206 }
1207 return ret;
1208}
1209
1210static int
1211ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
1212 const struct nlmsghdr *nlh,
1213 const struct nlattr * const attr[])
1214{
1215 struct ip_set *set;
1216 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1217 const struct nlattr *nla;
1218 u32 flags = flag_exist(nlh);
1219 bool use_lineno;
1220 int ret = 0;
1221
1222 if (unlikely(protocol_failed(attr) ||
1223 attr[IPSET_ATTR_SETNAME] == NULL ||
1224 !((attr[IPSET_ATTR_DATA] != NULL) ^
1225 (attr[IPSET_ATTR_ADT] != NULL)) ||
1226 (attr[IPSET_ATTR_DATA] != NULL &&
1227 !flag_nested(attr[IPSET_ATTR_DATA])) ||
1228 (attr[IPSET_ATTR_ADT] != NULL &&
1229 (!flag_nested(attr[IPSET_ATTR_ADT]) ||
1230 attr[IPSET_ATTR_LINENO] == NULL))))
1231 return -IPSET_ERR_PROTOCOL;
1232
1233 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1234 if (set == NULL)
1235 return -ENOENT;
1236
1237 use_lineno = !!attr[IPSET_ATTR_LINENO];
1238 if (attr[IPSET_ATTR_DATA]) {
8da560ce
PM
1239 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
1240 attr[IPSET_ATTR_DATA],
1241 set->type->adt_policy))
a7b4f989 1242 return -IPSET_ERR_PROTOCOL;
5f52bc3c
JK
1243 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
1244 use_lineno);
a7b4f989
JK
1245 } else {
1246 int nla_rem;
1247
1248 nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
1249 memset(tb, 0, sizeof(*tb));
1250 if (nla_type(nla) != IPSET_ATTR_DATA ||
1251 !flag_nested(nla) ||
8da560ce
PM
1252 nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
1253 set->type->adt_policy))
a7b4f989 1254 return -IPSET_ERR_PROTOCOL;
5f52bc3c 1255 ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
a7b4f989
JK
1256 flags, use_lineno);
1257 if (ret < 0)
1258 return ret;
1259 }
1260 }
1261 return ret;
1262}
1263
1264static int
1265ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
1266 const struct nlmsghdr *nlh,
1267 const struct nlattr * const attr[])
1268{
1269 struct ip_set *set;
1270 struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
1271 int ret = 0;
1272
1273 if (unlikely(protocol_failed(attr) ||
1274 attr[IPSET_ATTR_SETNAME] == NULL ||
1275 attr[IPSET_ATTR_DATA] == NULL ||
1276 !flag_nested(attr[IPSET_ATTR_DATA])))
1277 return -IPSET_ERR_PROTOCOL;
1278
1279 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1280 if (set == NULL)
1281 return -ENOENT;
1282
8da560ce
PM
1283 if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
1284 set->type->adt_policy))
a7b4f989
JK
1285 return -IPSET_ERR_PROTOCOL;
1286
1287 read_lock_bh(&set->lock);
1288 ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0);
1289 read_unlock_bh(&set->lock);
1290 /* Userspace can't trigger element to be re-added */
1291 if (ret == -EAGAIN)
1292 ret = 1;
1293
1294 return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST;
1295}
1296
1297/* Get headed data of a set */
1298
1299static int
1300ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1301 const struct nlmsghdr *nlh,
1302 const struct nlattr * const attr[])
1303{
1304 const struct ip_set *set;
1305 struct sk_buff *skb2;
1306 struct nlmsghdr *nlh2;
1307 ip_set_id_t index;
1308 int ret = 0;
1309
1310 if (unlikely(protocol_failed(attr) ||
1311 attr[IPSET_ATTR_SETNAME] == NULL))
1312 return -IPSET_ERR_PROTOCOL;
1313
1314 index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
1315 if (index == IPSET_INVALID_ID)
1316 return -ENOENT;
1317 set = ip_set_list[index];
1318
1319 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1320 if (skb2 == NULL)
1321 return -ENOMEM;
1322
1323 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1324 IPSET_CMD_HEADER);
1325 if (!nlh2)
1326 goto nlmsg_failure;
1327 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1328 NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name);
1329 NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name);
1330 NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family);
1331 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision);
1332 nlmsg_end(skb2, nlh2);
1333
1334 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1335 if (ret < 0)
1336 return ret;
1337
1338 return 0;
1339
1340nla_put_failure:
1341 nlmsg_cancel(skb2, nlh2);
1342nlmsg_failure:
1343 kfree_skb(skb2);
1344 return -EMSGSIZE;
1345}
1346
1347/* Get type data */
1348
1349static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
1350 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1351 [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING,
1352 .len = IPSET_MAXNAMELEN - 1 },
1353 [IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
1354};
1355
1356static int
1357ip_set_type(struct sock *ctnl, struct sk_buff *skb,
1358 const struct nlmsghdr *nlh,
1359 const struct nlattr * const attr[])
1360{
1361 struct sk_buff *skb2;
1362 struct nlmsghdr *nlh2;
1363 u8 family, min, max;
1364 const char *typename;
1365 int ret = 0;
1366
1367 if (unlikely(protocol_failed(attr) ||
1368 attr[IPSET_ATTR_TYPENAME] == NULL ||
1369 attr[IPSET_ATTR_FAMILY] == NULL))
1370 return -IPSET_ERR_PROTOCOL;
1371
1372 family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
1373 typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
1374 ret = find_set_type_minmax(typename, family, &min, &max);
1375 if (ret)
1376 return ret;
1377
1378 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1379 if (skb2 == NULL)
1380 return -ENOMEM;
1381
1382 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1383 IPSET_CMD_TYPE);
1384 if (!nlh2)
1385 goto nlmsg_failure;
1386 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1387 NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename);
1388 NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family);
1389 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max);
1390 NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min);
1391 nlmsg_end(skb2, nlh2);
1392
1393 pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
1394 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1395 if (ret < 0)
1396 return ret;
1397
1398 return 0;
1399
1400nla_put_failure:
1401 nlmsg_cancel(skb2, nlh2);
1402nlmsg_failure:
1403 kfree_skb(skb2);
1404 return -EMSGSIZE;
1405}
1406
1407/* Get protocol version */
1408
1409static const struct nla_policy
1410ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
1411 [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
1412};
1413
1414static int
1415ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
1416 const struct nlmsghdr *nlh,
1417 const struct nlattr * const attr[])
1418{
1419 struct sk_buff *skb2;
1420 struct nlmsghdr *nlh2;
1421 int ret = 0;
1422
1423 if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
1424 return -IPSET_ERR_PROTOCOL;
1425
1426 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1427 if (skb2 == NULL)
1428 return -ENOMEM;
1429
1430 nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
1431 IPSET_CMD_PROTOCOL);
1432 if (!nlh2)
1433 goto nlmsg_failure;
1434 NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
1435 nlmsg_end(skb2, nlh2);
1436
1437 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1438 if (ret < 0)
1439 return ret;
1440
1441 return 0;
1442
1443nla_put_failure:
1444 nlmsg_cancel(skb2, nlh2);
1445nlmsg_failure:
1446 kfree_skb(skb2);
1447 return -EMSGSIZE;
1448}
1449
1450static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
1451 [IPSET_CMD_CREATE] = {
1452 .call = ip_set_create,
1453 .attr_count = IPSET_ATTR_CMD_MAX,
1454 .policy = ip_set_create_policy,
1455 },
1456 [IPSET_CMD_DESTROY] = {
1457 .call = ip_set_destroy,
1458 .attr_count = IPSET_ATTR_CMD_MAX,
1459 .policy = ip_set_setname_policy,
1460 },
1461 [IPSET_CMD_FLUSH] = {
1462 .call = ip_set_flush,
1463 .attr_count = IPSET_ATTR_CMD_MAX,
1464 .policy = ip_set_setname_policy,
1465 },
1466 [IPSET_CMD_RENAME] = {
1467 .call = ip_set_rename,
1468 .attr_count = IPSET_ATTR_CMD_MAX,
1469 .policy = ip_set_setname2_policy,
1470 },
1471 [IPSET_CMD_SWAP] = {
1472 .call = ip_set_swap,
1473 .attr_count = IPSET_ATTR_CMD_MAX,
1474 .policy = ip_set_setname2_policy,
1475 },
1476 [IPSET_CMD_LIST] = {
1477 .call = ip_set_dump,
1478 .attr_count = IPSET_ATTR_CMD_MAX,
1479 .policy = ip_set_setname_policy,
1480 },
1481 [IPSET_CMD_SAVE] = {
1482 .call = ip_set_dump,
1483 .attr_count = IPSET_ATTR_CMD_MAX,
1484 .policy = ip_set_setname_policy,
1485 },
1486 [IPSET_CMD_ADD] = {
1487 .call = ip_set_uadd,
1488 .attr_count = IPSET_ATTR_CMD_MAX,
1489 .policy = ip_set_adt_policy,
1490 },
1491 [IPSET_CMD_DEL] = {
1492 .call = ip_set_udel,
1493 .attr_count = IPSET_ATTR_CMD_MAX,
1494 .policy = ip_set_adt_policy,
1495 },
1496 [IPSET_CMD_TEST] = {
1497 .call = ip_set_utest,
1498 .attr_count = IPSET_ATTR_CMD_MAX,
1499 .policy = ip_set_adt_policy,
1500 },
1501 [IPSET_CMD_HEADER] = {
1502 .call = ip_set_header,
1503 .attr_count = IPSET_ATTR_CMD_MAX,
1504 .policy = ip_set_setname_policy,
1505 },
1506 [IPSET_CMD_TYPE] = {
1507 .call = ip_set_type,
1508 .attr_count = IPSET_ATTR_CMD_MAX,
1509 .policy = ip_set_type_policy,
1510 },
1511 [IPSET_CMD_PROTOCOL] = {
1512 .call = ip_set_protocol,
1513 .attr_count = IPSET_ATTR_CMD_MAX,
1514 .policy = ip_set_protocol_policy,
1515 },
1516};
1517
1518static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
1519 .name = "ip_set",
1520 .subsys_id = NFNL_SUBSYS_IPSET,
1521 .cb_count = IPSET_MSG_MAX,
1522 .cb = ip_set_netlink_subsys_cb,
1523};
1524
1525/* Interface to iptables/ip6tables */
1526
1527static int
1528ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1529{
1530 unsigned *op;
1531 void *data;
1532 int copylen = *len, ret = 0;
1533
1534 if (!capable(CAP_NET_ADMIN))
1535 return -EPERM;
1536 if (optval != SO_IP_SET)
1537 return -EBADF;
1538 if (*len < sizeof(unsigned))
1539 return -EINVAL;
1540
1541 data = vmalloc(*len);
1542 if (!data)
1543 return -ENOMEM;
1544 if (copy_from_user(data, user, *len) != 0) {
1545 ret = -EFAULT;
1546 goto done;
1547 }
1548 op = (unsigned *) data;
1549
1550 if (*op < IP_SET_OP_VERSION) {
1551 /* Check the version at the beginning of operations */
1552 struct ip_set_req_version *req_version = data;
1553 if (req_version->version != IPSET_PROTOCOL) {
1554 ret = -EPROTO;
1555 goto done;
1556 }
1557 }
1558
1559 switch (*op) {
1560 case IP_SET_OP_VERSION: {
1561 struct ip_set_req_version *req_version = data;
1562
1563 if (*len != sizeof(struct ip_set_req_version)) {
1564 ret = -EINVAL;
1565 goto done;
1566 }
1567
1568 req_version->version = IPSET_PROTOCOL;
1569 ret = copy_to_user(user, req_version,
1570 sizeof(struct ip_set_req_version));
1571 goto done;
1572 }
1573 case IP_SET_OP_GET_BYNAME: {
1574 struct ip_set_req_get_set *req_get = data;
1575
1576 if (*len != sizeof(struct ip_set_req_get_set)) {
1577 ret = -EINVAL;
1578 goto done;
1579 }
1580 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1581 nfnl_lock();
1582 req_get->set.index = find_set_id(req_get->set.name);
1583 nfnl_unlock();
1584 goto copy;
1585 }
1586 case IP_SET_OP_GET_BYINDEX: {
1587 struct ip_set_req_get_set *req_get = data;
1588
1589 if (*len != sizeof(struct ip_set_req_get_set) ||
1590 req_get->set.index >= ip_set_max) {
1591 ret = -EINVAL;
1592 goto done;
1593 }
1594 nfnl_lock();
1595 strncpy(req_get->set.name,
1596 ip_set_list[req_get->set.index]
1597 ? ip_set_list[req_get->set.index]->name : "",
1598 IPSET_MAXNAMELEN);
1599 nfnl_unlock();
1600 goto copy;
1601 }
1602 default:
1603 ret = -EBADMSG;
1604 goto done;
1605 } /* end of switch(op) */
1606
1607copy:
1608 ret = copy_to_user(user, data, copylen);
1609
1610done:
1611 vfree(data);
1612 if (ret > 0)
1613 ret = 0;
1614 return ret;
1615}
1616
1617static struct nf_sockopt_ops so_set __read_mostly = {
1618 .pf = PF_INET,
1619 .get_optmin = SO_IP_SET,
1620 .get_optmax = SO_IP_SET + 1,
1621 .get = &ip_set_sockfn_get,
1622 .owner = THIS_MODULE,
1623};
1624
1625static int __init
1626ip_set_init(void)
1627{
1628 int ret;
1629
1630 if (max_sets)
1631 ip_set_max = max_sets;
1632 if (ip_set_max >= IPSET_INVALID_ID)
1633 ip_set_max = IPSET_INVALID_ID - 1;
1634
1635 ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max,
1636 GFP_KERNEL);
1637 if (!ip_set_list) {
1638 pr_err("ip_set: Unable to create ip_set_list\n");
1639 return -ENOMEM;
1640 }
1641
1642 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
1643 if (ret != 0) {
1644 pr_err("ip_set: cannot register with nfnetlink.\n");
1645 kfree(ip_set_list);
1646 return ret;
1647 }
1648 ret = nf_register_sockopt(&so_set);
1649 if (ret != 0) {
1650 pr_err("SO_SET registry failed: %d\n", ret);
1651 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1652 kfree(ip_set_list);
1653 return ret;
1654 }
1655
1656 pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
1657 return 0;
1658}
1659
1660static void __exit
1661ip_set_fini(void)
1662{
1663 /* There can't be any existing set */
1664 nf_unregister_sockopt(&so_set);
1665 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1666 kfree(ip_set_list);
1667 pr_debug("these are the famous last words\n");
1668}
1669
1670module_init(ip_set_init);
1671module_exit(ip_set_fini);