IPVS: netns, connection hash got net as param.
[linux-2.6-block.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
9330419d 41#include <linux/nsproxy.h>
1da177e4 42#include <net/ip.h>
09571c7a
VB
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
14c85021 47#include <net/route.h>
1da177e4 48#include <net/sock.h>
9a812198 49#include <net/genetlink.h>
1da177e4
LT
50
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 56static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61/* lock for table with the real services */
62static DEFINE_RWLOCK(__ip_vs_rs_lock);
63
64/* lock for state and timeout tables */
4f72816e 65static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
1da177e4
LT
66
67/* lock for drop entry handling */
68static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
69
70/* lock for drop packet handling */
71static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
72
73/* 1/rate drop and drop-entry variables */
74int ip_vs_drop_rate = 0;
75int ip_vs_drop_counter = 0;
76static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
77
78/* number of virtual services */
79static int ip_vs_num_services = 0;
80
81/* sysctl variables */
82static int sysctl_ip_vs_drop_entry = 0;
83static int sysctl_ip_vs_drop_packet = 0;
84static int sysctl_ip_vs_secure_tcp = 0;
85static int sysctl_ip_vs_amemthresh = 1024;
86static int sysctl_ip_vs_am_droprate = 10;
87int sysctl_ip_vs_cache_bypass = 0;
88int sysctl_ip_vs_expire_nodest_conn = 0;
89int sysctl_ip_vs_expire_quiescent_template = 0;
90int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
91int sysctl_ip_vs_nat_icmp_send = 0;
f4bc17cd
JA
92#ifdef CONFIG_IP_VS_NFCT
93int sysctl_ip_vs_conntrack;
94#endif
8a803040 95int sysctl_ip_vs_snat_reroute = 1;
b880c1f0 96int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */
1da177e4
LT
97
98#ifdef CONFIG_IP_VS_DEBUG
99static int sysctl_ip_vs_debug_level = 0;
100
101int ip_vs_get_debug_level(void)
102{
103 return sysctl_ip_vs_debug_level;
104}
105#endif
106
09571c7a
VB
107#ifdef CONFIG_IP_VS_IPV6
108/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
109static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
110{
111 struct rt6_info *rt;
112 struct flowi fl = {
113 .oif = 0,
5811662b
CG
114 .fl6_dst = *addr,
115 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
09571c7a
VB
116 };
117
118 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
119 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
120 return 1;
121
122 return 0;
123}
124#endif
1da177e4 125/*
af9debd4
JA
126 * update_defense_level is called from keventd and from sysctl,
127 * so it needs to protect itself from softirqs
1da177e4 128 */
9330419d 129static void update_defense_level(struct netns_ipvs *ipvs)
1da177e4
LT
130{
131 struct sysinfo i;
132 static int old_secure_tcp = 0;
133 int availmem;
134 int nomem;
135 int to_change = -1;
136
137 /* we only count free and buffered memory (in pages) */
138 si_meminfo(&i);
139 availmem = i.freeram + i.bufferram;
140 /* however in linux 2.5 the i.bufferram is total page cache size,
141 we need adjust it */
142 /* si_swapinfo(&i); */
143 /* availmem = availmem - (i.totalswap - i.freeswap); */
144
145 nomem = (availmem < sysctl_ip_vs_amemthresh);
146
af9debd4
JA
147 local_bh_disable();
148
1da177e4
LT
149 /* drop_entry */
150 spin_lock(&__ip_vs_dropentry_lock);
151 switch (sysctl_ip_vs_drop_entry) {
152 case 0:
153 atomic_set(&ip_vs_dropentry, 0);
154 break;
155 case 1:
156 if (nomem) {
157 atomic_set(&ip_vs_dropentry, 1);
158 sysctl_ip_vs_drop_entry = 2;
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 }
162 break;
163 case 2:
164 if (nomem) {
165 atomic_set(&ip_vs_dropentry, 1);
166 } else {
167 atomic_set(&ip_vs_dropentry, 0);
168 sysctl_ip_vs_drop_entry = 1;
169 };
170 break;
171 case 3:
172 atomic_set(&ip_vs_dropentry, 1);
173 break;
174 }
175 spin_unlock(&__ip_vs_dropentry_lock);
176
177 /* drop_packet */
178 spin_lock(&__ip_vs_droppacket_lock);
179 switch (sysctl_ip_vs_drop_packet) {
180 case 0:
181 ip_vs_drop_rate = 0;
182 break;
183 case 1:
184 if (nomem) {
185 ip_vs_drop_rate = ip_vs_drop_counter
186 = sysctl_ip_vs_amemthresh /
187 (sysctl_ip_vs_amemthresh-availmem);
188 sysctl_ip_vs_drop_packet = 2;
189 } else {
190 ip_vs_drop_rate = 0;
191 }
192 break;
193 case 2:
194 if (nomem) {
195 ip_vs_drop_rate = ip_vs_drop_counter
196 = sysctl_ip_vs_amemthresh /
197 (sysctl_ip_vs_amemthresh-availmem);
198 } else {
199 ip_vs_drop_rate = 0;
200 sysctl_ip_vs_drop_packet = 1;
201 }
202 break;
203 case 3:
204 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
205 break;
206 }
207 spin_unlock(&__ip_vs_droppacket_lock);
208
209 /* secure_tcp */
4f72816e 210 spin_lock(&ip_vs_securetcp_lock);
1da177e4
LT
211 switch (sysctl_ip_vs_secure_tcp) {
212 case 0:
213 if (old_secure_tcp >= 2)
214 to_change = 0;
215 break;
216 case 1:
217 if (nomem) {
218 if (old_secure_tcp < 2)
219 to_change = 1;
220 sysctl_ip_vs_secure_tcp = 2;
221 } else {
222 if (old_secure_tcp >= 2)
223 to_change = 0;
224 }
225 break;
226 case 2:
227 if (nomem) {
228 if (old_secure_tcp < 2)
229 to_change = 1;
230 } else {
231 if (old_secure_tcp >= 2)
232 to_change = 0;
233 sysctl_ip_vs_secure_tcp = 1;
234 }
235 break;
236 case 3:
237 if (old_secure_tcp < 2)
238 to_change = 1;
239 break;
240 }
241 old_secure_tcp = sysctl_ip_vs_secure_tcp;
242 if (to_change >= 0)
9330419d
HS
243 ip_vs_protocol_timeout_change(ipvs,
244 sysctl_ip_vs_secure_tcp > 1);
4f72816e 245 spin_unlock(&ip_vs_securetcp_lock);
af9debd4
JA
246
247 local_bh_enable();
1da177e4
LT
248}
249
250
251/*
252 * Timer for checking the defense
253 */
254#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
255static void defense_work_handler(struct work_struct *work);
256static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 257
c4028958 258static void defense_work_handler(struct work_struct *work)
1da177e4 259{
b17fc996 260 struct netns_ipvs *ipvs = net_ipvs(&init_net);
9330419d
HS
261
262 update_defense_level(ipvs);
1da177e4
LT
263 if (atomic_read(&ip_vs_dropentry))
264 ip_vs_random_dropentry();
265
266 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
267}
268
269int
270ip_vs_use_count_inc(void)
271{
272 return try_module_get(THIS_MODULE);
273}
274
275void
276ip_vs_use_count_dec(void)
277{
278 module_put(THIS_MODULE);
279}
280
281
282/*
283 * Hash table: for virtual service lookups
284 */
285#define IP_VS_SVC_TAB_BITS 8
286#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
287#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
288
289/* the service table hashed by <protocol, addr, port> */
290static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
291/* the service table hashed by fwmark */
292static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
293
1da177e4
LT
294/*
295 * Trash for destinations
296 */
297static LIST_HEAD(ip_vs_dest_trash);
298
299/*
300 * FTP & NULL virtual service counters
301 */
302static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
303static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
304
305
306/*
307 * Returns hash value for virtual service
308 */
fc723250
HS
309static inline unsigned
310ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
311 const union nf_inet_addr *addr, __be16 port)
1da177e4
LT
312{
313 register unsigned porth = ntohs(port);
b18610de 314 __be32 addr_fold = addr->ip;
1da177e4 315
b18610de
JV
316#ifdef CONFIG_IP_VS_IPV6
317 if (af == AF_INET6)
318 addr_fold = addr->ip6[0]^addr->ip6[1]^
319 addr->ip6[2]^addr->ip6[3];
320#endif
fc723250 321 addr_fold ^= ((size_t)net>>8);
b18610de
JV
322
323 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
324 & IP_VS_SVC_TAB_MASK;
325}
326
327/*
328 * Returns hash value of fwmark for virtual service lookup
329 */
fc723250 330static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
1da177e4 331{
fc723250 332 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
1da177e4
LT
333}
334
335/*
fc723250 336 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
1da177e4
LT
337 * or in the ip_vs_svc_fwm_table by fwmark.
338 * Should be called with locked tables.
339 */
340static int ip_vs_svc_hash(struct ip_vs_service *svc)
341{
342 unsigned hash;
343
344 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
345 pr_err("%s(): request for already hashed, called from %pF\n",
346 __func__, __builtin_return_address(0));
1da177e4
LT
347 return 0;
348 }
349
350 if (svc->fwmark == 0) {
351 /*
fc723250 352 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
1da177e4 353 */
fc723250
HS
354 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
355 &svc->addr, svc->port);
1da177e4
LT
356 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
357 } else {
358 /*
fc723250 359 * Hash it by fwmark in svc_fwm_table
1da177e4 360 */
fc723250 361 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
1da177e4
LT
362 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
363 }
364
365 svc->flags |= IP_VS_SVC_F_HASHED;
366 /* increase its refcnt because it is referenced by the svc table */
367 atomic_inc(&svc->refcnt);
368 return 1;
369}
370
371
372/*
fc723250 373 * Unhashes a service from svc_table / svc_fwm_table.
1da177e4
LT
374 * Should be called with locked tables.
375 */
376static int ip_vs_svc_unhash(struct ip_vs_service *svc)
377{
378 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
379 pr_err("%s(): request for unhash flagged, called from %pF\n",
380 __func__, __builtin_return_address(0));
1da177e4
LT
381 return 0;
382 }
383
384 if (svc->fwmark == 0) {
fc723250 385 /* Remove it from the svc_table table */
1da177e4
LT
386 list_del(&svc->s_list);
387 } else {
fc723250 388 /* Remove it from the svc_fwm_table table */
1da177e4
LT
389 list_del(&svc->f_list);
390 }
391
392 svc->flags &= ~IP_VS_SVC_F_HASHED;
393 atomic_dec(&svc->refcnt);
394 return 1;
395}
396
397
398/*
fc723250 399 * Get service by {netns, proto,addr,port} in the service table.
1da177e4 400 */
b18610de 401static inline struct ip_vs_service *
fc723250
HS
402__ip_vs_service_find(struct net *net, int af, __u16 protocol,
403 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
404{
405 unsigned hash;
406 struct ip_vs_service *svc;
407
408 /* Check for "full" addressed entries */
fc723250 409 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
1da177e4
LT
410
411 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
412 if ((svc->af == af)
413 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4 414 && (svc->port == vport)
fc723250
HS
415 && (svc->protocol == protocol)
416 && net_eq(svc->net, net)) {
1da177e4 417 /* HIT */
1da177e4
LT
418 return svc;
419 }
420 }
421
422 return NULL;
423}
424
425
426/*
427 * Get service by {fwmark} in the service table.
428 */
b18610de 429static inline struct ip_vs_service *
fc723250 430__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
1da177e4
LT
431{
432 unsigned hash;
433 struct ip_vs_service *svc;
434
435 /* Check for fwmark addressed entries */
fc723250 436 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
1da177e4
LT
437
438 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
fc723250
HS
439 if (svc->fwmark == fwmark && svc->af == af
440 && net_eq(svc->net, net)) {
1da177e4 441 /* HIT */
1da177e4
LT
442 return svc;
443 }
444 }
445
446 return NULL;
447}
448
449struct ip_vs_service *
fc723250 450ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
3c2e0505 451 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
452{
453 struct ip_vs_service *svc;
3c2e0505 454
1da177e4
LT
455 read_lock(&__ip_vs_svc_lock);
456
457 /*
458 * Check the table hashed by fwmark first
459 */
fc723250
HS
460 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
461 if (fwmark && svc)
1da177e4
LT
462 goto out;
463
464 /*
465 * Check the table hashed by <protocol,addr,port>
466 * for "full" addressed entries
467 */
fc723250 468 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
1da177e4
LT
469
470 if (svc == NULL
471 && protocol == IPPROTO_TCP
472 && atomic_read(&ip_vs_ftpsvc_counter)
473 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
474 /*
475 * Check if ftp service entry exists, the packet
476 * might belong to FTP data connections.
477 */
fc723250 478 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
1da177e4
LT
479 }
480
481 if (svc == NULL
482 && atomic_read(&ip_vs_nullsvc_counter)) {
483 /*
484 * Check if the catch-all port (port zero) exists
485 */
fc723250 486 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
1da177e4
LT
487 }
488
489 out:
26c15cfd
JA
490 if (svc)
491 atomic_inc(&svc->usecnt);
1da177e4
LT
492 read_unlock(&__ip_vs_svc_lock);
493
3c2e0505
JV
494 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
495 fwmark, ip_vs_proto_name(protocol),
496 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
497 svc ? "hit" : "not hit");
1da177e4
LT
498
499 return svc;
500}
501
502
503static inline void
504__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
505{
506 atomic_inc(&svc->refcnt);
507 dest->svc = svc;
508}
509
26c15cfd 510static void
1da177e4
LT
511__ip_vs_unbind_svc(struct ip_vs_dest *dest)
512{
513 struct ip_vs_service *svc = dest->svc;
514
515 dest->svc = NULL;
26c15cfd
JA
516 if (atomic_dec_and_test(&svc->refcnt)) {
517 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
518 svc->fwmark,
519 IP_VS_DBG_ADDR(svc->af, &svc->addr),
520 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 521 free_percpu(svc->stats.cpustats);
1da177e4 522 kfree(svc);
26c15cfd 523 }
1da177e4
LT
524}
525
526
527/*
528 * Returns hash value for real service
529 */
7937df15
JV
530static inline unsigned ip_vs_rs_hashkey(int af,
531 const union nf_inet_addr *addr,
532 __be16 port)
1da177e4
LT
533{
534 register unsigned porth = ntohs(port);
7937df15
JV
535 __be32 addr_fold = addr->ip;
536
537#ifdef CONFIG_IP_VS_IPV6
538 if (af == AF_INET6)
539 addr_fold = addr->ip6[0]^addr->ip6[1]^
540 addr->ip6[2]^addr->ip6[3];
541#endif
1da177e4 542
7937df15 543 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
544 & IP_VS_RTAB_MASK;
545}
546
547/*
fc723250 548 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
1da177e4
LT
549 * should be called with locked tables.
550 */
fc723250 551static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
1da177e4
LT
552{
553 unsigned hash;
554
555 if (!list_empty(&dest->d_list)) {
556 return 0;
557 }
558
559 /*
560 * Hash by proto,addr,port,
561 * which are the parameters of the real service.
562 */
7937df15
JV
563 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
564
fc723250 565 list_add(&dest->d_list, &ipvs->rs_table[hash]);
1da177e4
LT
566
567 return 1;
568}
569
570/*
fc723250 571 * UNhashes ip_vs_dest from rs_table.
1da177e4
LT
572 * should be called with locked tables.
573 */
574static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
575{
576 /*
fc723250 577 * Remove it from the rs_table table.
1da177e4
LT
578 */
579 if (!list_empty(&dest->d_list)) {
580 list_del(&dest->d_list);
581 INIT_LIST_HEAD(&dest->d_list);
582 }
583
584 return 1;
585}
586
587/*
588 * Lookup real service by <proto,addr,port> in the real service table.
589 */
590struct ip_vs_dest *
fc723250 591ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
7937df15
JV
592 const union nf_inet_addr *daddr,
593 __be16 dport)
1da177e4 594{
fc723250 595 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
596 unsigned hash;
597 struct ip_vs_dest *dest;
598
599 /*
600 * Check for "full" addressed entries
601 * Return the first found entry
602 */
7937df15 603 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4
LT
604
605 read_lock(&__ip_vs_rs_lock);
fc723250 606 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
7937df15
JV
607 if ((dest->af == af)
608 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
609 && (dest->port == dport)
610 && ((dest->protocol == protocol) ||
611 dest->vfwmark)) {
612 /* HIT */
613 read_unlock(&__ip_vs_rs_lock);
614 return dest;
615 }
616 }
617 read_unlock(&__ip_vs_rs_lock);
618
619 return NULL;
620}
621
622/*
623 * Lookup destination by {addr,port} in the given service
624 */
625static struct ip_vs_dest *
7937df15
JV
626ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
627 __be16 dport)
1da177e4
LT
628{
629 struct ip_vs_dest *dest;
630
631 /*
632 * Find the destination for the given service
633 */
634 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
635 if ((dest->af == svc->af)
636 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
637 && (dest->port == dport)) {
1da177e4
LT
638 /* HIT */
639 return dest;
640 }
641 }
642
643 return NULL;
644}
645
1e356f9c
RB
646/*
647 * Find destination by {daddr,dport,vaddr,protocol}
648 * Cretaed to be used in ip_vs_process_message() in
649 * the backup synchronization daemon. It finds the
650 * destination to be bound to the received connection
651 * on the backup.
652 *
653 * ip_vs_lookup_real_service() looked promissing, but
654 * seems not working as expected.
655 */
fc723250
HS
656struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
657 const union nf_inet_addr *daddr,
7937df15
JV
658 __be16 dport,
659 const union nf_inet_addr *vaddr,
0e051e68 660 __be16 vport, __u16 protocol, __u32 fwmark)
1e356f9c
RB
661{
662 struct ip_vs_dest *dest;
663 struct ip_vs_service *svc;
664
fc723250 665 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
1e356f9c
RB
666 if (!svc)
667 return NULL;
668 dest = ip_vs_lookup_dest(svc, daddr, dport);
669 if (dest)
670 atomic_inc(&dest->refcnt);
671 ip_vs_service_put(svc);
672 return dest;
673}
1da177e4
LT
674
675/*
676 * Lookup dest by {svc,addr,port} in the destination trash.
677 * The destination trash is used to hold the destinations that are removed
678 * from the service table but are still referenced by some conn entries.
679 * The reason to add the destination trash is when the dest is temporary
680 * down (either by administrator or by monitor program), the dest can be
681 * picked back from the trash, the remaining connections to the dest can
682 * continue, and the counting information of the dest is also useful for
683 * scheduling.
684 */
685static struct ip_vs_dest *
7937df15
JV
686ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
687 __be16 dport)
1da177e4
LT
688{
689 struct ip_vs_dest *dest, *nxt;
690
691 /*
692 * Find the destination in trash
693 */
694 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
695 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
696 "dest->refcnt=%d\n",
697 dest->vfwmark,
698 IP_VS_DBG_ADDR(svc->af, &dest->addr),
699 ntohs(dest->port),
700 atomic_read(&dest->refcnt));
701 if (dest->af == svc->af &&
702 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
703 dest->port == dport &&
704 dest->vfwmark == svc->fwmark &&
705 dest->protocol == svc->protocol &&
706 (svc->fwmark ||
7937df15 707 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
708 dest->vport == svc->port))) {
709 /* HIT */
710 return dest;
711 }
712
713 /*
714 * Try to purge the destination from trash if not referenced
715 */
716 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
717 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
718 "from trash\n",
719 dest->vfwmark,
720 IP_VS_DBG_ADDR(svc->af, &dest->addr),
721 ntohs(dest->port));
1da177e4
LT
722 list_del(&dest->n_list);
723 ip_vs_dst_reset(dest);
724 __ip_vs_unbind_svc(dest);
b17fc996 725 free_percpu(dest->stats.cpustats);
1da177e4
LT
726 kfree(dest);
727 }
728 }
729
730 return NULL;
731}
732
733
734/*
735 * Clean up all the destinations in the trash
736 * Called by the ip_vs_control_cleanup()
737 *
738 * When the ip_vs_control_clearup is activated by ipvs module exit,
739 * the service tables must have been flushed and all the connections
740 * are expired, and the refcnt of each destination in the trash must
741 * be 1, so we simply release them here.
742 */
743static void ip_vs_trash_cleanup(void)
744{
745 struct ip_vs_dest *dest, *nxt;
746
747 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
748 list_del(&dest->n_list);
749 ip_vs_dst_reset(dest);
750 __ip_vs_unbind_svc(dest);
b17fc996 751 free_percpu(dest->stats.cpustats);
1da177e4
LT
752 kfree(dest);
753 }
754}
755
756
757static void
758ip_vs_zero_stats(struct ip_vs_stats *stats)
759{
760 spin_lock_bh(&stats->lock);
e93615d0 761
e9c0ce23 762 memset(&stats->ustats, 0, sizeof(stats->ustats));
1da177e4 763 ip_vs_zero_estimator(stats);
e93615d0 764
3a14a313 765 spin_unlock_bh(&stats->lock);
1da177e4
LT
766}
767
768/*
769 * Update a destination in the given service
770 */
771static void
26c15cfd
JA
772__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
773 struct ip_vs_dest_user_kern *udest, int add)
1da177e4 774{
fc723250 775 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
776 int conn_flags;
777
778 /* set the weight and the flags */
779 atomic_set(&dest->weight, udest->weight);
3575792e
JA
780 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
781 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4 782
1da177e4 783 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 784 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
785 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
786 } else {
787 /*
fc723250 788 * Put the real service in rs_table if not present.
1da177e4
LT
789 * For now only for NAT!
790 */
791 write_lock_bh(&__ip_vs_rs_lock);
fc723250 792 ip_vs_rs_hash(ipvs, dest);
1da177e4
LT
793 write_unlock_bh(&__ip_vs_rs_lock);
794 }
795 atomic_set(&dest->conn_flags, conn_flags);
796
797 /* bind the service */
798 if (!dest->svc) {
799 __ip_vs_bind_svc(dest, svc);
800 } else {
801 if (dest->svc != svc) {
802 __ip_vs_unbind_svc(dest);
803 ip_vs_zero_stats(&dest->stats);
804 __ip_vs_bind_svc(dest, svc);
805 }
806 }
807
808 /* set the dest status flags */
809 dest->flags |= IP_VS_DEST_F_AVAILABLE;
810
811 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
812 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
813 dest->u_threshold = udest->u_threshold;
814 dest->l_threshold = udest->l_threshold;
26c15cfd 815
fc604767
JA
816 spin_lock(&dest->dst_lock);
817 ip_vs_dst_reset(dest);
818 spin_unlock(&dest->dst_lock);
819
26c15cfd 820 if (add)
29c2026f 821 ip_vs_new_estimator(svc->net, &dest->stats);
26c15cfd
JA
822
823 write_lock_bh(&__ip_vs_svc_lock);
824
825 /* Wait until all other svc users go away */
826 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
827
828 if (add) {
829 list_add(&dest->n_list, &svc->destinations);
830 svc->num_dests++;
831 }
832
833 /* call the update_service, because server weight may be changed */
834 if (svc->scheduler->update_service)
835 svc->scheduler->update_service(svc);
836
837 write_unlock_bh(&__ip_vs_svc_lock);
1da177e4
LT
838}
839
840
841/*
842 * Create a destination for the given service
843 */
844static int
c860c6b1 845ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
846 struct ip_vs_dest **dest_p)
847{
848 struct ip_vs_dest *dest;
849 unsigned atype;
850
851 EnterFunction(2);
852
09571c7a
VB
853#ifdef CONFIG_IP_VS_IPV6
854 if (svc->af == AF_INET6) {
855 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
856 if ((!(atype & IPV6_ADDR_UNICAST) ||
857 atype & IPV6_ADDR_LINKLOCAL) &&
09571c7a
VB
858 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
859 return -EINVAL;
860 } else
861#endif
862 {
863 atype = inet_addr_type(&init_net, udest->addr.ip);
864 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
865 return -EINVAL;
866 }
1da177e4 867
dee06e47 868 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
1da177e4 869 if (dest == NULL) {
1e3e238e 870 pr_err("%s(): no memory.\n", __func__);
1da177e4
LT
871 return -ENOMEM;
872 }
b17fc996
HS
873 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
874 if (!dest->stats.cpustats) {
875 pr_err("%s() alloc_percpu failed\n", __func__);
876 goto err_alloc;
877 }
1da177e4 878
c860c6b1 879 dest->af = svc->af;
1da177e4 880 dest->protocol = svc->protocol;
c860c6b1 881 dest->vaddr = svc->addr;
1da177e4
LT
882 dest->vport = svc->port;
883 dest->vfwmark = svc->fwmark;
c860c6b1 884 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
885 dest->port = udest->port;
886
887 atomic_set(&dest->activeconns, 0);
888 atomic_set(&dest->inactconns, 0);
889 atomic_set(&dest->persistconns, 0);
26c15cfd 890 atomic_set(&dest->refcnt, 1);
1da177e4
LT
891
892 INIT_LIST_HEAD(&dest->d_list);
893 spin_lock_init(&dest->dst_lock);
894 spin_lock_init(&dest->stats.lock);
26c15cfd 895 __ip_vs_update_dest(svc, dest, udest, 1);
1da177e4
LT
896
897 *dest_p = dest;
898
899 LeaveFunction(2);
900 return 0;
b17fc996
HS
901
902err_alloc:
903 kfree(dest);
904 return -ENOMEM;
1da177e4
LT
905}
906
907
908/*
909 * Add a destination into an existing service
910 */
911static int
c860c6b1 912ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
913{
914 struct ip_vs_dest *dest;
c860c6b1 915 union nf_inet_addr daddr;
014d730d 916 __be16 dport = udest->port;
1da177e4
LT
917 int ret;
918
919 EnterFunction(2);
920
921 if (udest->weight < 0) {
1e3e238e 922 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
923 return -ERANGE;
924 }
925
926 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
927 pr_err("%s(): lower threshold is higher than upper threshold\n",
928 __func__);
1da177e4
LT
929 return -ERANGE;
930 }
931
c860c6b1
JV
932 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
933
1da177e4
LT
934 /*
935 * Check if the dest already exists in the list
936 */
7937df15
JV
937 dest = ip_vs_lookup_dest(svc, &daddr, dport);
938
1da177e4 939 if (dest != NULL) {
1e3e238e 940 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
941 return -EEXIST;
942 }
943
944 /*
945 * Check if the dest already exists in the trash and
946 * is from the same service
947 */
7937df15
JV
948 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
949
1da177e4 950 if (dest != NULL) {
cfc78c5a
JV
951 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
952 "dest->refcnt=%d, service %u/%s:%u\n",
953 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
954 atomic_read(&dest->refcnt),
955 dest->vfwmark,
956 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
957 ntohs(dest->vport));
958
1da177e4
LT
959 /*
960 * Get the destination from the trash
961 */
962 list_del(&dest->n_list);
963
26c15cfd
JA
964 __ip_vs_update_dest(svc, dest, udest, 1);
965 ret = 0;
966 } else {
1da177e4 967 /*
26c15cfd 968 * Allocate and initialize the dest structure
1da177e4 969 */
26c15cfd 970 ret = ip_vs_new_dest(svc, udest, &dest);
1da177e4 971 }
1da177e4
LT
972 LeaveFunction(2);
973
26c15cfd 974 return ret;
1da177e4
LT
975}
976
977
978/*
979 * Edit a destination in the given service
980 */
981static int
c860c6b1 982ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
983{
984 struct ip_vs_dest *dest;
c860c6b1 985 union nf_inet_addr daddr;
014d730d 986 __be16 dport = udest->port;
1da177e4
LT
987
988 EnterFunction(2);
989
990 if (udest->weight < 0) {
1e3e238e 991 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
992 return -ERANGE;
993 }
994
995 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
996 pr_err("%s(): lower threshold is higher than upper threshold\n",
997 __func__);
1da177e4
LT
998 return -ERANGE;
999 }
1000
c860c6b1
JV
1001 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1002
1da177e4
LT
1003 /*
1004 * Lookup the destination list
1005 */
7937df15
JV
1006 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1007
1da177e4 1008 if (dest == NULL) {
1e3e238e 1009 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
1010 return -ENOENT;
1011 }
1012
26c15cfd 1013 __ip_vs_update_dest(svc, dest, udest, 0);
1da177e4
LT
1014 LeaveFunction(2);
1015
1016 return 0;
1017}
1018
1019
1020/*
1021 * Delete a destination (must be already unlinked from the service)
1022 */
29c2026f 1023static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1da177e4 1024{
29c2026f 1025 ip_vs_kill_estimator(net, &dest->stats);
1da177e4
LT
1026
1027 /*
1028 * Remove it from the d-linked list with the real services.
1029 */
1030 write_lock_bh(&__ip_vs_rs_lock);
1031 ip_vs_rs_unhash(dest);
1032 write_unlock_bh(&__ip_vs_rs_lock);
1033
1034 /*
1035 * Decrease the refcnt of the dest, and free the dest
1036 * if nobody refers to it (refcnt=0). Otherwise, throw
1037 * the destination into the trash.
1038 */
1039 if (atomic_dec_and_test(&dest->refcnt)) {
26c15cfd
JA
1040 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1041 dest->vfwmark,
1042 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1043 ntohs(dest->port));
1da177e4
LT
1044 ip_vs_dst_reset(dest);
1045 /* simply decrease svc->refcnt here, let the caller check
1046 and release the service if nobody refers to it.
1047 Only user context can release destination and service,
1048 and only one user context can update virtual service at a
1049 time, so the operation here is OK */
1050 atomic_dec(&dest->svc->refcnt);
b17fc996 1051 free_percpu(dest->stats.cpustats);
1da177e4
LT
1052 kfree(dest);
1053 } else {
cfc78c5a
JV
1054 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1055 "dest->refcnt=%d\n",
1056 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1057 ntohs(dest->port),
1058 atomic_read(&dest->refcnt));
1da177e4
LT
1059 list_add(&dest->n_list, &ip_vs_dest_trash);
1060 atomic_inc(&dest->refcnt);
1061 }
1062}
1063
1064
1065/*
1066 * Unlink a destination from the given service
1067 */
1068static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1069 struct ip_vs_dest *dest,
1070 int svcupd)
1071{
1072 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1073
1074 /*
1075 * Remove it from the d-linked destination list.
1076 */
1077 list_del(&dest->n_list);
1078 svc->num_dests--;
82dfb6f3
SW
1079
1080 /*
1081 * Call the update_service function of its scheduler
1082 */
1083 if (svcupd && svc->scheduler->update_service)
1084 svc->scheduler->update_service(svc);
1da177e4
LT
1085}
1086
1087
1088/*
1089 * Delete a destination server in the given service
1090 */
1091static int
c860c6b1 1092ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1093{
1094 struct ip_vs_dest *dest;
29c2026f 1095 struct net *net = svc->net;
014d730d 1096 __be16 dport = udest->port;
1da177e4
LT
1097
1098 EnterFunction(2);
1099
7937df15 1100 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1101
1da177e4 1102 if (dest == NULL) {
1e3e238e 1103 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1104 return -ENOENT;
1105 }
1106
1107 write_lock_bh(&__ip_vs_svc_lock);
1108
1109 /*
1110 * Wait until all other svc users go away.
1111 */
26c15cfd 1112 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1113
1114 /*
1115 * Unlink dest from the service
1116 */
1117 __ip_vs_unlink_dest(svc, dest, 1);
1118
1119 write_unlock_bh(&__ip_vs_svc_lock);
1120
1121 /*
1122 * Delete the destination
1123 */
29c2026f 1124 __ip_vs_del_dest(net, dest);
1da177e4
LT
1125
1126 LeaveFunction(2);
1127
1128 return 0;
1129}
1130
1131
1132/*
1133 * Add a service into the service hash table
1134 */
1135static int
fc723250 1136ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
c860c6b1 1137 struct ip_vs_service **svc_p)
1da177e4
LT
1138{
1139 int ret = 0;
1140 struct ip_vs_scheduler *sched = NULL;
0d1e71b0 1141 struct ip_vs_pe *pe = NULL;
1da177e4
LT
1142 struct ip_vs_service *svc = NULL;
1143
1144 /* increase the module use count */
1145 ip_vs_use_count_inc();
1146
1147 /* Lookup the scheduler by 'u->sched_name' */
1148 sched = ip_vs_scheduler_get(u->sched_name);
1149 if (sched == NULL) {
1e3e238e 1150 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4 1151 ret = -ENOENT;
6e08bfb8 1152 goto out_err;
1da177e4
LT
1153 }
1154
0d1e71b0 1155 if (u->pe_name && *u->pe_name) {
e9e5eee8 1156 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1157 if (pe == NULL) {
1158 pr_info("persistence engine module ip_vs_pe_%s "
1159 "not found\n", u->pe_name);
1160 ret = -ENOENT;
1161 goto out_err;
1162 }
1163 }
1164
f94fd041 1165#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1166 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1167 ret = -EINVAL;
1168 goto out_err;
f94fd041
JV
1169 }
1170#endif
1171
dee06e47 1172 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1173 if (svc == NULL) {
1e3e238e 1174 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1175 ret = -ENOMEM;
1176 goto out_err;
1177 }
b17fc996
HS
1178 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1179 if (!svc->stats.cpustats) {
1180 pr_err("%s() alloc_percpu failed\n", __func__);
1181 goto out_err;
1182 }
1da177e4
LT
1183
1184 /* I'm the first user of the service */
26c15cfd 1185 atomic_set(&svc->usecnt, 0);
1da177e4
LT
1186 atomic_set(&svc->refcnt, 0);
1187
c860c6b1 1188 svc->af = u->af;
1da177e4 1189 svc->protocol = u->protocol;
c860c6b1 1190 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1191 svc->port = u->port;
1192 svc->fwmark = u->fwmark;
1193 svc->flags = u->flags;
1194 svc->timeout = u->timeout * HZ;
1195 svc->netmask = u->netmask;
fc723250 1196 svc->net = net;
1da177e4
LT
1197
1198 INIT_LIST_HEAD(&svc->destinations);
1199 rwlock_init(&svc->sched_lock);
1200 spin_lock_init(&svc->stats.lock);
1201
1202 /* Bind the scheduler */
1203 ret = ip_vs_bind_scheduler(svc, sched);
1204 if (ret)
1205 goto out_err;
1206 sched = NULL;
1207
0d1e71b0
SH
1208 /* Bind the ct retriever */
1209 ip_vs_bind_pe(svc, pe);
1210 pe = NULL;
1211
1da177e4
LT
1212 /* Update the virtual service counters */
1213 if (svc->port == FTPPORT)
1214 atomic_inc(&ip_vs_ftpsvc_counter);
1215 else if (svc->port == 0)
1216 atomic_inc(&ip_vs_nullsvc_counter);
1217
29c2026f 1218 ip_vs_new_estimator(net, &svc->stats);
f94fd041
JV
1219
1220 /* Count only IPv4 services for old get/setsockopt interface */
1221 if (svc->af == AF_INET)
1222 ip_vs_num_services++;
1da177e4
LT
1223
1224 /* Hash the service into the service table */
1225 write_lock_bh(&__ip_vs_svc_lock);
1226 ip_vs_svc_hash(svc);
1227 write_unlock_bh(&__ip_vs_svc_lock);
1228
1229 *svc_p = svc;
1230 return 0;
1231
b17fc996 1232
6e08bfb8 1233 out_err:
1da177e4 1234 if (svc != NULL) {
2fabf35b 1235 ip_vs_unbind_scheduler(svc);
1da177e4
LT
1236 if (svc->inc) {
1237 local_bh_disable();
1238 ip_vs_app_inc_put(svc->inc);
1239 local_bh_enable();
1240 }
b17fc996
HS
1241 if (svc->stats.cpustats)
1242 free_percpu(svc->stats.cpustats);
1da177e4
LT
1243 kfree(svc);
1244 }
1245 ip_vs_scheduler_put(sched);
0d1e71b0 1246 ip_vs_pe_put(pe);
1da177e4 1247
1da177e4
LT
1248 /* decrease the module use count */
1249 ip_vs_use_count_dec();
1250
1251 return ret;
1252}
1253
1254
1255/*
1256 * Edit a service and bind it with a new scheduler
1257 */
1258static int
c860c6b1 1259ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1260{
1261 struct ip_vs_scheduler *sched, *old_sched;
0d1e71b0 1262 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1da177e4
LT
1263 int ret = 0;
1264
1265 /*
1266 * Lookup the scheduler, by 'u->sched_name'
1267 */
1268 sched = ip_vs_scheduler_get(u->sched_name);
1269 if (sched == NULL) {
1e3e238e 1270 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1271 return -ENOENT;
1272 }
1273 old_sched = sched;
1274
0d1e71b0 1275 if (u->pe_name && *u->pe_name) {
e9e5eee8 1276 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1277 if (pe == NULL) {
1278 pr_info("persistence engine module ip_vs_pe_%s "
1279 "not found\n", u->pe_name);
1280 ret = -ENOENT;
1281 goto out;
1282 }
1283 old_pe = pe;
1284 }
1285
f94fd041 1286#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1287 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1288 ret = -EINVAL;
1289 goto out;
f94fd041
JV
1290 }
1291#endif
1292
1da177e4
LT
1293 write_lock_bh(&__ip_vs_svc_lock);
1294
1295 /*
1296 * Wait until all other svc users go away.
1297 */
26c15cfd 1298 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1299
1300 /*
1301 * Set the flags and timeout value
1302 */
1303 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1304 svc->timeout = u->timeout * HZ;
1305 svc->netmask = u->netmask;
1306
1307 old_sched = svc->scheduler;
1308 if (sched != old_sched) {
1309 /*
1310 * Unbind the old scheduler
1311 */
1312 if ((ret = ip_vs_unbind_scheduler(svc))) {
1313 old_sched = sched;
9e691ed6 1314 goto out_unlock;
1da177e4
LT
1315 }
1316
1317 /*
1318 * Bind the new scheduler
1319 */
1320 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1321 /*
1322 * If ip_vs_bind_scheduler fails, restore the old
1323 * scheduler.
1324 * The main reason of failure is out of memory.
1325 *
1326 * The question is if the old scheduler can be
1327 * restored all the time. TODO: if it cannot be
1328 * restored some time, we must delete the service,
1329 * otherwise the system may crash.
1330 */
1331 ip_vs_bind_scheduler(svc, old_sched);
1332 old_sched = sched;
9e691ed6 1333 goto out_unlock;
1da177e4
LT
1334 }
1335 }
1336
0d1e71b0
SH
1337 old_pe = svc->pe;
1338 if (pe != old_pe) {
1339 ip_vs_unbind_pe(svc);
1340 ip_vs_bind_pe(svc, pe);
1341 }
1342
9e691ed6 1343 out_unlock:
1da177e4 1344 write_unlock_bh(&__ip_vs_svc_lock);
9e691ed6 1345 out:
6e08bfb8 1346 ip_vs_scheduler_put(old_sched);
0d1e71b0 1347 ip_vs_pe_put(old_pe);
1da177e4
LT
1348 return ret;
1349}
1350
1351
1352/*
1353 * Delete a service from the service list
1354 * - The service must be unlinked, unlocked and not referenced!
1355 * - We are called under _bh lock
1356 */
1357static void __ip_vs_del_service(struct ip_vs_service *svc)
1358{
1359 struct ip_vs_dest *dest, *nxt;
1360 struct ip_vs_scheduler *old_sched;
0d1e71b0
SH
1361 struct ip_vs_pe *old_pe;
1362
1363 pr_info("%s: enter\n", __func__);
1da177e4 1364
f94fd041
JV
1365 /* Count only IPv4 services for old get/setsockopt interface */
1366 if (svc->af == AF_INET)
1367 ip_vs_num_services--;
1368
29c2026f 1369 ip_vs_kill_estimator(svc->net, &svc->stats);
1da177e4
LT
1370
1371 /* Unbind scheduler */
1372 old_sched = svc->scheduler;
1373 ip_vs_unbind_scheduler(svc);
6e08bfb8 1374 ip_vs_scheduler_put(old_sched);
1da177e4 1375
0d1e71b0
SH
1376 /* Unbind persistence engine */
1377 old_pe = svc->pe;
1378 ip_vs_unbind_pe(svc);
1379 ip_vs_pe_put(old_pe);
1380
1da177e4
LT
1381 /* Unbind app inc */
1382 if (svc->inc) {
1383 ip_vs_app_inc_put(svc->inc);
1384 svc->inc = NULL;
1385 }
1386
1387 /*
1388 * Unlink the whole destination list
1389 */
1390 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1391 __ip_vs_unlink_dest(svc, dest, 0);
29c2026f 1392 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1393 }
1394
1395 /*
1396 * Update the virtual service counters
1397 */
1398 if (svc->port == FTPPORT)
1399 atomic_dec(&ip_vs_ftpsvc_counter);
1400 else if (svc->port == 0)
1401 atomic_dec(&ip_vs_nullsvc_counter);
1402
1403 /*
1404 * Free the service if nobody refers to it
1405 */
26c15cfd
JA
1406 if (atomic_read(&svc->refcnt) == 0) {
1407 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1408 svc->fwmark,
1409 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1410 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 1411 free_percpu(svc->stats.cpustats);
1da177e4 1412 kfree(svc);
26c15cfd 1413 }
1da177e4
LT
1414
1415 /* decrease the module use count */
1416 ip_vs_use_count_dec();
1417}
1418
1419/*
26c15cfd 1420 * Unlink a service from list and try to delete it if its refcnt reached 0
1da177e4 1421 */
26c15cfd 1422static void ip_vs_unlink_service(struct ip_vs_service *svc)
1da177e4 1423{
1da177e4
LT
1424 /*
1425 * Unhash it from the service table
1426 */
1427 write_lock_bh(&__ip_vs_svc_lock);
1428
1429 ip_vs_svc_unhash(svc);
1430
1431 /*
1432 * Wait until all the svc users go away.
1433 */
26c15cfd 1434 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1435
1436 __ip_vs_del_service(svc);
1437
1438 write_unlock_bh(&__ip_vs_svc_lock);
26c15cfd
JA
1439}
1440
1441/*
1442 * Delete a service from the service list
1443 */
1444static int ip_vs_del_service(struct ip_vs_service *svc)
1445{
1446 if (svc == NULL)
1447 return -EEXIST;
1448 ip_vs_unlink_service(svc);
1da177e4
LT
1449
1450 return 0;
1451}
1452
1453
1454/*
1455 * Flush all the virtual services
1456 */
fc723250 1457static int ip_vs_flush(struct net *net)
1da177e4
LT
1458{
1459 int idx;
1460 struct ip_vs_service *svc, *nxt;
1461
1462 /*
fc723250 1463 * Flush the service table hashed by <netns,protocol,addr,port>
1da177e4
LT
1464 */
1465 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
fc723250
HS
1466 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1467 s_list) {
1468 if (net_eq(svc->net, net))
1469 ip_vs_unlink_service(svc);
1da177e4
LT
1470 }
1471 }
1472
1473 /*
1474 * Flush the service table hashed by fwmark
1475 */
1476 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1477 list_for_each_entry_safe(svc, nxt,
1478 &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1479 if (net_eq(svc->net, net))
1480 ip_vs_unlink_service(svc);
1da177e4
LT
1481 }
1482 }
1483
1484 return 0;
1485}
1486
1487
1488/*
1489 * Zero counters in a service or all services
1490 */
1491static int ip_vs_zero_service(struct ip_vs_service *svc)
1492{
1493 struct ip_vs_dest *dest;
1494
1495 write_lock_bh(&__ip_vs_svc_lock);
1496 list_for_each_entry(dest, &svc->destinations, n_list) {
1497 ip_vs_zero_stats(&dest->stats);
1498 }
1499 ip_vs_zero_stats(&svc->stats);
1500 write_unlock_bh(&__ip_vs_svc_lock);
1501 return 0;
1502}
1503
fc723250 1504static int ip_vs_zero_all(struct net *net)
1da177e4
LT
1505{
1506 int idx;
1507 struct ip_vs_service *svc;
1508
1509 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1510 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250
HS
1511 if (net_eq(svc->net, net))
1512 ip_vs_zero_service(svc);
1da177e4
LT
1513 }
1514 }
1515
1516 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1517 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1518 if (net_eq(svc->net, net))
1519 ip_vs_zero_service(svc);
1da177e4
LT
1520 }
1521 }
1522
b17fc996 1523 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
1da177e4
LT
1524 return 0;
1525}
1526
1527
1528static int
8d65af78 1529proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1530 void __user *buffer, size_t *lenp, loff_t *ppos)
1531{
9330419d 1532 struct net *net = current->nsproxy->net_ns;
1da177e4
LT
1533 int *valp = table->data;
1534 int val = *valp;
1535 int rc;
1536
8d65af78 1537 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1538 if (write && (*valp != val)) {
1539 if ((*valp < 0) || (*valp > 3)) {
1540 /* Restore the correct value */
1541 *valp = val;
1542 } else {
9330419d 1543 update_defense_level(net_ipvs(net));
1da177e4
LT
1544 }
1545 }
1546 return rc;
1547}
1548
1549
1550static int
8d65af78 1551proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1552 void __user *buffer, size_t *lenp, loff_t *ppos)
1553{
1554 int *valp = table->data;
1555 int val[2];
1556 int rc;
1557
1558 /* backup the value first */
1559 memcpy(val, valp, sizeof(val));
1560
8d65af78 1561 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1562 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1563 /* Restore the correct value */
1564 memcpy(valp, val, sizeof(val));
1565 }
1566 return rc;
1567}
1568
b880c1f0
HS
1569static int
1570proc_do_sync_mode(ctl_table *table, int write,
1571 void __user *buffer, size_t *lenp, loff_t *ppos)
1572{
1573 int *valp = table->data;
1574 int val = *valp;
1575 int rc;
1576
1577 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1578 if (write && (*valp != val)) {
1579 if ((*valp < 0) || (*valp > 1)) {
1580 /* Restore the correct value */
1581 *valp = val;
1582 } else {
f131315f
HS
1583 struct net *net = current->nsproxy->net_ns;
1584 ip_vs_sync_switch_mode(net, val);
b880c1f0
HS
1585 }
1586 }
1587 return rc;
1588}
1da177e4
LT
1589
1590/*
1591 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1592 */
1593
1594static struct ctl_table vs_vars[] = {
1595 {
1da177e4
LT
1596 .procname = "amemthresh",
1597 .data = &sysctl_ip_vs_amemthresh,
1598 .maxlen = sizeof(int),
1599 .mode = 0644,
6d9f239a 1600 .proc_handler = proc_dointvec,
1da177e4
LT
1601 },
1602#ifdef CONFIG_IP_VS_DEBUG
1603 {
1da177e4
LT
1604 .procname = "debug_level",
1605 .data = &sysctl_ip_vs_debug_level,
1606 .maxlen = sizeof(int),
1607 .mode = 0644,
6d9f239a 1608 .proc_handler = proc_dointvec,
1da177e4
LT
1609 },
1610#endif
1611 {
1da177e4
LT
1612 .procname = "am_droprate",
1613 .data = &sysctl_ip_vs_am_droprate,
1614 .maxlen = sizeof(int),
1615 .mode = 0644,
6d9f239a 1616 .proc_handler = proc_dointvec,
1da177e4
LT
1617 },
1618 {
1da177e4
LT
1619 .procname = "drop_entry",
1620 .data = &sysctl_ip_vs_drop_entry,
1621 .maxlen = sizeof(int),
1622 .mode = 0644,
6d9f239a 1623 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1624 },
1625 {
1da177e4
LT
1626 .procname = "drop_packet",
1627 .data = &sysctl_ip_vs_drop_packet,
1628 .maxlen = sizeof(int),
1629 .mode = 0644,
6d9f239a 1630 .proc_handler = proc_do_defense_mode,
1da177e4 1631 },
f4bc17cd
JA
1632#ifdef CONFIG_IP_VS_NFCT
1633 {
1634 .procname = "conntrack",
1635 .data = &sysctl_ip_vs_conntrack,
1636 .maxlen = sizeof(int),
1637 .mode = 0644,
1638 .proc_handler = &proc_dointvec,
1639 },
1640#endif
1da177e4 1641 {
1da177e4
LT
1642 .procname = "secure_tcp",
1643 .data = &sysctl_ip_vs_secure_tcp,
1644 .maxlen = sizeof(int),
1645 .mode = 0644,
6d9f239a 1646 .proc_handler = proc_do_defense_mode,
1da177e4 1647 },
8a803040
JA
1648 {
1649 .procname = "snat_reroute",
1650 .data = &sysctl_ip_vs_snat_reroute,
1651 .maxlen = sizeof(int),
1652 .mode = 0644,
1653 .proc_handler = &proc_dointvec,
1654 },
b880c1f0
HS
1655 {
1656 .procname = "sync_version",
1657 .data = &sysctl_ip_vs_sync_ver,
1658 .maxlen = sizeof(int),
1659 .mode = 0644,
1660 .proc_handler = &proc_do_sync_mode,
1661 },
1da177e4
LT
1662#if 0
1663 {
1da177e4
LT
1664 .procname = "timeout_established",
1665 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1666 .maxlen = sizeof(int),
1667 .mode = 0644,
6d9f239a 1668 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1669 },
1670 {
1da177e4
LT
1671 .procname = "timeout_synsent",
1672 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1673 .maxlen = sizeof(int),
1674 .mode = 0644,
6d9f239a 1675 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1676 },
1677 {
1da177e4
LT
1678 .procname = "timeout_synrecv",
1679 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1680 .maxlen = sizeof(int),
1681 .mode = 0644,
6d9f239a 1682 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1683 },
1684 {
1da177e4
LT
1685 .procname = "timeout_finwait",
1686 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1687 .maxlen = sizeof(int),
1688 .mode = 0644,
6d9f239a 1689 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1690 },
1691 {
1da177e4
LT
1692 .procname = "timeout_timewait",
1693 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1694 .maxlen = sizeof(int),
1695 .mode = 0644,
6d9f239a 1696 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1697 },
1698 {
1da177e4
LT
1699 .procname = "timeout_close",
1700 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1701 .maxlen = sizeof(int),
1702 .mode = 0644,
6d9f239a 1703 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1704 },
1705 {
1da177e4
LT
1706 .procname = "timeout_closewait",
1707 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1708 .maxlen = sizeof(int),
1709 .mode = 0644,
6d9f239a 1710 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1711 },
1712 {
1da177e4
LT
1713 .procname = "timeout_lastack",
1714 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1715 .maxlen = sizeof(int),
1716 .mode = 0644,
6d9f239a 1717 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1718 },
1719 {
1da177e4
LT
1720 .procname = "timeout_listen",
1721 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1722 .maxlen = sizeof(int),
1723 .mode = 0644,
6d9f239a 1724 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1725 },
1726 {
1da177e4
LT
1727 .procname = "timeout_synack",
1728 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1729 .maxlen = sizeof(int),
1730 .mode = 0644,
6d9f239a 1731 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1732 },
1733 {
1da177e4
LT
1734 .procname = "timeout_udp",
1735 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1736 .maxlen = sizeof(int),
1737 .mode = 0644,
6d9f239a 1738 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1739 },
1740 {
1da177e4
LT
1741 .procname = "timeout_icmp",
1742 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1743 .maxlen = sizeof(int),
1744 .mode = 0644,
6d9f239a 1745 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1746 },
1747#endif
1748 {
1da177e4
LT
1749 .procname = "cache_bypass",
1750 .data = &sysctl_ip_vs_cache_bypass,
1751 .maxlen = sizeof(int),
1752 .mode = 0644,
6d9f239a 1753 .proc_handler = proc_dointvec,
1da177e4
LT
1754 },
1755 {
1da177e4
LT
1756 .procname = "expire_nodest_conn",
1757 .data = &sysctl_ip_vs_expire_nodest_conn,
1758 .maxlen = sizeof(int),
1759 .mode = 0644,
6d9f239a 1760 .proc_handler = proc_dointvec,
1da177e4
LT
1761 },
1762 {
1da177e4
LT
1763 .procname = "expire_quiescent_template",
1764 .data = &sysctl_ip_vs_expire_quiescent_template,
1765 .maxlen = sizeof(int),
1766 .mode = 0644,
6d9f239a 1767 .proc_handler = proc_dointvec,
1da177e4
LT
1768 },
1769 {
1da177e4
LT
1770 .procname = "sync_threshold",
1771 .data = &sysctl_ip_vs_sync_threshold,
1772 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1773 .mode = 0644,
6d9f239a 1774 .proc_handler = proc_do_sync_threshold,
1da177e4
LT
1775 },
1776 {
1da177e4
LT
1777 .procname = "nat_icmp_send",
1778 .data = &sysctl_ip_vs_nat_icmp_send,
1779 .maxlen = sizeof(int),
1780 .mode = 0644,
6d9f239a 1781 .proc_handler = proc_dointvec,
1da177e4 1782 },
f8572d8f 1783 { }
1da177e4
LT
1784};
1785
5587da55 1786const struct ctl_path net_vs_ctl_path[] = {
f8572d8f
EB
1787 { .procname = "net", },
1788 { .procname = "ipv4", },
90754f8e
PE
1789 { .procname = "vs", },
1790 { }
1da177e4 1791};
90754f8e 1792EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1793
1794static struct ctl_table_header * sysctl_header;
1795
1796#ifdef CONFIG_PROC_FS
1797
1798struct ip_vs_iter {
fc723250 1799 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1da177e4
LT
1800 struct list_head *table;
1801 int bucket;
1802};
1803
1804/*
1805 * Write the contents of the VS rule table to a PROCfs file.
1806 * (It is kept just for backward compatibility)
1807 */
1808static inline const char *ip_vs_fwd_name(unsigned flags)
1809{
1810 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1811 case IP_VS_CONN_F_LOCALNODE:
1812 return "Local";
1813 case IP_VS_CONN_F_TUNNEL:
1814 return "Tunnel";
1815 case IP_VS_CONN_F_DROUTE:
1816 return "Route";
1817 default:
1818 return "Masq";
1819 }
1820}
1821
1822
1823/* Get the Nth entry in the two lists */
1824static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1825{
fc723250 1826 struct net *net = seq_file_net(seq);
1da177e4
LT
1827 struct ip_vs_iter *iter = seq->private;
1828 int idx;
1829 struct ip_vs_service *svc;
1830
1831 /* look in hash by protocol */
1832 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1833 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250 1834 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1835 iter->table = ip_vs_svc_table;
1836 iter->bucket = idx;
1837 return svc;
1838 }
1839 }
1840 }
1841
1842 /* keep looking in fwmark */
1843 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1844 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250 1845 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1846 iter->table = ip_vs_svc_fwm_table;
1847 iter->bucket = idx;
1848 return svc;
1849 }
1850 }
1851 }
1852
1853 return NULL;
1854}
1855
1856static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1857__acquires(__ip_vs_svc_lock)
1da177e4
LT
1858{
1859
1860 read_lock_bh(&__ip_vs_svc_lock);
1861 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1862}
1863
1864
1865static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1866{
1867 struct list_head *e;
1868 struct ip_vs_iter *iter;
1869 struct ip_vs_service *svc;
1870
1871 ++*pos;
1872 if (v == SEQ_START_TOKEN)
1873 return ip_vs_info_array(seq,0);
1874
1875 svc = v;
1876 iter = seq->private;
1877
1878 if (iter->table == ip_vs_svc_table) {
1879 /* next service in table hashed by protocol */
1880 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1881 return list_entry(e, struct ip_vs_service, s_list);
1882
1883
1884 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1885 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1886 s_list) {
1887 return svc;
1888 }
1889 }
1890
1891 iter->table = ip_vs_svc_fwm_table;
1892 iter->bucket = -1;
1893 goto scan_fwmark;
1894 }
1895
1896 /* next service in hashed by fwmark */
1897 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1898 return list_entry(e, struct ip_vs_service, f_list);
1899
1900 scan_fwmark:
1901 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1902 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1903 f_list)
1904 return svc;
1905 }
1906
1907 return NULL;
1908}
1909
1910static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 1911__releases(__ip_vs_svc_lock)
1da177e4
LT
1912{
1913 read_unlock_bh(&__ip_vs_svc_lock);
1914}
1915
1916
1917static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1918{
1919 if (v == SEQ_START_TOKEN) {
1920 seq_printf(seq,
1921 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 1922 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
1923 seq_puts(seq,
1924 "Prot LocalAddress:Port Scheduler Flags\n");
1925 seq_puts(seq,
1926 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1927 } else {
1928 const struct ip_vs_service *svc = v;
1929 const struct ip_vs_iter *iter = seq->private;
1930 const struct ip_vs_dest *dest;
1931
667a5f18
VB
1932 if (iter->table == ip_vs_svc_table) {
1933#ifdef CONFIG_IP_VS_IPV6
1934 if (svc->af == AF_INET6)
5b095d98 1935 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 1936 ip_vs_proto_name(svc->protocol),
38ff4fa4 1937 &svc->addr.in6,
667a5f18
VB
1938 ntohs(svc->port),
1939 svc->scheduler->name);
1940 else
1941#endif
26ec037f 1942 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
1943 ip_vs_proto_name(svc->protocol),
1944 ntohl(svc->addr.ip),
1945 ntohs(svc->port),
26ec037f
NC
1946 svc->scheduler->name,
1947 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1948 } else {
26ec037f
NC
1949 seq_printf(seq, "FWM %08X %s %s",
1950 svc->fwmark, svc->scheduler->name,
1951 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1952 }
1da177e4
LT
1953
1954 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1955 seq_printf(seq, "persistent %d %08X\n",
1956 svc->timeout,
1957 ntohl(svc->netmask));
1958 else
1959 seq_putc(seq, '\n');
1960
1961 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1962#ifdef CONFIG_IP_VS_IPV6
1963 if (dest->af == AF_INET6)
1964 seq_printf(seq,
5b095d98 1965 " -> [%pI6]:%04X"
667a5f18 1966 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 1967 &dest->addr.in6,
667a5f18
VB
1968 ntohs(dest->port),
1969 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1970 atomic_read(&dest->weight),
1971 atomic_read(&dest->activeconns),
1972 atomic_read(&dest->inactconns));
1973 else
1974#endif
1975 seq_printf(seq,
1976 " -> %08X:%04X "
1977 "%-7s %-6d %-10d %-10d\n",
1978 ntohl(dest->addr.ip),
1979 ntohs(dest->port),
1980 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1981 atomic_read(&dest->weight),
1982 atomic_read(&dest->activeconns),
1983 atomic_read(&dest->inactconns));
1984
1da177e4
LT
1985 }
1986 }
1987 return 0;
1988}
1989
56b3d975 1990static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1991 .start = ip_vs_info_seq_start,
1992 .next = ip_vs_info_seq_next,
1993 .stop = ip_vs_info_seq_stop,
1994 .show = ip_vs_info_seq_show,
1995};
1996
1997static int ip_vs_info_open(struct inode *inode, struct file *file)
1998{
fc723250 1999 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
cf7732e4 2000 sizeof(struct ip_vs_iter));
1da177e4
LT
2001}
2002
9a32144e 2003static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
2004 .owner = THIS_MODULE,
2005 .open = ip_vs_info_open,
2006 .read = seq_read,
2007 .llseek = seq_lseek,
2008 .release = seq_release_private,
2009};
2010
2011#endif
2012
1da177e4
LT
2013#ifdef CONFIG_PROC_FS
2014static int ip_vs_stats_show(struct seq_file *seq, void *v)
2015{
b17fc996
HS
2016 struct net *net = seq_file_single_net(seq);
2017 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
1da177e4
LT
2018
2019/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2020 seq_puts(seq,
2021 " Total Incoming Outgoing Incoming Outgoing\n");
2022 seq_printf(seq,
2023 " Conns Packets Packets Bytes Bytes\n");
2024
b17fc996
HS
2025 spin_lock_bh(&tot_stats->lock);
2026 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
2027 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
2028 (unsigned long long) tot_stats->ustats.inbytes,
2029 (unsigned long long) tot_stats->ustats.outbytes);
1da177e4
LT
2030
2031/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2032 seq_puts(seq,
2033 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2034 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
b17fc996
HS
2035 tot_stats->ustats.cps,
2036 tot_stats->ustats.inpps,
2037 tot_stats->ustats.outpps,
2038 tot_stats->ustats.inbps,
2039 tot_stats->ustats.outbps);
2040 spin_unlock_bh(&tot_stats->lock);
1da177e4
LT
2041
2042 return 0;
2043}
2044
2045static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2046{
fc723250 2047 return single_open_net(inode, file, ip_vs_stats_show);
1da177e4
LT
2048}
2049
9a32144e 2050static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
2051 .owner = THIS_MODULE,
2052 .open = ip_vs_stats_seq_open,
2053 .read = seq_read,
2054 .llseek = seq_lseek,
2055 .release = single_release,
2056};
2057
b17fc996
HS
2058static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2059{
2060 struct net *net = seq_file_single_net(seq);
2061 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2062 int i;
2063
2064/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2065 seq_puts(seq,
2066 " Total Incoming Outgoing Incoming Outgoing\n");
2067 seq_printf(seq,
2068 "CPU Conns Packets Packets Bytes Bytes\n");
2069
2070 for_each_possible_cpu(i) {
2071 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2072 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2073 i, u->ustats.conns, u->ustats.inpkts,
2074 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2075 (__u64)u->ustats.outbytes);
2076 }
2077
2078 spin_lock_bh(&tot_stats->lock);
2079 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2080 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2081 tot_stats->ustats.outpkts,
2082 (unsigned long long) tot_stats->ustats.inbytes,
2083 (unsigned long long) tot_stats->ustats.outbytes);
2084
2085/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2086 seq_puts(seq,
2087 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2088 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2089 tot_stats->ustats.cps,
2090 tot_stats->ustats.inpps,
2091 tot_stats->ustats.outpps,
2092 tot_stats->ustats.inbps,
2093 tot_stats->ustats.outbps);
2094 spin_unlock_bh(&tot_stats->lock);
2095
2096 return 0;
2097}
2098
2099static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2100{
2101 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2102}
2103
2104static const struct file_operations ip_vs_stats_percpu_fops = {
2105 .owner = THIS_MODULE,
2106 .open = ip_vs_stats_percpu_seq_open,
2107 .read = seq_read,
2108 .llseek = seq_lseek,
2109 .release = single_release,
2110};
1da177e4
LT
2111#endif
2112
2113/*
2114 * Set timeout values for tcp tcpfin udp in the timeout_table.
2115 */
9330419d 2116static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2117{
9330419d
HS
2118 struct ip_vs_proto_data *pd;
2119
1da177e4
LT
2120 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2121 u->tcp_timeout,
2122 u->tcp_fin_timeout,
2123 u->udp_timeout);
2124
2125#ifdef CONFIG_IP_VS_PROTO_TCP
2126 if (u->tcp_timeout) {
9330419d
HS
2127 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2128 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
1da177e4
LT
2129 = u->tcp_timeout * HZ;
2130 }
2131
2132 if (u->tcp_fin_timeout) {
9330419d
HS
2133 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2134 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
1da177e4
LT
2135 = u->tcp_fin_timeout * HZ;
2136 }
2137#endif
2138
2139#ifdef CONFIG_IP_VS_PROTO_UDP
2140 if (u->udp_timeout) {
9330419d
HS
2141 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2142 pd->timeout_table[IP_VS_UDP_S_NORMAL]
1da177e4
LT
2143 = u->udp_timeout * HZ;
2144 }
2145#endif
2146 return 0;
2147}
2148
2149
2150#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2151#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2152#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2153 sizeof(struct ip_vs_dest_user))
2154#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2155#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2156#define MAX_ARG_LEN SVCDEST_ARG_LEN
2157
9b5b5cff 2158static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2159 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2160 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2161 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2162 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2163 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2164 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2165 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2166 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2167 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2168 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2169 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2170};
2171
c860c6b1
JV
2172static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2173 struct ip_vs_service_user *usvc_compat)
2174{
0d1e71b0
SH
2175 memset(usvc, 0, sizeof(*usvc));
2176
c860c6b1
JV
2177 usvc->af = AF_INET;
2178 usvc->protocol = usvc_compat->protocol;
2179 usvc->addr.ip = usvc_compat->addr;
2180 usvc->port = usvc_compat->port;
2181 usvc->fwmark = usvc_compat->fwmark;
2182
2183 /* Deep copy of sched_name is not needed here */
2184 usvc->sched_name = usvc_compat->sched_name;
2185
2186 usvc->flags = usvc_compat->flags;
2187 usvc->timeout = usvc_compat->timeout;
2188 usvc->netmask = usvc_compat->netmask;
2189}
2190
2191static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2192 struct ip_vs_dest_user *udest_compat)
2193{
0d1e71b0
SH
2194 memset(udest, 0, sizeof(*udest));
2195
c860c6b1
JV
2196 udest->addr.ip = udest_compat->addr;
2197 udest->port = udest_compat->port;
2198 udest->conn_flags = udest_compat->conn_flags;
2199 udest->weight = udest_compat->weight;
2200 udest->u_threshold = udest_compat->u_threshold;
2201 udest->l_threshold = udest_compat->l_threshold;
2202}
2203
1da177e4
LT
2204static int
2205do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2206{
fc723250 2207 struct net *net = sock_net(sk);
1da177e4
LT
2208 int ret;
2209 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2210 struct ip_vs_service_user *usvc_compat;
2211 struct ip_vs_service_user_kern usvc;
1da177e4 2212 struct ip_vs_service *svc;
c860c6b1
JV
2213 struct ip_vs_dest_user *udest_compat;
2214 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2215
2216 if (!capable(CAP_NET_ADMIN))
2217 return -EPERM;
2218
04bcef2a
AV
2219 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2220 return -EINVAL;
2221 if (len < 0 || len > MAX_ARG_LEN)
2222 return -EINVAL;
1da177e4 2223 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2224 pr_err("set_ctl: len %u != %u\n",
2225 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2226 return -EINVAL;
2227 }
2228
2229 if (copy_from_user(arg, user, len) != 0)
2230 return -EFAULT;
2231
2232 /* increase the module use count */
2233 ip_vs_use_count_inc();
2234
14cc3e2b 2235 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2236 ret = -ERESTARTSYS;
2237 goto out_dec;
2238 }
2239
2240 if (cmd == IP_VS_SO_SET_FLUSH) {
2241 /* Flush the virtual service */
fc723250 2242 ret = ip_vs_flush(net);
1da177e4
LT
2243 goto out_unlock;
2244 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2245 /* Set timeout values for (tcp tcpfin udp) */
9330419d 2246 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
1da177e4
LT
2247 goto out_unlock;
2248 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2249 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f
HS
2250 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2251 dm->syncid);
1da177e4
LT
2252 goto out_unlock;
2253 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2254 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f 2255 ret = stop_sync_thread(net, dm->state);
1da177e4
LT
2256 goto out_unlock;
2257 }
2258
c860c6b1
JV
2259 usvc_compat = (struct ip_vs_service_user *)arg;
2260 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2261
2262 /* We only use the new structs internally, so copy userspace compat
2263 * structs to extended internal versions */
2264 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2265 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2266
2267 if (cmd == IP_VS_SO_SET_ZERO) {
2268 /* if no service address is set, zero counters in all */
c860c6b1 2269 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
fc723250 2270 ret = ip_vs_zero_all(net);
1da177e4
LT
2271 goto out_unlock;
2272 }
2273 }
2274
2906f66a
VMR
2275 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2276 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2277 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2278 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2279 usvc.protocol, &usvc.addr.ip,
2280 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2281 ret = -EFAULT;
2282 goto out_unlock;
2283 }
2284
2285 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2286 if (usvc.fwmark == 0)
fc723250 2287 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
26c15cfd 2288 &usvc.addr, usvc.port);
1da177e4 2289 else
fc723250 2290 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
1da177e4
LT
2291
2292 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2293 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2294 ret = -ESRCH;
26c15cfd 2295 goto out_unlock;
1da177e4
LT
2296 }
2297
2298 switch (cmd) {
2299 case IP_VS_SO_SET_ADD:
2300 if (svc != NULL)
2301 ret = -EEXIST;
2302 else
fc723250 2303 ret = ip_vs_add_service(net, &usvc, &svc);
1da177e4
LT
2304 break;
2305 case IP_VS_SO_SET_EDIT:
c860c6b1 2306 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2307 break;
2308 case IP_VS_SO_SET_DEL:
2309 ret = ip_vs_del_service(svc);
2310 if (!ret)
2311 goto out_unlock;
2312 break;
2313 case IP_VS_SO_SET_ZERO:
2314 ret = ip_vs_zero_service(svc);
2315 break;
2316 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2317 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2318 break;
2319 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2320 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2321 break;
2322 case IP_VS_SO_SET_DELDEST:
c860c6b1 2323 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2324 break;
2325 default:
2326 ret = -EINVAL;
2327 }
2328
1da177e4 2329 out_unlock:
14cc3e2b 2330 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2331 out_dec:
2332 /* decrease the module use count */
2333 ip_vs_use_count_dec();
2334
2335 return ret;
2336}
2337
2338
2339static void
2340ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2341{
2342 spin_lock_bh(&src->lock);
e9c0ce23 2343 memcpy(dst, &src->ustats, sizeof(*dst));
1da177e4
LT
2344 spin_unlock_bh(&src->lock);
2345}
2346
2347static void
2348ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2349{
2350 dst->protocol = src->protocol;
e7ade46a 2351 dst->addr = src->addr.ip;
1da177e4
LT
2352 dst->port = src->port;
2353 dst->fwmark = src->fwmark;
4da62fc7 2354 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2355 dst->flags = src->flags;
2356 dst->timeout = src->timeout / HZ;
2357 dst->netmask = src->netmask;
2358 dst->num_dests = src->num_dests;
2359 ip_vs_copy_stats(&dst->stats, &src->stats);
2360}
2361
2362static inline int
fc723250
HS
2363__ip_vs_get_service_entries(struct net *net,
2364 const struct ip_vs_get_services *get,
1da177e4
LT
2365 struct ip_vs_get_services __user *uptr)
2366{
2367 int idx, count=0;
2368 struct ip_vs_service *svc;
2369 struct ip_vs_service_entry entry;
2370 int ret = 0;
2371
2372 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2373 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041 2374 /* Only expose IPv4 entries to old interface */
fc723250 2375 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2376 continue;
2377
1da177e4
LT
2378 if (count >= get->num_services)
2379 goto out;
4da62fc7 2380 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2381 ip_vs_copy_service(&entry, svc);
2382 if (copy_to_user(&uptr->entrytable[count],
2383 &entry, sizeof(entry))) {
2384 ret = -EFAULT;
2385 goto out;
2386 }
2387 count++;
2388 }
2389 }
2390
2391 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2392 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041 2393 /* Only expose IPv4 entries to old interface */
fc723250 2394 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2395 continue;
2396
1da177e4
LT
2397 if (count >= get->num_services)
2398 goto out;
4da62fc7 2399 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2400 ip_vs_copy_service(&entry, svc);
2401 if (copy_to_user(&uptr->entrytable[count],
2402 &entry, sizeof(entry))) {
2403 ret = -EFAULT;
2404 goto out;
2405 }
2406 count++;
2407 }
2408 }
2409 out:
2410 return ret;
2411}
2412
2413static inline int
fc723250 2414__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
1da177e4
LT
2415 struct ip_vs_get_dests __user *uptr)
2416{
2417 struct ip_vs_service *svc;
b18610de 2418 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2419 int ret = 0;
2420
2421 if (get->fwmark)
fc723250 2422 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
1da177e4 2423 else
fc723250 2424 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
26c15cfd 2425 get->port);
b18610de 2426
1da177e4
LT
2427 if (svc) {
2428 int count = 0;
2429 struct ip_vs_dest *dest;
2430 struct ip_vs_dest_entry entry;
2431
2432 list_for_each_entry(dest, &svc->destinations, n_list) {
2433 if (count >= get->num_dests)
2434 break;
2435
e7ade46a 2436 entry.addr = dest->addr.ip;
1da177e4
LT
2437 entry.port = dest->port;
2438 entry.conn_flags = atomic_read(&dest->conn_flags);
2439 entry.weight = atomic_read(&dest->weight);
2440 entry.u_threshold = dest->u_threshold;
2441 entry.l_threshold = dest->l_threshold;
2442 entry.activeconns = atomic_read(&dest->activeconns);
2443 entry.inactconns = atomic_read(&dest->inactconns);
2444 entry.persistconns = atomic_read(&dest->persistconns);
2445 ip_vs_copy_stats(&entry.stats, &dest->stats);
2446 if (copy_to_user(&uptr->entrytable[count],
2447 &entry, sizeof(entry))) {
2448 ret = -EFAULT;
2449 break;
2450 }
2451 count++;
2452 }
1da177e4
LT
2453 } else
2454 ret = -ESRCH;
2455 return ret;
2456}
2457
2458static inline void
9330419d 2459__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2460{
9330419d
HS
2461 struct ip_vs_proto_data *pd;
2462
1da177e4 2463#ifdef CONFIG_IP_VS_PROTO_TCP
9330419d
HS
2464 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2465 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2466 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
1da177e4
LT
2467#endif
2468#ifdef CONFIG_IP_VS_PROTO_UDP
9330419d 2469 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
1da177e4 2470 u->udp_timeout =
9330419d 2471 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
1da177e4
LT
2472#endif
2473}
2474
2475
2476#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2477#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2478#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2479#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2480#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2481#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2482#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2483
9b5b5cff 2484static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2485 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2486 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2487 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2488 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2489 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2490 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2491 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2492};
2493
2494static int
2495do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2496{
2497 unsigned char arg[128];
2498 int ret = 0;
04bcef2a 2499 unsigned int copylen;
fc723250 2500 struct net *net = sock_net(sk);
f131315f 2501 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2502
fc723250 2503 BUG_ON(!net);
1da177e4
LT
2504 if (!capable(CAP_NET_ADMIN))
2505 return -EPERM;
2506
04bcef2a
AV
2507 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2508 return -EINVAL;
2509
1da177e4 2510 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2511 pr_err("get_ctl: len %u < %u\n",
2512 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2513 return -EINVAL;
2514 }
2515
04bcef2a
AV
2516 copylen = get_arglen[GET_CMDID(cmd)];
2517 if (copylen > 128)
2518 return -EINVAL;
2519
2520 if (copy_from_user(arg, user, copylen) != 0)
1da177e4
LT
2521 return -EFAULT;
2522
14cc3e2b 2523 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2524 return -ERESTARTSYS;
2525
2526 switch (cmd) {
2527 case IP_VS_SO_GET_VERSION:
2528 {
2529 char buf[64];
2530
2531 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2532 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2533 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2534 ret = -EFAULT;
2535 goto out;
2536 }
2537 *len = strlen(buf)+1;
2538 }
2539 break;
2540
2541 case IP_VS_SO_GET_INFO:
2542 {
2543 struct ip_vs_getinfo info;
2544 info.version = IP_VS_VERSION_CODE;
6f7edb48 2545 info.size = ip_vs_conn_tab_size;
1da177e4
LT
2546 info.num_services = ip_vs_num_services;
2547 if (copy_to_user(user, &info, sizeof(info)) != 0)
2548 ret = -EFAULT;
2549 }
2550 break;
2551
2552 case IP_VS_SO_GET_SERVICES:
2553 {
2554 struct ip_vs_get_services *get;
2555 int size;
2556
2557 get = (struct ip_vs_get_services *)arg;
2558 size = sizeof(*get) +
2559 sizeof(struct ip_vs_service_entry) * get->num_services;
2560 if (*len != size) {
1e3e238e 2561 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2562 ret = -EINVAL;
2563 goto out;
2564 }
fc723250 2565 ret = __ip_vs_get_service_entries(net, get, user);
1da177e4
LT
2566 }
2567 break;
2568
2569 case IP_VS_SO_GET_SERVICE:
2570 {
2571 struct ip_vs_service_entry *entry;
2572 struct ip_vs_service *svc;
b18610de 2573 union nf_inet_addr addr;
1da177e4
LT
2574
2575 entry = (struct ip_vs_service_entry *)arg;
b18610de 2576 addr.ip = entry->addr;
1da177e4 2577 if (entry->fwmark)
fc723250 2578 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
1da177e4 2579 else
fc723250
HS
2580 svc = __ip_vs_service_find(net, AF_INET,
2581 entry->protocol, &addr,
2582 entry->port);
1da177e4
LT
2583 if (svc) {
2584 ip_vs_copy_service(entry, svc);
2585 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2586 ret = -EFAULT;
1da177e4
LT
2587 } else
2588 ret = -ESRCH;
2589 }
2590 break;
2591
2592 case IP_VS_SO_GET_DESTS:
2593 {
2594 struct ip_vs_get_dests *get;
2595 int size;
2596
2597 get = (struct ip_vs_get_dests *)arg;
2598 size = sizeof(*get) +
2599 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2600 if (*len != size) {
1e3e238e 2601 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2602 ret = -EINVAL;
2603 goto out;
2604 }
fc723250 2605 ret = __ip_vs_get_dest_entries(net, get, user);
1da177e4
LT
2606 }
2607 break;
2608
2609 case IP_VS_SO_GET_TIMEOUT:
2610 {
2611 struct ip_vs_timeout_user t;
2612
9330419d 2613 __ip_vs_get_timeouts(net, &t);
1da177e4
LT
2614 if (copy_to_user(user, &t, sizeof(t)) != 0)
2615 ret = -EFAULT;
2616 }
2617 break;
2618
2619 case IP_VS_SO_GET_DAEMON:
2620 {
2621 struct ip_vs_daemon_user d[2];
2622
2623 memset(&d, 0, sizeof(d));
f131315f 2624 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
1da177e4 2625 d[0].state = IP_VS_STATE_MASTER;
f131315f
HS
2626 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2627 sizeof(d[0].mcast_ifn));
2628 d[0].syncid = ipvs->master_syncid;
1da177e4 2629 }
f131315f 2630 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
1da177e4 2631 d[1].state = IP_VS_STATE_BACKUP;
f131315f
HS
2632 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2633 sizeof(d[1].mcast_ifn));
2634 d[1].syncid = ipvs->backup_syncid;
1da177e4
LT
2635 }
2636 if (copy_to_user(user, &d, sizeof(d)) != 0)
2637 ret = -EFAULT;
2638 }
2639 break;
2640
2641 default:
2642 ret = -EINVAL;
2643 }
2644
2645 out:
14cc3e2b 2646 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2647 return ret;
2648}
2649
2650
2651static struct nf_sockopt_ops ip_vs_sockopts = {
2652 .pf = PF_INET,
2653 .set_optmin = IP_VS_BASE_CTL,
2654 .set_optmax = IP_VS_SO_SET_MAX+1,
2655 .set = do_ip_vs_set_ctl,
2656 .get_optmin = IP_VS_BASE_CTL,
2657 .get_optmax = IP_VS_SO_GET_MAX+1,
2658 .get = do_ip_vs_get_ctl,
16fcec35 2659 .owner = THIS_MODULE,
1da177e4
LT
2660};
2661
9a812198
JV
2662/*
2663 * Generic Netlink interface
2664 */
2665
2666/* IPVS genetlink family */
2667static struct genl_family ip_vs_genl_family = {
2668 .id = GENL_ID_GENERATE,
2669 .hdrsize = 0,
2670 .name = IPVS_GENL_NAME,
2671 .version = IPVS_GENL_VERSION,
2672 .maxattr = IPVS_CMD_MAX,
2673};
2674
2675/* Policy used for first-level command attributes */
2676static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2677 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2678 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2679 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2680 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2681 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2682 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2683};
2684
2685/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2686static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2687 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2688 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2689 .len = IP_VS_IFNAME_MAXLEN },
2690 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2691};
2692
2693/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2694static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2695 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2696 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2697 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2698 .len = sizeof(union nf_inet_addr) },
2699 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2700 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2701 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2702 .len = IP_VS_SCHEDNAME_MAXLEN },
0d1e71b0
SH
2703 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2704 .len = IP_VS_PENAME_MAXLEN },
9a812198
JV
2705 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2706 .len = sizeof(struct ip_vs_flags) },
2707 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2708 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2709 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2710};
2711
2712/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2713static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2714 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2715 .len = sizeof(union nf_inet_addr) },
2716 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2717 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2718 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2719 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2720 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2721 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2722 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2723 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2724 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2725};
2726
2727static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2728 struct ip_vs_stats *stats)
2729{
2730 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2731 if (!nl_stats)
2732 return -EMSGSIZE;
2733
2734 spin_lock_bh(&stats->lock);
2735
e9c0ce23
SW
2736 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2737 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2738 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2739 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2740 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2741 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2742 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2743 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2744 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2745 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
9a812198
JV
2746
2747 spin_unlock_bh(&stats->lock);
2748
2749 nla_nest_end(skb, nl_stats);
2750
2751 return 0;
2752
2753nla_put_failure:
2754 spin_unlock_bh(&stats->lock);
2755 nla_nest_cancel(skb, nl_stats);
2756 return -EMSGSIZE;
2757}
2758
2759static int ip_vs_genl_fill_service(struct sk_buff *skb,
2760 struct ip_vs_service *svc)
2761{
2762 struct nlattr *nl_service;
2763 struct ip_vs_flags flags = { .flags = svc->flags,
2764 .mask = ~0 };
2765
2766 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2767 if (!nl_service)
2768 return -EMSGSIZE;
2769
f94fd041 2770 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2771
2772 if (svc->fwmark) {
2773 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2774 } else {
2775 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2776 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2777 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2778 }
2779
2780 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
0d1e71b0
SH
2781 if (svc->pe)
2782 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
9a812198
JV
2783 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2784 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2785 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2786
2787 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2788 goto nla_put_failure;
2789
2790 nla_nest_end(skb, nl_service);
2791
2792 return 0;
2793
2794nla_put_failure:
2795 nla_nest_cancel(skb, nl_service);
2796 return -EMSGSIZE;
2797}
2798
2799static int ip_vs_genl_dump_service(struct sk_buff *skb,
2800 struct ip_vs_service *svc,
2801 struct netlink_callback *cb)
2802{
2803 void *hdr;
2804
2805 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2806 &ip_vs_genl_family, NLM_F_MULTI,
2807 IPVS_CMD_NEW_SERVICE);
2808 if (!hdr)
2809 return -EMSGSIZE;
2810
2811 if (ip_vs_genl_fill_service(skb, svc) < 0)
2812 goto nla_put_failure;
2813
2814 return genlmsg_end(skb, hdr);
2815
2816nla_put_failure:
2817 genlmsg_cancel(skb, hdr);
2818 return -EMSGSIZE;
2819}
2820
2821static int ip_vs_genl_dump_services(struct sk_buff *skb,
2822 struct netlink_callback *cb)
2823{
2824 int idx = 0, i;
2825 int start = cb->args[0];
2826 struct ip_vs_service *svc;
fc723250 2827 struct net *net = skb_sknet(skb);
9a812198
JV
2828
2829 mutex_lock(&__ip_vs_mutex);
2830 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2831 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
fc723250 2832 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2833 continue;
2834 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2835 idx--;
2836 goto nla_put_failure;
2837 }
2838 }
2839 }
2840
2841 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2842 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
fc723250 2843 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2844 continue;
2845 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2846 idx--;
2847 goto nla_put_failure;
2848 }
2849 }
2850 }
2851
2852nla_put_failure:
2853 mutex_unlock(&__ip_vs_mutex);
2854 cb->args[0] = idx;
2855
2856 return skb->len;
2857}
2858
fc723250
HS
2859static int ip_vs_genl_parse_service(struct net *net,
2860 struct ip_vs_service_user_kern *usvc,
26c15cfd
JA
2861 struct nlattr *nla, int full_entry,
2862 struct ip_vs_service **ret_svc)
9a812198
JV
2863{
2864 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2865 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
26c15cfd 2866 struct ip_vs_service *svc;
9a812198
JV
2867
2868 /* Parse mandatory identifying service fields first */
2869 if (nla == NULL ||
2870 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2871 return -EINVAL;
2872
2873 nla_af = attrs[IPVS_SVC_ATTR_AF];
2874 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2875 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2876 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2877 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2878
2879 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2880 return -EINVAL;
2881
258c8893
SH
2882 memset(usvc, 0, sizeof(*usvc));
2883
c860c6b1 2884 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2885#ifdef CONFIG_IP_VS_IPV6
2886 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2887#else
2888 if (usvc->af != AF_INET)
2889#endif
9a812198
JV
2890 return -EAFNOSUPPORT;
2891
2892 if (nla_fwmark) {
2893 usvc->protocol = IPPROTO_TCP;
2894 usvc->fwmark = nla_get_u32(nla_fwmark);
2895 } else {
2896 usvc->protocol = nla_get_u16(nla_protocol);
2897 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2898 usvc->port = nla_get_u16(nla_port);
2899 usvc->fwmark = 0;
2900 }
2901
26c15cfd 2902 if (usvc->fwmark)
fc723250 2903 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
26c15cfd 2904 else
fc723250 2905 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
26c15cfd
JA
2906 &usvc->addr, usvc->port);
2907 *ret_svc = svc;
2908
9a812198
JV
2909 /* If a full entry was requested, check for the additional fields */
2910 if (full_entry) {
0d1e71b0 2911 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
9a812198
JV
2912 *nla_netmask;
2913 struct ip_vs_flags flags;
9a812198
JV
2914
2915 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
0d1e71b0 2916 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
9a812198
JV
2917 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2918 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2919 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2920
2921 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2922 return -EINVAL;
2923
2924 nla_memcpy(&flags, nla_flags, sizeof(flags));
2925
2926 /* prefill flags from service if it already exists */
26c15cfd 2927 if (svc)
9a812198 2928 usvc->flags = svc->flags;
9a812198
JV
2929
2930 /* set new flags from userland */
2931 usvc->flags = (usvc->flags & ~flags.mask) |
2932 (flags.flags & flags.mask);
c860c6b1 2933 usvc->sched_name = nla_data(nla_sched);
0d1e71b0 2934 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
9a812198
JV
2935 usvc->timeout = nla_get_u32(nla_timeout);
2936 usvc->netmask = nla_get_u32(nla_netmask);
2937 }
2938
2939 return 0;
2940}
2941
fc723250
HS
2942static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2943 struct nlattr *nla)
9a812198 2944{
c860c6b1 2945 struct ip_vs_service_user_kern usvc;
26c15cfd 2946 struct ip_vs_service *svc;
9a812198
JV
2947 int ret;
2948
fc723250 2949 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
26c15cfd 2950 return ret ? ERR_PTR(ret) : svc;
9a812198
JV
2951}
2952
2953static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2954{
2955 struct nlattr *nl_dest;
2956
2957 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2958 if (!nl_dest)
2959 return -EMSGSIZE;
2960
2961 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2962 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2963
2964 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2965 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2966 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2967 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2968 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2969 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2970 atomic_read(&dest->activeconns));
2971 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2972 atomic_read(&dest->inactconns));
2973 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2974 atomic_read(&dest->persistconns));
2975
2976 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2977 goto nla_put_failure;
2978
2979 nla_nest_end(skb, nl_dest);
2980
2981 return 0;
2982
2983nla_put_failure:
2984 nla_nest_cancel(skb, nl_dest);
2985 return -EMSGSIZE;
2986}
2987
2988static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2989 struct netlink_callback *cb)
2990{
2991 void *hdr;
2992
2993 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2994 &ip_vs_genl_family, NLM_F_MULTI,
2995 IPVS_CMD_NEW_DEST);
2996 if (!hdr)
2997 return -EMSGSIZE;
2998
2999 if (ip_vs_genl_fill_dest(skb, dest) < 0)
3000 goto nla_put_failure;
3001
3002 return genlmsg_end(skb, hdr);
3003
3004nla_put_failure:
3005 genlmsg_cancel(skb, hdr);
3006 return -EMSGSIZE;
3007}
3008
3009static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3010 struct netlink_callback *cb)
3011{
3012 int idx = 0;
3013 int start = cb->args[0];
3014 struct ip_vs_service *svc;
3015 struct ip_vs_dest *dest;
3016 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
fc723250 3017 struct net *net;
9a812198
JV
3018
3019 mutex_lock(&__ip_vs_mutex);
3020
3021 /* Try to find the service for which to dump destinations */
3022 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3023 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3024 goto out_err;
3025
fc723250
HS
3026 net = skb_sknet(skb);
3027 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3028 if (IS_ERR(svc) || svc == NULL)
3029 goto out_err;
3030
3031 /* Dump the destinations */
3032 list_for_each_entry(dest, &svc->destinations, n_list) {
3033 if (++idx <= start)
3034 continue;
3035 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3036 idx--;
3037 goto nla_put_failure;
3038 }
3039 }
3040
3041nla_put_failure:
3042 cb->args[0] = idx;
9a812198
JV
3043
3044out_err:
3045 mutex_unlock(&__ip_vs_mutex);
3046
3047 return skb->len;
3048}
3049
c860c6b1 3050static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
3051 struct nlattr *nla, int full_entry)
3052{
3053 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3054 struct nlattr *nla_addr, *nla_port;
3055
3056 /* Parse mandatory identifying destination fields first */
3057 if (nla == NULL ||
3058 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3059 return -EINVAL;
3060
3061 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3062 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3063
3064 if (!(nla_addr && nla_port))
3065 return -EINVAL;
3066
258c8893
SH
3067 memset(udest, 0, sizeof(*udest));
3068
9a812198
JV
3069 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3070 udest->port = nla_get_u16(nla_port);
3071
3072 /* If a full entry was requested, check for the additional fields */
3073 if (full_entry) {
3074 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3075 *nla_l_thresh;
3076
3077 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3078 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3079 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3080 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3081
3082 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3083 return -EINVAL;
3084
3085 udest->conn_flags = nla_get_u32(nla_fwd)
3086 & IP_VS_CONN_F_FWD_MASK;
3087 udest->weight = nla_get_u32(nla_weight);
3088 udest->u_threshold = nla_get_u32(nla_u_thresh);
3089 udest->l_threshold = nla_get_u32(nla_l_thresh);
3090 }
3091
3092 return 0;
3093}
3094
3095static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3096 const char *mcast_ifn, __be32 syncid)
3097{
3098 struct nlattr *nl_daemon;
3099
3100 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3101 if (!nl_daemon)
3102 return -EMSGSIZE;
3103
3104 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3105 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3106 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3107
3108 nla_nest_end(skb, nl_daemon);
3109
3110 return 0;
3111
3112nla_put_failure:
3113 nla_nest_cancel(skb, nl_daemon);
3114 return -EMSGSIZE;
3115}
3116
3117static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3118 const char *mcast_ifn, __be32 syncid,
3119 struct netlink_callback *cb)
3120{
3121 void *hdr;
3122 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3123 &ip_vs_genl_family, NLM_F_MULTI,
3124 IPVS_CMD_NEW_DAEMON);
3125 if (!hdr)
3126 return -EMSGSIZE;
3127
3128 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3129 goto nla_put_failure;
3130
3131 return genlmsg_end(skb, hdr);
3132
3133nla_put_failure:
3134 genlmsg_cancel(skb, hdr);
3135 return -EMSGSIZE;
3136}
3137
3138static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3139 struct netlink_callback *cb)
3140{
f131315f
HS
3141 struct net *net = skb_net(skb);
3142 struct netns_ipvs *ipvs = net_ipvs(net);
3143
9a812198 3144 mutex_lock(&__ip_vs_mutex);
f131315f 3145 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
9a812198 3146 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
f131315f
HS
3147 ipvs->master_mcast_ifn,
3148 ipvs->master_syncid, cb) < 0)
9a812198
JV
3149 goto nla_put_failure;
3150
3151 cb->args[0] = 1;
3152 }
3153
f131315f 3154 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
9a812198 3155 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
f131315f
HS
3156 ipvs->backup_mcast_ifn,
3157 ipvs->backup_syncid, cb) < 0)
9a812198
JV
3158 goto nla_put_failure;
3159
3160 cb->args[1] = 1;
3161 }
3162
3163nla_put_failure:
3164 mutex_unlock(&__ip_vs_mutex);
3165
3166 return skb->len;
3167}
3168
f131315f 3169static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3170{
3171 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3172 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3173 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3174 return -EINVAL;
3175
f131315f
HS
3176 return start_sync_thread(net,
3177 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
9a812198
JV
3178 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3179 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3180}
3181
f131315f 3182static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3183{
3184 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3185 return -EINVAL;
3186
f131315f
HS
3187 return stop_sync_thread(net,
3188 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
9a812198
JV
3189}
3190
9330419d 3191static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
9a812198
JV
3192{
3193 struct ip_vs_timeout_user t;
3194
9330419d 3195 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3196
3197 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3198 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3199
3200 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3201 t.tcp_fin_timeout =
3202 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3203
3204 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3205 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3206
9330419d 3207 return ip_vs_set_timeout(net, &t);
9a812198
JV
3208}
3209
3210static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3211{
3212 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3213 struct ip_vs_service_user_kern usvc;
3214 struct ip_vs_dest_user_kern udest;
9a812198
JV
3215 int ret = 0, cmd;
3216 int need_full_svc = 0, need_full_dest = 0;
fc723250 3217 struct net *net;
9a812198 3218
fc723250 3219 net = skb_sknet(skb);
9a812198
JV
3220 cmd = info->genlhdr->cmd;
3221
3222 mutex_lock(&__ip_vs_mutex);
3223
3224 if (cmd == IPVS_CMD_FLUSH) {
fc723250 3225 ret = ip_vs_flush(net);
9a812198
JV
3226 goto out;
3227 } else if (cmd == IPVS_CMD_SET_CONFIG) {
9330419d 3228 ret = ip_vs_genl_set_config(net, info->attrs);
9a812198
JV
3229 goto out;
3230 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3231 cmd == IPVS_CMD_DEL_DAEMON) {
3232
3233 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3234
3235 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3236 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3237 info->attrs[IPVS_CMD_ATTR_DAEMON],
3238 ip_vs_daemon_policy)) {
3239 ret = -EINVAL;
3240 goto out;
3241 }
3242
3243 if (cmd == IPVS_CMD_NEW_DAEMON)
f131315f 3244 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
9a812198 3245 else
f131315f 3246 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
9a812198
JV
3247 goto out;
3248 } else if (cmd == IPVS_CMD_ZERO &&
3249 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
fc723250 3250 ret = ip_vs_zero_all(net);
9a812198
JV
3251 goto out;
3252 }
3253
3254 /* All following commands require a service argument, so check if we
3255 * received a valid one. We need a full service specification when
3256 * adding / editing a service. Only identifying members otherwise. */
3257 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3258 need_full_svc = 1;
3259
fc723250 3260 ret = ip_vs_genl_parse_service(net, &usvc,
9a812198 3261 info->attrs[IPVS_CMD_ATTR_SERVICE],
26c15cfd 3262 need_full_svc, &svc);
9a812198
JV
3263 if (ret)
3264 goto out;
3265
9a812198
JV
3266 /* Unless we're adding a new service, the service must already exist */
3267 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3268 ret = -ESRCH;
3269 goto out;
3270 }
3271
3272 /* Destination commands require a valid destination argument. For
3273 * adding / editing a destination, we need a full destination
3274 * specification. */
3275 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3276 cmd == IPVS_CMD_DEL_DEST) {
3277 if (cmd != IPVS_CMD_DEL_DEST)
3278 need_full_dest = 1;
3279
3280 ret = ip_vs_genl_parse_dest(&udest,
3281 info->attrs[IPVS_CMD_ATTR_DEST],
3282 need_full_dest);
3283 if (ret)
3284 goto out;
3285 }
3286
3287 switch (cmd) {
3288 case IPVS_CMD_NEW_SERVICE:
3289 if (svc == NULL)
fc723250 3290 ret = ip_vs_add_service(net, &usvc, &svc);
9a812198
JV
3291 else
3292 ret = -EEXIST;
3293 break;
3294 case IPVS_CMD_SET_SERVICE:
3295 ret = ip_vs_edit_service(svc, &usvc);
3296 break;
3297 case IPVS_CMD_DEL_SERVICE:
3298 ret = ip_vs_del_service(svc);
26c15cfd 3299 /* do not use svc, it can be freed */
9a812198
JV
3300 break;
3301 case IPVS_CMD_NEW_DEST:
3302 ret = ip_vs_add_dest(svc, &udest);
3303 break;
3304 case IPVS_CMD_SET_DEST:
3305 ret = ip_vs_edit_dest(svc, &udest);
3306 break;
3307 case IPVS_CMD_DEL_DEST:
3308 ret = ip_vs_del_dest(svc, &udest);
3309 break;
3310 case IPVS_CMD_ZERO:
3311 ret = ip_vs_zero_service(svc);
3312 break;
3313 default:
3314 ret = -EINVAL;
3315 }
3316
3317out:
9a812198
JV
3318 mutex_unlock(&__ip_vs_mutex);
3319
3320 return ret;
3321}
3322
3323static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3324{
3325 struct sk_buff *msg;
3326 void *reply;
3327 int ret, cmd, reply_cmd;
fc723250 3328 struct net *net;
9a812198 3329
fc723250 3330 net = skb_sknet(skb);
9a812198
JV
3331 cmd = info->genlhdr->cmd;
3332
3333 if (cmd == IPVS_CMD_GET_SERVICE)
3334 reply_cmd = IPVS_CMD_NEW_SERVICE;
3335 else if (cmd == IPVS_CMD_GET_INFO)
3336 reply_cmd = IPVS_CMD_SET_INFO;
3337 else if (cmd == IPVS_CMD_GET_CONFIG)
3338 reply_cmd = IPVS_CMD_SET_CONFIG;
3339 else {
1e3e238e 3340 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3341 return -EINVAL;
3342 }
3343
3344 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3345 if (!msg)
3346 return -ENOMEM;
3347
3348 mutex_lock(&__ip_vs_mutex);
3349
3350 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3351 if (reply == NULL)
3352 goto nla_put_failure;
3353
3354 switch (cmd) {
3355 case IPVS_CMD_GET_SERVICE:
3356 {
3357 struct ip_vs_service *svc;
3358
fc723250
HS
3359 svc = ip_vs_genl_find_service(net,
3360 info->attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3361 if (IS_ERR(svc)) {
3362 ret = PTR_ERR(svc);
3363 goto out_err;
3364 } else if (svc) {
3365 ret = ip_vs_genl_fill_service(msg, svc);
9a812198
JV
3366 if (ret)
3367 goto nla_put_failure;
3368 } else {
3369 ret = -ESRCH;
3370 goto out_err;
3371 }
3372
3373 break;
3374 }
3375
3376 case IPVS_CMD_GET_CONFIG:
3377 {
3378 struct ip_vs_timeout_user t;
3379
9330419d 3380 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3381#ifdef CONFIG_IP_VS_PROTO_TCP
3382 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3383 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3384 t.tcp_fin_timeout);
3385#endif
3386#ifdef CONFIG_IP_VS_PROTO_UDP
3387 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3388#endif
3389
3390 break;
3391 }
3392
3393 case IPVS_CMD_GET_INFO:
3394 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3395 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
6f7edb48 3396 ip_vs_conn_tab_size);
9a812198
JV
3397 break;
3398 }
3399
3400 genlmsg_end(msg, reply);
134e6375 3401 ret = genlmsg_reply(msg, info);
9a812198
JV
3402 goto out;
3403
3404nla_put_failure:
1e3e238e 3405 pr_err("not enough space in Netlink message\n");
9a812198
JV
3406 ret = -EMSGSIZE;
3407
3408out_err:
3409 nlmsg_free(msg);
3410out:
3411 mutex_unlock(&__ip_vs_mutex);
3412
3413 return ret;
3414}
3415
3416
3417static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3418 {
3419 .cmd = IPVS_CMD_NEW_SERVICE,
3420 .flags = GENL_ADMIN_PERM,
3421 .policy = ip_vs_cmd_policy,
3422 .doit = ip_vs_genl_set_cmd,
3423 },
3424 {
3425 .cmd = IPVS_CMD_SET_SERVICE,
3426 .flags = GENL_ADMIN_PERM,
3427 .policy = ip_vs_cmd_policy,
3428 .doit = ip_vs_genl_set_cmd,
3429 },
3430 {
3431 .cmd = IPVS_CMD_DEL_SERVICE,
3432 .flags = GENL_ADMIN_PERM,
3433 .policy = ip_vs_cmd_policy,
3434 .doit = ip_vs_genl_set_cmd,
3435 },
3436 {
3437 .cmd = IPVS_CMD_GET_SERVICE,
3438 .flags = GENL_ADMIN_PERM,
3439 .doit = ip_vs_genl_get_cmd,
3440 .dumpit = ip_vs_genl_dump_services,
3441 .policy = ip_vs_cmd_policy,
3442 },
3443 {
3444 .cmd = IPVS_CMD_NEW_DEST,
3445 .flags = GENL_ADMIN_PERM,
3446 .policy = ip_vs_cmd_policy,
3447 .doit = ip_vs_genl_set_cmd,
3448 },
3449 {
3450 .cmd = IPVS_CMD_SET_DEST,
3451 .flags = GENL_ADMIN_PERM,
3452 .policy = ip_vs_cmd_policy,
3453 .doit = ip_vs_genl_set_cmd,
3454 },
3455 {
3456 .cmd = IPVS_CMD_DEL_DEST,
3457 .flags = GENL_ADMIN_PERM,
3458 .policy = ip_vs_cmd_policy,
3459 .doit = ip_vs_genl_set_cmd,
3460 },
3461 {
3462 .cmd = IPVS_CMD_GET_DEST,
3463 .flags = GENL_ADMIN_PERM,
3464 .policy = ip_vs_cmd_policy,
3465 .dumpit = ip_vs_genl_dump_dests,
3466 },
3467 {
3468 .cmd = IPVS_CMD_NEW_DAEMON,
3469 .flags = GENL_ADMIN_PERM,
3470 .policy = ip_vs_cmd_policy,
3471 .doit = ip_vs_genl_set_cmd,
3472 },
3473 {
3474 .cmd = IPVS_CMD_DEL_DAEMON,
3475 .flags = GENL_ADMIN_PERM,
3476 .policy = ip_vs_cmd_policy,
3477 .doit = ip_vs_genl_set_cmd,
3478 },
3479 {
3480 .cmd = IPVS_CMD_GET_DAEMON,
3481 .flags = GENL_ADMIN_PERM,
3482 .dumpit = ip_vs_genl_dump_daemons,
3483 },
3484 {
3485 .cmd = IPVS_CMD_SET_CONFIG,
3486 .flags = GENL_ADMIN_PERM,
3487 .policy = ip_vs_cmd_policy,
3488 .doit = ip_vs_genl_set_cmd,
3489 },
3490 {
3491 .cmd = IPVS_CMD_GET_CONFIG,
3492 .flags = GENL_ADMIN_PERM,
3493 .doit = ip_vs_genl_get_cmd,
3494 },
3495 {
3496 .cmd = IPVS_CMD_GET_INFO,
3497 .flags = GENL_ADMIN_PERM,
3498 .doit = ip_vs_genl_get_cmd,
3499 },
3500 {
3501 .cmd = IPVS_CMD_ZERO,
3502 .flags = GENL_ADMIN_PERM,
3503 .policy = ip_vs_cmd_policy,
3504 .doit = ip_vs_genl_set_cmd,
3505 },
3506 {
3507 .cmd = IPVS_CMD_FLUSH,
3508 .flags = GENL_ADMIN_PERM,
3509 .doit = ip_vs_genl_set_cmd,
3510 },
3511};
3512
3513static int __init ip_vs_genl_register(void)
3514{
8f698d54
MM
3515 return genl_register_family_with_ops(&ip_vs_genl_family,
3516 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3517}
3518
3519static void ip_vs_genl_unregister(void)
3520{
3521 genl_unregister_family(&ip_vs_genl_family);
3522}
3523
3524/* End of Generic Netlink interface definitions */
3525
61b1ab45
HS
3526/*
3527 * per netns intit/exit func.
3528 */
3529int __net_init __ip_vs_control_init(struct net *net)
3530{
fc723250
HS
3531 int idx;
3532 struct netns_ipvs *ipvs = net_ipvs(net);
3533
61b1ab45
HS
3534 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3535 return -EPERM;
b17fc996
HS
3536 /* procfs stats */
3537 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3538 if (ipvs->tot_stats == NULL) {
3539 pr_err("%s(): no memory.\n", __func__);
3540 return -ENOMEM;
3541 }
3542 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3543 if (!ipvs->cpustats) {
3544 pr_err("%s() alloc_percpu failed\n", __func__);
3545 goto err_alloc;
3546 }
3547 spin_lock_init(&ipvs->tot_stats->lock);
61b1ab45 3548
fc723250
HS
3549 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3550 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3551
61b1ab45
HS
3552 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3553 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
b17fc996
HS
3554 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3555 &ip_vs_stats_percpu_fops);
61b1ab45
HS
3556 sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path,
3557 vs_vars);
3558 if (sysctl_header == NULL)
3559 goto err_reg;
b17fc996 3560 ip_vs_new_estimator(net, ipvs->tot_stats);
61b1ab45
HS
3561 return 0;
3562
3563err_reg:
b17fc996
HS
3564 free_percpu(ipvs->cpustats);
3565err_alloc:
3566 kfree(ipvs->tot_stats);
61b1ab45
HS
3567 return -ENOMEM;
3568}
3569
3570static void __net_exit __ip_vs_control_cleanup(struct net *net)
3571{
b17fc996
HS
3572 struct netns_ipvs *ipvs = net_ipvs(net);
3573
61b1ab45
HS
3574 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3575 return;
3576
b17fc996 3577 ip_vs_kill_estimator(net, ipvs->tot_stats);
61b1ab45 3578 unregister_net_sysctl_table(sysctl_header);
b17fc996 3579 proc_net_remove(net, "ip_vs_stats_percpu");
61b1ab45
HS
3580 proc_net_remove(net, "ip_vs_stats");
3581 proc_net_remove(net, "ip_vs");
b17fc996
HS
3582 free_percpu(ipvs->cpustats);
3583 kfree(ipvs->tot_stats);
61b1ab45
HS
3584}
3585
3586static struct pernet_operations ipvs_control_ops = {
3587 .init = __ip_vs_control_init,
3588 .exit = __ip_vs_control_cleanup,
3589};
1da177e4 3590
048cf48b 3591int __init ip_vs_control_init(void)
1da177e4 3592{
1da177e4 3593 int idx;
fc723250 3594 int ret;
1da177e4
LT
3595
3596 EnterFunction(2);
3597
fc723250 3598 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
d86bef73
EB
3599 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3600 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3601 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3602 }
fc723250
HS
3603
3604 ret = register_pernet_subsys(&ipvs_control_ops);
3605 if (ret) {
3606 pr_err("cannot register namespace.\n");
3607 goto err;
d86bef73 3608 }
fc723250
HS
3609
3610 smp_wmb(); /* Do we really need it now ? */
d86bef73 3611
1da177e4
LT
3612 ret = nf_register_sockopt(&ip_vs_sockopts);
3613 if (ret) {
1e3e238e 3614 pr_err("cannot register sockopt.\n");
fc723250 3615 goto err_net;
1da177e4
LT
3616 }
3617
9a812198
JV
3618 ret = ip_vs_genl_register();
3619 if (ret) {
1e3e238e 3620 pr_err("cannot register Generic Netlink interface.\n");
9a812198 3621 nf_unregister_sockopt(&ip_vs_sockopts);
fc723250 3622 goto err_net;
9a812198
JV
3623 }
3624
1da177e4
LT
3625 /* Hook the defense timer */
3626 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3627
3628 LeaveFunction(2);
3629 return 0;
fc723250
HS
3630
3631err_net:
3632 unregister_pernet_subsys(&ipvs_control_ops);
3633err:
3634 return ret;
1da177e4
LT
3635}
3636
3637
3638void ip_vs_control_cleanup(void)
3639{
3640 EnterFunction(2);
3641 ip_vs_trash_cleanup();
afe2c511 3642 cancel_delayed_work_sync(&defense_work);
28e53bdd 3643 cancel_work_sync(&defense_work.work);
61b1ab45 3644 unregister_pernet_subsys(&ipvs_control_ops);
9a812198 3645 ip_vs_genl_unregister();
1da177e4
LT
3646 nf_unregister_sockopt(&ip_vs_sockopts);
3647 LeaveFunction(2);
3648}