Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net...
[linux-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <net/ip.h>
42 #ifdef CONFIG_IP_VS_IPV6
43 #include <net/ipv6.h>
44 #include <net/ip6_route.h>
45 #endif
46 #include <net/route.h>
47 #include <net/sock.h>
48 #include <net/genetlink.h>
49
50 #include <asm/uaccess.h>
51
52 #include <net/ip_vs.h>
53
54 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
55 static DEFINE_MUTEX(__ip_vs_mutex);
56
57 /* lock for service table */
58 static DEFINE_RWLOCK(__ip_vs_svc_lock);
59
60 /* lock for table with the real services */
61 static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63 /* lock for state and timeout tables */
64 static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65
66 /* lock for drop entry handling */
67 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69 /* lock for drop packet handling */
70 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72 /* 1/rate drop and drop-entry variables */
73 int ip_vs_drop_rate = 0;
74 int ip_vs_drop_counter = 0;
75 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77 /* number of virtual services */
78 static int ip_vs_num_services = 0;
79
80 /* sysctl variables */
81 static int sysctl_ip_vs_drop_entry = 0;
82 static int sysctl_ip_vs_drop_packet = 0;
83 static int sysctl_ip_vs_secure_tcp = 0;
84 static int sysctl_ip_vs_amemthresh = 1024;
85 static int sysctl_ip_vs_am_droprate = 10;
86 int sysctl_ip_vs_cache_bypass = 0;
87 int sysctl_ip_vs_expire_nodest_conn = 0;
88 int sysctl_ip_vs_expire_quiescent_template = 0;
89 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90 int sysctl_ip_vs_nat_icmp_send = 0;
91 #ifdef CONFIG_IP_VS_NFCT
92 int sysctl_ip_vs_conntrack;
93 #endif
94 int sysctl_ip_vs_snat_reroute = 1;
95 int sysctl_ip_vs_sync_ver = 1;          /* Default version of sync proto */
96
97 #ifdef CONFIG_IP_VS_DEBUG
98 static int sysctl_ip_vs_debug_level = 0;
99
100 int ip_vs_get_debug_level(void)
101 {
102         return sysctl_ip_vs_debug_level;
103 }
104 #endif
105
106 #ifdef CONFIG_IP_VS_IPV6
107 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
109 {
110         struct rt6_info *rt;
111         struct flowi fl = {
112                 .oif = 0,
113                 .fl6_dst = *addr,
114                 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
115         };
116
117         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
118         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
119                         return 1;
120
121         return 0;
122 }
123 #endif
124 /*
125  *      update_defense_level is called from keventd and from sysctl,
126  *      so it needs to protect itself from softirqs
127  */
128 static void update_defense_level(void)
129 {
130         struct sysinfo i;
131         static int old_secure_tcp = 0;
132         int availmem;
133         int nomem;
134         int to_change = -1;
135
136         /* we only count free and buffered memory (in pages) */
137         si_meminfo(&i);
138         availmem = i.freeram + i.bufferram;
139         /* however in linux 2.5 the i.bufferram is total page cache size,
140            we need adjust it */
141         /* si_swapinfo(&i); */
142         /* availmem = availmem - (i.totalswap - i.freeswap); */
143
144         nomem = (availmem < sysctl_ip_vs_amemthresh);
145
146         local_bh_disable();
147
148         /* drop_entry */
149         spin_lock(&__ip_vs_dropentry_lock);
150         switch (sysctl_ip_vs_drop_entry) {
151         case 0:
152                 atomic_set(&ip_vs_dropentry, 0);
153                 break;
154         case 1:
155                 if (nomem) {
156                         atomic_set(&ip_vs_dropentry, 1);
157                         sysctl_ip_vs_drop_entry = 2;
158                 } else {
159                         atomic_set(&ip_vs_dropentry, 0);
160                 }
161                 break;
162         case 2:
163                 if (nomem) {
164                         atomic_set(&ip_vs_dropentry, 1);
165                 } else {
166                         atomic_set(&ip_vs_dropentry, 0);
167                         sysctl_ip_vs_drop_entry = 1;
168                 };
169                 break;
170         case 3:
171                 atomic_set(&ip_vs_dropentry, 1);
172                 break;
173         }
174         spin_unlock(&__ip_vs_dropentry_lock);
175
176         /* drop_packet */
177         spin_lock(&__ip_vs_droppacket_lock);
178         switch (sysctl_ip_vs_drop_packet) {
179         case 0:
180                 ip_vs_drop_rate = 0;
181                 break;
182         case 1:
183                 if (nomem) {
184                         ip_vs_drop_rate = ip_vs_drop_counter
185                                 = sysctl_ip_vs_amemthresh /
186                                 (sysctl_ip_vs_amemthresh-availmem);
187                         sysctl_ip_vs_drop_packet = 2;
188                 } else {
189                         ip_vs_drop_rate = 0;
190                 }
191                 break;
192         case 2:
193                 if (nomem) {
194                         ip_vs_drop_rate = ip_vs_drop_counter
195                                 = sysctl_ip_vs_amemthresh /
196                                 (sysctl_ip_vs_amemthresh-availmem);
197                 } else {
198                         ip_vs_drop_rate = 0;
199                         sysctl_ip_vs_drop_packet = 1;
200                 }
201                 break;
202         case 3:
203                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
204                 break;
205         }
206         spin_unlock(&__ip_vs_droppacket_lock);
207
208         /* secure_tcp */
209         spin_lock(&ip_vs_securetcp_lock);
210         switch (sysctl_ip_vs_secure_tcp) {
211         case 0:
212                 if (old_secure_tcp >= 2)
213                         to_change = 0;
214                 break;
215         case 1:
216                 if (nomem) {
217                         if (old_secure_tcp < 2)
218                                 to_change = 1;
219                         sysctl_ip_vs_secure_tcp = 2;
220                 } else {
221                         if (old_secure_tcp >= 2)
222                                 to_change = 0;
223                 }
224                 break;
225         case 2:
226                 if (nomem) {
227                         if (old_secure_tcp < 2)
228                                 to_change = 1;
229                 } else {
230                         if (old_secure_tcp >= 2)
231                                 to_change = 0;
232                         sysctl_ip_vs_secure_tcp = 1;
233                 }
234                 break;
235         case 3:
236                 if (old_secure_tcp < 2)
237                         to_change = 1;
238                 break;
239         }
240         old_secure_tcp = sysctl_ip_vs_secure_tcp;
241         if (to_change >= 0)
242                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
243         spin_unlock(&ip_vs_securetcp_lock);
244
245         local_bh_enable();
246 }
247
248
249 /*
250  *      Timer for checking the defense
251  */
252 #define DEFENSE_TIMER_PERIOD    1*HZ
253 static void defense_work_handler(struct work_struct *work);
254 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
255
256 static void defense_work_handler(struct work_struct *work)
257 {
258         update_defense_level();
259         if (atomic_read(&ip_vs_dropentry))
260                 ip_vs_random_dropentry();
261
262         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
263 }
264
265 int
266 ip_vs_use_count_inc(void)
267 {
268         return try_module_get(THIS_MODULE);
269 }
270
271 void
272 ip_vs_use_count_dec(void)
273 {
274         module_put(THIS_MODULE);
275 }
276
277
278 /*
279  *      Hash table: for virtual service lookups
280  */
281 #define IP_VS_SVC_TAB_BITS 8
282 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
283 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
284
285 /* the service table hashed by <protocol, addr, port> */
286 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
287 /* the service table hashed by fwmark */
288 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
289
290 /*
291  *      Hash table: for real service lookups
292  */
293 #define IP_VS_RTAB_BITS 4
294 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
295 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
296
297 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
298
299 /*
300  *      Trash for destinations
301  */
302 static LIST_HEAD(ip_vs_dest_trash);
303
304 /*
305  *      FTP & NULL virtual service counters
306  */
307 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
308 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
309
310
311 /*
312  *      Returns hash value for virtual service
313  */
314 static __inline__ unsigned
315 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
316                   __be16 port)
317 {
318         register unsigned porth = ntohs(port);
319         __be32 addr_fold = addr->ip;
320
321 #ifdef CONFIG_IP_VS_IPV6
322         if (af == AF_INET6)
323                 addr_fold = addr->ip6[0]^addr->ip6[1]^
324                             addr->ip6[2]^addr->ip6[3];
325 #endif
326
327         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
328                 & IP_VS_SVC_TAB_MASK;
329 }
330
331 /*
332  *      Returns hash value of fwmark for virtual service lookup
333  */
334 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
335 {
336         return fwmark & IP_VS_SVC_TAB_MASK;
337 }
338
339 /*
340  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
341  *      or in the ip_vs_svc_fwm_table by fwmark.
342  *      Should be called with locked tables.
343  */
344 static int ip_vs_svc_hash(struct ip_vs_service *svc)
345 {
346         unsigned hash;
347
348         if (svc->flags & IP_VS_SVC_F_HASHED) {
349                 pr_err("%s(): request for already hashed, called from %pF\n",
350                        __func__, __builtin_return_address(0));
351                 return 0;
352         }
353
354         if (svc->fwmark == 0) {
355                 /*
356                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
357                  */
358                 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
359                                          svc->port);
360                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
361         } else {
362                 /*
363                  *  Hash it by fwmark in ip_vs_svc_fwm_table
364                  */
365                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
366                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
367         }
368
369         svc->flags |= IP_VS_SVC_F_HASHED;
370         /* increase its refcnt because it is referenced by the svc table */
371         atomic_inc(&svc->refcnt);
372         return 1;
373 }
374
375
376 /*
377  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
378  *      Should be called with locked tables.
379  */
380 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
381 {
382         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
383                 pr_err("%s(): request for unhash flagged, called from %pF\n",
384                        __func__, __builtin_return_address(0));
385                 return 0;
386         }
387
388         if (svc->fwmark == 0) {
389                 /* Remove it from the ip_vs_svc_table table */
390                 list_del(&svc->s_list);
391         } else {
392                 /* Remove it from the ip_vs_svc_fwm_table table */
393                 list_del(&svc->f_list);
394         }
395
396         svc->flags &= ~IP_VS_SVC_F_HASHED;
397         atomic_dec(&svc->refcnt);
398         return 1;
399 }
400
401
402 /*
403  *      Get service by {proto,addr,port} in the service table.
404  */
405 static inline struct ip_vs_service *
406 __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
407                     __be16 vport)
408 {
409         unsigned hash;
410         struct ip_vs_service *svc;
411
412         /* Check for "full" addressed entries */
413         hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
414
415         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
416                 if ((svc->af == af)
417                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
418                     && (svc->port == vport)
419                     && (svc->protocol == protocol)) {
420                         /* HIT */
421                         return svc;
422                 }
423         }
424
425         return NULL;
426 }
427
428
429 /*
430  *      Get service by {fwmark} in the service table.
431  */
432 static inline struct ip_vs_service *
433 __ip_vs_svc_fwm_find(int af, __u32 fwmark)
434 {
435         unsigned hash;
436         struct ip_vs_service *svc;
437
438         /* Check for fwmark addressed entries */
439         hash = ip_vs_svc_fwm_hashkey(fwmark);
440
441         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
442                 if (svc->fwmark == fwmark && svc->af == af) {
443                         /* HIT */
444                         return svc;
445                 }
446         }
447
448         return NULL;
449 }
450
451 struct ip_vs_service *
452 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
453                   const union nf_inet_addr *vaddr, __be16 vport)
454 {
455         struct ip_vs_service *svc;
456
457         read_lock(&__ip_vs_svc_lock);
458
459         /*
460          *      Check the table hashed by fwmark first
461          */
462         if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
463                 goto out;
464
465         /*
466          *      Check the table hashed by <protocol,addr,port>
467          *      for "full" addressed entries
468          */
469         svc = __ip_vs_service_find(af, protocol, vaddr, vport);
470
471         if (svc == NULL
472             && protocol == IPPROTO_TCP
473             && atomic_read(&ip_vs_ftpsvc_counter)
474             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
475                 /*
476                  * Check if ftp service entry exists, the packet
477                  * might belong to FTP data connections.
478                  */
479                 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
480         }
481
482         if (svc == NULL
483             && atomic_read(&ip_vs_nullsvc_counter)) {
484                 /*
485                  * Check if the catch-all port (port zero) exists
486                  */
487                 svc = __ip_vs_service_find(af, protocol, vaddr, 0);
488         }
489
490   out:
491         if (svc)
492                 atomic_inc(&svc->usecnt);
493         read_unlock(&__ip_vs_svc_lock);
494
495         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
496                       fwmark, ip_vs_proto_name(protocol),
497                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
498                       svc ? "hit" : "not hit");
499
500         return svc;
501 }
502
503
504 static inline void
505 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
506 {
507         atomic_inc(&svc->refcnt);
508         dest->svc = svc;
509 }
510
511 static void
512 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
513 {
514         struct ip_vs_service *svc = dest->svc;
515
516         dest->svc = NULL;
517         if (atomic_dec_and_test(&svc->refcnt)) {
518                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
519                               svc->fwmark,
520                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
521                               ntohs(svc->port), atomic_read(&svc->usecnt));
522                 kfree(svc);
523         }
524 }
525
526
527 /*
528  *      Returns hash value for real service
529  */
530 static inline unsigned ip_vs_rs_hashkey(int af,
531                                             const union nf_inet_addr *addr,
532                                             __be16 port)
533 {
534         register unsigned porth = ntohs(port);
535         __be32 addr_fold = addr->ip;
536
537 #ifdef CONFIG_IP_VS_IPV6
538         if (af == AF_INET6)
539                 addr_fold = addr->ip6[0]^addr->ip6[1]^
540                             addr->ip6[2]^addr->ip6[3];
541 #endif
542
543         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
544                 & IP_VS_RTAB_MASK;
545 }
546
547 /*
548  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
549  *      should be called with locked tables.
550  */
551 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
552 {
553         unsigned hash;
554
555         if (!list_empty(&dest->d_list)) {
556                 return 0;
557         }
558
559         /*
560          *      Hash by proto,addr,port,
561          *      which are the parameters of the real service.
562          */
563         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
564
565         list_add(&dest->d_list, &ip_vs_rtable[hash]);
566
567         return 1;
568 }
569
570 /*
571  *      UNhashes ip_vs_dest from ip_vs_rtable.
572  *      should be called with locked tables.
573  */
574 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
575 {
576         /*
577          * Remove it from the ip_vs_rtable table.
578          */
579         if (!list_empty(&dest->d_list)) {
580                 list_del(&dest->d_list);
581                 INIT_LIST_HEAD(&dest->d_list);
582         }
583
584         return 1;
585 }
586
587 /*
588  *      Lookup real service by <proto,addr,port> in the real service table.
589  */
590 struct ip_vs_dest *
591 ip_vs_lookup_real_service(int af, __u16 protocol,
592                           const union nf_inet_addr *daddr,
593                           __be16 dport)
594 {
595         unsigned hash;
596         struct ip_vs_dest *dest;
597
598         /*
599          *      Check for "full" addressed entries
600          *      Return the first found entry
601          */
602         hash = ip_vs_rs_hashkey(af, daddr, dport);
603
604         read_lock(&__ip_vs_rs_lock);
605         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
606                 if ((dest->af == af)
607                     && ip_vs_addr_equal(af, &dest->addr, daddr)
608                     && (dest->port == dport)
609                     && ((dest->protocol == protocol) ||
610                         dest->vfwmark)) {
611                         /* HIT */
612                         read_unlock(&__ip_vs_rs_lock);
613                         return dest;
614                 }
615         }
616         read_unlock(&__ip_vs_rs_lock);
617
618         return NULL;
619 }
620
621 /*
622  *      Lookup destination by {addr,port} in the given service
623  */
624 static struct ip_vs_dest *
625 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
626                   __be16 dport)
627 {
628         struct ip_vs_dest *dest;
629
630         /*
631          * Find the destination for the given service
632          */
633         list_for_each_entry(dest, &svc->destinations, n_list) {
634                 if ((dest->af == svc->af)
635                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
636                     && (dest->port == dport)) {
637                         /* HIT */
638                         return dest;
639                 }
640         }
641
642         return NULL;
643 }
644
645 /*
646  * Find destination by {daddr,dport,vaddr,protocol}
647  * Cretaed to be used in ip_vs_process_message() in
648  * the backup synchronization daemon. It finds the
649  * destination to be bound to the received connection
650  * on the backup.
651  *
652  * ip_vs_lookup_real_service() looked promissing, but
653  * seems not working as expected.
654  */
655 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
656                                    __be16 dport,
657                                    const union nf_inet_addr *vaddr,
658                                    __be16 vport, __u16 protocol, __u32 fwmark)
659 {
660         struct ip_vs_dest *dest;
661         struct ip_vs_service *svc;
662
663         svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport);
664         if (!svc)
665                 return NULL;
666         dest = ip_vs_lookup_dest(svc, daddr, dport);
667         if (dest)
668                 atomic_inc(&dest->refcnt);
669         ip_vs_service_put(svc);
670         return dest;
671 }
672
673 /*
674  *  Lookup dest by {svc,addr,port} in the destination trash.
675  *  The destination trash is used to hold the destinations that are removed
676  *  from the service table but are still referenced by some conn entries.
677  *  The reason to add the destination trash is when the dest is temporary
678  *  down (either by administrator or by monitor program), the dest can be
679  *  picked back from the trash, the remaining connections to the dest can
680  *  continue, and the counting information of the dest is also useful for
681  *  scheduling.
682  */
683 static struct ip_vs_dest *
684 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
685                      __be16 dport)
686 {
687         struct ip_vs_dest *dest, *nxt;
688
689         /*
690          * Find the destination in trash
691          */
692         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
693                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
694                               "dest->refcnt=%d\n",
695                               dest->vfwmark,
696                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
697                               ntohs(dest->port),
698                               atomic_read(&dest->refcnt));
699                 if (dest->af == svc->af &&
700                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
701                     dest->port == dport &&
702                     dest->vfwmark == svc->fwmark &&
703                     dest->protocol == svc->protocol &&
704                     (svc->fwmark ||
705                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
706                       dest->vport == svc->port))) {
707                         /* HIT */
708                         return dest;
709                 }
710
711                 /*
712                  * Try to purge the destination from trash if not referenced
713                  */
714                 if (atomic_read(&dest->refcnt) == 1) {
715                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
716                                       "from trash\n",
717                                       dest->vfwmark,
718                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
719                                       ntohs(dest->port));
720                         list_del(&dest->n_list);
721                         ip_vs_dst_reset(dest);
722                         __ip_vs_unbind_svc(dest);
723                         kfree(dest);
724                 }
725         }
726
727         return NULL;
728 }
729
730
731 /*
732  *  Clean up all the destinations in the trash
733  *  Called by the ip_vs_control_cleanup()
734  *
735  *  When the ip_vs_control_clearup is activated by ipvs module exit,
736  *  the service tables must have been flushed and all the connections
737  *  are expired, and the refcnt of each destination in the trash must
738  *  be 1, so we simply release them here.
739  */
740 static void ip_vs_trash_cleanup(void)
741 {
742         struct ip_vs_dest *dest, *nxt;
743
744         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
745                 list_del(&dest->n_list);
746                 ip_vs_dst_reset(dest);
747                 __ip_vs_unbind_svc(dest);
748                 kfree(dest);
749         }
750 }
751
752
753 static void
754 ip_vs_zero_stats(struct ip_vs_stats *stats)
755 {
756         spin_lock_bh(&stats->lock);
757
758         memset(&stats->ustats, 0, sizeof(stats->ustats));
759         ip_vs_zero_estimator(stats);
760
761         spin_unlock_bh(&stats->lock);
762 }
763
764 /*
765  *      Update a destination in the given service
766  */
767 static void
768 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769                     struct ip_vs_dest_user_kern *udest, int add)
770 {
771         int conn_flags;
772
773         /* set the weight and the flags */
774         atomic_set(&dest->weight, udest->weight);
775         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
776         conn_flags |= IP_VS_CONN_F_INACTIVE;
777
778         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
779         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
780                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
781         } else {
782                 /*
783                  *    Put the real service in ip_vs_rtable if not present.
784                  *    For now only for NAT!
785                  */
786                 write_lock_bh(&__ip_vs_rs_lock);
787                 ip_vs_rs_hash(dest);
788                 write_unlock_bh(&__ip_vs_rs_lock);
789         }
790         atomic_set(&dest->conn_flags, conn_flags);
791
792         /* bind the service */
793         if (!dest->svc) {
794                 __ip_vs_bind_svc(dest, svc);
795         } else {
796                 if (dest->svc != svc) {
797                         __ip_vs_unbind_svc(dest);
798                         ip_vs_zero_stats(&dest->stats);
799                         __ip_vs_bind_svc(dest, svc);
800                 }
801         }
802
803         /* set the dest status flags */
804         dest->flags |= IP_VS_DEST_F_AVAILABLE;
805
806         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
807                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
808         dest->u_threshold = udest->u_threshold;
809         dest->l_threshold = udest->l_threshold;
810
811         spin_lock(&dest->dst_lock);
812         ip_vs_dst_reset(dest);
813         spin_unlock(&dest->dst_lock);
814
815         if (add)
816                 ip_vs_new_estimator(&dest->stats);
817
818         write_lock_bh(&__ip_vs_svc_lock);
819
820         /* Wait until all other svc users go away */
821         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
822
823         if (add) {
824                 list_add(&dest->n_list, &svc->destinations);
825                 svc->num_dests++;
826         }
827
828         /* call the update_service, because server weight may be changed */
829         if (svc->scheduler->update_service)
830                 svc->scheduler->update_service(svc);
831
832         write_unlock_bh(&__ip_vs_svc_lock);
833 }
834
835
836 /*
837  *      Create a destination for the given service
838  */
839 static int
840 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
841                struct ip_vs_dest **dest_p)
842 {
843         struct ip_vs_dest *dest;
844         unsigned atype;
845
846         EnterFunction(2);
847
848 #ifdef CONFIG_IP_VS_IPV6
849         if (svc->af == AF_INET6) {
850                 atype = ipv6_addr_type(&udest->addr.in6);
851                 if ((!(atype & IPV6_ADDR_UNICAST) ||
852                         atype & IPV6_ADDR_LINKLOCAL) &&
853                         !__ip_vs_addr_is_local_v6(&udest->addr.in6))
854                         return -EINVAL;
855         } else
856 #endif
857         {
858                 atype = inet_addr_type(&init_net, udest->addr.ip);
859                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
860                         return -EINVAL;
861         }
862
863         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
864         if (dest == NULL) {
865                 pr_err("%s(): no memory.\n", __func__);
866                 return -ENOMEM;
867         }
868
869         dest->af = svc->af;
870         dest->protocol = svc->protocol;
871         dest->vaddr = svc->addr;
872         dest->vport = svc->port;
873         dest->vfwmark = svc->fwmark;
874         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
875         dest->port = udest->port;
876
877         atomic_set(&dest->activeconns, 0);
878         atomic_set(&dest->inactconns, 0);
879         atomic_set(&dest->persistconns, 0);
880         atomic_set(&dest->refcnt, 1);
881
882         INIT_LIST_HEAD(&dest->d_list);
883         spin_lock_init(&dest->dst_lock);
884         spin_lock_init(&dest->stats.lock);
885         __ip_vs_update_dest(svc, dest, udest, 1);
886
887         *dest_p = dest;
888
889         LeaveFunction(2);
890         return 0;
891 }
892
893
894 /*
895  *      Add a destination into an existing service
896  */
897 static int
898 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
899 {
900         struct ip_vs_dest *dest;
901         union nf_inet_addr daddr;
902         __be16 dport = udest->port;
903         int ret;
904
905         EnterFunction(2);
906
907         if (udest->weight < 0) {
908                 pr_err("%s(): server weight less than zero\n", __func__);
909                 return -ERANGE;
910         }
911
912         if (udest->l_threshold > udest->u_threshold) {
913                 pr_err("%s(): lower threshold is higher than upper threshold\n",
914                         __func__);
915                 return -ERANGE;
916         }
917
918         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
919
920         /*
921          * Check if the dest already exists in the list
922          */
923         dest = ip_vs_lookup_dest(svc, &daddr, dport);
924
925         if (dest != NULL) {
926                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
927                 return -EEXIST;
928         }
929
930         /*
931          * Check if the dest already exists in the trash and
932          * is from the same service
933          */
934         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
935
936         if (dest != NULL) {
937                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
938                               "dest->refcnt=%d, service %u/%s:%u\n",
939                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
940                               atomic_read(&dest->refcnt),
941                               dest->vfwmark,
942                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
943                               ntohs(dest->vport));
944
945                 /*
946                  * Get the destination from the trash
947                  */
948                 list_del(&dest->n_list);
949
950                 __ip_vs_update_dest(svc, dest, udest, 1);
951                 ret = 0;
952         } else {
953                 /*
954                  * Allocate and initialize the dest structure
955                  */
956                 ret = ip_vs_new_dest(svc, udest, &dest);
957         }
958         LeaveFunction(2);
959
960         return ret;
961 }
962
963
964 /*
965  *      Edit a destination in the given service
966  */
967 static int
968 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
969 {
970         struct ip_vs_dest *dest;
971         union nf_inet_addr daddr;
972         __be16 dport = udest->port;
973
974         EnterFunction(2);
975
976         if (udest->weight < 0) {
977                 pr_err("%s(): server weight less than zero\n", __func__);
978                 return -ERANGE;
979         }
980
981         if (udest->l_threshold > udest->u_threshold) {
982                 pr_err("%s(): lower threshold is higher than upper threshold\n",
983                         __func__);
984                 return -ERANGE;
985         }
986
987         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
988
989         /*
990          *  Lookup the destination list
991          */
992         dest = ip_vs_lookup_dest(svc, &daddr, dport);
993
994         if (dest == NULL) {
995                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
996                 return -ENOENT;
997         }
998
999         __ip_vs_update_dest(svc, dest, udest, 0);
1000         LeaveFunction(2);
1001
1002         return 0;
1003 }
1004
1005
1006 /*
1007  *      Delete a destination (must be already unlinked from the service)
1008  */
1009 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1010 {
1011         ip_vs_kill_estimator(&dest->stats);
1012
1013         /*
1014          *  Remove it from the d-linked list with the real services.
1015          */
1016         write_lock_bh(&__ip_vs_rs_lock);
1017         ip_vs_rs_unhash(dest);
1018         write_unlock_bh(&__ip_vs_rs_lock);
1019
1020         /*
1021          *  Decrease the refcnt of the dest, and free the dest
1022          *  if nobody refers to it (refcnt=0). Otherwise, throw
1023          *  the destination into the trash.
1024          */
1025         if (atomic_dec_and_test(&dest->refcnt)) {
1026                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1027                               dest->vfwmark,
1028                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1029                               ntohs(dest->port));
1030                 ip_vs_dst_reset(dest);
1031                 /* simply decrease svc->refcnt here, let the caller check
1032                    and release the service if nobody refers to it.
1033                    Only user context can release destination and service,
1034                    and only one user context can update virtual service at a
1035                    time, so the operation here is OK */
1036                 atomic_dec(&dest->svc->refcnt);
1037                 kfree(dest);
1038         } else {
1039                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1040                               "dest->refcnt=%d\n",
1041                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1042                               ntohs(dest->port),
1043                               atomic_read(&dest->refcnt));
1044                 list_add(&dest->n_list, &ip_vs_dest_trash);
1045                 atomic_inc(&dest->refcnt);
1046         }
1047 }
1048
1049
1050 /*
1051  *      Unlink a destination from the given service
1052  */
1053 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1054                                 struct ip_vs_dest *dest,
1055                                 int svcupd)
1056 {
1057         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1058
1059         /*
1060          *  Remove it from the d-linked destination list.
1061          */
1062         list_del(&dest->n_list);
1063         svc->num_dests--;
1064
1065         /*
1066          *  Call the update_service function of its scheduler
1067          */
1068         if (svcupd && svc->scheduler->update_service)
1069                         svc->scheduler->update_service(svc);
1070 }
1071
1072
1073 /*
1074  *      Delete a destination server in the given service
1075  */
1076 static int
1077 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1078 {
1079         struct ip_vs_dest *dest;
1080         __be16 dport = udest->port;
1081
1082         EnterFunction(2);
1083
1084         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1085
1086         if (dest == NULL) {
1087                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1088                 return -ENOENT;
1089         }
1090
1091         write_lock_bh(&__ip_vs_svc_lock);
1092
1093         /*
1094          *      Wait until all other svc users go away.
1095          */
1096         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1097
1098         /*
1099          *      Unlink dest from the service
1100          */
1101         __ip_vs_unlink_dest(svc, dest, 1);
1102
1103         write_unlock_bh(&__ip_vs_svc_lock);
1104
1105         /*
1106          *      Delete the destination
1107          */
1108         __ip_vs_del_dest(dest);
1109
1110         LeaveFunction(2);
1111
1112         return 0;
1113 }
1114
1115
1116 /*
1117  *      Add a service into the service hash table
1118  */
1119 static int
1120 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1121                   struct ip_vs_service **svc_p)
1122 {
1123         int ret = 0;
1124         struct ip_vs_scheduler *sched = NULL;
1125         struct ip_vs_pe *pe = NULL;
1126         struct ip_vs_service *svc = NULL;
1127
1128         /* increase the module use count */
1129         ip_vs_use_count_inc();
1130
1131         /* Lookup the scheduler by 'u->sched_name' */
1132         sched = ip_vs_scheduler_get(u->sched_name);
1133         if (sched == NULL) {
1134                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1135                 ret = -ENOENT;
1136                 goto out_err;
1137         }
1138
1139         if (u->pe_name && *u->pe_name) {
1140                 pe = ip_vs_pe_getbyname(u->pe_name);
1141                 if (pe == NULL) {
1142                         pr_info("persistence engine module ip_vs_pe_%s "
1143                                 "not found\n", u->pe_name);
1144                         ret = -ENOENT;
1145                         goto out_err;
1146                 }
1147         }
1148
1149 #ifdef CONFIG_IP_VS_IPV6
1150         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1151                 ret = -EINVAL;
1152                 goto out_err;
1153         }
1154 #endif
1155
1156         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1157         if (svc == NULL) {
1158                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1159                 ret = -ENOMEM;
1160                 goto out_err;
1161         }
1162
1163         /* I'm the first user of the service */
1164         atomic_set(&svc->usecnt, 0);
1165         atomic_set(&svc->refcnt, 0);
1166
1167         svc->af = u->af;
1168         svc->protocol = u->protocol;
1169         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1170         svc->port = u->port;
1171         svc->fwmark = u->fwmark;
1172         svc->flags = u->flags;
1173         svc->timeout = u->timeout * HZ;
1174         svc->netmask = u->netmask;
1175
1176         INIT_LIST_HEAD(&svc->destinations);
1177         rwlock_init(&svc->sched_lock);
1178         spin_lock_init(&svc->stats.lock);
1179
1180         /* Bind the scheduler */
1181         ret = ip_vs_bind_scheduler(svc, sched);
1182         if (ret)
1183                 goto out_err;
1184         sched = NULL;
1185
1186         /* Bind the ct retriever */
1187         ip_vs_bind_pe(svc, pe);
1188         pe = NULL;
1189
1190         /* Update the virtual service counters */
1191         if (svc->port == FTPPORT)
1192                 atomic_inc(&ip_vs_ftpsvc_counter);
1193         else if (svc->port == 0)
1194                 atomic_inc(&ip_vs_nullsvc_counter);
1195
1196         ip_vs_new_estimator(&svc->stats);
1197
1198         /* Count only IPv4 services for old get/setsockopt interface */
1199         if (svc->af == AF_INET)
1200                 ip_vs_num_services++;
1201
1202         /* Hash the service into the service table */
1203         write_lock_bh(&__ip_vs_svc_lock);
1204         ip_vs_svc_hash(svc);
1205         write_unlock_bh(&__ip_vs_svc_lock);
1206
1207         *svc_p = svc;
1208         return 0;
1209
1210  out_err:
1211         if (svc != NULL) {
1212                 ip_vs_unbind_scheduler(svc);
1213                 if (svc->inc) {
1214                         local_bh_disable();
1215                         ip_vs_app_inc_put(svc->inc);
1216                         local_bh_enable();
1217                 }
1218                 kfree(svc);
1219         }
1220         ip_vs_scheduler_put(sched);
1221         ip_vs_pe_put(pe);
1222
1223         /* decrease the module use count */
1224         ip_vs_use_count_dec();
1225
1226         return ret;
1227 }
1228
1229
1230 /*
1231  *      Edit a service and bind it with a new scheduler
1232  */
1233 static int
1234 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1235 {
1236         struct ip_vs_scheduler *sched, *old_sched;
1237         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1238         int ret = 0;
1239
1240         /*
1241          * Lookup the scheduler, by 'u->sched_name'
1242          */
1243         sched = ip_vs_scheduler_get(u->sched_name);
1244         if (sched == NULL) {
1245                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1246                 return -ENOENT;
1247         }
1248         old_sched = sched;
1249
1250         if (u->pe_name && *u->pe_name) {
1251                 pe = ip_vs_pe_getbyname(u->pe_name);
1252                 if (pe == NULL) {
1253                         pr_info("persistence engine module ip_vs_pe_%s "
1254                                 "not found\n", u->pe_name);
1255                         ret = -ENOENT;
1256                         goto out;
1257                 }
1258                 old_pe = pe;
1259         }
1260
1261 #ifdef CONFIG_IP_VS_IPV6
1262         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1263                 ret = -EINVAL;
1264                 goto out;
1265         }
1266 #endif
1267
1268         write_lock_bh(&__ip_vs_svc_lock);
1269
1270         /*
1271          * Wait until all other svc users go away.
1272          */
1273         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1274
1275         /*
1276          * Set the flags and timeout value
1277          */
1278         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1279         svc->timeout = u->timeout * HZ;
1280         svc->netmask = u->netmask;
1281
1282         old_sched = svc->scheduler;
1283         if (sched != old_sched) {
1284                 /*
1285                  * Unbind the old scheduler
1286                  */
1287                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1288                         old_sched = sched;
1289                         goto out_unlock;
1290                 }
1291
1292                 /*
1293                  * Bind the new scheduler
1294                  */
1295                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1296                         /*
1297                          * If ip_vs_bind_scheduler fails, restore the old
1298                          * scheduler.
1299                          * The main reason of failure is out of memory.
1300                          *
1301                          * The question is if the old scheduler can be
1302                          * restored all the time. TODO: if it cannot be
1303                          * restored some time, we must delete the service,
1304                          * otherwise the system may crash.
1305                          */
1306                         ip_vs_bind_scheduler(svc, old_sched);
1307                         old_sched = sched;
1308                         goto out_unlock;
1309                 }
1310         }
1311
1312         old_pe = svc->pe;
1313         if (pe != old_pe) {
1314                 ip_vs_unbind_pe(svc);
1315                 ip_vs_bind_pe(svc, pe);
1316         }
1317
1318   out_unlock:
1319         write_unlock_bh(&__ip_vs_svc_lock);
1320   out:
1321         ip_vs_scheduler_put(old_sched);
1322         ip_vs_pe_put(old_pe);
1323         return ret;
1324 }
1325
1326
1327 /*
1328  *      Delete a service from the service list
1329  *      - The service must be unlinked, unlocked and not referenced!
1330  *      - We are called under _bh lock
1331  */
1332 static void __ip_vs_del_service(struct ip_vs_service *svc)
1333 {
1334         struct ip_vs_dest *dest, *nxt;
1335         struct ip_vs_scheduler *old_sched;
1336         struct ip_vs_pe *old_pe;
1337
1338         pr_info("%s: enter\n", __func__);
1339
1340         /* Count only IPv4 services for old get/setsockopt interface */
1341         if (svc->af == AF_INET)
1342                 ip_vs_num_services--;
1343
1344         ip_vs_kill_estimator(&svc->stats);
1345
1346         /* Unbind scheduler */
1347         old_sched = svc->scheduler;
1348         ip_vs_unbind_scheduler(svc);
1349         ip_vs_scheduler_put(old_sched);
1350
1351         /* Unbind persistence engine */
1352         old_pe = svc->pe;
1353         ip_vs_unbind_pe(svc);
1354         ip_vs_pe_put(old_pe);
1355
1356         /* Unbind app inc */
1357         if (svc->inc) {
1358                 ip_vs_app_inc_put(svc->inc);
1359                 svc->inc = NULL;
1360         }
1361
1362         /*
1363          *    Unlink the whole destination list
1364          */
1365         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1366                 __ip_vs_unlink_dest(svc, dest, 0);
1367                 __ip_vs_del_dest(dest);
1368         }
1369
1370         /*
1371          *    Update the virtual service counters
1372          */
1373         if (svc->port == FTPPORT)
1374                 atomic_dec(&ip_vs_ftpsvc_counter);
1375         else if (svc->port == 0)
1376                 atomic_dec(&ip_vs_nullsvc_counter);
1377
1378         /*
1379          *    Free the service if nobody refers to it
1380          */
1381         if (atomic_read(&svc->refcnt) == 0) {
1382                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1383                               svc->fwmark,
1384                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1385                               ntohs(svc->port), atomic_read(&svc->usecnt));
1386                 kfree(svc);
1387         }
1388
1389         /* decrease the module use count */
1390         ip_vs_use_count_dec();
1391 }
1392
1393 /*
1394  * Unlink a service from list and try to delete it if its refcnt reached 0
1395  */
1396 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1397 {
1398         /*
1399          * Unhash it from the service table
1400          */
1401         write_lock_bh(&__ip_vs_svc_lock);
1402
1403         ip_vs_svc_unhash(svc);
1404
1405         /*
1406          * Wait until all the svc users go away.
1407          */
1408         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1409
1410         __ip_vs_del_service(svc);
1411
1412         write_unlock_bh(&__ip_vs_svc_lock);
1413 }
1414
1415 /*
1416  *      Delete a service from the service list
1417  */
1418 static int ip_vs_del_service(struct ip_vs_service *svc)
1419 {
1420         if (svc == NULL)
1421                 return -EEXIST;
1422         ip_vs_unlink_service(svc);
1423
1424         return 0;
1425 }
1426
1427
1428 /*
1429  *      Flush all the virtual services
1430  */
1431 static int ip_vs_flush(void)
1432 {
1433         int idx;
1434         struct ip_vs_service *svc, *nxt;
1435
1436         /*
1437          * Flush the service table hashed by <protocol,addr,port>
1438          */
1439         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1440                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1441                         ip_vs_unlink_service(svc);
1442                 }
1443         }
1444
1445         /*
1446          * Flush the service table hashed by fwmark
1447          */
1448         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1449                 list_for_each_entry_safe(svc, nxt,
1450                                          &ip_vs_svc_fwm_table[idx], f_list) {
1451                         ip_vs_unlink_service(svc);
1452                 }
1453         }
1454
1455         return 0;
1456 }
1457
1458
1459 /*
1460  *      Zero counters in a service or all services
1461  */
1462 static int ip_vs_zero_service(struct ip_vs_service *svc)
1463 {
1464         struct ip_vs_dest *dest;
1465
1466         write_lock_bh(&__ip_vs_svc_lock);
1467         list_for_each_entry(dest, &svc->destinations, n_list) {
1468                 ip_vs_zero_stats(&dest->stats);
1469         }
1470         ip_vs_zero_stats(&svc->stats);
1471         write_unlock_bh(&__ip_vs_svc_lock);
1472         return 0;
1473 }
1474
1475 static int ip_vs_zero_all(void)
1476 {
1477         int idx;
1478         struct ip_vs_service *svc;
1479
1480         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1481                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1482                         ip_vs_zero_service(svc);
1483                 }
1484         }
1485
1486         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1487                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1488                         ip_vs_zero_service(svc);
1489                 }
1490         }
1491
1492         ip_vs_zero_stats(&ip_vs_stats);
1493         return 0;
1494 }
1495
1496
1497 static int
1498 proc_do_defense_mode(ctl_table *table, int write,
1499                      void __user *buffer, size_t *lenp, loff_t *ppos)
1500 {
1501         int *valp = table->data;
1502         int val = *valp;
1503         int rc;
1504
1505         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1506         if (write && (*valp != val)) {
1507                 if ((*valp < 0) || (*valp > 3)) {
1508                         /* Restore the correct value */
1509                         *valp = val;
1510                 } else {
1511                         update_defense_level();
1512                 }
1513         }
1514         return rc;
1515 }
1516
1517
1518 static int
1519 proc_do_sync_threshold(ctl_table *table, int write,
1520                        void __user *buffer, size_t *lenp, loff_t *ppos)
1521 {
1522         int *valp = table->data;
1523         int val[2];
1524         int rc;
1525
1526         /* backup the value first */
1527         memcpy(val, valp, sizeof(val));
1528
1529         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1530         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1531                 /* Restore the correct value */
1532                 memcpy(valp, val, sizeof(val));
1533         }
1534         return rc;
1535 }
1536
1537 static int
1538 proc_do_sync_mode(ctl_table *table, int write,
1539                      void __user *buffer, size_t *lenp, loff_t *ppos)
1540 {
1541         int *valp = table->data;
1542         int val = *valp;
1543         int rc;
1544
1545         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1546         if (write && (*valp != val)) {
1547                 if ((*valp < 0) || (*valp > 1)) {
1548                         /* Restore the correct value */
1549                         *valp = val;
1550                 } else {
1551                         ip_vs_sync_switch_mode(val);
1552                 }
1553         }
1554         return rc;
1555 }
1556
1557 /*
1558  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1559  */
1560
1561 static struct ctl_table vs_vars[] = {
1562         {
1563                 .procname       = "amemthresh",
1564                 .data           = &sysctl_ip_vs_amemthresh,
1565                 .maxlen         = sizeof(int),
1566                 .mode           = 0644,
1567                 .proc_handler   = proc_dointvec,
1568         },
1569 #ifdef CONFIG_IP_VS_DEBUG
1570         {
1571                 .procname       = "debug_level",
1572                 .data           = &sysctl_ip_vs_debug_level,
1573                 .maxlen         = sizeof(int),
1574                 .mode           = 0644,
1575                 .proc_handler   = proc_dointvec,
1576         },
1577 #endif
1578         {
1579                 .procname       = "am_droprate",
1580                 .data           = &sysctl_ip_vs_am_droprate,
1581                 .maxlen         = sizeof(int),
1582                 .mode           = 0644,
1583                 .proc_handler   = proc_dointvec,
1584         },
1585         {
1586                 .procname       = "drop_entry",
1587                 .data           = &sysctl_ip_vs_drop_entry,
1588                 .maxlen         = sizeof(int),
1589                 .mode           = 0644,
1590                 .proc_handler   = proc_do_defense_mode,
1591         },
1592         {
1593                 .procname       = "drop_packet",
1594                 .data           = &sysctl_ip_vs_drop_packet,
1595                 .maxlen         = sizeof(int),
1596                 .mode           = 0644,
1597                 .proc_handler   = proc_do_defense_mode,
1598         },
1599 #ifdef CONFIG_IP_VS_NFCT
1600         {
1601                 .procname       = "conntrack",
1602                 .data           = &sysctl_ip_vs_conntrack,
1603                 .maxlen         = sizeof(int),
1604                 .mode           = 0644,
1605                 .proc_handler   = &proc_dointvec,
1606         },
1607 #endif
1608         {
1609                 .procname       = "secure_tcp",
1610                 .data           = &sysctl_ip_vs_secure_tcp,
1611                 .maxlen         = sizeof(int),
1612                 .mode           = 0644,
1613                 .proc_handler   = proc_do_defense_mode,
1614         },
1615         {
1616                 .procname       = "snat_reroute",
1617                 .data           = &sysctl_ip_vs_snat_reroute,
1618                 .maxlen         = sizeof(int),
1619                 .mode           = 0644,
1620                 .proc_handler   = &proc_dointvec,
1621         },
1622         {
1623                 .procname       = "sync_version",
1624                 .data           = &sysctl_ip_vs_sync_ver,
1625                 .maxlen         = sizeof(int),
1626                 .mode           = 0644,
1627                 .proc_handler   = &proc_do_sync_mode,
1628         },
1629 #if 0
1630         {
1631                 .procname       = "timeout_established",
1632                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1633                 .maxlen         = sizeof(int),
1634                 .mode           = 0644,
1635                 .proc_handler   = proc_dointvec_jiffies,
1636         },
1637         {
1638                 .procname       = "timeout_synsent",
1639                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1640                 .maxlen         = sizeof(int),
1641                 .mode           = 0644,
1642                 .proc_handler   = proc_dointvec_jiffies,
1643         },
1644         {
1645                 .procname       = "timeout_synrecv",
1646                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1647                 .maxlen         = sizeof(int),
1648                 .mode           = 0644,
1649                 .proc_handler   = proc_dointvec_jiffies,
1650         },
1651         {
1652                 .procname       = "timeout_finwait",
1653                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1654                 .maxlen         = sizeof(int),
1655                 .mode           = 0644,
1656                 .proc_handler   = proc_dointvec_jiffies,
1657         },
1658         {
1659                 .procname       = "timeout_timewait",
1660                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1661                 .maxlen         = sizeof(int),
1662                 .mode           = 0644,
1663                 .proc_handler   = proc_dointvec_jiffies,
1664         },
1665         {
1666                 .procname       = "timeout_close",
1667                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1668                 .maxlen         = sizeof(int),
1669                 .mode           = 0644,
1670                 .proc_handler   = proc_dointvec_jiffies,
1671         },
1672         {
1673                 .procname       = "timeout_closewait",
1674                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1675                 .maxlen         = sizeof(int),
1676                 .mode           = 0644,
1677                 .proc_handler   = proc_dointvec_jiffies,
1678         },
1679         {
1680                 .procname       = "timeout_lastack",
1681                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1682                 .maxlen         = sizeof(int),
1683                 .mode           = 0644,
1684                 .proc_handler   = proc_dointvec_jiffies,
1685         },
1686         {
1687                 .procname       = "timeout_listen",
1688                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1689                 .maxlen         = sizeof(int),
1690                 .mode           = 0644,
1691                 .proc_handler   = proc_dointvec_jiffies,
1692         },
1693         {
1694                 .procname       = "timeout_synack",
1695                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1696                 .maxlen         = sizeof(int),
1697                 .mode           = 0644,
1698                 .proc_handler   = proc_dointvec_jiffies,
1699         },
1700         {
1701                 .procname       = "timeout_udp",
1702                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1703                 .maxlen         = sizeof(int),
1704                 .mode           = 0644,
1705                 .proc_handler   = proc_dointvec_jiffies,
1706         },
1707         {
1708                 .procname       = "timeout_icmp",
1709                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1710                 .maxlen         = sizeof(int),
1711                 .mode           = 0644,
1712                 .proc_handler   = proc_dointvec_jiffies,
1713         },
1714 #endif
1715         {
1716                 .procname       = "cache_bypass",
1717                 .data           = &sysctl_ip_vs_cache_bypass,
1718                 .maxlen         = sizeof(int),
1719                 .mode           = 0644,
1720                 .proc_handler   = proc_dointvec,
1721         },
1722         {
1723                 .procname       = "expire_nodest_conn",
1724                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1725                 .maxlen         = sizeof(int),
1726                 .mode           = 0644,
1727                 .proc_handler   = proc_dointvec,
1728         },
1729         {
1730                 .procname       = "expire_quiescent_template",
1731                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1732                 .maxlen         = sizeof(int),
1733                 .mode           = 0644,
1734                 .proc_handler   = proc_dointvec,
1735         },
1736         {
1737                 .procname       = "sync_threshold",
1738                 .data           = &sysctl_ip_vs_sync_threshold,
1739                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1740                 .mode           = 0644,
1741                 .proc_handler   = proc_do_sync_threshold,
1742         },
1743         {
1744                 .procname       = "nat_icmp_send",
1745                 .data           = &sysctl_ip_vs_nat_icmp_send,
1746                 .maxlen         = sizeof(int),
1747                 .mode           = 0644,
1748                 .proc_handler   = proc_dointvec,
1749         },
1750         { }
1751 };
1752
1753 const struct ctl_path net_vs_ctl_path[] = {
1754         { .procname = "net", },
1755         { .procname = "ipv4", },
1756         { .procname = "vs", },
1757         { }
1758 };
1759 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1760
1761 static struct ctl_table_header * sysctl_header;
1762
1763 #ifdef CONFIG_PROC_FS
1764
1765 struct ip_vs_iter {
1766         struct list_head *table;
1767         int bucket;
1768 };
1769
1770 /*
1771  *      Write the contents of the VS rule table to a PROCfs file.
1772  *      (It is kept just for backward compatibility)
1773  */
1774 static inline const char *ip_vs_fwd_name(unsigned flags)
1775 {
1776         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1777         case IP_VS_CONN_F_LOCALNODE:
1778                 return "Local";
1779         case IP_VS_CONN_F_TUNNEL:
1780                 return "Tunnel";
1781         case IP_VS_CONN_F_DROUTE:
1782                 return "Route";
1783         default:
1784                 return "Masq";
1785         }
1786 }
1787
1788
1789 /* Get the Nth entry in the two lists */
1790 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1791 {
1792         struct ip_vs_iter *iter = seq->private;
1793         int idx;
1794         struct ip_vs_service *svc;
1795
1796         /* look in hash by protocol */
1797         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1798                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1799                         if (pos-- == 0){
1800                                 iter->table = ip_vs_svc_table;
1801                                 iter->bucket = idx;
1802                                 return svc;
1803                         }
1804                 }
1805         }
1806
1807         /* keep looking in fwmark */
1808         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1809                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1810                         if (pos-- == 0) {
1811                                 iter->table = ip_vs_svc_fwm_table;
1812                                 iter->bucket = idx;
1813                                 return svc;
1814                         }
1815                 }
1816         }
1817
1818         return NULL;
1819 }
1820
1821 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1822 __acquires(__ip_vs_svc_lock)
1823 {
1824
1825         read_lock_bh(&__ip_vs_svc_lock);
1826         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1827 }
1828
1829
1830 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1831 {
1832         struct list_head *e;
1833         struct ip_vs_iter *iter;
1834         struct ip_vs_service *svc;
1835
1836         ++*pos;
1837         if (v == SEQ_START_TOKEN)
1838                 return ip_vs_info_array(seq,0);
1839
1840         svc = v;
1841         iter = seq->private;
1842
1843         if (iter->table == ip_vs_svc_table) {
1844                 /* next service in table hashed by protocol */
1845                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1846                         return list_entry(e, struct ip_vs_service, s_list);
1847
1848
1849                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1850                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1851                                             s_list) {
1852                                 return svc;
1853                         }
1854                 }
1855
1856                 iter->table = ip_vs_svc_fwm_table;
1857                 iter->bucket = -1;
1858                 goto scan_fwmark;
1859         }
1860
1861         /* next service in hashed by fwmark */
1862         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1863                 return list_entry(e, struct ip_vs_service, f_list);
1864
1865  scan_fwmark:
1866         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1867                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1868                                     f_list)
1869                         return svc;
1870         }
1871
1872         return NULL;
1873 }
1874
1875 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1876 __releases(__ip_vs_svc_lock)
1877 {
1878         read_unlock_bh(&__ip_vs_svc_lock);
1879 }
1880
1881
1882 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1883 {
1884         if (v == SEQ_START_TOKEN) {
1885                 seq_printf(seq,
1886                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1887                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1888                 seq_puts(seq,
1889                          "Prot LocalAddress:Port Scheduler Flags\n");
1890                 seq_puts(seq,
1891                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1892         } else {
1893                 const struct ip_vs_service *svc = v;
1894                 const struct ip_vs_iter *iter = seq->private;
1895                 const struct ip_vs_dest *dest;
1896
1897                 if (iter->table == ip_vs_svc_table) {
1898 #ifdef CONFIG_IP_VS_IPV6
1899                         if (svc->af == AF_INET6)
1900                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1901                                            ip_vs_proto_name(svc->protocol),
1902                                            &svc->addr.in6,
1903                                            ntohs(svc->port),
1904                                            svc->scheduler->name);
1905                         else
1906 #endif
1907                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
1908                                            ip_vs_proto_name(svc->protocol),
1909                                            ntohl(svc->addr.ip),
1910                                            ntohs(svc->port),
1911                                            svc->scheduler->name,
1912                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1913                 } else {
1914                         seq_printf(seq, "FWM  %08X %s %s",
1915                                    svc->fwmark, svc->scheduler->name,
1916                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1917                 }
1918
1919                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1920                         seq_printf(seq, "persistent %d %08X\n",
1921                                 svc->timeout,
1922                                 ntohl(svc->netmask));
1923                 else
1924                         seq_putc(seq, '\n');
1925
1926                 list_for_each_entry(dest, &svc->destinations, n_list) {
1927 #ifdef CONFIG_IP_VS_IPV6
1928                         if (dest->af == AF_INET6)
1929                                 seq_printf(seq,
1930                                            "  -> [%pI6]:%04X"
1931                                            "      %-7s %-6d %-10d %-10d\n",
1932                                            &dest->addr.in6,
1933                                            ntohs(dest->port),
1934                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1935                                            atomic_read(&dest->weight),
1936                                            atomic_read(&dest->activeconns),
1937                                            atomic_read(&dest->inactconns));
1938                         else
1939 #endif
1940                                 seq_printf(seq,
1941                                            "  -> %08X:%04X      "
1942                                            "%-7s %-6d %-10d %-10d\n",
1943                                            ntohl(dest->addr.ip),
1944                                            ntohs(dest->port),
1945                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1946                                            atomic_read(&dest->weight),
1947                                            atomic_read(&dest->activeconns),
1948                                            atomic_read(&dest->inactconns));
1949
1950                 }
1951         }
1952         return 0;
1953 }
1954
1955 static const struct seq_operations ip_vs_info_seq_ops = {
1956         .start = ip_vs_info_seq_start,
1957         .next  = ip_vs_info_seq_next,
1958         .stop  = ip_vs_info_seq_stop,
1959         .show  = ip_vs_info_seq_show,
1960 };
1961
1962 static int ip_vs_info_open(struct inode *inode, struct file *file)
1963 {
1964         return seq_open_private(file, &ip_vs_info_seq_ops,
1965                         sizeof(struct ip_vs_iter));
1966 }
1967
1968 static const struct file_operations ip_vs_info_fops = {
1969         .owner   = THIS_MODULE,
1970         .open    = ip_vs_info_open,
1971         .read    = seq_read,
1972         .llseek  = seq_lseek,
1973         .release = seq_release_private,
1974 };
1975
1976 #endif
1977
1978 struct ip_vs_stats ip_vs_stats = {
1979         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1980 };
1981
1982 #ifdef CONFIG_PROC_FS
1983 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1984 {
1985
1986 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1987         seq_puts(seq,
1988                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1989         seq_printf(seq,
1990                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1991
1992         spin_lock_bh(&ip_vs_stats.lock);
1993         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1994                    ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1995                    (unsigned long long) ip_vs_stats.ustats.inbytes,
1996                    (unsigned long long) ip_vs_stats.ustats.outbytes);
1997
1998 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1999         seq_puts(seq,
2000                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2001         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
2002                         ip_vs_stats.ustats.cps,
2003                         ip_vs_stats.ustats.inpps,
2004                         ip_vs_stats.ustats.outpps,
2005                         ip_vs_stats.ustats.inbps,
2006                         ip_vs_stats.ustats.outbps);
2007         spin_unlock_bh(&ip_vs_stats.lock);
2008
2009         return 0;
2010 }
2011
2012 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2013 {
2014         return single_open(file, ip_vs_stats_show, NULL);
2015 }
2016
2017 static const struct file_operations ip_vs_stats_fops = {
2018         .owner = THIS_MODULE,
2019         .open = ip_vs_stats_seq_open,
2020         .read = seq_read,
2021         .llseek = seq_lseek,
2022         .release = single_release,
2023 };
2024
2025 #endif
2026
2027 /*
2028  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2029  */
2030 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2031 {
2032         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2033                   u->tcp_timeout,
2034                   u->tcp_fin_timeout,
2035                   u->udp_timeout);
2036
2037 #ifdef CONFIG_IP_VS_PROTO_TCP
2038         if (u->tcp_timeout) {
2039                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2040                         = u->tcp_timeout * HZ;
2041         }
2042
2043         if (u->tcp_fin_timeout) {
2044                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2045                         = u->tcp_fin_timeout * HZ;
2046         }
2047 #endif
2048
2049 #ifdef CONFIG_IP_VS_PROTO_UDP
2050         if (u->udp_timeout) {
2051                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2052                         = u->udp_timeout * HZ;
2053         }
2054 #endif
2055         return 0;
2056 }
2057
2058
2059 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2060 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2061 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2062                                  sizeof(struct ip_vs_dest_user))
2063 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2064 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2065 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2066
2067 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2068         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2069         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2070         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2071         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2072         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2073         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2074         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2075         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2076         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2077         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2078         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2079 };
2080
2081 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2082                                   struct ip_vs_service_user *usvc_compat)
2083 {
2084         memset(usvc, 0, sizeof(*usvc));
2085
2086         usvc->af                = AF_INET;
2087         usvc->protocol          = usvc_compat->protocol;
2088         usvc->addr.ip           = usvc_compat->addr;
2089         usvc->port              = usvc_compat->port;
2090         usvc->fwmark            = usvc_compat->fwmark;
2091
2092         /* Deep copy of sched_name is not needed here */
2093         usvc->sched_name        = usvc_compat->sched_name;
2094
2095         usvc->flags             = usvc_compat->flags;
2096         usvc->timeout           = usvc_compat->timeout;
2097         usvc->netmask           = usvc_compat->netmask;
2098 }
2099
2100 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2101                                    struct ip_vs_dest_user *udest_compat)
2102 {
2103         memset(udest, 0, sizeof(*udest));
2104
2105         udest->addr.ip          = udest_compat->addr;
2106         udest->port             = udest_compat->port;
2107         udest->conn_flags       = udest_compat->conn_flags;
2108         udest->weight           = udest_compat->weight;
2109         udest->u_threshold      = udest_compat->u_threshold;
2110         udest->l_threshold      = udest_compat->l_threshold;
2111 }
2112
2113 static int
2114 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2115 {
2116         int ret;
2117         unsigned char arg[MAX_ARG_LEN];
2118         struct ip_vs_service_user *usvc_compat;
2119         struct ip_vs_service_user_kern usvc;
2120         struct ip_vs_service *svc;
2121         struct ip_vs_dest_user *udest_compat;
2122         struct ip_vs_dest_user_kern udest;
2123
2124         if (!capable(CAP_NET_ADMIN))
2125                 return -EPERM;
2126
2127         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2128                 return -EINVAL;
2129         if (len < 0 || len >  MAX_ARG_LEN)
2130                 return -EINVAL;
2131         if (len != set_arglen[SET_CMDID(cmd)]) {
2132                 pr_err("set_ctl: len %u != %u\n",
2133                        len, set_arglen[SET_CMDID(cmd)]);
2134                 return -EINVAL;
2135         }
2136
2137         if (copy_from_user(arg, user, len) != 0)
2138                 return -EFAULT;
2139
2140         /* increase the module use count */
2141         ip_vs_use_count_inc();
2142
2143         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2144                 ret = -ERESTARTSYS;
2145                 goto out_dec;
2146         }
2147
2148         if (cmd == IP_VS_SO_SET_FLUSH) {
2149                 /* Flush the virtual service */
2150                 ret = ip_vs_flush();
2151                 goto out_unlock;
2152         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2153                 /* Set timeout values for (tcp tcpfin udp) */
2154                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2155                 goto out_unlock;
2156         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2157                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2158                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2159                 goto out_unlock;
2160         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2161                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2162                 ret = stop_sync_thread(dm->state);
2163                 goto out_unlock;
2164         }
2165
2166         usvc_compat = (struct ip_vs_service_user *)arg;
2167         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2168
2169         /* We only use the new structs internally, so copy userspace compat
2170          * structs to extended internal versions */
2171         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2172         ip_vs_copy_udest_compat(&udest, udest_compat);
2173
2174         if (cmd == IP_VS_SO_SET_ZERO) {
2175                 /* if no service address is set, zero counters in all */
2176                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2177                         ret = ip_vs_zero_all();
2178                         goto out_unlock;
2179                 }
2180         }
2181
2182         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2183         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2184             usvc.protocol != IPPROTO_SCTP) {
2185                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2186                        usvc.protocol, &usvc.addr.ip,
2187                        ntohs(usvc.port), usvc.sched_name);
2188                 ret = -EFAULT;
2189                 goto out_unlock;
2190         }
2191
2192         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2193         if (usvc.fwmark == 0)
2194                 svc = __ip_vs_service_find(usvc.af, usvc.protocol,
2195                                            &usvc.addr, usvc.port);
2196         else
2197                 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
2198
2199         if (cmd != IP_VS_SO_SET_ADD
2200             && (svc == NULL || svc->protocol != usvc.protocol)) {
2201                 ret = -ESRCH;
2202                 goto out_unlock;
2203         }
2204
2205         switch (cmd) {
2206         case IP_VS_SO_SET_ADD:
2207                 if (svc != NULL)
2208                         ret = -EEXIST;
2209                 else
2210                         ret = ip_vs_add_service(&usvc, &svc);
2211                 break;
2212         case IP_VS_SO_SET_EDIT:
2213                 ret = ip_vs_edit_service(svc, &usvc);
2214                 break;
2215         case IP_VS_SO_SET_DEL:
2216                 ret = ip_vs_del_service(svc);
2217                 if (!ret)
2218                         goto out_unlock;
2219                 break;
2220         case IP_VS_SO_SET_ZERO:
2221                 ret = ip_vs_zero_service(svc);
2222                 break;
2223         case IP_VS_SO_SET_ADDDEST:
2224                 ret = ip_vs_add_dest(svc, &udest);
2225                 break;
2226         case IP_VS_SO_SET_EDITDEST:
2227                 ret = ip_vs_edit_dest(svc, &udest);
2228                 break;
2229         case IP_VS_SO_SET_DELDEST:
2230                 ret = ip_vs_del_dest(svc, &udest);
2231                 break;
2232         default:
2233                 ret = -EINVAL;
2234         }
2235
2236   out_unlock:
2237         mutex_unlock(&__ip_vs_mutex);
2238   out_dec:
2239         /* decrease the module use count */
2240         ip_vs_use_count_dec();
2241
2242         return ret;
2243 }
2244
2245
2246 static void
2247 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2248 {
2249         spin_lock_bh(&src->lock);
2250         memcpy(dst, &src->ustats, sizeof(*dst));
2251         spin_unlock_bh(&src->lock);
2252 }
2253
2254 static void
2255 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2256 {
2257         dst->protocol = src->protocol;
2258         dst->addr = src->addr.ip;
2259         dst->port = src->port;
2260         dst->fwmark = src->fwmark;
2261         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2262         dst->flags = src->flags;
2263         dst->timeout = src->timeout / HZ;
2264         dst->netmask = src->netmask;
2265         dst->num_dests = src->num_dests;
2266         ip_vs_copy_stats(&dst->stats, &src->stats);
2267 }
2268
2269 static inline int
2270 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2271                             struct ip_vs_get_services __user *uptr)
2272 {
2273         int idx, count=0;
2274         struct ip_vs_service *svc;
2275         struct ip_vs_service_entry entry;
2276         int ret = 0;
2277
2278         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2279                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2280                         /* Only expose IPv4 entries to old interface */
2281                         if (svc->af != AF_INET)
2282                                 continue;
2283
2284                         if (count >= get->num_services)
2285                                 goto out;
2286                         memset(&entry, 0, sizeof(entry));
2287                         ip_vs_copy_service(&entry, svc);
2288                         if (copy_to_user(&uptr->entrytable[count],
2289                                          &entry, sizeof(entry))) {
2290                                 ret = -EFAULT;
2291                                 goto out;
2292                         }
2293                         count++;
2294                 }
2295         }
2296
2297         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2298                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2299                         /* Only expose IPv4 entries to old interface */
2300                         if (svc->af != AF_INET)
2301                                 continue;
2302
2303                         if (count >= get->num_services)
2304                                 goto out;
2305                         memset(&entry, 0, sizeof(entry));
2306                         ip_vs_copy_service(&entry, svc);
2307                         if (copy_to_user(&uptr->entrytable[count],
2308                                          &entry, sizeof(entry))) {
2309                                 ret = -EFAULT;
2310                                 goto out;
2311                         }
2312                         count++;
2313                 }
2314         }
2315   out:
2316         return ret;
2317 }
2318
2319 static inline int
2320 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2321                          struct ip_vs_get_dests __user *uptr)
2322 {
2323         struct ip_vs_service *svc;
2324         union nf_inet_addr addr = { .ip = get->addr };
2325         int ret = 0;
2326
2327         if (get->fwmark)
2328                 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
2329         else
2330                 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
2331                                            get->port);
2332
2333         if (svc) {
2334                 int count = 0;
2335                 struct ip_vs_dest *dest;
2336                 struct ip_vs_dest_entry entry;
2337
2338                 list_for_each_entry(dest, &svc->destinations, n_list) {
2339                         if (count >= get->num_dests)
2340                                 break;
2341
2342                         entry.addr = dest->addr.ip;
2343                         entry.port = dest->port;
2344                         entry.conn_flags = atomic_read(&dest->conn_flags);
2345                         entry.weight = atomic_read(&dest->weight);
2346                         entry.u_threshold = dest->u_threshold;
2347                         entry.l_threshold = dest->l_threshold;
2348                         entry.activeconns = atomic_read(&dest->activeconns);
2349                         entry.inactconns = atomic_read(&dest->inactconns);
2350                         entry.persistconns = atomic_read(&dest->persistconns);
2351                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2352                         if (copy_to_user(&uptr->entrytable[count],
2353                                          &entry, sizeof(entry))) {
2354                                 ret = -EFAULT;
2355                                 break;
2356                         }
2357                         count++;
2358                 }
2359         } else
2360                 ret = -ESRCH;
2361         return ret;
2362 }
2363
2364 static inline void
2365 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2366 {
2367 #ifdef CONFIG_IP_VS_PROTO_TCP
2368         u->tcp_timeout =
2369                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2370         u->tcp_fin_timeout =
2371                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2372 #endif
2373 #ifdef CONFIG_IP_VS_PROTO_UDP
2374         u->udp_timeout =
2375                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2376 #endif
2377 }
2378
2379
2380 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2381 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2382 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2383 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2384 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2385 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2386 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2387
2388 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2389         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2390         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2391         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2392         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2393         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2394         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2395         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2396 };
2397
2398 static int
2399 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2400 {
2401         unsigned char arg[128];
2402         int ret = 0;
2403         unsigned int copylen;
2404
2405         if (!capable(CAP_NET_ADMIN))
2406                 return -EPERM;
2407
2408         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2409                 return -EINVAL;
2410
2411         if (*len < get_arglen[GET_CMDID(cmd)]) {
2412                 pr_err("get_ctl: len %u < %u\n",
2413                        *len, get_arglen[GET_CMDID(cmd)]);
2414                 return -EINVAL;
2415         }
2416
2417         copylen = get_arglen[GET_CMDID(cmd)];
2418         if (copylen > 128)
2419                 return -EINVAL;
2420
2421         if (copy_from_user(arg, user, copylen) != 0)
2422                 return -EFAULT;
2423
2424         if (mutex_lock_interruptible(&__ip_vs_mutex))
2425                 return -ERESTARTSYS;
2426
2427         switch (cmd) {
2428         case IP_VS_SO_GET_VERSION:
2429         {
2430                 char buf[64];
2431
2432                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2433                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2434                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2435                         ret = -EFAULT;
2436                         goto out;
2437                 }
2438                 *len = strlen(buf)+1;
2439         }
2440         break;
2441
2442         case IP_VS_SO_GET_INFO:
2443         {
2444                 struct ip_vs_getinfo info;
2445                 info.version = IP_VS_VERSION_CODE;
2446                 info.size = ip_vs_conn_tab_size;
2447                 info.num_services = ip_vs_num_services;
2448                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2449                         ret = -EFAULT;
2450         }
2451         break;
2452
2453         case IP_VS_SO_GET_SERVICES:
2454         {
2455                 struct ip_vs_get_services *get;
2456                 int size;
2457
2458                 get = (struct ip_vs_get_services *)arg;
2459                 size = sizeof(*get) +
2460                         sizeof(struct ip_vs_service_entry) * get->num_services;
2461                 if (*len != size) {
2462                         pr_err("length: %u != %u\n", *len, size);
2463                         ret = -EINVAL;
2464                         goto out;
2465                 }
2466                 ret = __ip_vs_get_service_entries(get, user);
2467         }
2468         break;
2469
2470         case IP_VS_SO_GET_SERVICE:
2471         {
2472                 struct ip_vs_service_entry *entry;
2473                 struct ip_vs_service *svc;
2474                 union nf_inet_addr addr;
2475
2476                 entry = (struct ip_vs_service_entry *)arg;
2477                 addr.ip = entry->addr;
2478                 if (entry->fwmark)
2479                         svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
2480                 else
2481                         svc = __ip_vs_service_find(AF_INET, entry->protocol,
2482                                                    &addr, entry->port);
2483                 if (svc) {
2484                         ip_vs_copy_service(entry, svc);
2485                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2486                                 ret = -EFAULT;
2487                 } else
2488                         ret = -ESRCH;
2489         }
2490         break;
2491
2492         case IP_VS_SO_GET_DESTS:
2493         {
2494                 struct ip_vs_get_dests *get;
2495                 int size;
2496
2497                 get = (struct ip_vs_get_dests *)arg;
2498                 size = sizeof(*get) +
2499                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2500                 if (*len != size) {
2501                         pr_err("length: %u != %u\n", *len, size);
2502                         ret = -EINVAL;
2503                         goto out;
2504                 }
2505                 ret = __ip_vs_get_dest_entries(get, user);
2506         }
2507         break;
2508
2509         case IP_VS_SO_GET_TIMEOUT:
2510         {
2511                 struct ip_vs_timeout_user t;
2512
2513                 __ip_vs_get_timeouts(&t);
2514                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2515                         ret = -EFAULT;
2516         }
2517         break;
2518
2519         case IP_VS_SO_GET_DAEMON:
2520         {
2521                 struct ip_vs_daemon_user d[2];
2522
2523                 memset(&d, 0, sizeof(d));
2524                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2525                         d[0].state = IP_VS_STATE_MASTER;
2526                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2527                         d[0].syncid = ip_vs_master_syncid;
2528                 }
2529                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2530                         d[1].state = IP_VS_STATE_BACKUP;
2531                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2532                         d[1].syncid = ip_vs_backup_syncid;
2533                 }
2534                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2535                         ret = -EFAULT;
2536         }
2537         break;
2538
2539         default:
2540                 ret = -EINVAL;
2541         }
2542
2543   out:
2544         mutex_unlock(&__ip_vs_mutex);
2545         return ret;
2546 }
2547
2548
2549 static struct nf_sockopt_ops ip_vs_sockopts = {
2550         .pf             = PF_INET,
2551         .set_optmin     = IP_VS_BASE_CTL,
2552         .set_optmax     = IP_VS_SO_SET_MAX+1,
2553         .set            = do_ip_vs_set_ctl,
2554         .get_optmin     = IP_VS_BASE_CTL,
2555         .get_optmax     = IP_VS_SO_GET_MAX+1,
2556         .get            = do_ip_vs_get_ctl,
2557         .owner          = THIS_MODULE,
2558 };
2559
2560 /*
2561  * Generic Netlink interface
2562  */
2563
2564 /* IPVS genetlink family */
2565 static struct genl_family ip_vs_genl_family = {
2566         .id             = GENL_ID_GENERATE,
2567         .hdrsize        = 0,
2568         .name           = IPVS_GENL_NAME,
2569         .version        = IPVS_GENL_VERSION,
2570         .maxattr        = IPVS_CMD_MAX,
2571 };
2572
2573 /* Policy used for first-level command attributes */
2574 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2575         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2576         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2577         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2578         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2579         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2580         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2581 };
2582
2583 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2584 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2585         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2586         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2587                                             .len = IP_VS_IFNAME_MAXLEN },
2588         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2589 };
2590
2591 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2592 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2593         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2594         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2595         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2596                                             .len = sizeof(union nf_inet_addr) },
2597         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2598         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2599         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2600                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2601         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2602                                             .len = IP_VS_PENAME_MAXLEN },
2603         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2604                                             .len = sizeof(struct ip_vs_flags) },
2605         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2606         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2607         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2608 };
2609
2610 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2611 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2612         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2613                                             .len = sizeof(union nf_inet_addr) },
2614         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2615         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2616         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2617         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2618         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2619         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2620         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2621         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2622         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2623 };
2624
2625 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2626                                  struct ip_vs_stats *stats)
2627 {
2628         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2629         if (!nl_stats)
2630                 return -EMSGSIZE;
2631
2632         spin_lock_bh(&stats->lock);
2633
2634         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2635         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2636         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2637         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2638         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2639         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2640         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2641         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2642         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2643         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2644
2645         spin_unlock_bh(&stats->lock);
2646
2647         nla_nest_end(skb, nl_stats);
2648
2649         return 0;
2650
2651 nla_put_failure:
2652         spin_unlock_bh(&stats->lock);
2653         nla_nest_cancel(skb, nl_stats);
2654         return -EMSGSIZE;
2655 }
2656
2657 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2658                                    struct ip_vs_service *svc)
2659 {
2660         struct nlattr *nl_service;
2661         struct ip_vs_flags flags = { .flags = svc->flags,
2662                                      .mask = ~0 };
2663
2664         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2665         if (!nl_service)
2666                 return -EMSGSIZE;
2667
2668         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2669
2670         if (svc->fwmark) {
2671                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2672         } else {
2673                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2674                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2675                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2676         }
2677
2678         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2679         if (svc->pe)
2680                 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2681         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2682         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2683         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2684
2685         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2686                 goto nla_put_failure;
2687
2688         nla_nest_end(skb, nl_service);
2689
2690         return 0;
2691
2692 nla_put_failure:
2693         nla_nest_cancel(skb, nl_service);
2694         return -EMSGSIZE;
2695 }
2696
2697 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2698                                    struct ip_vs_service *svc,
2699                                    struct netlink_callback *cb)
2700 {
2701         void *hdr;
2702
2703         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2704                           &ip_vs_genl_family, NLM_F_MULTI,
2705                           IPVS_CMD_NEW_SERVICE);
2706         if (!hdr)
2707                 return -EMSGSIZE;
2708
2709         if (ip_vs_genl_fill_service(skb, svc) < 0)
2710                 goto nla_put_failure;
2711
2712         return genlmsg_end(skb, hdr);
2713
2714 nla_put_failure:
2715         genlmsg_cancel(skb, hdr);
2716         return -EMSGSIZE;
2717 }
2718
2719 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2720                                     struct netlink_callback *cb)
2721 {
2722         int idx = 0, i;
2723         int start = cb->args[0];
2724         struct ip_vs_service *svc;
2725
2726         mutex_lock(&__ip_vs_mutex);
2727         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2728                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2729                         if (++idx <= start)
2730                                 continue;
2731                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2732                                 idx--;
2733                                 goto nla_put_failure;
2734                         }
2735                 }
2736         }
2737
2738         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2739                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2740                         if (++idx <= start)
2741                                 continue;
2742                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2743                                 idx--;
2744                                 goto nla_put_failure;
2745                         }
2746                 }
2747         }
2748
2749 nla_put_failure:
2750         mutex_unlock(&__ip_vs_mutex);
2751         cb->args[0] = idx;
2752
2753         return skb->len;
2754 }
2755
2756 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2757                                     struct nlattr *nla, int full_entry,
2758                                     struct ip_vs_service **ret_svc)
2759 {
2760         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2761         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2762         struct ip_vs_service *svc;
2763
2764         /* Parse mandatory identifying service fields first */
2765         if (nla == NULL ||
2766             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2767                 return -EINVAL;
2768
2769         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2770         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2771         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2772         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2773         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2774
2775         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2776                 return -EINVAL;
2777
2778         memset(usvc, 0, sizeof(*usvc));
2779
2780         usvc->af = nla_get_u16(nla_af);
2781 #ifdef CONFIG_IP_VS_IPV6
2782         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2783 #else
2784         if (usvc->af != AF_INET)
2785 #endif
2786                 return -EAFNOSUPPORT;
2787
2788         if (nla_fwmark) {
2789                 usvc->protocol = IPPROTO_TCP;
2790                 usvc->fwmark = nla_get_u32(nla_fwmark);
2791         } else {
2792                 usvc->protocol = nla_get_u16(nla_protocol);
2793                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2794                 usvc->port = nla_get_u16(nla_port);
2795                 usvc->fwmark = 0;
2796         }
2797
2798         if (usvc->fwmark)
2799                 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
2800         else
2801                 svc = __ip_vs_service_find(usvc->af, usvc->protocol,
2802                                            &usvc->addr, usvc->port);
2803         *ret_svc = svc;
2804
2805         /* If a full entry was requested, check for the additional fields */
2806         if (full_entry) {
2807                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2808                               *nla_netmask;
2809                 struct ip_vs_flags flags;
2810
2811                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2812                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2813                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2814                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2815                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2816
2817                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2818                         return -EINVAL;
2819
2820                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2821
2822                 /* prefill flags from service if it already exists */
2823                 if (svc)
2824                         usvc->flags = svc->flags;
2825
2826                 /* set new flags from userland */
2827                 usvc->flags = (usvc->flags & ~flags.mask) |
2828                               (flags.flags & flags.mask);
2829                 usvc->sched_name = nla_data(nla_sched);
2830                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2831                 usvc->timeout = nla_get_u32(nla_timeout);
2832                 usvc->netmask = nla_get_u32(nla_netmask);
2833         }
2834
2835         return 0;
2836 }
2837
2838 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2839 {
2840         struct ip_vs_service_user_kern usvc;
2841         struct ip_vs_service *svc;
2842         int ret;
2843
2844         ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
2845         return ret ? ERR_PTR(ret) : svc;
2846 }
2847
2848 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2849 {
2850         struct nlattr *nl_dest;
2851
2852         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2853         if (!nl_dest)
2854                 return -EMSGSIZE;
2855
2856         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2857         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2858
2859         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2860                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2861         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2862         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2863         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2864         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2865                     atomic_read(&dest->activeconns));
2866         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2867                     atomic_read(&dest->inactconns));
2868         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2869                     atomic_read(&dest->persistconns));
2870
2871         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2872                 goto nla_put_failure;
2873
2874         nla_nest_end(skb, nl_dest);
2875
2876         return 0;
2877
2878 nla_put_failure:
2879         nla_nest_cancel(skb, nl_dest);
2880         return -EMSGSIZE;
2881 }
2882
2883 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2884                                 struct netlink_callback *cb)
2885 {
2886         void *hdr;
2887
2888         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2889                           &ip_vs_genl_family, NLM_F_MULTI,
2890                           IPVS_CMD_NEW_DEST);
2891         if (!hdr)
2892                 return -EMSGSIZE;
2893
2894         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2895                 goto nla_put_failure;
2896
2897         return genlmsg_end(skb, hdr);
2898
2899 nla_put_failure:
2900         genlmsg_cancel(skb, hdr);
2901         return -EMSGSIZE;
2902 }
2903
2904 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2905                                  struct netlink_callback *cb)
2906 {
2907         int idx = 0;
2908         int start = cb->args[0];
2909         struct ip_vs_service *svc;
2910         struct ip_vs_dest *dest;
2911         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2912
2913         mutex_lock(&__ip_vs_mutex);
2914
2915         /* Try to find the service for which to dump destinations */
2916         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2917                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2918                 goto out_err;
2919
2920         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2921         if (IS_ERR(svc) || svc == NULL)
2922                 goto out_err;
2923
2924         /* Dump the destinations */
2925         list_for_each_entry(dest, &svc->destinations, n_list) {
2926                 if (++idx <= start)
2927                         continue;
2928                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2929                         idx--;
2930                         goto nla_put_failure;
2931                 }
2932         }
2933
2934 nla_put_failure:
2935         cb->args[0] = idx;
2936
2937 out_err:
2938         mutex_unlock(&__ip_vs_mutex);
2939
2940         return skb->len;
2941 }
2942
2943 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2944                                  struct nlattr *nla, int full_entry)
2945 {
2946         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2947         struct nlattr *nla_addr, *nla_port;
2948
2949         /* Parse mandatory identifying destination fields first */
2950         if (nla == NULL ||
2951             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2952                 return -EINVAL;
2953
2954         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2955         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2956
2957         if (!(nla_addr && nla_port))
2958                 return -EINVAL;
2959
2960         memset(udest, 0, sizeof(*udest));
2961
2962         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2963         udest->port = nla_get_u16(nla_port);
2964
2965         /* If a full entry was requested, check for the additional fields */
2966         if (full_entry) {
2967                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2968                               *nla_l_thresh;
2969
2970                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2971                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2972                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2973                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2974
2975                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2976                         return -EINVAL;
2977
2978                 udest->conn_flags = nla_get_u32(nla_fwd)
2979                                     & IP_VS_CONN_F_FWD_MASK;
2980                 udest->weight = nla_get_u32(nla_weight);
2981                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2982                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2983         }
2984
2985         return 0;
2986 }
2987
2988 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2989                                   const char *mcast_ifn, __be32 syncid)
2990 {
2991         struct nlattr *nl_daemon;
2992
2993         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2994         if (!nl_daemon)
2995                 return -EMSGSIZE;
2996
2997         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2998         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2999         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3000
3001         nla_nest_end(skb, nl_daemon);
3002
3003         return 0;
3004
3005 nla_put_failure:
3006         nla_nest_cancel(skb, nl_daemon);
3007         return -EMSGSIZE;
3008 }
3009
3010 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3011                                   const char *mcast_ifn, __be32 syncid,
3012                                   struct netlink_callback *cb)
3013 {
3014         void *hdr;
3015         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3016                           &ip_vs_genl_family, NLM_F_MULTI,
3017                           IPVS_CMD_NEW_DAEMON);
3018         if (!hdr)
3019                 return -EMSGSIZE;
3020
3021         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3022                 goto nla_put_failure;
3023
3024         return genlmsg_end(skb, hdr);
3025
3026 nla_put_failure:
3027         genlmsg_cancel(skb, hdr);
3028         return -EMSGSIZE;
3029 }
3030
3031 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3032                                    struct netlink_callback *cb)
3033 {
3034         mutex_lock(&__ip_vs_mutex);
3035         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3036                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3037                                            ip_vs_master_mcast_ifn,
3038                                            ip_vs_master_syncid, cb) < 0)
3039                         goto nla_put_failure;
3040
3041                 cb->args[0] = 1;
3042         }
3043
3044         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3045                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3046                                            ip_vs_backup_mcast_ifn,
3047                                            ip_vs_backup_syncid, cb) < 0)
3048                         goto nla_put_failure;
3049
3050                 cb->args[1] = 1;
3051         }
3052
3053 nla_put_failure:
3054         mutex_unlock(&__ip_vs_mutex);
3055
3056         return skb->len;
3057 }
3058
3059 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3060 {
3061         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3062               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3063               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3064                 return -EINVAL;
3065
3066         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3067                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3068                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3069 }
3070
3071 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3072 {
3073         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3074                 return -EINVAL;
3075
3076         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3077 }
3078
3079 static int ip_vs_genl_set_config(struct nlattr **attrs)
3080 {
3081         struct ip_vs_timeout_user t;
3082
3083         __ip_vs_get_timeouts(&t);
3084
3085         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3086                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3087
3088         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3089                 t.tcp_fin_timeout =
3090                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3091
3092         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3093                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3094
3095         return ip_vs_set_timeout(&t);
3096 }
3097
3098 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3099 {
3100         struct ip_vs_service *svc = NULL;
3101         struct ip_vs_service_user_kern usvc;
3102         struct ip_vs_dest_user_kern udest;
3103         int ret = 0, cmd;
3104         int need_full_svc = 0, need_full_dest = 0;
3105
3106         cmd = info->genlhdr->cmd;
3107
3108         mutex_lock(&__ip_vs_mutex);
3109
3110         if (cmd == IPVS_CMD_FLUSH) {
3111                 ret = ip_vs_flush();
3112                 goto out;
3113         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3114                 ret = ip_vs_genl_set_config(info->attrs);
3115                 goto out;
3116         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3117                    cmd == IPVS_CMD_DEL_DAEMON) {
3118
3119                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3120
3121                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3122                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3123                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3124                                      ip_vs_daemon_policy)) {
3125                         ret = -EINVAL;
3126                         goto out;
3127                 }
3128
3129                 if (cmd == IPVS_CMD_NEW_DAEMON)
3130                         ret = ip_vs_genl_new_daemon(daemon_attrs);
3131                 else
3132                         ret = ip_vs_genl_del_daemon(daemon_attrs);
3133                 goto out;
3134         } else if (cmd == IPVS_CMD_ZERO &&
3135                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3136                 ret = ip_vs_zero_all();
3137                 goto out;
3138         }
3139
3140         /* All following commands require a service argument, so check if we
3141          * received a valid one. We need a full service specification when
3142          * adding / editing a service. Only identifying members otherwise. */
3143         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3144                 need_full_svc = 1;
3145
3146         ret = ip_vs_genl_parse_service(&usvc,
3147                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3148                                        need_full_svc, &svc);
3149         if (ret)
3150                 goto out;
3151
3152         /* Unless we're adding a new service, the service must already exist */
3153         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3154                 ret = -ESRCH;
3155                 goto out;
3156         }
3157
3158         /* Destination commands require a valid destination argument. For
3159          * adding / editing a destination, we need a full destination
3160          * specification. */
3161         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3162             cmd == IPVS_CMD_DEL_DEST) {
3163                 if (cmd != IPVS_CMD_DEL_DEST)
3164                         need_full_dest = 1;
3165
3166                 ret = ip_vs_genl_parse_dest(&udest,
3167                                             info->attrs[IPVS_CMD_ATTR_DEST],
3168                                             need_full_dest);
3169                 if (ret)
3170                         goto out;
3171         }
3172
3173         switch (cmd) {
3174         case IPVS_CMD_NEW_SERVICE:
3175                 if (svc == NULL)
3176                         ret = ip_vs_add_service(&usvc, &svc);
3177                 else
3178                         ret = -EEXIST;
3179                 break;
3180         case IPVS_CMD_SET_SERVICE:
3181                 ret = ip_vs_edit_service(svc, &usvc);
3182                 break;
3183         case IPVS_CMD_DEL_SERVICE:
3184                 ret = ip_vs_del_service(svc);
3185                 /* do not use svc, it can be freed */
3186                 break;
3187         case IPVS_CMD_NEW_DEST:
3188                 ret = ip_vs_add_dest(svc, &udest);
3189                 break;
3190         case IPVS_CMD_SET_DEST:
3191                 ret = ip_vs_edit_dest(svc, &udest);
3192                 break;
3193         case IPVS_CMD_DEL_DEST:
3194                 ret = ip_vs_del_dest(svc, &udest);
3195                 break;
3196         case IPVS_CMD_ZERO:
3197                 ret = ip_vs_zero_service(svc);
3198                 break;
3199         default:
3200                 ret = -EINVAL;
3201         }
3202
3203 out:
3204         mutex_unlock(&__ip_vs_mutex);
3205
3206         return ret;
3207 }
3208
3209 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3210 {
3211         struct sk_buff *msg;
3212         void *reply;
3213         int ret, cmd, reply_cmd;
3214
3215         cmd = info->genlhdr->cmd;
3216
3217         if (cmd == IPVS_CMD_GET_SERVICE)
3218                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3219         else if (cmd == IPVS_CMD_GET_INFO)
3220                 reply_cmd = IPVS_CMD_SET_INFO;
3221         else if (cmd == IPVS_CMD_GET_CONFIG)
3222                 reply_cmd = IPVS_CMD_SET_CONFIG;
3223         else {
3224                 pr_err("unknown Generic Netlink command\n");
3225                 return -EINVAL;
3226         }
3227
3228         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3229         if (!msg)
3230                 return -ENOMEM;
3231
3232         mutex_lock(&__ip_vs_mutex);
3233
3234         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3235         if (reply == NULL)
3236                 goto nla_put_failure;
3237
3238         switch (cmd) {
3239         case IPVS_CMD_GET_SERVICE:
3240         {
3241                 struct ip_vs_service *svc;
3242
3243                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3244                 if (IS_ERR(svc)) {
3245                         ret = PTR_ERR(svc);
3246                         goto out_err;
3247                 } else if (svc) {
3248                         ret = ip_vs_genl_fill_service(msg, svc);
3249                         if (ret)
3250                                 goto nla_put_failure;
3251                 } else {
3252                         ret = -ESRCH;
3253                         goto out_err;
3254                 }
3255
3256                 break;
3257         }
3258
3259         case IPVS_CMD_GET_CONFIG:
3260         {
3261                 struct ip_vs_timeout_user t;
3262
3263                 __ip_vs_get_timeouts(&t);
3264 #ifdef CONFIG_IP_VS_PROTO_TCP
3265                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3266                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3267                             t.tcp_fin_timeout);
3268 #endif
3269 #ifdef CONFIG_IP_VS_PROTO_UDP
3270                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3271 #endif
3272
3273                 break;
3274         }
3275
3276         case IPVS_CMD_GET_INFO:
3277                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3278                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3279                             ip_vs_conn_tab_size);
3280                 break;
3281         }
3282
3283         genlmsg_end(msg, reply);
3284         ret = genlmsg_reply(msg, info);
3285         goto out;
3286
3287 nla_put_failure:
3288         pr_err("not enough space in Netlink message\n");
3289         ret = -EMSGSIZE;
3290
3291 out_err:
3292         nlmsg_free(msg);
3293 out:
3294         mutex_unlock(&__ip_vs_mutex);
3295
3296         return ret;
3297 }
3298
3299
3300 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3301         {
3302                 .cmd    = IPVS_CMD_NEW_SERVICE,
3303                 .flags  = GENL_ADMIN_PERM,
3304                 .policy = ip_vs_cmd_policy,
3305                 .doit   = ip_vs_genl_set_cmd,
3306         },
3307         {
3308                 .cmd    = IPVS_CMD_SET_SERVICE,
3309                 .flags  = GENL_ADMIN_PERM,
3310                 .policy = ip_vs_cmd_policy,
3311                 .doit   = ip_vs_genl_set_cmd,
3312         },
3313         {
3314                 .cmd    = IPVS_CMD_DEL_SERVICE,
3315                 .flags  = GENL_ADMIN_PERM,
3316                 .policy = ip_vs_cmd_policy,
3317                 .doit   = ip_vs_genl_set_cmd,
3318         },
3319         {
3320                 .cmd    = IPVS_CMD_GET_SERVICE,
3321                 .flags  = GENL_ADMIN_PERM,
3322                 .doit   = ip_vs_genl_get_cmd,
3323                 .dumpit = ip_vs_genl_dump_services,
3324                 .policy = ip_vs_cmd_policy,
3325         },
3326         {
3327                 .cmd    = IPVS_CMD_NEW_DEST,
3328                 .flags  = GENL_ADMIN_PERM,
3329                 .policy = ip_vs_cmd_policy,
3330                 .doit   = ip_vs_genl_set_cmd,
3331         },
3332         {
3333                 .cmd    = IPVS_CMD_SET_DEST,
3334                 .flags  = GENL_ADMIN_PERM,
3335                 .policy = ip_vs_cmd_policy,
3336                 .doit   = ip_vs_genl_set_cmd,
3337         },
3338         {
3339                 .cmd    = IPVS_CMD_DEL_DEST,
3340                 .flags  = GENL_ADMIN_PERM,
3341                 .policy = ip_vs_cmd_policy,
3342                 .doit   = ip_vs_genl_set_cmd,
3343         },
3344         {
3345                 .cmd    = IPVS_CMD_GET_DEST,
3346                 .flags  = GENL_ADMIN_PERM,
3347                 .policy = ip_vs_cmd_policy,
3348                 .dumpit = ip_vs_genl_dump_dests,
3349         },
3350         {
3351                 .cmd    = IPVS_CMD_NEW_DAEMON,
3352                 .flags  = GENL_ADMIN_PERM,
3353                 .policy = ip_vs_cmd_policy,
3354                 .doit   = ip_vs_genl_set_cmd,
3355         },
3356         {
3357                 .cmd    = IPVS_CMD_DEL_DAEMON,
3358                 .flags  = GENL_ADMIN_PERM,
3359                 .policy = ip_vs_cmd_policy,
3360                 .doit   = ip_vs_genl_set_cmd,
3361         },
3362         {
3363                 .cmd    = IPVS_CMD_GET_DAEMON,
3364                 .flags  = GENL_ADMIN_PERM,
3365                 .dumpit = ip_vs_genl_dump_daemons,
3366         },
3367         {
3368                 .cmd    = IPVS_CMD_SET_CONFIG,
3369                 .flags  = GENL_ADMIN_PERM,
3370                 .policy = ip_vs_cmd_policy,
3371                 .doit   = ip_vs_genl_set_cmd,
3372         },
3373         {
3374                 .cmd    = IPVS_CMD_GET_CONFIG,
3375                 .flags  = GENL_ADMIN_PERM,
3376                 .doit   = ip_vs_genl_get_cmd,
3377         },
3378         {
3379                 .cmd    = IPVS_CMD_GET_INFO,
3380                 .flags  = GENL_ADMIN_PERM,
3381                 .doit   = ip_vs_genl_get_cmd,
3382         },
3383         {
3384                 .cmd    = IPVS_CMD_ZERO,
3385                 .flags  = GENL_ADMIN_PERM,
3386                 .policy = ip_vs_cmd_policy,
3387                 .doit   = ip_vs_genl_set_cmd,
3388         },
3389         {
3390                 .cmd    = IPVS_CMD_FLUSH,
3391                 .flags  = GENL_ADMIN_PERM,
3392                 .doit   = ip_vs_genl_set_cmd,
3393         },
3394 };
3395
3396 static int __init ip_vs_genl_register(void)
3397 {
3398         return genl_register_family_with_ops(&ip_vs_genl_family,
3399                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3400 }
3401
3402 static void ip_vs_genl_unregister(void)
3403 {
3404         genl_unregister_family(&ip_vs_genl_family);
3405 }
3406
3407 /* End of Generic Netlink interface definitions */
3408
3409
3410 int __init ip_vs_control_init(void)
3411 {
3412         int ret;
3413         int idx;
3414
3415         EnterFunction(2);
3416
3417         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3418         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3419                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3420                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3421         }
3422         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3423                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3424         }
3425         smp_wmb();
3426
3427         ret = nf_register_sockopt(&ip_vs_sockopts);
3428         if (ret) {
3429                 pr_err("cannot register sockopt.\n");
3430                 return ret;
3431         }
3432
3433         ret = ip_vs_genl_register();
3434         if (ret) {
3435                 pr_err("cannot register Generic Netlink interface.\n");
3436                 nf_unregister_sockopt(&ip_vs_sockopts);
3437                 return ret;
3438         }
3439
3440         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3441         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3442
3443         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3444
3445         ip_vs_new_estimator(&ip_vs_stats);
3446
3447         /* Hook the defense timer */
3448         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3449
3450         LeaveFunction(2);
3451         return 0;
3452 }
3453
3454
3455 void ip_vs_control_cleanup(void)
3456 {
3457         EnterFunction(2);
3458         ip_vs_trash_cleanup();
3459         cancel_delayed_work_sync(&defense_work);
3460         cancel_work_sync(&defense_work.work);
3461         ip_vs_kill_estimator(&ip_vs_stats);
3462         unregister_sysctl_table(sysctl_header);
3463         proc_net_remove(&init_net, "ip_vs_stats");
3464         proc_net_remove(&init_net, "ip_vs");
3465         ip_vs_genl_unregister();
3466         nf_unregister_sockopt(&ip_vs_sockopts);
3467         LeaveFunction(2);
3468 }