IPVS: Backup, Prepare for transferring firewall marks (fwmark) to the backup daemon.
[linux-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <net/ip.h>
42 #ifdef CONFIG_IP_VS_IPV6
43 #include <net/ipv6.h>
44 #include <net/ip6_route.h>
45 #endif
46 #include <net/route.h>
47 #include <net/sock.h>
48 #include <net/genetlink.h>
49
50 #include <asm/uaccess.h>
51
52 #include <net/ip_vs.h>
53
54 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
55 static DEFINE_MUTEX(__ip_vs_mutex);
56
57 /* lock for service table */
58 static DEFINE_RWLOCK(__ip_vs_svc_lock);
59
60 /* lock for table with the real services */
61 static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63 /* lock for state and timeout tables */
64 static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65
66 /* lock for drop entry handling */
67 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69 /* lock for drop packet handling */
70 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72 /* 1/rate drop and drop-entry variables */
73 int ip_vs_drop_rate = 0;
74 int ip_vs_drop_counter = 0;
75 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77 /* number of virtual services */
78 static int ip_vs_num_services = 0;
79
80 /* sysctl variables */
81 static int sysctl_ip_vs_drop_entry = 0;
82 static int sysctl_ip_vs_drop_packet = 0;
83 static int sysctl_ip_vs_secure_tcp = 0;
84 static int sysctl_ip_vs_amemthresh = 1024;
85 static int sysctl_ip_vs_am_droprate = 10;
86 int sysctl_ip_vs_cache_bypass = 0;
87 int sysctl_ip_vs_expire_nodest_conn = 0;
88 int sysctl_ip_vs_expire_quiescent_template = 0;
89 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90 int sysctl_ip_vs_nat_icmp_send = 0;
91 #ifdef CONFIG_IP_VS_NFCT
92 int sysctl_ip_vs_conntrack;
93 #endif
94 int sysctl_ip_vs_snat_reroute = 1;
95
96
97 #ifdef CONFIG_IP_VS_DEBUG
98 static int sysctl_ip_vs_debug_level = 0;
99
100 int ip_vs_get_debug_level(void)
101 {
102         return sysctl_ip_vs_debug_level;
103 }
104 #endif
105
106 #ifdef CONFIG_IP_VS_IPV6
107 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
109 {
110         struct rt6_info *rt;
111         struct flowi fl = {
112                 .oif = 0,
113                 .nl_u = {
114                         .ip6_u = {
115                                 .daddr = *addr,
116                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
117         };
118
119         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
120         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
121                         return 1;
122
123         return 0;
124 }
125 #endif
126 /*
127  *      update_defense_level is called from keventd and from sysctl,
128  *      so it needs to protect itself from softirqs
129  */
130 static void update_defense_level(void)
131 {
132         struct sysinfo i;
133         static int old_secure_tcp = 0;
134         int availmem;
135         int nomem;
136         int to_change = -1;
137
138         /* we only count free and buffered memory (in pages) */
139         si_meminfo(&i);
140         availmem = i.freeram + i.bufferram;
141         /* however in linux 2.5 the i.bufferram is total page cache size,
142            we need adjust it */
143         /* si_swapinfo(&i); */
144         /* availmem = availmem - (i.totalswap - i.freeswap); */
145
146         nomem = (availmem < sysctl_ip_vs_amemthresh);
147
148         local_bh_disable();
149
150         /* drop_entry */
151         spin_lock(&__ip_vs_dropentry_lock);
152         switch (sysctl_ip_vs_drop_entry) {
153         case 0:
154                 atomic_set(&ip_vs_dropentry, 0);
155                 break;
156         case 1:
157                 if (nomem) {
158                         atomic_set(&ip_vs_dropentry, 1);
159                         sysctl_ip_vs_drop_entry = 2;
160                 } else {
161                         atomic_set(&ip_vs_dropentry, 0);
162                 }
163                 break;
164         case 2:
165                 if (nomem) {
166                         atomic_set(&ip_vs_dropentry, 1);
167                 } else {
168                         atomic_set(&ip_vs_dropentry, 0);
169                         sysctl_ip_vs_drop_entry = 1;
170                 };
171                 break;
172         case 3:
173                 atomic_set(&ip_vs_dropentry, 1);
174                 break;
175         }
176         spin_unlock(&__ip_vs_dropentry_lock);
177
178         /* drop_packet */
179         spin_lock(&__ip_vs_droppacket_lock);
180         switch (sysctl_ip_vs_drop_packet) {
181         case 0:
182                 ip_vs_drop_rate = 0;
183                 break;
184         case 1:
185                 if (nomem) {
186                         ip_vs_drop_rate = ip_vs_drop_counter
187                                 = sysctl_ip_vs_amemthresh /
188                                 (sysctl_ip_vs_amemthresh-availmem);
189                         sysctl_ip_vs_drop_packet = 2;
190                 } else {
191                         ip_vs_drop_rate = 0;
192                 }
193                 break;
194         case 2:
195                 if (nomem) {
196                         ip_vs_drop_rate = ip_vs_drop_counter
197                                 = sysctl_ip_vs_amemthresh /
198                                 (sysctl_ip_vs_amemthresh-availmem);
199                 } else {
200                         ip_vs_drop_rate = 0;
201                         sysctl_ip_vs_drop_packet = 1;
202                 }
203                 break;
204         case 3:
205                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
206                 break;
207         }
208         spin_unlock(&__ip_vs_droppacket_lock);
209
210         /* secure_tcp */
211         spin_lock(&ip_vs_securetcp_lock);
212         switch (sysctl_ip_vs_secure_tcp) {
213         case 0:
214                 if (old_secure_tcp >= 2)
215                         to_change = 0;
216                 break;
217         case 1:
218                 if (nomem) {
219                         if (old_secure_tcp < 2)
220                                 to_change = 1;
221                         sysctl_ip_vs_secure_tcp = 2;
222                 } else {
223                         if (old_secure_tcp >= 2)
224                                 to_change = 0;
225                 }
226                 break;
227         case 2:
228                 if (nomem) {
229                         if (old_secure_tcp < 2)
230                                 to_change = 1;
231                 } else {
232                         if (old_secure_tcp >= 2)
233                                 to_change = 0;
234                         sysctl_ip_vs_secure_tcp = 1;
235                 }
236                 break;
237         case 3:
238                 if (old_secure_tcp < 2)
239                         to_change = 1;
240                 break;
241         }
242         old_secure_tcp = sysctl_ip_vs_secure_tcp;
243         if (to_change >= 0)
244                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
245         spin_unlock(&ip_vs_securetcp_lock);
246
247         local_bh_enable();
248 }
249
250
251 /*
252  *      Timer for checking the defense
253  */
254 #define DEFENSE_TIMER_PERIOD    1*HZ
255 static void defense_work_handler(struct work_struct *work);
256 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
257
258 static void defense_work_handler(struct work_struct *work)
259 {
260         update_defense_level();
261         if (atomic_read(&ip_vs_dropentry))
262                 ip_vs_random_dropentry();
263
264         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
265 }
266
267 int
268 ip_vs_use_count_inc(void)
269 {
270         return try_module_get(THIS_MODULE);
271 }
272
273 void
274 ip_vs_use_count_dec(void)
275 {
276         module_put(THIS_MODULE);
277 }
278
279
280 /*
281  *      Hash table: for virtual service lookups
282  */
283 #define IP_VS_SVC_TAB_BITS 8
284 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
285 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
286
287 /* the service table hashed by <protocol, addr, port> */
288 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
289 /* the service table hashed by fwmark */
290 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
291
292 /*
293  *      Hash table: for real service lookups
294  */
295 #define IP_VS_RTAB_BITS 4
296 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
297 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
298
299 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
300
301 /*
302  *      Trash for destinations
303  */
304 static LIST_HEAD(ip_vs_dest_trash);
305
306 /*
307  *      FTP & NULL virtual service counters
308  */
309 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
310 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
311
312
313 /*
314  *      Returns hash value for virtual service
315  */
316 static __inline__ unsigned
317 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
318                   __be16 port)
319 {
320         register unsigned porth = ntohs(port);
321         __be32 addr_fold = addr->ip;
322
323 #ifdef CONFIG_IP_VS_IPV6
324         if (af == AF_INET6)
325                 addr_fold = addr->ip6[0]^addr->ip6[1]^
326                             addr->ip6[2]^addr->ip6[3];
327 #endif
328
329         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
330                 & IP_VS_SVC_TAB_MASK;
331 }
332
333 /*
334  *      Returns hash value of fwmark for virtual service lookup
335  */
336 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
337 {
338         return fwmark & IP_VS_SVC_TAB_MASK;
339 }
340
341 /*
342  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
343  *      or in the ip_vs_svc_fwm_table by fwmark.
344  *      Should be called with locked tables.
345  */
346 static int ip_vs_svc_hash(struct ip_vs_service *svc)
347 {
348         unsigned hash;
349
350         if (svc->flags & IP_VS_SVC_F_HASHED) {
351                 pr_err("%s(): request for already hashed, called from %pF\n",
352                        __func__, __builtin_return_address(0));
353                 return 0;
354         }
355
356         if (svc->fwmark == 0) {
357                 /*
358                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
359                  */
360                 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
361                                          svc->port);
362                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
363         } else {
364                 /*
365                  *  Hash it by fwmark in ip_vs_svc_fwm_table
366                  */
367                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
368                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
369         }
370
371         svc->flags |= IP_VS_SVC_F_HASHED;
372         /* increase its refcnt because it is referenced by the svc table */
373         atomic_inc(&svc->refcnt);
374         return 1;
375 }
376
377
378 /*
379  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
380  *      Should be called with locked tables.
381  */
382 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
383 {
384         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
385                 pr_err("%s(): request for unhash flagged, called from %pF\n",
386                        __func__, __builtin_return_address(0));
387                 return 0;
388         }
389
390         if (svc->fwmark == 0) {
391                 /* Remove it from the ip_vs_svc_table table */
392                 list_del(&svc->s_list);
393         } else {
394                 /* Remove it from the ip_vs_svc_fwm_table table */
395                 list_del(&svc->f_list);
396         }
397
398         svc->flags &= ~IP_VS_SVC_F_HASHED;
399         atomic_dec(&svc->refcnt);
400         return 1;
401 }
402
403
404 /*
405  *      Get service by {proto,addr,port} in the service table.
406  */
407 static inline struct ip_vs_service *
408 __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
409                     __be16 vport)
410 {
411         unsigned hash;
412         struct ip_vs_service *svc;
413
414         /* Check for "full" addressed entries */
415         hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
416
417         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
418                 if ((svc->af == af)
419                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
420                     && (svc->port == vport)
421                     && (svc->protocol == protocol)) {
422                         /* HIT */
423                         return svc;
424                 }
425         }
426
427         return NULL;
428 }
429
430
431 /*
432  *      Get service by {fwmark} in the service table.
433  */
434 static inline struct ip_vs_service *
435 __ip_vs_svc_fwm_find(int af, __u32 fwmark)
436 {
437         unsigned hash;
438         struct ip_vs_service *svc;
439
440         /* Check for fwmark addressed entries */
441         hash = ip_vs_svc_fwm_hashkey(fwmark);
442
443         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
444                 if (svc->fwmark == fwmark && svc->af == af) {
445                         /* HIT */
446                         return svc;
447                 }
448         }
449
450         return NULL;
451 }
452
453 struct ip_vs_service *
454 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
455                   const union nf_inet_addr *vaddr, __be16 vport)
456 {
457         struct ip_vs_service *svc;
458
459         read_lock(&__ip_vs_svc_lock);
460
461         /*
462          *      Check the table hashed by fwmark first
463          */
464         if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
465                 goto out;
466
467         /*
468          *      Check the table hashed by <protocol,addr,port>
469          *      for "full" addressed entries
470          */
471         svc = __ip_vs_service_find(af, protocol, vaddr, vport);
472
473         if (svc == NULL
474             && protocol == IPPROTO_TCP
475             && atomic_read(&ip_vs_ftpsvc_counter)
476             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
477                 /*
478                  * Check if ftp service entry exists, the packet
479                  * might belong to FTP data connections.
480                  */
481                 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
482         }
483
484         if (svc == NULL
485             && atomic_read(&ip_vs_nullsvc_counter)) {
486                 /*
487                  * Check if the catch-all port (port zero) exists
488                  */
489                 svc = __ip_vs_service_find(af, protocol, vaddr, 0);
490         }
491
492   out:
493         if (svc)
494                 atomic_inc(&svc->usecnt);
495         read_unlock(&__ip_vs_svc_lock);
496
497         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
498                       fwmark, ip_vs_proto_name(protocol),
499                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
500                       svc ? "hit" : "not hit");
501
502         return svc;
503 }
504
505
506 static inline void
507 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
508 {
509         atomic_inc(&svc->refcnt);
510         dest->svc = svc;
511 }
512
513 static void
514 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
515 {
516         struct ip_vs_service *svc = dest->svc;
517
518         dest->svc = NULL;
519         if (atomic_dec_and_test(&svc->refcnt)) {
520                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
521                               svc->fwmark,
522                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
523                               ntohs(svc->port), atomic_read(&svc->usecnt));
524                 kfree(svc);
525         }
526 }
527
528
529 /*
530  *      Returns hash value for real service
531  */
532 static inline unsigned ip_vs_rs_hashkey(int af,
533                                             const union nf_inet_addr *addr,
534                                             __be16 port)
535 {
536         register unsigned porth = ntohs(port);
537         __be32 addr_fold = addr->ip;
538
539 #ifdef CONFIG_IP_VS_IPV6
540         if (af == AF_INET6)
541                 addr_fold = addr->ip6[0]^addr->ip6[1]^
542                             addr->ip6[2]^addr->ip6[3];
543 #endif
544
545         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
546                 & IP_VS_RTAB_MASK;
547 }
548
549 /*
550  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
551  *      should be called with locked tables.
552  */
553 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
554 {
555         unsigned hash;
556
557         if (!list_empty(&dest->d_list)) {
558                 return 0;
559         }
560
561         /*
562          *      Hash by proto,addr,port,
563          *      which are the parameters of the real service.
564          */
565         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
566
567         list_add(&dest->d_list, &ip_vs_rtable[hash]);
568
569         return 1;
570 }
571
572 /*
573  *      UNhashes ip_vs_dest from ip_vs_rtable.
574  *      should be called with locked tables.
575  */
576 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
577 {
578         /*
579          * Remove it from the ip_vs_rtable table.
580          */
581         if (!list_empty(&dest->d_list)) {
582                 list_del(&dest->d_list);
583                 INIT_LIST_HEAD(&dest->d_list);
584         }
585
586         return 1;
587 }
588
589 /*
590  *      Lookup real service by <proto,addr,port> in the real service table.
591  */
592 struct ip_vs_dest *
593 ip_vs_lookup_real_service(int af, __u16 protocol,
594                           const union nf_inet_addr *daddr,
595                           __be16 dport)
596 {
597         unsigned hash;
598         struct ip_vs_dest *dest;
599
600         /*
601          *      Check for "full" addressed entries
602          *      Return the first found entry
603          */
604         hash = ip_vs_rs_hashkey(af, daddr, dport);
605
606         read_lock(&__ip_vs_rs_lock);
607         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
608                 if ((dest->af == af)
609                     && ip_vs_addr_equal(af, &dest->addr, daddr)
610                     && (dest->port == dport)
611                     && ((dest->protocol == protocol) ||
612                         dest->vfwmark)) {
613                         /* HIT */
614                         read_unlock(&__ip_vs_rs_lock);
615                         return dest;
616                 }
617         }
618         read_unlock(&__ip_vs_rs_lock);
619
620         return NULL;
621 }
622
623 /*
624  *      Lookup destination by {addr,port} in the given service
625  */
626 static struct ip_vs_dest *
627 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
628                   __be16 dport)
629 {
630         struct ip_vs_dest *dest;
631
632         /*
633          * Find the destination for the given service
634          */
635         list_for_each_entry(dest, &svc->destinations, n_list) {
636                 if ((dest->af == svc->af)
637                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
638                     && (dest->port == dport)) {
639                         /* HIT */
640                         return dest;
641                 }
642         }
643
644         return NULL;
645 }
646
647 /*
648  * Find destination by {daddr,dport,vaddr,protocol}
649  * Cretaed to be used in ip_vs_process_message() in
650  * the backup synchronization daemon. It finds the
651  * destination to be bound to the received connection
652  * on the backup.
653  *
654  * ip_vs_lookup_real_service() looked promissing, but
655  * seems not working as expected.
656  */
657 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
658                                    __be16 dport,
659                                    const union nf_inet_addr *vaddr,
660                                    __be16 vport, __u16 protocol, __u32 fwmark)
661 {
662         struct ip_vs_dest *dest;
663         struct ip_vs_service *svc;
664
665         svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport);
666         if (!svc)
667                 return NULL;
668         dest = ip_vs_lookup_dest(svc, daddr, dport);
669         if (dest)
670                 atomic_inc(&dest->refcnt);
671         ip_vs_service_put(svc);
672         return dest;
673 }
674
675 /*
676  *  Lookup dest by {svc,addr,port} in the destination trash.
677  *  The destination trash is used to hold the destinations that are removed
678  *  from the service table but are still referenced by some conn entries.
679  *  The reason to add the destination trash is when the dest is temporary
680  *  down (either by administrator or by monitor program), the dest can be
681  *  picked back from the trash, the remaining connections to the dest can
682  *  continue, and the counting information of the dest is also useful for
683  *  scheduling.
684  */
685 static struct ip_vs_dest *
686 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
687                      __be16 dport)
688 {
689         struct ip_vs_dest *dest, *nxt;
690
691         /*
692          * Find the destination in trash
693          */
694         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
695                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
696                               "dest->refcnt=%d\n",
697                               dest->vfwmark,
698                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
699                               ntohs(dest->port),
700                               atomic_read(&dest->refcnt));
701                 if (dest->af == svc->af &&
702                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
703                     dest->port == dport &&
704                     dest->vfwmark == svc->fwmark &&
705                     dest->protocol == svc->protocol &&
706                     (svc->fwmark ||
707                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
708                       dest->vport == svc->port))) {
709                         /* HIT */
710                         return dest;
711                 }
712
713                 /*
714                  * Try to purge the destination from trash if not referenced
715                  */
716                 if (atomic_read(&dest->refcnt) == 1) {
717                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
718                                       "from trash\n",
719                                       dest->vfwmark,
720                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
721                                       ntohs(dest->port));
722                         list_del(&dest->n_list);
723                         ip_vs_dst_reset(dest);
724                         __ip_vs_unbind_svc(dest);
725                         kfree(dest);
726                 }
727         }
728
729         return NULL;
730 }
731
732
733 /*
734  *  Clean up all the destinations in the trash
735  *  Called by the ip_vs_control_cleanup()
736  *
737  *  When the ip_vs_control_clearup is activated by ipvs module exit,
738  *  the service tables must have been flushed and all the connections
739  *  are expired, and the refcnt of each destination in the trash must
740  *  be 1, so we simply release them here.
741  */
742 static void ip_vs_trash_cleanup(void)
743 {
744         struct ip_vs_dest *dest, *nxt;
745
746         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
747                 list_del(&dest->n_list);
748                 ip_vs_dst_reset(dest);
749                 __ip_vs_unbind_svc(dest);
750                 kfree(dest);
751         }
752 }
753
754
755 static void
756 ip_vs_zero_stats(struct ip_vs_stats *stats)
757 {
758         spin_lock_bh(&stats->lock);
759
760         memset(&stats->ustats, 0, sizeof(stats->ustats));
761         ip_vs_zero_estimator(stats);
762
763         spin_unlock_bh(&stats->lock);
764 }
765
766 /*
767  *      Update a destination in the given service
768  */
769 static void
770 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
771                     struct ip_vs_dest_user_kern *udest, int add)
772 {
773         int conn_flags;
774
775         /* set the weight and the flags */
776         atomic_set(&dest->weight, udest->weight);
777         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
778         conn_flags |= IP_VS_CONN_F_INACTIVE;
779
780         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
781         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
782                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
783         } else {
784                 /*
785                  *    Put the real service in ip_vs_rtable if not present.
786                  *    For now only for NAT!
787                  */
788                 write_lock_bh(&__ip_vs_rs_lock);
789                 ip_vs_rs_hash(dest);
790                 write_unlock_bh(&__ip_vs_rs_lock);
791         }
792         atomic_set(&dest->conn_flags, conn_flags);
793
794         /* bind the service */
795         if (!dest->svc) {
796                 __ip_vs_bind_svc(dest, svc);
797         } else {
798                 if (dest->svc != svc) {
799                         __ip_vs_unbind_svc(dest);
800                         ip_vs_zero_stats(&dest->stats);
801                         __ip_vs_bind_svc(dest, svc);
802                 }
803         }
804
805         /* set the dest status flags */
806         dest->flags |= IP_VS_DEST_F_AVAILABLE;
807
808         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
809                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
810         dest->u_threshold = udest->u_threshold;
811         dest->l_threshold = udest->l_threshold;
812
813         spin_lock(&dest->dst_lock);
814         ip_vs_dst_reset(dest);
815         spin_unlock(&dest->dst_lock);
816
817         if (add)
818                 ip_vs_new_estimator(&dest->stats);
819
820         write_lock_bh(&__ip_vs_svc_lock);
821
822         /* Wait until all other svc users go away */
823         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
824
825         if (add) {
826                 list_add(&dest->n_list, &svc->destinations);
827                 svc->num_dests++;
828         }
829
830         /* call the update_service, because server weight may be changed */
831         if (svc->scheduler->update_service)
832                 svc->scheduler->update_service(svc);
833
834         write_unlock_bh(&__ip_vs_svc_lock);
835 }
836
837
838 /*
839  *      Create a destination for the given service
840  */
841 static int
842 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
843                struct ip_vs_dest **dest_p)
844 {
845         struct ip_vs_dest *dest;
846         unsigned atype;
847
848         EnterFunction(2);
849
850 #ifdef CONFIG_IP_VS_IPV6
851         if (svc->af == AF_INET6) {
852                 atype = ipv6_addr_type(&udest->addr.in6);
853                 if ((!(atype & IPV6_ADDR_UNICAST) ||
854                         atype & IPV6_ADDR_LINKLOCAL) &&
855                         !__ip_vs_addr_is_local_v6(&udest->addr.in6))
856                         return -EINVAL;
857         } else
858 #endif
859         {
860                 atype = inet_addr_type(&init_net, udest->addr.ip);
861                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
862                         return -EINVAL;
863         }
864
865         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
866         if (dest == NULL) {
867                 pr_err("%s(): no memory.\n", __func__);
868                 return -ENOMEM;
869         }
870
871         dest->af = svc->af;
872         dest->protocol = svc->protocol;
873         dest->vaddr = svc->addr;
874         dest->vport = svc->port;
875         dest->vfwmark = svc->fwmark;
876         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
877         dest->port = udest->port;
878
879         atomic_set(&dest->activeconns, 0);
880         atomic_set(&dest->inactconns, 0);
881         atomic_set(&dest->persistconns, 0);
882         atomic_set(&dest->refcnt, 1);
883
884         INIT_LIST_HEAD(&dest->d_list);
885         spin_lock_init(&dest->dst_lock);
886         spin_lock_init(&dest->stats.lock);
887         __ip_vs_update_dest(svc, dest, udest, 1);
888
889         *dest_p = dest;
890
891         LeaveFunction(2);
892         return 0;
893 }
894
895
896 /*
897  *      Add a destination into an existing service
898  */
899 static int
900 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
901 {
902         struct ip_vs_dest *dest;
903         union nf_inet_addr daddr;
904         __be16 dport = udest->port;
905         int ret;
906
907         EnterFunction(2);
908
909         if (udest->weight < 0) {
910                 pr_err("%s(): server weight less than zero\n", __func__);
911                 return -ERANGE;
912         }
913
914         if (udest->l_threshold > udest->u_threshold) {
915                 pr_err("%s(): lower threshold is higher than upper threshold\n",
916                         __func__);
917                 return -ERANGE;
918         }
919
920         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
921
922         /*
923          * Check if the dest already exists in the list
924          */
925         dest = ip_vs_lookup_dest(svc, &daddr, dport);
926
927         if (dest != NULL) {
928                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
929                 return -EEXIST;
930         }
931
932         /*
933          * Check if the dest already exists in the trash and
934          * is from the same service
935          */
936         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
937
938         if (dest != NULL) {
939                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
940                               "dest->refcnt=%d, service %u/%s:%u\n",
941                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
942                               atomic_read(&dest->refcnt),
943                               dest->vfwmark,
944                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
945                               ntohs(dest->vport));
946
947                 /*
948                  * Get the destination from the trash
949                  */
950                 list_del(&dest->n_list);
951
952                 __ip_vs_update_dest(svc, dest, udest, 1);
953                 ret = 0;
954         } else {
955                 /*
956                  * Allocate and initialize the dest structure
957                  */
958                 ret = ip_vs_new_dest(svc, udest, &dest);
959         }
960         LeaveFunction(2);
961
962         return ret;
963 }
964
965
966 /*
967  *      Edit a destination in the given service
968  */
969 static int
970 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
971 {
972         struct ip_vs_dest *dest;
973         union nf_inet_addr daddr;
974         __be16 dport = udest->port;
975
976         EnterFunction(2);
977
978         if (udest->weight < 0) {
979                 pr_err("%s(): server weight less than zero\n", __func__);
980                 return -ERANGE;
981         }
982
983         if (udest->l_threshold > udest->u_threshold) {
984                 pr_err("%s(): lower threshold is higher than upper threshold\n",
985                         __func__);
986                 return -ERANGE;
987         }
988
989         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
990
991         /*
992          *  Lookup the destination list
993          */
994         dest = ip_vs_lookup_dest(svc, &daddr, dport);
995
996         if (dest == NULL) {
997                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
998                 return -ENOENT;
999         }
1000
1001         __ip_vs_update_dest(svc, dest, udest, 0);
1002         LeaveFunction(2);
1003
1004         return 0;
1005 }
1006
1007
1008 /*
1009  *      Delete a destination (must be already unlinked from the service)
1010  */
1011 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1012 {
1013         ip_vs_kill_estimator(&dest->stats);
1014
1015         /*
1016          *  Remove it from the d-linked list with the real services.
1017          */
1018         write_lock_bh(&__ip_vs_rs_lock);
1019         ip_vs_rs_unhash(dest);
1020         write_unlock_bh(&__ip_vs_rs_lock);
1021
1022         /*
1023          *  Decrease the refcnt of the dest, and free the dest
1024          *  if nobody refers to it (refcnt=0). Otherwise, throw
1025          *  the destination into the trash.
1026          */
1027         if (atomic_dec_and_test(&dest->refcnt)) {
1028                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1029                               dest->vfwmark,
1030                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1031                               ntohs(dest->port));
1032                 ip_vs_dst_reset(dest);
1033                 /* simply decrease svc->refcnt here, let the caller check
1034                    and release the service if nobody refers to it.
1035                    Only user context can release destination and service,
1036                    and only one user context can update virtual service at a
1037                    time, so the operation here is OK */
1038                 atomic_dec(&dest->svc->refcnt);
1039                 kfree(dest);
1040         } else {
1041                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1042                               "dest->refcnt=%d\n",
1043                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1044                               ntohs(dest->port),
1045                               atomic_read(&dest->refcnt));
1046                 list_add(&dest->n_list, &ip_vs_dest_trash);
1047                 atomic_inc(&dest->refcnt);
1048         }
1049 }
1050
1051
1052 /*
1053  *      Unlink a destination from the given service
1054  */
1055 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1056                                 struct ip_vs_dest *dest,
1057                                 int svcupd)
1058 {
1059         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1060
1061         /*
1062          *  Remove it from the d-linked destination list.
1063          */
1064         list_del(&dest->n_list);
1065         svc->num_dests--;
1066
1067         /*
1068          *  Call the update_service function of its scheduler
1069          */
1070         if (svcupd && svc->scheduler->update_service)
1071                         svc->scheduler->update_service(svc);
1072 }
1073
1074
1075 /*
1076  *      Delete a destination server in the given service
1077  */
1078 static int
1079 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1080 {
1081         struct ip_vs_dest *dest;
1082         __be16 dport = udest->port;
1083
1084         EnterFunction(2);
1085
1086         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1087
1088         if (dest == NULL) {
1089                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1090                 return -ENOENT;
1091         }
1092
1093         write_lock_bh(&__ip_vs_svc_lock);
1094
1095         /*
1096          *      Wait until all other svc users go away.
1097          */
1098         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1099
1100         /*
1101          *      Unlink dest from the service
1102          */
1103         __ip_vs_unlink_dest(svc, dest, 1);
1104
1105         write_unlock_bh(&__ip_vs_svc_lock);
1106
1107         /*
1108          *      Delete the destination
1109          */
1110         __ip_vs_del_dest(dest);
1111
1112         LeaveFunction(2);
1113
1114         return 0;
1115 }
1116
1117
1118 /*
1119  *      Add a service into the service hash table
1120  */
1121 static int
1122 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1123                   struct ip_vs_service **svc_p)
1124 {
1125         int ret = 0;
1126         struct ip_vs_scheduler *sched = NULL;
1127         struct ip_vs_pe *pe = NULL;
1128         struct ip_vs_service *svc = NULL;
1129
1130         /* increase the module use count */
1131         ip_vs_use_count_inc();
1132
1133         /* Lookup the scheduler by 'u->sched_name' */
1134         sched = ip_vs_scheduler_get(u->sched_name);
1135         if (sched == NULL) {
1136                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1137                 ret = -ENOENT;
1138                 goto out_err;
1139         }
1140
1141         if (u->pe_name && *u->pe_name) {
1142                 pe = ip_vs_pe_getbyname(u->pe_name);
1143                 if (pe == NULL) {
1144                         pr_info("persistence engine module ip_vs_pe_%s "
1145                                 "not found\n", u->pe_name);
1146                         ret = -ENOENT;
1147                         goto out_err;
1148                 }
1149         }
1150
1151 #ifdef CONFIG_IP_VS_IPV6
1152         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1153                 ret = -EINVAL;
1154                 goto out_err;
1155         }
1156 #endif
1157
1158         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1159         if (svc == NULL) {
1160                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1161                 ret = -ENOMEM;
1162                 goto out_err;
1163         }
1164
1165         /* I'm the first user of the service */
1166         atomic_set(&svc->usecnt, 0);
1167         atomic_set(&svc->refcnt, 0);
1168
1169         svc->af = u->af;
1170         svc->protocol = u->protocol;
1171         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1172         svc->port = u->port;
1173         svc->fwmark = u->fwmark;
1174         svc->flags = u->flags;
1175         svc->timeout = u->timeout * HZ;
1176         svc->netmask = u->netmask;
1177
1178         INIT_LIST_HEAD(&svc->destinations);
1179         rwlock_init(&svc->sched_lock);
1180         spin_lock_init(&svc->stats.lock);
1181
1182         /* Bind the scheduler */
1183         ret = ip_vs_bind_scheduler(svc, sched);
1184         if (ret)
1185                 goto out_err;
1186         sched = NULL;
1187
1188         /* Bind the ct retriever */
1189         ip_vs_bind_pe(svc, pe);
1190         pe = NULL;
1191
1192         /* Update the virtual service counters */
1193         if (svc->port == FTPPORT)
1194                 atomic_inc(&ip_vs_ftpsvc_counter);
1195         else if (svc->port == 0)
1196                 atomic_inc(&ip_vs_nullsvc_counter);
1197
1198         ip_vs_new_estimator(&svc->stats);
1199
1200         /* Count only IPv4 services for old get/setsockopt interface */
1201         if (svc->af == AF_INET)
1202                 ip_vs_num_services++;
1203
1204         /* Hash the service into the service table */
1205         write_lock_bh(&__ip_vs_svc_lock);
1206         ip_vs_svc_hash(svc);
1207         write_unlock_bh(&__ip_vs_svc_lock);
1208
1209         *svc_p = svc;
1210         return 0;
1211
1212  out_err:
1213         if (svc != NULL) {
1214                 ip_vs_unbind_scheduler(svc);
1215                 if (svc->inc) {
1216                         local_bh_disable();
1217                         ip_vs_app_inc_put(svc->inc);
1218                         local_bh_enable();
1219                 }
1220                 kfree(svc);
1221         }
1222         ip_vs_scheduler_put(sched);
1223         ip_vs_pe_put(pe);
1224
1225         /* decrease the module use count */
1226         ip_vs_use_count_dec();
1227
1228         return ret;
1229 }
1230
1231
1232 /*
1233  *      Edit a service and bind it with a new scheduler
1234  */
1235 static int
1236 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1237 {
1238         struct ip_vs_scheduler *sched, *old_sched;
1239         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1240         int ret = 0;
1241
1242         /*
1243          * Lookup the scheduler, by 'u->sched_name'
1244          */
1245         sched = ip_vs_scheduler_get(u->sched_name);
1246         if (sched == NULL) {
1247                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1248                 return -ENOENT;
1249         }
1250         old_sched = sched;
1251
1252         if (u->pe_name && *u->pe_name) {
1253                 pe = ip_vs_pe_getbyname(u->pe_name);
1254                 if (pe == NULL) {
1255                         pr_info("persistence engine module ip_vs_pe_%s "
1256                                 "not found\n", u->pe_name);
1257                         ret = -ENOENT;
1258                         goto out;
1259                 }
1260                 old_pe = pe;
1261         }
1262
1263 #ifdef CONFIG_IP_VS_IPV6
1264         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1265                 ret = -EINVAL;
1266                 goto out;
1267         }
1268 #endif
1269
1270         write_lock_bh(&__ip_vs_svc_lock);
1271
1272         /*
1273          * Wait until all other svc users go away.
1274          */
1275         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1276
1277         /*
1278          * Set the flags and timeout value
1279          */
1280         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1281         svc->timeout = u->timeout * HZ;
1282         svc->netmask = u->netmask;
1283
1284         old_sched = svc->scheduler;
1285         if (sched != old_sched) {
1286                 /*
1287                  * Unbind the old scheduler
1288                  */
1289                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1290                         old_sched = sched;
1291                         goto out_unlock;
1292                 }
1293
1294                 /*
1295                  * Bind the new scheduler
1296                  */
1297                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1298                         /*
1299                          * If ip_vs_bind_scheduler fails, restore the old
1300                          * scheduler.
1301                          * The main reason of failure is out of memory.
1302                          *
1303                          * The question is if the old scheduler can be
1304                          * restored all the time. TODO: if it cannot be
1305                          * restored some time, we must delete the service,
1306                          * otherwise the system may crash.
1307                          */
1308                         ip_vs_bind_scheduler(svc, old_sched);
1309                         old_sched = sched;
1310                         goto out_unlock;
1311                 }
1312         }
1313
1314         old_pe = svc->pe;
1315         if (pe != old_pe) {
1316                 ip_vs_unbind_pe(svc);
1317                 ip_vs_bind_pe(svc, pe);
1318         }
1319
1320   out_unlock:
1321         write_unlock_bh(&__ip_vs_svc_lock);
1322   out:
1323         ip_vs_scheduler_put(old_sched);
1324         ip_vs_pe_put(old_pe);
1325         return ret;
1326 }
1327
1328
1329 /*
1330  *      Delete a service from the service list
1331  *      - The service must be unlinked, unlocked and not referenced!
1332  *      - We are called under _bh lock
1333  */
1334 static void __ip_vs_del_service(struct ip_vs_service *svc)
1335 {
1336         struct ip_vs_dest *dest, *nxt;
1337         struct ip_vs_scheduler *old_sched;
1338         struct ip_vs_pe *old_pe;
1339
1340         pr_info("%s: enter\n", __func__);
1341
1342         /* Count only IPv4 services for old get/setsockopt interface */
1343         if (svc->af == AF_INET)
1344                 ip_vs_num_services--;
1345
1346         ip_vs_kill_estimator(&svc->stats);
1347
1348         /* Unbind scheduler */
1349         old_sched = svc->scheduler;
1350         ip_vs_unbind_scheduler(svc);
1351         ip_vs_scheduler_put(old_sched);
1352
1353         /* Unbind persistence engine */
1354         old_pe = svc->pe;
1355         ip_vs_unbind_pe(svc);
1356         ip_vs_pe_put(old_pe);
1357
1358         /* Unbind app inc */
1359         if (svc->inc) {
1360                 ip_vs_app_inc_put(svc->inc);
1361                 svc->inc = NULL;
1362         }
1363
1364         /*
1365          *    Unlink the whole destination list
1366          */
1367         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1368                 __ip_vs_unlink_dest(svc, dest, 0);
1369                 __ip_vs_del_dest(dest);
1370         }
1371
1372         /*
1373          *    Update the virtual service counters
1374          */
1375         if (svc->port == FTPPORT)
1376                 atomic_dec(&ip_vs_ftpsvc_counter);
1377         else if (svc->port == 0)
1378                 atomic_dec(&ip_vs_nullsvc_counter);
1379
1380         /*
1381          *    Free the service if nobody refers to it
1382          */
1383         if (atomic_read(&svc->refcnt) == 0) {
1384                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1385                               svc->fwmark,
1386                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1387                               ntohs(svc->port), atomic_read(&svc->usecnt));
1388                 kfree(svc);
1389         }
1390
1391         /* decrease the module use count */
1392         ip_vs_use_count_dec();
1393 }
1394
1395 /*
1396  * Unlink a service from list and try to delete it if its refcnt reached 0
1397  */
1398 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1399 {
1400         /*
1401          * Unhash it from the service table
1402          */
1403         write_lock_bh(&__ip_vs_svc_lock);
1404
1405         ip_vs_svc_unhash(svc);
1406
1407         /*
1408          * Wait until all the svc users go away.
1409          */
1410         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1411
1412         __ip_vs_del_service(svc);
1413
1414         write_unlock_bh(&__ip_vs_svc_lock);
1415 }
1416
1417 /*
1418  *      Delete a service from the service list
1419  */
1420 static int ip_vs_del_service(struct ip_vs_service *svc)
1421 {
1422         if (svc == NULL)
1423                 return -EEXIST;
1424         ip_vs_unlink_service(svc);
1425
1426         return 0;
1427 }
1428
1429
1430 /*
1431  *      Flush all the virtual services
1432  */
1433 static int ip_vs_flush(void)
1434 {
1435         int idx;
1436         struct ip_vs_service *svc, *nxt;
1437
1438         /*
1439          * Flush the service table hashed by <protocol,addr,port>
1440          */
1441         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1442                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1443                         ip_vs_unlink_service(svc);
1444                 }
1445         }
1446
1447         /*
1448          * Flush the service table hashed by fwmark
1449          */
1450         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1451                 list_for_each_entry_safe(svc, nxt,
1452                                          &ip_vs_svc_fwm_table[idx], f_list) {
1453                         ip_vs_unlink_service(svc);
1454                 }
1455         }
1456
1457         return 0;
1458 }
1459
1460
1461 /*
1462  *      Zero counters in a service or all services
1463  */
1464 static int ip_vs_zero_service(struct ip_vs_service *svc)
1465 {
1466         struct ip_vs_dest *dest;
1467
1468         write_lock_bh(&__ip_vs_svc_lock);
1469         list_for_each_entry(dest, &svc->destinations, n_list) {
1470                 ip_vs_zero_stats(&dest->stats);
1471         }
1472         ip_vs_zero_stats(&svc->stats);
1473         write_unlock_bh(&__ip_vs_svc_lock);
1474         return 0;
1475 }
1476
1477 static int ip_vs_zero_all(void)
1478 {
1479         int idx;
1480         struct ip_vs_service *svc;
1481
1482         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1483                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1484                         ip_vs_zero_service(svc);
1485                 }
1486         }
1487
1488         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1489                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1490                         ip_vs_zero_service(svc);
1491                 }
1492         }
1493
1494         ip_vs_zero_stats(&ip_vs_stats);
1495         return 0;
1496 }
1497
1498
1499 static int
1500 proc_do_defense_mode(ctl_table *table, int write,
1501                      void __user *buffer, size_t *lenp, loff_t *ppos)
1502 {
1503         int *valp = table->data;
1504         int val = *valp;
1505         int rc;
1506
1507         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1508         if (write && (*valp != val)) {
1509                 if ((*valp < 0) || (*valp > 3)) {
1510                         /* Restore the correct value */
1511                         *valp = val;
1512                 } else {
1513                         update_defense_level();
1514                 }
1515         }
1516         return rc;
1517 }
1518
1519
1520 static int
1521 proc_do_sync_threshold(ctl_table *table, int write,
1522                        void __user *buffer, size_t *lenp, loff_t *ppos)
1523 {
1524         int *valp = table->data;
1525         int val[2];
1526         int rc;
1527
1528         /* backup the value first */
1529         memcpy(val, valp, sizeof(val));
1530
1531         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1532         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1533                 /* Restore the correct value */
1534                 memcpy(valp, val, sizeof(val));
1535         }
1536         return rc;
1537 }
1538
1539
1540 /*
1541  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1542  */
1543
1544 static struct ctl_table vs_vars[] = {
1545         {
1546                 .procname       = "amemthresh",
1547                 .data           = &sysctl_ip_vs_amemthresh,
1548                 .maxlen         = sizeof(int),
1549                 .mode           = 0644,
1550                 .proc_handler   = proc_dointvec,
1551         },
1552 #ifdef CONFIG_IP_VS_DEBUG
1553         {
1554                 .procname       = "debug_level",
1555                 .data           = &sysctl_ip_vs_debug_level,
1556                 .maxlen         = sizeof(int),
1557                 .mode           = 0644,
1558                 .proc_handler   = proc_dointvec,
1559         },
1560 #endif
1561         {
1562                 .procname       = "am_droprate",
1563                 .data           = &sysctl_ip_vs_am_droprate,
1564                 .maxlen         = sizeof(int),
1565                 .mode           = 0644,
1566                 .proc_handler   = proc_dointvec,
1567         },
1568         {
1569                 .procname       = "drop_entry",
1570                 .data           = &sysctl_ip_vs_drop_entry,
1571                 .maxlen         = sizeof(int),
1572                 .mode           = 0644,
1573                 .proc_handler   = proc_do_defense_mode,
1574         },
1575         {
1576                 .procname       = "drop_packet",
1577                 .data           = &sysctl_ip_vs_drop_packet,
1578                 .maxlen         = sizeof(int),
1579                 .mode           = 0644,
1580                 .proc_handler   = proc_do_defense_mode,
1581         },
1582 #ifdef CONFIG_IP_VS_NFCT
1583         {
1584                 .procname       = "conntrack",
1585                 .data           = &sysctl_ip_vs_conntrack,
1586                 .maxlen         = sizeof(int),
1587                 .mode           = 0644,
1588                 .proc_handler   = &proc_dointvec,
1589         },
1590 #endif
1591         {
1592                 .procname       = "secure_tcp",
1593                 .data           = &sysctl_ip_vs_secure_tcp,
1594                 .maxlen         = sizeof(int),
1595                 .mode           = 0644,
1596                 .proc_handler   = proc_do_defense_mode,
1597         },
1598         {
1599                 .procname       = "snat_reroute",
1600                 .data           = &sysctl_ip_vs_snat_reroute,
1601                 .maxlen         = sizeof(int),
1602                 .mode           = 0644,
1603                 .proc_handler   = &proc_dointvec,
1604         },
1605 #if 0
1606         {
1607                 .procname       = "timeout_established",
1608                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1609                 .maxlen         = sizeof(int),
1610                 .mode           = 0644,
1611                 .proc_handler   = proc_dointvec_jiffies,
1612         },
1613         {
1614                 .procname       = "timeout_synsent",
1615                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1616                 .maxlen         = sizeof(int),
1617                 .mode           = 0644,
1618                 .proc_handler   = proc_dointvec_jiffies,
1619         },
1620         {
1621                 .procname       = "timeout_synrecv",
1622                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1623                 .maxlen         = sizeof(int),
1624                 .mode           = 0644,
1625                 .proc_handler   = proc_dointvec_jiffies,
1626         },
1627         {
1628                 .procname       = "timeout_finwait",
1629                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1630                 .maxlen         = sizeof(int),
1631                 .mode           = 0644,
1632                 .proc_handler   = proc_dointvec_jiffies,
1633         },
1634         {
1635                 .procname       = "timeout_timewait",
1636                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1637                 .maxlen         = sizeof(int),
1638                 .mode           = 0644,
1639                 .proc_handler   = proc_dointvec_jiffies,
1640         },
1641         {
1642                 .procname       = "timeout_close",
1643                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1644                 .maxlen         = sizeof(int),
1645                 .mode           = 0644,
1646                 .proc_handler   = proc_dointvec_jiffies,
1647         },
1648         {
1649                 .procname       = "timeout_closewait",
1650                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1651                 .maxlen         = sizeof(int),
1652                 .mode           = 0644,
1653                 .proc_handler   = proc_dointvec_jiffies,
1654         },
1655         {
1656                 .procname       = "timeout_lastack",
1657                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1658                 .maxlen         = sizeof(int),
1659                 .mode           = 0644,
1660                 .proc_handler   = proc_dointvec_jiffies,
1661         },
1662         {
1663                 .procname       = "timeout_listen",
1664                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1665                 .maxlen         = sizeof(int),
1666                 .mode           = 0644,
1667                 .proc_handler   = proc_dointvec_jiffies,
1668         },
1669         {
1670                 .procname       = "timeout_synack",
1671                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1672                 .maxlen         = sizeof(int),
1673                 .mode           = 0644,
1674                 .proc_handler   = proc_dointvec_jiffies,
1675         },
1676         {
1677                 .procname       = "timeout_udp",
1678                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1679                 .maxlen         = sizeof(int),
1680                 .mode           = 0644,
1681                 .proc_handler   = proc_dointvec_jiffies,
1682         },
1683         {
1684                 .procname       = "timeout_icmp",
1685                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1686                 .maxlen         = sizeof(int),
1687                 .mode           = 0644,
1688                 .proc_handler   = proc_dointvec_jiffies,
1689         },
1690 #endif
1691         {
1692                 .procname       = "cache_bypass",
1693                 .data           = &sysctl_ip_vs_cache_bypass,
1694                 .maxlen         = sizeof(int),
1695                 .mode           = 0644,
1696                 .proc_handler   = proc_dointvec,
1697         },
1698         {
1699                 .procname       = "expire_nodest_conn",
1700                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1701                 .maxlen         = sizeof(int),
1702                 .mode           = 0644,
1703                 .proc_handler   = proc_dointvec,
1704         },
1705         {
1706                 .procname       = "expire_quiescent_template",
1707                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1708                 .maxlen         = sizeof(int),
1709                 .mode           = 0644,
1710                 .proc_handler   = proc_dointvec,
1711         },
1712         {
1713                 .procname       = "sync_threshold",
1714                 .data           = &sysctl_ip_vs_sync_threshold,
1715                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1716                 .mode           = 0644,
1717                 .proc_handler   = proc_do_sync_threshold,
1718         },
1719         {
1720                 .procname       = "nat_icmp_send",
1721                 .data           = &sysctl_ip_vs_nat_icmp_send,
1722                 .maxlen         = sizeof(int),
1723                 .mode           = 0644,
1724                 .proc_handler   = proc_dointvec,
1725         },
1726         { }
1727 };
1728
1729 const struct ctl_path net_vs_ctl_path[] = {
1730         { .procname = "net", },
1731         { .procname = "ipv4", },
1732         { .procname = "vs", },
1733         { }
1734 };
1735 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1736
1737 static struct ctl_table_header * sysctl_header;
1738
1739 #ifdef CONFIG_PROC_FS
1740
1741 struct ip_vs_iter {
1742         struct list_head *table;
1743         int bucket;
1744 };
1745
1746 /*
1747  *      Write the contents of the VS rule table to a PROCfs file.
1748  *      (It is kept just for backward compatibility)
1749  */
1750 static inline const char *ip_vs_fwd_name(unsigned flags)
1751 {
1752         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1753         case IP_VS_CONN_F_LOCALNODE:
1754                 return "Local";
1755         case IP_VS_CONN_F_TUNNEL:
1756                 return "Tunnel";
1757         case IP_VS_CONN_F_DROUTE:
1758                 return "Route";
1759         default:
1760                 return "Masq";
1761         }
1762 }
1763
1764
1765 /* Get the Nth entry in the two lists */
1766 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1767 {
1768         struct ip_vs_iter *iter = seq->private;
1769         int idx;
1770         struct ip_vs_service *svc;
1771
1772         /* look in hash by protocol */
1773         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1774                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1775                         if (pos-- == 0){
1776                                 iter->table = ip_vs_svc_table;
1777                                 iter->bucket = idx;
1778                                 return svc;
1779                         }
1780                 }
1781         }
1782
1783         /* keep looking in fwmark */
1784         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1785                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1786                         if (pos-- == 0) {
1787                                 iter->table = ip_vs_svc_fwm_table;
1788                                 iter->bucket = idx;
1789                                 return svc;
1790                         }
1791                 }
1792         }
1793
1794         return NULL;
1795 }
1796
1797 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1798 __acquires(__ip_vs_svc_lock)
1799 {
1800
1801         read_lock_bh(&__ip_vs_svc_lock);
1802         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1803 }
1804
1805
1806 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1807 {
1808         struct list_head *e;
1809         struct ip_vs_iter *iter;
1810         struct ip_vs_service *svc;
1811
1812         ++*pos;
1813         if (v == SEQ_START_TOKEN)
1814                 return ip_vs_info_array(seq,0);
1815
1816         svc = v;
1817         iter = seq->private;
1818
1819         if (iter->table == ip_vs_svc_table) {
1820                 /* next service in table hashed by protocol */
1821                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1822                         return list_entry(e, struct ip_vs_service, s_list);
1823
1824
1825                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1826                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1827                                             s_list) {
1828                                 return svc;
1829                         }
1830                 }
1831
1832                 iter->table = ip_vs_svc_fwm_table;
1833                 iter->bucket = -1;
1834                 goto scan_fwmark;
1835         }
1836
1837         /* next service in hashed by fwmark */
1838         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1839                 return list_entry(e, struct ip_vs_service, f_list);
1840
1841  scan_fwmark:
1842         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1843                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1844                                     f_list)
1845                         return svc;
1846         }
1847
1848         return NULL;
1849 }
1850
1851 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1852 __releases(__ip_vs_svc_lock)
1853 {
1854         read_unlock_bh(&__ip_vs_svc_lock);
1855 }
1856
1857
1858 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1859 {
1860         if (v == SEQ_START_TOKEN) {
1861                 seq_printf(seq,
1862                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1863                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1864                 seq_puts(seq,
1865                          "Prot LocalAddress:Port Scheduler Flags\n");
1866                 seq_puts(seq,
1867                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1868         } else {
1869                 const struct ip_vs_service *svc = v;
1870                 const struct ip_vs_iter *iter = seq->private;
1871                 const struct ip_vs_dest *dest;
1872
1873                 if (iter->table == ip_vs_svc_table) {
1874 #ifdef CONFIG_IP_VS_IPV6
1875                         if (svc->af == AF_INET6)
1876                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1877                                            ip_vs_proto_name(svc->protocol),
1878                                            &svc->addr.in6,
1879                                            ntohs(svc->port),
1880                                            svc->scheduler->name);
1881                         else
1882 #endif
1883                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
1884                                            ip_vs_proto_name(svc->protocol),
1885                                            ntohl(svc->addr.ip),
1886                                            ntohs(svc->port),
1887                                            svc->scheduler->name,
1888                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1889                 } else {
1890                         seq_printf(seq, "FWM  %08X %s %s",
1891                                    svc->fwmark, svc->scheduler->name,
1892                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1893                 }
1894
1895                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1896                         seq_printf(seq, "persistent %d %08X\n",
1897                                 svc->timeout,
1898                                 ntohl(svc->netmask));
1899                 else
1900                         seq_putc(seq, '\n');
1901
1902                 list_for_each_entry(dest, &svc->destinations, n_list) {
1903 #ifdef CONFIG_IP_VS_IPV6
1904                         if (dest->af == AF_INET6)
1905                                 seq_printf(seq,
1906                                            "  -> [%pI6]:%04X"
1907                                            "      %-7s %-6d %-10d %-10d\n",
1908                                            &dest->addr.in6,
1909                                            ntohs(dest->port),
1910                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1911                                            atomic_read(&dest->weight),
1912                                            atomic_read(&dest->activeconns),
1913                                            atomic_read(&dest->inactconns));
1914                         else
1915 #endif
1916                                 seq_printf(seq,
1917                                            "  -> %08X:%04X      "
1918                                            "%-7s %-6d %-10d %-10d\n",
1919                                            ntohl(dest->addr.ip),
1920                                            ntohs(dest->port),
1921                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1922                                            atomic_read(&dest->weight),
1923                                            atomic_read(&dest->activeconns),
1924                                            atomic_read(&dest->inactconns));
1925
1926                 }
1927         }
1928         return 0;
1929 }
1930
1931 static const struct seq_operations ip_vs_info_seq_ops = {
1932         .start = ip_vs_info_seq_start,
1933         .next  = ip_vs_info_seq_next,
1934         .stop  = ip_vs_info_seq_stop,
1935         .show  = ip_vs_info_seq_show,
1936 };
1937
1938 static int ip_vs_info_open(struct inode *inode, struct file *file)
1939 {
1940         return seq_open_private(file, &ip_vs_info_seq_ops,
1941                         sizeof(struct ip_vs_iter));
1942 }
1943
1944 static const struct file_operations ip_vs_info_fops = {
1945         .owner   = THIS_MODULE,
1946         .open    = ip_vs_info_open,
1947         .read    = seq_read,
1948         .llseek  = seq_lseek,
1949         .release = seq_release_private,
1950 };
1951
1952 #endif
1953
1954 struct ip_vs_stats ip_vs_stats = {
1955         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1956 };
1957
1958 #ifdef CONFIG_PROC_FS
1959 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1960 {
1961
1962 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1963         seq_puts(seq,
1964                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1965         seq_printf(seq,
1966                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1967
1968         spin_lock_bh(&ip_vs_stats.lock);
1969         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1970                    ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1971                    (unsigned long long) ip_vs_stats.ustats.inbytes,
1972                    (unsigned long long) ip_vs_stats.ustats.outbytes);
1973
1974 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1975         seq_puts(seq,
1976                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1977         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1978                         ip_vs_stats.ustats.cps,
1979                         ip_vs_stats.ustats.inpps,
1980                         ip_vs_stats.ustats.outpps,
1981                         ip_vs_stats.ustats.inbps,
1982                         ip_vs_stats.ustats.outbps);
1983         spin_unlock_bh(&ip_vs_stats.lock);
1984
1985         return 0;
1986 }
1987
1988 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1989 {
1990         return single_open(file, ip_vs_stats_show, NULL);
1991 }
1992
1993 static const struct file_operations ip_vs_stats_fops = {
1994         .owner = THIS_MODULE,
1995         .open = ip_vs_stats_seq_open,
1996         .read = seq_read,
1997         .llseek = seq_lseek,
1998         .release = single_release,
1999 };
2000
2001 #endif
2002
2003 /*
2004  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2005  */
2006 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2007 {
2008         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2009                   u->tcp_timeout,
2010                   u->tcp_fin_timeout,
2011                   u->udp_timeout);
2012
2013 #ifdef CONFIG_IP_VS_PROTO_TCP
2014         if (u->tcp_timeout) {
2015                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2016                         = u->tcp_timeout * HZ;
2017         }
2018
2019         if (u->tcp_fin_timeout) {
2020                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2021                         = u->tcp_fin_timeout * HZ;
2022         }
2023 #endif
2024
2025 #ifdef CONFIG_IP_VS_PROTO_UDP
2026         if (u->udp_timeout) {
2027                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2028                         = u->udp_timeout * HZ;
2029         }
2030 #endif
2031         return 0;
2032 }
2033
2034
2035 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2036 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2037 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2038                                  sizeof(struct ip_vs_dest_user))
2039 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2040 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2041 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2042
2043 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2044         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2045         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2046         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2047         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2048         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2049         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2050         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2051         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2052         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2053         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2054         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2055 };
2056
2057 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2058                                   struct ip_vs_service_user *usvc_compat)
2059 {
2060         memset(usvc, 0, sizeof(*usvc));
2061
2062         usvc->af                = AF_INET;
2063         usvc->protocol          = usvc_compat->protocol;
2064         usvc->addr.ip           = usvc_compat->addr;
2065         usvc->port              = usvc_compat->port;
2066         usvc->fwmark            = usvc_compat->fwmark;
2067
2068         /* Deep copy of sched_name is not needed here */
2069         usvc->sched_name        = usvc_compat->sched_name;
2070
2071         usvc->flags             = usvc_compat->flags;
2072         usvc->timeout           = usvc_compat->timeout;
2073         usvc->netmask           = usvc_compat->netmask;
2074 }
2075
2076 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2077                                    struct ip_vs_dest_user *udest_compat)
2078 {
2079         memset(udest, 0, sizeof(*udest));
2080
2081         udest->addr.ip          = udest_compat->addr;
2082         udest->port             = udest_compat->port;
2083         udest->conn_flags       = udest_compat->conn_flags;
2084         udest->weight           = udest_compat->weight;
2085         udest->u_threshold      = udest_compat->u_threshold;
2086         udest->l_threshold      = udest_compat->l_threshold;
2087 }
2088
2089 static int
2090 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2091 {
2092         int ret;
2093         unsigned char arg[MAX_ARG_LEN];
2094         struct ip_vs_service_user *usvc_compat;
2095         struct ip_vs_service_user_kern usvc;
2096         struct ip_vs_service *svc;
2097         struct ip_vs_dest_user *udest_compat;
2098         struct ip_vs_dest_user_kern udest;
2099
2100         if (!capable(CAP_NET_ADMIN))
2101                 return -EPERM;
2102
2103         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2104                 return -EINVAL;
2105         if (len < 0 || len >  MAX_ARG_LEN)
2106                 return -EINVAL;
2107         if (len != set_arglen[SET_CMDID(cmd)]) {
2108                 pr_err("set_ctl: len %u != %u\n",
2109                        len, set_arglen[SET_CMDID(cmd)]);
2110                 return -EINVAL;
2111         }
2112
2113         if (copy_from_user(arg, user, len) != 0)
2114                 return -EFAULT;
2115
2116         /* increase the module use count */
2117         ip_vs_use_count_inc();
2118
2119         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2120                 ret = -ERESTARTSYS;
2121                 goto out_dec;
2122         }
2123
2124         if (cmd == IP_VS_SO_SET_FLUSH) {
2125                 /* Flush the virtual service */
2126                 ret = ip_vs_flush();
2127                 goto out_unlock;
2128         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2129                 /* Set timeout values for (tcp tcpfin udp) */
2130                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2131                 goto out_unlock;
2132         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2133                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2134                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2135                 goto out_unlock;
2136         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2137                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2138                 ret = stop_sync_thread(dm->state);
2139                 goto out_unlock;
2140         }
2141
2142         usvc_compat = (struct ip_vs_service_user *)arg;
2143         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2144
2145         /* We only use the new structs internally, so copy userspace compat
2146          * structs to extended internal versions */
2147         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2148         ip_vs_copy_udest_compat(&udest, udest_compat);
2149
2150         if (cmd == IP_VS_SO_SET_ZERO) {
2151                 /* if no service address is set, zero counters in all */
2152                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2153                         ret = ip_vs_zero_all();
2154                         goto out_unlock;
2155                 }
2156         }
2157
2158         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2159         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2160             usvc.protocol != IPPROTO_SCTP) {
2161                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2162                        usvc.protocol, &usvc.addr.ip,
2163                        ntohs(usvc.port), usvc.sched_name);
2164                 ret = -EFAULT;
2165                 goto out_unlock;
2166         }
2167
2168         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2169         if (usvc.fwmark == 0)
2170                 svc = __ip_vs_service_find(usvc.af, usvc.protocol,
2171                                            &usvc.addr, usvc.port);
2172         else
2173                 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
2174
2175         if (cmd != IP_VS_SO_SET_ADD
2176             && (svc == NULL || svc->protocol != usvc.protocol)) {
2177                 ret = -ESRCH;
2178                 goto out_unlock;
2179         }
2180
2181         switch (cmd) {
2182         case IP_VS_SO_SET_ADD:
2183                 if (svc != NULL)
2184                         ret = -EEXIST;
2185                 else
2186                         ret = ip_vs_add_service(&usvc, &svc);
2187                 break;
2188         case IP_VS_SO_SET_EDIT:
2189                 ret = ip_vs_edit_service(svc, &usvc);
2190                 break;
2191         case IP_VS_SO_SET_DEL:
2192                 ret = ip_vs_del_service(svc);
2193                 if (!ret)
2194                         goto out_unlock;
2195                 break;
2196         case IP_VS_SO_SET_ZERO:
2197                 ret = ip_vs_zero_service(svc);
2198                 break;
2199         case IP_VS_SO_SET_ADDDEST:
2200                 ret = ip_vs_add_dest(svc, &udest);
2201                 break;
2202         case IP_VS_SO_SET_EDITDEST:
2203                 ret = ip_vs_edit_dest(svc, &udest);
2204                 break;
2205         case IP_VS_SO_SET_DELDEST:
2206                 ret = ip_vs_del_dest(svc, &udest);
2207                 break;
2208         default:
2209                 ret = -EINVAL;
2210         }
2211
2212   out_unlock:
2213         mutex_unlock(&__ip_vs_mutex);
2214   out_dec:
2215         /* decrease the module use count */
2216         ip_vs_use_count_dec();
2217
2218         return ret;
2219 }
2220
2221
2222 static void
2223 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2224 {
2225         spin_lock_bh(&src->lock);
2226         memcpy(dst, &src->ustats, sizeof(*dst));
2227         spin_unlock_bh(&src->lock);
2228 }
2229
2230 static void
2231 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2232 {
2233         dst->protocol = src->protocol;
2234         dst->addr = src->addr.ip;
2235         dst->port = src->port;
2236         dst->fwmark = src->fwmark;
2237         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2238         dst->flags = src->flags;
2239         dst->timeout = src->timeout / HZ;
2240         dst->netmask = src->netmask;
2241         dst->num_dests = src->num_dests;
2242         ip_vs_copy_stats(&dst->stats, &src->stats);
2243 }
2244
2245 static inline int
2246 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2247                             struct ip_vs_get_services __user *uptr)
2248 {
2249         int idx, count=0;
2250         struct ip_vs_service *svc;
2251         struct ip_vs_service_entry entry;
2252         int ret = 0;
2253
2254         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2255                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2256                         /* Only expose IPv4 entries to old interface */
2257                         if (svc->af != AF_INET)
2258                                 continue;
2259
2260                         if (count >= get->num_services)
2261                                 goto out;
2262                         memset(&entry, 0, sizeof(entry));
2263                         ip_vs_copy_service(&entry, svc);
2264                         if (copy_to_user(&uptr->entrytable[count],
2265                                          &entry, sizeof(entry))) {
2266                                 ret = -EFAULT;
2267                                 goto out;
2268                         }
2269                         count++;
2270                 }
2271         }
2272
2273         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2274                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2275                         /* Only expose IPv4 entries to old interface */
2276                         if (svc->af != AF_INET)
2277                                 continue;
2278
2279                         if (count >= get->num_services)
2280                                 goto out;
2281                         memset(&entry, 0, sizeof(entry));
2282                         ip_vs_copy_service(&entry, svc);
2283                         if (copy_to_user(&uptr->entrytable[count],
2284                                          &entry, sizeof(entry))) {
2285                                 ret = -EFAULT;
2286                                 goto out;
2287                         }
2288                         count++;
2289                 }
2290         }
2291   out:
2292         return ret;
2293 }
2294
2295 static inline int
2296 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2297                          struct ip_vs_get_dests __user *uptr)
2298 {
2299         struct ip_vs_service *svc;
2300         union nf_inet_addr addr = { .ip = get->addr };
2301         int ret = 0;
2302
2303         if (get->fwmark)
2304                 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
2305         else
2306                 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
2307                                            get->port);
2308
2309         if (svc) {
2310                 int count = 0;
2311                 struct ip_vs_dest *dest;
2312                 struct ip_vs_dest_entry entry;
2313
2314                 list_for_each_entry(dest, &svc->destinations, n_list) {
2315                         if (count >= get->num_dests)
2316                                 break;
2317
2318                         entry.addr = dest->addr.ip;
2319                         entry.port = dest->port;
2320                         entry.conn_flags = atomic_read(&dest->conn_flags);
2321                         entry.weight = atomic_read(&dest->weight);
2322                         entry.u_threshold = dest->u_threshold;
2323                         entry.l_threshold = dest->l_threshold;
2324                         entry.activeconns = atomic_read(&dest->activeconns);
2325                         entry.inactconns = atomic_read(&dest->inactconns);
2326                         entry.persistconns = atomic_read(&dest->persistconns);
2327                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2328                         if (copy_to_user(&uptr->entrytable[count],
2329                                          &entry, sizeof(entry))) {
2330                                 ret = -EFAULT;
2331                                 break;
2332                         }
2333                         count++;
2334                 }
2335         } else
2336                 ret = -ESRCH;
2337         return ret;
2338 }
2339
2340 static inline void
2341 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2342 {
2343 #ifdef CONFIG_IP_VS_PROTO_TCP
2344         u->tcp_timeout =
2345                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2346         u->tcp_fin_timeout =
2347                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2348 #endif
2349 #ifdef CONFIG_IP_VS_PROTO_UDP
2350         u->udp_timeout =
2351                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2352 #endif
2353 }
2354
2355
2356 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2357 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2358 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2359 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2360 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2361 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2362 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2363
2364 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2365         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2366         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2367         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2368         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2369         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2370         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2371         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2372 };
2373
2374 static int
2375 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2376 {
2377         unsigned char arg[128];
2378         int ret = 0;
2379         unsigned int copylen;
2380
2381         if (!capable(CAP_NET_ADMIN))
2382                 return -EPERM;
2383
2384         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2385                 return -EINVAL;
2386
2387         if (*len < get_arglen[GET_CMDID(cmd)]) {
2388                 pr_err("get_ctl: len %u < %u\n",
2389                        *len, get_arglen[GET_CMDID(cmd)]);
2390                 return -EINVAL;
2391         }
2392
2393         copylen = get_arglen[GET_CMDID(cmd)];
2394         if (copylen > 128)
2395                 return -EINVAL;
2396
2397         if (copy_from_user(arg, user, copylen) != 0)
2398                 return -EFAULT;
2399
2400         if (mutex_lock_interruptible(&__ip_vs_mutex))
2401                 return -ERESTARTSYS;
2402
2403         switch (cmd) {
2404         case IP_VS_SO_GET_VERSION:
2405         {
2406                 char buf[64];
2407
2408                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2409                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2410                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2411                         ret = -EFAULT;
2412                         goto out;
2413                 }
2414                 *len = strlen(buf)+1;
2415         }
2416         break;
2417
2418         case IP_VS_SO_GET_INFO:
2419         {
2420                 struct ip_vs_getinfo info;
2421                 info.version = IP_VS_VERSION_CODE;
2422                 info.size = ip_vs_conn_tab_size;
2423                 info.num_services = ip_vs_num_services;
2424                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2425                         ret = -EFAULT;
2426         }
2427         break;
2428
2429         case IP_VS_SO_GET_SERVICES:
2430         {
2431                 struct ip_vs_get_services *get;
2432                 int size;
2433
2434                 get = (struct ip_vs_get_services *)arg;
2435                 size = sizeof(*get) +
2436                         sizeof(struct ip_vs_service_entry) * get->num_services;
2437                 if (*len != size) {
2438                         pr_err("length: %u != %u\n", *len, size);
2439                         ret = -EINVAL;
2440                         goto out;
2441                 }
2442                 ret = __ip_vs_get_service_entries(get, user);
2443         }
2444         break;
2445
2446         case IP_VS_SO_GET_SERVICE:
2447         {
2448                 struct ip_vs_service_entry *entry;
2449                 struct ip_vs_service *svc;
2450                 union nf_inet_addr addr;
2451
2452                 entry = (struct ip_vs_service_entry *)arg;
2453                 addr.ip = entry->addr;
2454                 if (entry->fwmark)
2455                         svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
2456                 else
2457                         svc = __ip_vs_service_find(AF_INET, entry->protocol,
2458                                                    &addr, entry->port);
2459                 if (svc) {
2460                         ip_vs_copy_service(entry, svc);
2461                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2462                                 ret = -EFAULT;
2463                 } else
2464                         ret = -ESRCH;
2465         }
2466         break;
2467
2468         case IP_VS_SO_GET_DESTS:
2469         {
2470                 struct ip_vs_get_dests *get;
2471                 int size;
2472
2473                 get = (struct ip_vs_get_dests *)arg;
2474                 size = sizeof(*get) +
2475                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2476                 if (*len != size) {
2477                         pr_err("length: %u != %u\n", *len, size);
2478                         ret = -EINVAL;
2479                         goto out;
2480                 }
2481                 ret = __ip_vs_get_dest_entries(get, user);
2482         }
2483         break;
2484
2485         case IP_VS_SO_GET_TIMEOUT:
2486         {
2487                 struct ip_vs_timeout_user t;
2488
2489                 __ip_vs_get_timeouts(&t);
2490                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2491                         ret = -EFAULT;
2492         }
2493         break;
2494
2495         case IP_VS_SO_GET_DAEMON:
2496         {
2497                 struct ip_vs_daemon_user d[2];
2498
2499                 memset(&d, 0, sizeof(d));
2500                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2501                         d[0].state = IP_VS_STATE_MASTER;
2502                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2503                         d[0].syncid = ip_vs_master_syncid;
2504                 }
2505                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2506                         d[1].state = IP_VS_STATE_BACKUP;
2507                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2508                         d[1].syncid = ip_vs_backup_syncid;
2509                 }
2510                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2511                         ret = -EFAULT;
2512         }
2513         break;
2514
2515         default:
2516                 ret = -EINVAL;
2517         }
2518
2519   out:
2520         mutex_unlock(&__ip_vs_mutex);
2521         return ret;
2522 }
2523
2524
2525 static struct nf_sockopt_ops ip_vs_sockopts = {
2526         .pf             = PF_INET,
2527         .set_optmin     = IP_VS_BASE_CTL,
2528         .set_optmax     = IP_VS_SO_SET_MAX+1,
2529         .set            = do_ip_vs_set_ctl,
2530         .get_optmin     = IP_VS_BASE_CTL,
2531         .get_optmax     = IP_VS_SO_GET_MAX+1,
2532         .get            = do_ip_vs_get_ctl,
2533         .owner          = THIS_MODULE,
2534 };
2535
2536 /*
2537  * Generic Netlink interface
2538  */
2539
2540 /* IPVS genetlink family */
2541 static struct genl_family ip_vs_genl_family = {
2542         .id             = GENL_ID_GENERATE,
2543         .hdrsize        = 0,
2544         .name           = IPVS_GENL_NAME,
2545         .version        = IPVS_GENL_VERSION,
2546         .maxattr        = IPVS_CMD_MAX,
2547 };
2548
2549 /* Policy used for first-level command attributes */
2550 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2551         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2552         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2553         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2554         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2555         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2556         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2557 };
2558
2559 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2560 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2561         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2562         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2563                                             .len = IP_VS_IFNAME_MAXLEN },
2564         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2565 };
2566
2567 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2568 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2569         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2570         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2571         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2572                                             .len = sizeof(union nf_inet_addr) },
2573         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2574         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2575         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2576                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2577         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2578                                             .len = IP_VS_PENAME_MAXLEN },
2579         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2580                                             .len = sizeof(struct ip_vs_flags) },
2581         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2582         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2583         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2584 };
2585
2586 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2587 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2588         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2589                                             .len = sizeof(union nf_inet_addr) },
2590         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2591         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2592         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2593         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2594         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2595         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2596         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2597         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2598         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2599 };
2600
2601 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2602                                  struct ip_vs_stats *stats)
2603 {
2604         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2605         if (!nl_stats)
2606                 return -EMSGSIZE;
2607
2608         spin_lock_bh(&stats->lock);
2609
2610         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2611         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2612         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2613         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2614         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2615         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2616         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2617         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2618         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2619         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2620
2621         spin_unlock_bh(&stats->lock);
2622
2623         nla_nest_end(skb, nl_stats);
2624
2625         return 0;
2626
2627 nla_put_failure:
2628         spin_unlock_bh(&stats->lock);
2629         nla_nest_cancel(skb, nl_stats);
2630         return -EMSGSIZE;
2631 }
2632
2633 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2634                                    struct ip_vs_service *svc)
2635 {
2636         struct nlattr *nl_service;
2637         struct ip_vs_flags flags = { .flags = svc->flags,
2638                                      .mask = ~0 };
2639
2640         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2641         if (!nl_service)
2642                 return -EMSGSIZE;
2643
2644         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2645
2646         if (svc->fwmark) {
2647                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2648         } else {
2649                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2650                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2651                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2652         }
2653
2654         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2655         if (svc->pe)
2656                 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2657         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2658         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2659         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2660
2661         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2662                 goto nla_put_failure;
2663
2664         nla_nest_end(skb, nl_service);
2665
2666         return 0;
2667
2668 nla_put_failure:
2669         nla_nest_cancel(skb, nl_service);
2670         return -EMSGSIZE;
2671 }
2672
2673 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2674                                    struct ip_vs_service *svc,
2675                                    struct netlink_callback *cb)
2676 {
2677         void *hdr;
2678
2679         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2680                           &ip_vs_genl_family, NLM_F_MULTI,
2681                           IPVS_CMD_NEW_SERVICE);
2682         if (!hdr)
2683                 return -EMSGSIZE;
2684
2685         if (ip_vs_genl_fill_service(skb, svc) < 0)
2686                 goto nla_put_failure;
2687
2688         return genlmsg_end(skb, hdr);
2689
2690 nla_put_failure:
2691         genlmsg_cancel(skb, hdr);
2692         return -EMSGSIZE;
2693 }
2694
2695 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2696                                     struct netlink_callback *cb)
2697 {
2698         int idx = 0, i;
2699         int start = cb->args[0];
2700         struct ip_vs_service *svc;
2701
2702         mutex_lock(&__ip_vs_mutex);
2703         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2704                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2705                         if (++idx <= start)
2706                                 continue;
2707                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2708                                 idx--;
2709                                 goto nla_put_failure;
2710                         }
2711                 }
2712         }
2713
2714         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2715                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2716                         if (++idx <= start)
2717                                 continue;
2718                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2719                                 idx--;
2720                                 goto nla_put_failure;
2721                         }
2722                 }
2723         }
2724
2725 nla_put_failure:
2726         mutex_unlock(&__ip_vs_mutex);
2727         cb->args[0] = idx;
2728
2729         return skb->len;
2730 }
2731
2732 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2733                                     struct nlattr *nla, int full_entry,
2734                                     struct ip_vs_service **ret_svc)
2735 {
2736         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2737         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2738         struct ip_vs_service *svc;
2739
2740         /* Parse mandatory identifying service fields first */
2741         if (nla == NULL ||
2742             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2743                 return -EINVAL;
2744
2745         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2746         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2747         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2748         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2749         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2750
2751         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2752                 return -EINVAL;
2753
2754         memset(usvc, 0, sizeof(*usvc));
2755
2756         usvc->af = nla_get_u16(nla_af);
2757 #ifdef CONFIG_IP_VS_IPV6
2758         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2759 #else
2760         if (usvc->af != AF_INET)
2761 #endif
2762                 return -EAFNOSUPPORT;
2763
2764         if (nla_fwmark) {
2765                 usvc->protocol = IPPROTO_TCP;
2766                 usvc->fwmark = nla_get_u32(nla_fwmark);
2767         } else {
2768                 usvc->protocol = nla_get_u16(nla_protocol);
2769                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2770                 usvc->port = nla_get_u16(nla_port);
2771                 usvc->fwmark = 0;
2772         }
2773
2774         if (usvc->fwmark)
2775                 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
2776         else
2777                 svc = __ip_vs_service_find(usvc->af, usvc->protocol,
2778                                            &usvc->addr, usvc->port);
2779         *ret_svc = svc;
2780
2781         /* If a full entry was requested, check for the additional fields */
2782         if (full_entry) {
2783                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2784                               *nla_netmask;
2785                 struct ip_vs_flags flags;
2786
2787                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2788                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2789                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2790                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2791                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2792
2793                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2794                         return -EINVAL;
2795
2796                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2797
2798                 /* prefill flags from service if it already exists */
2799                 if (svc)
2800                         usvc->flags = svc->flags;
2801
2802                 /* set new flags from userland */
2803                 usvc->flags = (usvc->flags & ~flags.mask) |
2804                               (flags.flags & flags.mask);
2805                 usvc->sched_name = nla_data(nla_sched);
2806                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2807                 usvc->timeout = nla_get_u32(nla_timeout);
2808                 usvc->netmask = nla_get_u32(nla_netmask);
2809         }
2810
2811         return 0;
2812 }
2813
2814 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2815 {
2816         struct ip_vs_service_user_kern usvc;
2817         struct ip_vs_service *svc;
2818         int ret;
2819
2820         ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
2821         return ret ? ERR_PTR(ret) : svc;
2822 }
2823
2824 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2825 {
2826         struct nlattr *nl_dest;
2827
2828         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2829         if (!nl_dest)
2830                 return -EMSGSIZE;
2831
2832         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2833         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2834
2835         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2836                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2837         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2838         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2839         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2840         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2841                     atomic_read(&dest->activeconns));
2842         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2843                     atomic_read(&dest->inactconns));
2844         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2845                     atomic_read(&dest->persistconns));
2846
2847         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2848                 goto nla_put_failure;
2849
2850         nla_nest_end(skb, nl_dest);
2851
2852         return 0;
2853
2854 nla_put_failure:
2855         nla_nest_cancel(skb, nl_dest);
2856         return -EMSGSIZE;
2857 }
2858
2859 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2860                                 struct netlink_callback *cb)
2861 {
2862         void *hdr;
2863
2864         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2865                           &ip_vs_genl_family, NLM_F_MULTI,
2866                           IPVS_CMD_NEW_DEST);
2867         if (!hdr)
2868                 return -EMSGSIZE;
2869
2870         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2871                 goto nla_put_failure;
2872
2873         return genlmsg_end(skb, hdr);
2874
2875 nla_put_failure:
2876         genlmsg_cancel(skb, hdr);
2877         return -EMSGSIZE;
2878 }
2879
2880 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2881                                  struct netlink_callback *cb)
2882 {
2883         int idx = 0;
2884         int start = cb->args[0];
2885         struct ip_vs_service *svc;
2886         struct ip_vs_dest *dest;
2887         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2888
2889         mutex_lock(&__ip_vs_mutex);
2890
2891         /* Try to find the service for which to dump destinations */
2892         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2893                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2894                 goto out_err;
2895
2896         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2897         if (IS_ERR(svc) || svc == NULL)
2898                 goto out_err;
2899
2900         /* Dump the destinations */
2901         list_for_each_entry(dest, &svc->destinations, n_list) {
2902                 if (++idx <= start)
2903                         continue;
2904                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2905                         idx--;
2906                         goto nla_put_failure;
2907                 }
2908         }
2909
2910 nla_put_failure:
2911         cb->args[0] = idx;
2912
2913 out_err:
2914         mutex_unlock(&__ip_vs_mutex);
2915
2916         return skb->len;
2917 }
2918
2919 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2920                                  struct nlattr *nla, int full_entry)
2921 {
2922         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2923         struct nlattr *nla_addr, *nla_port;
2924
2925         /* Parse mandatory identifying destination fields first */
2926         if (nla == NULL ||
2927             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2928                 return -EINVAL;
2929
2930         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2931         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2932
2933         if (!(nla_addr && nla_port))
2934                 return -EINVAL;
2935
2936         memset(udest, 0, sizeof(*udest));
2937
2938         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2939         udest->port = nla_get_u16(nla_port);
2940
2941         /* If a full entry was requested, check for the additional fields */
2942         if (full_entry) {
2943                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2944                               *nla_l_thresh;
2945
2946                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2947                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2948                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2949                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2950
2951                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2952                         return -EINVAL;
2953
2954                 udest->conn_flags = nla_get_u32(nla_fwd)
2955                                     & IP_VS_CONN_F_FWD_MASK;
2956                 udest->weight = nla_get_u32(nla_weight);
2957                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2958                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2959         }
2960
2961         return 0;
2962 }
2963
2964 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2965                                   const char *mcast_ifn, __be32 syncid)
2966 {
2967         struct nlattr *nl_daemon;
2968
2969         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2970         if (!nl_daemon)
2971                 return -EMSGSIZE;
2972
2973         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2974         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2975         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2976
2977         nla_nest_end(skb, nl_daemon);
2978
2979         return 0;
2980
2981 nla_put_failure:
2982         nla_nest_cancel(skb, nl_daemon);
2983         return -EMSGSIZE;
2984 }
2985
2986 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2987                                   const char *mcast_ifn, __be32 syncid,
2988                                   struct netlink_callback *cb)
2989 {
2990         void *hdr;
2991         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2992                           &ip_vs_genl_family, NLM_F_MULTI,
2993                           IPVS_CMD_NEW_DAEMON);
2994         if (!hdr)
2995                 return -EMSGSIZE;
2996
2997         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2998                 goto nla_put_failure;
2999
3000         return genlmsg_end(skb, hdr);
3001
3002 nla_put_failure:
3003         genlmsg_cancel(skb, hdr);
3004         return -EMSGSIZE;
3005 }
3006
3007 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3008                                    struct netlink_callback *cb)
3009 {
3010         mutex_lock(&__ip_vs_mutex);
3011         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3012                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3013                                            ip_vs_master_mcast_ifn,
3014                                            ip_vs_master_syncid, cb) < 0)
3015                         goto nla_put_failure;
3016
3017                 cb->args[0] = 1;
3018         }
3019
3020         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3021                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3022                                            ip_vs_backup_mcast_ifn,
3023                                            ip_vs_backup_syncid, cb) < 0)
3024                         goto nla_put_failure;
3025
3026                 cb->args[1] = 1;
3027         }
3028
3029 nla_put_failure:
3030         mutex_unlock(&__ip_vs_mutex);
3031
3032         return skb->len;
3033 }
3034
3035 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3036 {
3037         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3038               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3039               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3040                 return -EINVAL;
3041
3042         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3043                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3044                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3045 }
3046
3047 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3048 {
3049         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3050                 return -EINVAL;
3051
3052         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3053 }
3054
3055 static int ip_vs_genl_set_config(struct nlattr **attrs)
3056 {
3057         struct ip_vs_timeout_user t;
3058
3059         __ip_vs_get_timeouts(&t);
3060
3061         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3062                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3063
3064         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3065                 t.tcp_fin_timeout =
3066                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3067
3068         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3069                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3070
3071         return ip_vs_set_timeout(&t);
3072 }
3073
3074 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3075 {
3076         struct ip_vs_service *svc = NULL;
3077         struct ip_vs_service_user_kern usvc;
3078         struct ip_vs_dest_user_kern udest;
3079         int ret = 0, cmd;
3080         int need_full_svc = 0, need_full_dest = 0;
3081
3082         cmd = info->genlhdr->cmd;
3083
3084         mutex_lock(&__ip_vs_mutex);
3085
3086         if (cmd == IPVS_CMD_FLUSH) {
3087                 ret = ip_vs_flush();
3088                 goto out;
3089         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3090                 ret = ip_vs_genl_set_config(info->attrs);
3091                 goto out;
3092         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3093                    cmd == IPVS_CMD_DEL_DAEMON) {
3094
3095                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3096
3097                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3098                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3099                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3100                                      ip_vs_daemon_policy)) {
3101                         ret = -EINVAL;
3102                         goto out;
3103                 }
3104
3105                 if (cmd == IPVS_CMD_NEW_DAEMON)
3106                         ret = ip_vs_genl_new_daemon(daemon_attrs);
3107                 else
3108                         ret = ip_vs_genl_del_daemon(daemon_attrs);
3109                 goto out;
3110         } else if (cmd == IPVS_CMD_ZERO &&
3111                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3112                 ret = ip_vs_zero_all();
3113                 goto out;
3114         }
3115
3116         /* All following commands require a service argument, so check if we
3117          * received a valid one. We need a full service specification when
3118          * adding / editing a service. Only identifying members otherwise. */
3119         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3120                 need_full_svc = 1;
3121
3122         ret = ip_vs_genl_parse_service(&usvc,
3123                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3124                                        need_full_svc, &svc);
3125         if (ret)
3126                 goto out;
3127
3128         /* Unless we're adding a new service, the service must already exist */
3129         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3130                 ret = -ESRCH;
3131                 goto out;
3132         }
3133
3134         /* Destination commands require a valid destination argument. For
3135          * adding / editing a destination, we need a full destination
3136          * specification. */
3137         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3138             cmd == IPVS_CMD_DEL_DEST) {
3139                 if (cmd != IPVS_CMD_DEL_DEST)
3140                         need_full_dest = 1;
3141
3142                 ret = ip_vs_genl_parse_dest(&udest,
3143                                             info->attrs[IPVS_CMD_ATTR_DEST],
3144                                             need_full_dest);
3145                 if (ret)
3146                         goto out;
3147         }
3148
3149         switch (cmd) {
3150         case IPVS_CMD_NEW_SERVICE:
3151                 if (svc == NULL)
3152                         ret = ip_vs_add_service(&usvc, &svc);
3153                 else
3154                         ret = -EEXIST;
3155                 break;
3156         case IPVS_CMD_SET_SERVICE:
3157                 ret = ip_vs_edit_service(svc, &usvc);
3158                 break;
3159         case IPVS_CMD_DEL_SERVICE:
3160                 ret = ip_vs_del_service(svc);
3161                 /* do not use svc, it can be freed */
3162                 break;
3163         case IPVS_CMD_NEW_DEST:
3164                 ret = ip_vs_add_dest(svc, &udest);
3165                 break;
3166         case IPVS_CMD_SET_DEST:
3167                 ret = ip_vs_edit_dest(svc, &udest);
3168                 break;
3169         case IPVS_CMD_DEL_DEST:
3170                 ret = ip_vs_del_dest(svc, &udest);
3171                 break;
3172         case IPVS_CMD_ZERO:
3173                 ret = ip_vs_zero_service(svc);
3174                 break;
3175         default:
3176                 ret = -EINVAL;
3177         }
3178
3179 out:
3180         mutex_unlock(&__ip_vs_mutex);
3181
3182         return ret;
3183 }
3184
3185 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3186 {
3187         struct sk_buff *msg;
3188         void *reply;
3189         int ret, cmd, reply_cmd;
3190
3191         cmd = info->genlhdr->cmd;
3192
3193         if (cmd == IPVS_CMD_GET_SERVICE)
3194                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3195         else if (cmd == IPVS_CMD_GET_INFO)
3196                 reply_cmd = IPVS_CMD_SET_INFO;
3197         else if (cmd == IPVS_CMD_GET_CONFIG)
3198                 reply_cmd = IPVS_CMD_SET_CONFIG;
3199         else {
3200                 pr_err("unknown Generic Netlink command\n");
3201                 return -EINVAL;
3202         }
3203
3204         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3205         if (!msg)
3206                 return -ENOMEM;
3207
3208         mutex_lock(&__ip_vs_mutex);
3209
3210         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3211         if (reply == NULL)
3212                 goto nla_put_failure;
3213
3214         switch (cmd) {
3215         case IPVS_CMD_GET_SERVICE:
3216         {
3217                 struct ip_vs_service *svc;
3218
3219                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3220                 if (IS_ERR(svc)) {
3221                         ret = PTR_ERR(svc);
3222                         goto out_err;
3223                 } else if (svc) {
3224                         ret = ip_vs_genl_fill_service(msg, svc);
3225                         if (ret)
3226                                 goto nla_put_failure;
3227                 } else {
3228                         ret = -ESRCH;
3229                         goto out_err;
3230                 }
3231
3232                 break;
3233         }
3234
3235         case IPVS_CMD_GET_CONFIG:
3236         {
3237                 struct ip_vs_timeout_user t;
3238
3239                 __ip_vs_get_timeouts(&t);
3240 #ifdef CONFIG_IP_VS_PROTO_TCP
3241                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3242                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3243                             t.tcp_fin_timeout);
3244 #endif
3245 #ifdef CONFIG_IP_VS_PROTO_UDP
3246                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3247 #endif
3248
3249                 break;
3250         }
3251
3252         case IPVS_CMD_GET_INFO:
3253                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3254                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3255                             ip_vs_conn_tab_size);
3256                 break;
3257         }
3258
3259         genlmsg_end(msg, reply);
3260         ret = genlmsg_reply(msg, info);
3261         goto out;
3262
3263 nla_put_failure:
3264         pr_err("not enough space in Netlink message\n");
3265         ret = -EMSGSIZE;
3266
3267 out_err:
3268         nlmsg_free(msg);
3269 out:
3270         mutex_unlock(&__ip_vs_mutex);
3271
3272         return ret;
3273 }
3274
3275
3276 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3277         {
3278                 .cmd    = IPVS_CMD_NEW_SERVICE,
3279                 .flags  = GENL_ADMIN_PERM,
3280                 .policy = ip_vs_cmd_policy,
3281                 .doit   = ip_vs_genl_set_cmd,
3282         },
3283         {
3284                 .cmd    = IPVS_CMD_SET_SERVICE,
3285                 .flags  = GENL_ADMIN_PERM,
3286                 .policy = ip_vs_cmd_policy,
3287                 .doit   = ip_vs_genl_set_cmd,
3288         },
3289         {
3290                 .cmd    = IPVS_CMD_DEL_SERVICE,
3291                 .flags  = GENL_ADMIN_PERM,
3292                 .policy = ip_vs_cmd_policy,
3293                 .doit   = ip_vs_genl_set_cmd,
3294         },
3295         {
3296                 .cmd    = IPVS_CMD_GET_SERVICE,
3297                 .flags  = GENL_ADMIN_PERM,
3298                 .doit   = ip_vs_genl_get_cmd,
3299                 .dumpit = ip_vs_genl_dump_services,
3300                 .policy = ip_vs_cmd_policy,
3301         },
3302         {
3303                 .cmd    = IPVS_CMD_NEW_DEST,
3304                 .flags  = GENL_ADMIN_PERM,
3305                 .policy = ip_vs_cmd_policy,
3306                 .doit   = ip_vs_genl_set_cmd,
3307         },
3308         {
3309                 .cmd    = IPVS_CMD_SET_DEST,
3310                 .flags  = GENL_ADMIN_PERM,
3311                 .policy = ip_vs_cmd_policy,
3312                 .doit   = ip_vs_genl_set_cmd,
3313         },
3314         {
3315                 .cmd    = IPVS_CMD_DEL_DEST,
3316                 .flags  = GENL_ADMIN_PERM,
3317                 .policy = ip_vs_cmd_policy,
3318                 .doit   = ip_vs_genl_set_cmd,
3319         },
3320         {
3321                 .cmd    = IPVS_CMD_GET_DEST,
3322                 .flags  = GENL_ADMIN_PERM,
3323                 .policy = ip_vs_cmd_policy,
3324                 .dumpit = ip_vs_genl_dump_dests,
3325         },
3326         {
3327                 .cmd    = IPVS_CMD_NEW_DAEMON,
3328                 .flags  = GENL_ADMIN_PERM,
3329                 .policy = ip_vs_cmd_policy,
3330                 .doit   = ip_vs_genl_set_cmd,
3331         },
3332         {
3333                 .cmd    = IPVS_CMD_DEL_DAEMON,
3334                 .flags  = GENL_ADMIN_PERM,
3335                 .policy = ip_vs_cmd_policy,
3336                 .doit   = ip_vs_genl_set_cmd,
3337         },
3338         {
3339                 .cmd    = IPVS_CMD_GET_DAEMON,
3340                 .flags  = GENL_ADMIN_PERM,
3341                 .dumpit = ip_vs_genl_dump_daemons,
3342         },
3343         {
3344                 .cmd    = IPVS_CMD_SET_CONFIG,
3345                 .flags  = GENL_ADMIN_PERM,
3346                 .policy = ip_vs_cmd_policy,
3347                 .doit   = ip_vs_genl_set_cmd,
3348         },
3349         {
3350                 .cmd    = IPVS_CMD_GET_CONFIG,
3351                 .flags  = GENL_ADMIN_PERM,
3352                 .doit   = ip_vs_genl_get_cmd,
3353         },
3354         {
3355                 .cmd    = IPVS_CMD_GET_INFO,
3356                 .flags  = GENL_ADMIN_PERM,
3357                 .doit   = ip_vs_genl_get_cmd,
3358         },
3359         {
3360                 .cmd    = IPVS_CMD_ZERO,
3361                 .flags  = GENL_ADMIN_PERM,
3362                 .policy = ip_vs_cmd_policy,
3363                 .doit   = ip_vs_genl_set_cmd,
3364         },
3365         {
3366                 .cmd    = IPVS_CMD_FLUSH,
3367                 .flags  = GENL_ADMIN_PERM,
3368                 .doit   = ip_vs_genl_set_cmd,
3369         },
3370 };
3371
3372 static int __init ip_vs_genl_register(void)
3373 {
3374         return genl_register_family_with_ops(&ip_vs_genl_family,
3375                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3376 }
3377
3378 static void ip_vs_genl_unregister(void)
3379 {
3380         genl_unregister_family(&ip_vs_genl_family);
3381 }
3382
3383 /* End of Generic Netlink interface definitions */
3384
3385
3386 int __init ip_vs_control_init(void)
3387 {
3388         int ret;
3389         int idx;
3390
3391         EnterFunction(2);
3392
3393         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3394         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3395                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3396                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3397         }
3398         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3399                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3400         }
3401         smp_wmb();
3402
3403         ret = nf_register_sockopt(&ip_vs_sockopts);
3404         if (ret) {
3405                 pr_err("cannot register sockopt.\n");
3406                 return ret;
3407         }
3408
3409         ret = ip_vs_genl_register();
3410         if (ret) {
3411                 pr_err("cannot register Generic Netlink interface.\n");
3412                 nf_unregister_sockopt(&ip_vs_sockopts);
3413                 return ret;
3414         }
3415
3416         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3417         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3418
3419         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3420
3421         ip_vs_new_estimator(&ip_vs_stats);
3422
3423         /* Hook the defense timer */
3424         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3425
3426         LeaveFunction(2);
3427         return 0;
3428 }
3429
3430
3431 void ip_vs_control_cleanup(void)
3432 {
3433         EnterFunction(2);
3434         ip_vs_trash_cleanup();
3435         cancel_rearming_delayed_work(&defense_work);
3436         cancel_work_sync(&defense_work.work);
3437         ip_vs_kill_estimator(&ip_vs_stats);
3438         unregister_sysctl_table(sysctl_header);
3439         proc_net_remove(&init_net, "ip_vs_stats");
3440         proc_net_remove(&init_net, "ip_vs");
3441         ip_vs_genl_unregister();
3442         nf_unregister_sockopt(&ip_vs_sockopts);
3443         LeaveFunction(2);
3444 }