IPVS: netns, use ip_vs_proto_data as param.
[linux-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61 /* lock for table with the real services */
62 static DEFINE_RWLOCK(__ip_vs_rs_lock);
63
64 /* lock for state and timeout tables */
65 static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
66
67 /* lock for drop entry handling */
68 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
69
70 /* lock for drop packet handling */
71 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
72
73 /* 1/rate drop and drop-entry variables */
74 int ip_vs_drop_rate = 0;
75 int ip_vs_drop_counter = 0;
76 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
77
78 /* number of virtual services */
79 static int ip_vs_num_services = 0;
80
81 /* sysctl variables */
82 static int sysctl_ip_vs_drop_entry = 0;
83 static int sysctl_ip_vs_drop_packet = 0;
84 static int sysctl_ip_vs_secure_tcp = 0;
85 static int sysctl_ip_vs_amemthresh = 1024;
86 static int sysctl_ip_vs_am_droprate = 10;
87 int sysctl_ip_vs_cache_bypass = 0;
88 int sysctl_ip_vs_expire_nodest_conn = 0;
89 int sysctl_ip_vs_expire_quiescent_template = 0;
90 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
91 int sysctl_ip_vs_nat_icmp_send = 0;
92 #ifdef CONFIG_IP_VS_NFCT
93 int sysctl_ip_vs_conntrack;
94 #endif
95 int sysctl_ip_vs_snat_reroute = 1;
96 int sysctl_ip_vs_sync_ver = 1;          /* Default version of sync proto */
97
98 #ifdef CONFIG_IP_VS_DEBUG
99 static int sysctl_ip_vs_debug_level = 0;
100
101 int ip_vs_get_debug_level(void)
102 {
103         return sysctl_ip_vs_debug_level;
104 }
105 #endif
106
107 #ifdef CONFIG_IP_VS_IPV6
108 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
109 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
110 {
111         struct rt6_info *rt;
112         struct flowi fl = {
113                 .oif = 0,
114                 .fl6_dst = *addr,
115                 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
116         };
117
118         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
119         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
120                         return 1;
121
122         return 0;
123 }
124 #endif
125 /*
126  *      update_defense_level is called from keventd and from sysctl,
127  *      so it needs to protect itself from softirqs
128  */
129 static void update_defense_level(struct netns_ipvs *ipvs)
130 {
131         struct sysinfo i;
132         static int old_secure_tcp = 0;
133         int availmem;
134         int nomem;
135         int to_change = -1;
136
137         /* we only count free and buffered memory (in pages) */
138         si_meminfo(&i);
139         availmem = i.freeram + i.bufferram;
140         /* however in linux 2.5 the i.bufferram is total page cache size,
141            we need adjust it */
142         /* si_swapinfo(&i); */
143         /* availmem = availmem - (i.totalswap - i.freeswap); */
144
145         nomem = (availmem < sysctl_ip_vs_amemthresh);
146
147         local_bh_disable();
148
149         /* drop_entry */
150         spin_lock(&__ip_vs_dropentry_lock);
151         switch (sysctl_ip_vs_drop_entry) {
152         case 0:
153                 atomic_set(&ip_vs_dropentry, 0);
154                 break;
155         case 1:
156                 if (nomem) {
157                         atomic_set(&ip_vs_dropentry, 1);
158                         sysctl_ip_vs_drop_entry = 2;
159                 } else {
160                         atomic_set(&ip_vs_dropentry, 0);
161                 }
162                 break;
163         case 2:
164                 if (nomem) {
165                         atomic_set(&ip_vs_dropentry, 1);
166                 } else {
167                         atomic_set(&ip_vs_dropentry, 0);
168                         sysctl_ip_vs_drop_entry = 1;
169                 };
170                 break;
171         case 3:
172                 atomic_set(&ip_vs_dropentry, 1);
173                 break;
174         }
175         spin_unlock(&__ip_vs_dropentry_lock);
176
177         /* drop_packet */
178         spin_lock(&__ip_vs_droppacket_lock);
179         switch (sysctl_ip_vs_drop_packet) {
180         case 0:
181                 ip_vs_drop_rate = 0;
182                 break;
183         case 1:
184                 if (nomem) {
185                         ip_vs_drop_rate = ip_vs_drop_counter
186                                 = sysctl_ip_vs_amemthresh /
187                                 (sysctl_ip_vs_amemthresh-availmem);
188                         sysctl_ip_vs_drop_packet = 2;
189                 } else {
190                         ip_vs_drop_rate = 0;
191                 }
192                 break;
193         case 2:
194                 if (nomem) {
195                         ip_vs_drop_rate = ip_vs_drop_counter
196                                 = sysctl_ip_vs_amemthresh /
197                                 (sysctl_ip_vs_amemthresh-availmem);
198                 } else {
199                         ip_vs_drop_rate = 0;
200                         sysctl_ip_vs_drop_packet = 1;
201                 }
202                 break;
203         case 3:
204                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
205                 break;
206         }
207         spin_unlock(&__ip_vs_droppacket_lock);
208
209         /* secure_tcp */
210         spin_lock(&ip_vs_securetcp_lock);
211         switch (sysctl_ip_vs_secure_tcp) {
212         case 0:
213                 if (old_secure_tcp >= 2)
214                         to_change = 0;
215                 break;
216         case 1:
217                 if (nomem) {
218                         if (old_secure_tcp < 2)
219                                 to_change = 1;
220                         sysctl_ip_vs_secure_tcp = 2;
221                 } else {
222                         if (old_secure_tcp >= 2)
223                                 to_change = 0;
224                 }
225                 break;
226         case 2:
227                 if (nomem) {
228                         if (old_secure_tcp < 2)
229                                 to_change = 1;
230                 } else {
231                         if (old_secure_tcp >= 2)
232                                 to_change = 0;
233                         sysctl_ip_vs_secure_tcp = 1;
234                 }
235                 break;
236         case 3:
237                 if (old_secure_tcp < 2)
238                         to_change = 1;
239                 break;
240         }
241         old_secure_tcp = sysctl_ip_vs_secure_tcp;
242         if (to_change >= 0)
243                 ip_vs_protocol_timeout_change(ipvs,
244                                              sysctl_ip_vs_secure_tcp > 1);
245         spin_unlock(&ip_vs_securetcp_lock);
246
247         local_bh_enable();
248 }
249
250
251 /*
252  *      Timer for checking the defense
253  */
254 #define DEFENSE_TIMER_PERIOD    1*HZ
255 static void defense_work_handler(struct work_struct *work);
256 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
257
258 static void defense_work_handler(struct work_struct *work)
259 {
260         struct net *net = &init_net;
261         struct netns_ipvs *ipvs = net_ipvs(net);
262
263         update_defense_level(ipvs);
264         if (atomic_read(&ip_vs_dropentry))
265                 ip_vs_random_dropentry();
266
267         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
268 }
269
270 int
271 ip_vs_use_count_inc(void)
272 {
273         return try_module_get(THIS_MODULE);
274 }
275
276 void
277 ip_vs_use_count_dec(void)
278 {
279         module_put(THIS_MODULE);
280 }
281
282
283 /*
284  *      Hash table: for virtual service lookups
285  */
286 #define IP_VS_SVC_TAB_BITS 8
287 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
288 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
289
290 /* the service table hashed by <protocol, addr, port> */
291 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
292 /* the service table hashed by fwmark */
293 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
294
295 /*
296  *      Trash for destinations
297  */
298 static LIST_HEAD(ip_vs_dest_trash);
299
300 /*
301  *      FTP & NULL virtual service counters
302  */
303 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
304 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
305
306
307 /*
308  *      Returns hash value for virtual service
309  */
310 static inline unsigned
311 ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
312                   const union nf_inet_addr *addr, __be16 port)
313 {
314         register unsigned porth = ntohs(port);
315         __be32 addr_fold = addr->ip;
316
317 #ifdef CONFIG_IP_VS_IPV6
318         if (af == AF_INET6)
319                 addr_fold = addr->ip6[0]^addr->ip6[1]^
320                             addr->ip6[2]^addr->ip6[3];
321 #endif
322         addr_fold ^= ((size_t)net>>8);
323
324         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
325                 & IP_VS_SVC_TAB_MASK;
326 }
327
328 /*
329  *      Returns hash value of fwmark for virtual service lookup
330  */
331 static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
332 {
333         return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
334 }
335
336 /*
337  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
338  *      or in the ip_vs_svc_fwm_table by fwmark.
339  *      Should be called with locked tables.
340  */
341 static int ip_vs_svc_hash(struct ip_vs_service *svc)
342 {
343         unsigned hash;
344
345         if (svc->flags & IP_VS_SVC_F_HASHED) {
346                 pr_err("%s(): request for already hashed, called from %pF\n",
347                        __func__, __builtin_return_address(0));
348                 return 0;
349         }
350
351         if (svc->fwmark == 0) {
352                 /*
353                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
354                  */
355                 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
356                                          &svc->addr, svc->port);
357                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
358         } else {
359                 /*
360                  *  Hash it by fwmark in svc_fwm_table
361                  */
362                 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
363                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
364         }
365
366         svc->flags |= IP_VS_SVC_F_HASHED;
367         /* increase its refcnt because it is referenced by the svc table */
368         atomic_inc(&svc->refcnt);
369         return 1;
370 }
371
372
373 /*
374  *      Unhashes a service from svc_table / svc_fwm_table.
375  *      Should be called with locked tables.
376  */
377 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
378 {
379         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
380                 pr_err("%s(): request for unhash flagged, called from %pF\n",
381                        __func__, __builtin_return_address(0));
382                 return 0;
383         }
384
385         if (svc->fwmark == 0) {
386                 /* Remove it from the svc_table table */
387                 list_del(&svc->s_list);
388         } else {
389                 /* Remove it from the svc_fwm_table table */
390                 list_del(&svc->f_list);
391         }
392
393         svc->flags &= ~IP_VS_SVC_F_HASHED;
394         atomic_dec(&svc->refcnt);
395         return 1;
396 }
397
398
399 /*
400  *      Get service by {netns, proto,addr,port} in the service table.
401  */
402 static inline struct ip_vs_service *
403 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
404                      const union nf_inet_addr *vaddr, __be16 vport)
405 {
406         unsigned hash;
407         struct ip_vs_service *svc;
408
409         /* Check for "full" addressed entries */
410         hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
411
412         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
413                 if ((svc->af == af)
414                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
415                     && (svc->port == vport)
416                     && (svc->protocol == protocol)
417                     && net_eq(svc->net, net)) {
418                         /* HIT */
419                         return svc;
420                 }
421         }
422
423         return NULL;
424 }
425
426
427 /*
428  *      Get service by {fwmark} in the service table.
429  */
430 static inline struct ip_vs_service *
431 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
432 {
433         unsigned hash;
434         struct ip_vs_service *svc;
435
436         /* Check for fwmark addressed entries */
437         hash = ip_vs_svc_fwm_hashkey(net, fwmark);
438
439         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
440                 if (svc->fwmark == fwmark && svc->af == af
441                     && net_eq(svc->net, net)) {
442                         /* HIT */
443                         return svc;
444                 }
445         }
446
447         return NULL;
448 }
449
450 struct ip_vs_service *
451 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
452                   const union nf_inet_addr *vaddr, __be16 vport)
453 {
454         struct ip_vs_service *svc;
455
456         read_lock(&__ip_vs_svc_lock);
457
458         /*
459          *      Check the table hashed by fwmark first
460          */
461         svc = __ip_vs_svc_fwm_find(net, af, fwmark);
462         if (fwmark && svc)
463                 goto out;
464
465         /*
466          *      Check the table hashed by <protocol,addr,port>
467          *      for "full" addressed entries
468          */
469         svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
470
471         if (svc == NULL
472             && protocol == IPPROTO_TCP
473             && atomic_read(&ip_vs_ftpsvc_counter)
474             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
475                 /*
476                  * Check if ftp service entry exists, the packet
477                  * might belong to FTP data connections.
478                  */
479                 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
480         }
481
482         if (svc == NULL
483             && atomic_read(&ip_vs_nullsvc_counter)) {
484                 /*
485                  * Check if the catch-all port (port zero) exists
486                  */
487                 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
488         }
489
490   out:
491         if (svc)
492                 atomic_inc(&svc->usecnt);
493         read_unlock(&__ip_vs_svc_lock);
494
495         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
496                       fwmark, ip_vs_proto_name(protocol),
497                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
498                       svc ? "hit" : "not hit");
499
500         return svc;
501 }
502
503
504 static inline void
505 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
506 {
507         atomic_inc(&svc->refcnt);
508         dest->svc = svc;
509 }
510
511 static void
512 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
513 {
514         struct ip_vs_service *svc = dest->svc;
515
516         dest->svc = NULL;
517         if (atomic_dec_and_test(&svc->refcnt)) {
518                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
519                               svc->fwmark,
520                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
521                               ntohs(svc->port), atomic_read(&svc->usecnt));
522                 kfree(svc);
523         }
524 }
525
526
527 /*
528  *      Returns hash value for real service
529  */
530 static inline unsigned ip_vs_rs_hashkey(int af,
531                                             const union nf_inet_addr *addr,
532                                             __be16 port)
533 {
534         register unsigned porth = ntohs(port);
535         __be32 addr_fold = addr->ip;
536
537 #ifdef CONFIG_IP_VS_IPV6
538         if (af == AF_INET6)
539                 addr_fold = addr->ip6[0]^addr->ip6[1]^
540                             addr->ip6[2]^addr->ip6[3];
541 #endif
542
543         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
544                 & IP_VS_RTAB_MASK;
545 }
546
547 /*
548  *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
549  *      should be called with locked tables.
550  */
551 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
552 {
553         unsigned hash;
554
555         if (!list_empty(&dest->d_list)) {
556                 return 0;
557         }
558
559         /*
560          *      Hash by proto,addr,port,
561          *      which are the parameters of the real service.
562          */
563         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
564
565         list_add(&dest->d_list, &ipvs->rs_table[hash]);
566
567         return 1;
568 }
569
570 /*
571  *      UNhashes ip_vs_dest from rs_table.
572  *      should be called with locked tables.
573  */
574 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
575 {
576         /*
577          * Remove it from the rs_table table.
578          */
579         if (!list_empty(&dest->d_list)) {
580                 list_del(&dest->d_list);
581                 INIT_LIST_HEAD(&dest->d_list);
582         }
583
584         return 1;
585 }
586
587 /*
588  *      Lookup real service by <proto,addr,port> in the real service table.
589  */
590 struct ip_vs_dest *
591 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
592                           const union nf_inet_addr *daddr,
593                           __be16 dport)
594 {
595         struct netns_ipvs *ipvs = net_ipvs(net);
596         unsigned hash;
597         struct ip_vs_dest *dest;
598
599         /*
600          *      Check for "full" addressed entries
601          *      Return the first found entry
602          */
603         hash = ip_vs_rs_hashkey(af, daddr, dport);
604
605         read_lock(&__ip_vs_rs_lock);
606         list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
607                 if ((dest->af == af)
608                     && ip_vs_addr_equal(af, &dest->addr, daddr)
609                     && (dest->port == dport)
610                     && ((dest->protocol == protocol) ||
611                         dest->vfwmark)) {
612                         /* HIT */
613                         read_unlock(&__ip_vs_rs_lock);
614                         return dest;
615                 }
616         }
617         read_unlock(&__ip_vs_rs_lock);
618
619         return NULL;
620 }
621
622 /*
623  *      Lookup destination by {addr,port} in the given service
624  */
625 static struct ip_vs_dest *
626 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
627                   __be16 dport)
628 {
629         struct ip_vs_dest *dest;
630
631         /*
632          * Find the destination for the given service
633          */
634         list_for_each_entry(dest, &svc->destinations, n_list) {
635                 if ((dest->af == svc->af)
636                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
637                     && (dest->port == dport)) {
638                         /* HIT */
639                         return dest;
640                 }
641         }
642
643         return NULL;
644 }
645
646 /*
647  * Find destination by {daddr,dport,vaddr,protocol}
648  * Cretaed to be used in ip_vs_process_message() in
649  * the backup synchronization daemon. It finds the
650  * destination to be bound to the received connection
651  * on the backup.
652  *
653  * ip_vs_lookup_real_service() looked promissing, but
654  * seems not working as expected.
655  */
656 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
657                                    const union nf_inet_addr *daddr,
658                                    __be16 dport,
659                                    const union nf_inet_addr *vaddr,
660                                    __be16 vport, __u16 protocol, __u32 fwmark)
661 {
662         struct ip_vs_dest *dest;
663         struct ip_vs_service *svc;
664
665         svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
666         if (!svc)
667                 return NULL;
668         dest = ip_vs_lookup_dest(svc, daddr, dport);
669         if (dest)
670                 atomic_inc(&dest->refcnt);
671         ip_vs_service_put(svc);
672         return dest;
673 }
674
675 /*
676  *  Lookup dest by {svc,addr,port} in the destination trash.
677  *  The destination trash is used to hold the destinations that are removed
678  *  from the service table but are still referenced by some conn entries.
679  *  The reason to add the destination trash is when the dest is temporary
680  *  down (either by administrator or by monitor program), the dest can be
681  *  picked back from the trash, the remaining connections to the dest can
682  *  continue, and the counting information of the dest is also useful for
683  *  scheduling.
684  */
685 static struct ip_vs_dest *
686 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
687                      __be16 dport)
688 {
689         struct ip_vs_dest *dest, *nxt;
690
691         /*
692          * Find the destination in trash
693          */
694         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
695                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
696                               "dest->refcnt=%d\n",
697                               dest->vfwmark,
698                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
699                               ntohs(dest->port),
700                               atomic_read(&dest->refcnt));
701                 if (dest->af == svc->af &&
702                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
703                     dest->port == dport &&
704                     dest->vfwmark == svc->fwmark &&
705                     dest->protocol == svc->protocol &&
706                     (svc->fwmark ||
707                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
708                       dest->vport == svc->port))) {
709                         /* HIT */
710                         return dest;
711                 }
712
713                 /*
714                  * Try to purge the destination from trash if not referenced
715                  */
716                 if (atomic_read(&dest->refcnt) == 1) {
717                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
718                                       "from trash\n",
719                                       dest->vfwmark,
720                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
721                                       ntohs(dest->port));
722                         list_del(&dest->n_list);
723                         ip_vs_dst_reset(dest);
724                         __ip_vs_unbind_svc(dest);
725                         kfree(dest);
726                 }
727         }
728
729         return NULL;
730 }
731
732
733 /*
734  *  Clean up all the destinations in the trash
735  *  Called by the ip_vs_control_cleanup()
736  *
737  *  When the ip_vs_control_clearup is activated by ipvs module exit,
738  *  the service tables must have been flushed and all the connections
739  *  are expired, and the refcnt of each destination in the trash must
740  *  be 1, so we simply release them here.
741  */
742 static void ip_vs_trash_cleanup(void)
743 {
744         struct ip_vs_dest *dest, *nxt;
745
746         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
747                 list_del(&dest->n_list);
748                 ip_vs_dst_reset(dest);
749                 __ip_vs_unbind_svc(dest);
750                 kfree(dest);
751         }
752 }
753
754
755 static void
756 ip_vs_zero_stats(struct ip_vs_stats *stats)
757 {
758         spin_lock_bh(&stats->lock);
759
760         memset(&stats->ustats, 0, sizeof(stats->ustats));
761         ip_vs_zero_estimator(stats);
762
763         spin_unlock_bh(&stats->lock);
764 }
765
766 /*
767  *      Update a destination in the given service
768  */
769 static void
770 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
771                     struct ip_vs_dest_user_kern *udest, int add)
772 {
773         struct netns_ipvs *ipvs = net_ipvs(svc->net);
774         int conn_flags;
775
776         /* set the weight and the flags */
777         atomic_set(&dest->weight, udest->weight);
778         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
779         conn_flags |= IP_VS_CONN_F_INACTIVE;
780
781         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
782         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
783                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
784         } else {
785                 /*
786                  *    Put the real service in rs_table if not present.
787                  *    For now only for NAT!
788                  */
789                 write_lock_bh(&__ip_vs_rs_lock);
790                 ip_vs_rs_hash(ipvs, dest);
791                 write_unlock_bh(&__ip_vs_rs_lock);
792         }
793         atomic_set(&dest->conn_flags, conn_flags);
794
795         /* bind the service */
796         if (!dest->svc) {
797                 __ip_vs_bind_svc(dest, svc);
798         } else {
799                 if (dest->svc != svc) {
800                         __ip_vs_unbind_svc(dest);
801                         ip_vs_zero_stats(&dest->stats);
802                         __ip_vs_bind_svc(dest, svc);
803                 }
804         }
805
806         /* set the dest status flags */
807         dest->flags |= IP_VS_DEST_F_AVAILABLE;
808
809         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
810                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
811         dest->u_threshold = udest->u_threshold;
812         dest->l_threshold = udest->l_threshold;
813
814         spin_lock(&dest->dst_lock);
815         ip_vs_dst_reset(dest);
816         spin_unlock(&dest->dst_lock);
817
818         if (add)
819                 ip_vs_new_estimator(&dest->stats);
820
821         write_lock_bh(&__ip_vs_svc_lock);
822
823         /* Wait until all other svc users go away */
824         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
825
826         if (add) {
827                 list_add(&dest->n_list, &svc->destinations);
828                 svc->num_dests++;
829         }
830
831         /* call the update_service, because server weight may be changed */
832         if (svc->scheduler->update_service)
833                 svc->scheduler->update_service(svc);
834
835         write_unlock_bh(&__ip_vs_svc_lock);
836 }
837
838
839 /*
840  *      Create a destination for the given service
841  */
842 static int
843 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
844                struct ip_vs_dest **dest_p)
845 {
846         struct ip_vs_dest *dest;
847         unsigned atype;
848
849         EnterFunction(2);
850
851 #ifdef CONFIG_IP_VS_IPV6
852         if (svc->af == AF_INET6) {
853                 atype = ipv6_addr_type(&udest->addr.in6);
854                 if ((!(atype & IPV6_ADDR_UNICAST) ||
855                         atype & IPV6_ADDR_LINKLOCAL) &&
856                         !__ip_vs_addr_is_local_v6(&udest->addr.in6))
857                         return -EINVAL;
858         } else
859 #endif
860         {
861                 atype = inet_addr_type(&init_net, udest->addr.ip);
862                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
863                         return -EINVAL;
864         }
865
866         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
867         if (dest == NULL) {
868                 pr_err("%s(): no memory.\n", __func__);
869                 return -ENOMEM;
870         }
871
872         dest->af = svc->af;
873         dest->protocol = svc->protocol;
874         dest->vaddr = svc->addr;
875         dest->vport = svc->port;
876         dest->vfwmark = svc->fwmark;
877         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
878         dest->port = udest->port;
879
880         atomic_set(&dest->activeconns, 0);
881         atomic_set(&dest->inactconns, 0);
882         atomic_set(&dest->persistconns, 0);
883         atomic_set(&dest->refcnt, 1);
884
885         INIT_LIST_HEAD(&dest->d_list);
886         spin_lock_init(&dest->dst_lock);
887         spin_lock_init(&dest->stats.lock);
888         __ip_vs_update_dest(svc, dest, udest, 1);
889
890         *dest_p = dest;
891
892         LeaveFunction(2);
893         return 0;
894 }
895
896
897 /*
898  *      Add a destination into an existing service
899  */
900 static int
901 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
902 {
903         struct ip_vs_dest *dest;
904         union nf_inet_addr daddr;
905         __be16 dport = udest->port;
906         int ret;
907
908         EnterFunction(2);
909
910         if (udest->weight < 0) {
911                 pr_err("%s(): server weight less than zero\n", __func__);
912                 return -ERANGE;
913         }
914
915         if (udest->l_threshold > udest->u_threshold) {
916                 pr_err("%s(): lower threshold is higher than upper threshold\n",
917                         __func__);
918                 return -ERANGE;
919         }
920
921         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
922
923         /*
924          * Check if the dest already exists in the list
925          */
926         dest = ip_vs_lookup_dest(svc, &daddr, dport);
927
928         if (dest != NULL) {
929                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
930                 return -EEXIST;
931         }
932
933         /*
934          * Check if the dest already exists in the trash and
935          * is from the same service
936          */
937         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
938
939         if (dest != NULL) {
940                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
941                               "dest->refcnt=%d, service %u/%s:%u\n",
942                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
943                               atomic_read(&dest->refcnt),
944                               dest->vfwmark,
945                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
946                               ntohs(dest->vport));
947
948                 /*
949                  * Get the destination from the trash
950                  */
951                 list_del(&dest->n_list);
952
953                 __ip_vs_update_dest(svc, dest, udest, 1);
954                 ret = 0;
955         } else {
956                 /*
957                  * Allocate and initialize the dest structure
958                  */
959                 ret = ip_vs_new_dest(svc, udest, &dest);
960         }
961         LeaveFunction(2);
962
963         return ret;
964 }
965
966
967 /*
968  *      Edit a destination in the given service
969  */
970 static int
971 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
972 {
973         struct ip_vs_dest *dest;
974         union nf_inet_addr daddr;
975         __be16 dport = udest->port;
976
977         EnterFunction(2);
978
979         if (udest->weight < 0) {
980                 pr_err("%s(): server weight less than zero\n", __func__);
981                 return -ERANGE;
982         }
983
984         if (udest->l_threshold > udest->u_threshold) {
985                 pr_err("%s(): lower threshold is higher than upper threshold\n",
986                         __func__);
987                 return -ERANGE;
988         }
989
990         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
991
992         /*
993          *  Lookup the destination list
994          */
995         dest = ip_vs_lookup_dest(svc, &daddr, dport);
996
997         if (dest == NULL) {
998                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
999                 return -ENOENT;
1000         }
1001
1002         __ip_vs_update_dest(svc, dest, udest, 0);
1003         LeaveFunction(2);
1004
1005         return 0;
1006 }
1007
1008
1009 /*
1010  *      Delete a destination (must be already unlinked from the service)
1011  */
1012 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1013 {
1014         ip_vs_kill_estimator(&dest->stats);
1015
1016         /*
1017          *  Remove it from the d-linked list with the real services.
1018          */
1019         write_lock_bh(&__ip_vs_rs_lock);
1020         ip_vs_rs_unhash(dest);
1021         write_unlock_bh(&__ip_vs_rs_lock);
1022
1023         /*
1024          *  Decrease the refcnt of the dest, and free the dest
1025          *  if nobody refers to it (refcnt=0). Otherwise, throw
1026          *  the destination into the trash.
1027          */
1028         if (atomic_dec_and_test(&dest->refcnt)) {
1029                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1030                               dest->vfwmark,
1031                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1032                               ntohs(dest->port));
1033                 ip_vs_dst_reset(dest);
1034                 /* simply decrease svc->refcnt here, let the caller check
1035                    and release the service if nobody refers to it.
1036                    Only user context can release destination and service,
1037                    and only one user context can update virtual service at a
1038                    time, so the operation here is OK */
1039                 atomic_dec(&dest->svc->refcnt);
1040                 kfree(dest);
1041         } else {
1042                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1043                               "dest->refcnt=%d\n",
1044                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1045                               ntohs(dest->port),
1046                               atomic_read(&dest->refcnt));
1047                 list_add(&dest->n_list, &ip_vs_dest_trash);
1048                 atomic_inc(&dest->refcnt);
1049         }
1050 }
1051
1052
1053 /*
1054  *      Unlink a destination from the given service
1055  */
1056 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1057                                 struct ip_vs_dest *dest,
1058                                 int svcupd)
1059 {
1060         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1061
1062         /*
1063          *  Remove it from the d-linked destination list.
1064          */
1065         list_del(&dest->n_list);
1066         svc->num_dests--;
1067
1068         /*
1069          *  Call the update_service function of its scheduler
1070          */
1071         if (svcupd && svc->scheduler->update_service)
1072                         svc->scheduler->update_service(svc);
1073 }
1074
1075
1076 /*
1077  *      Delete a destination server in the given service
1078  */
1079 static int
1080 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1081 {
1082         struct ip_vs_dest *dest;
1083         __be16 dport = udest->port;
1084
1085         EnterFunction(2);
1086
1087         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1088
1089         if (dest == NULL) {
1090                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1091                 return -ENOENT;
1092         }
1093
1094         write_lock_bh(&__ip_vs_svc_lock);
1095
1096         /*
1097          *      Wait until all other svc users go away.
1098          */
1099         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1100
1101         /*
1102          *      Unlink dest from the service
1103          */
1104         __ip_vs_unlink_dest(svc, dest, 1);
1105
1106         write_unlock_bh(&__ip_vs_svc_lock);
1107
1108         /*
1109          *      Delete the destination
1110          */
1111         __ip_vs_del_dest(dest);
1112
1113         LeaveFunction(2);
1114
1115         return 0;
1116 }
1117
1118
1119 /*
1120  *      Add a service into the service hash table
1121  */
1122 static int
1123 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1124                   struct ip_vs_service **svc_p)
1125 {
1126         int ret = 0;
1127         struct ip_vs_scheduler *sched = NULL;
1128         struct ip_vs_pe *pe = NULL;
1129         struct ip_vs_service *svc = NULL;
1130
1131         /* increase the module use count */
1132         ip_vs_use_count_inc();
1133
1134         /* Lookup the scheduler by 'u->sched_name' */
1135         sched = ip_vs_scheduler_get(u->sched_name);
1136         if (sched == NULL) {
1137                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1138                 ret = -ENOENT;
1139                 goto out_err;
1140         }
1141
1142         if (u->pe_name && *u->pe_name) {
1143                 pe = ip_vs_pe_getbyname(u->pe_name);
1144                 if (pe == NULL) {
1145                         pr_info("persistence engine module ip_vs_pe_%s "
1146                                 "not found\n", u->pe_name);
1147                         ret = -ENOENT;
1148                         goto out_err;
1149                 }
1150         }
1151
1152 #ifdef CONFIG_IP_VS_IPV6
1153         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1154                 ret = -EINVAL;
1155                 goto out_err;
1156         }
1157 #endif
1158
1159         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1160         if (svc == NULL) {
1161                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1162                 ret = -ENOMEM;
1163                 goto out_err;
1164         }
1165
1166         /* I'm the first user of the service */
1167         atomic_set(&svc->usecnt, 0);
1168         atomic_set(&svc->refcnt, 0);
1169
1170         svc->af = u->af;
1171         svc->protocol = u->protocol;
1172         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1173         svc->port = u->port;
1174         svc->fwmark = u->fwmark;
1175         svc->flags = u->flags;
1176         svc->timeout = u->timeout * HZ;
1177         svc->netmask = u->netmask;
1178         svc->net = net;
1179
1180         INIT_LIST_HEAD(&svc->destinations);
1181         rwlock_init(&svc->sched_lock);
1182         spin_lock_init(&svc->stats.lock);
1183
1184         /* Bind the scheduler */
1185         ret = ip_vs_bind_scheduler(svc, sched);
1186         if (ret)
1187                 goto out_err;
1188         sched = NULL;
1189
1190         /* Bind the ct retriever */
1191         ip_vs_bind_pe(svc, pe);
1192         pe = NULL;
1193
1194         /* Update the virtual service counters */
1195         if (svc->port == FTPPORT)
1196                 atomic_inc(&ip_vs_ftpsvc_counter);
1197         else if (svc->port == 0)
1198                 atomic_inc(&ip_vs_nullsvc_counter);
1199
1200         ip_vs_new_estimator(&svc->stats);
1201
1202         /* Count only IPv4 services for old get/setsockopt interface */
1203         if (svc->af == AF_INET)
1204                 ip_vs_num_services++;
1205
1206         /* Hash the service into the service table */
1207         write_lock_bh(&__ip_vs_svc_lock);
1208         ip_vs_svc_hash(svc);
1209         write_unlock_bh(&__ip_vs_svc_lock);
1210
1211         *svc_p = svc;
1212         return 0;
1213
1214  out_err:
1215         if (svc != NULL) {
1216                 ip_vs_unbind_scheduler(svc);
1217                 if (svc->inc) {
1218                         local_bh_disable();
1219                         ip_vs_app_inc_put(svc->inc);
1220                         local_bh_enable();
1221                 }
1222                 kfree(svc);
1223         }
1224         ip_vs_scheduler_put(sched);
1225         ip_vs_pe_put(pe);
1226
1227         /* decrease the module use count */
1228         ip_vs_use_count_dec();
1229
1230         return ret;
1231 }
1232
1233
1234 /*
1235  *      Edit a service and bind it with a new scheduler
1236  */
1237 static int
1238 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1239 {
1240         struct ip_vs_scheduler *sched, *old_sched;
1241         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1242         int ret = 0;
1243
1244         /*
1245          * Lookup the scheduler, by 'u->sched_name'
1246          */
1247         sched = ip_vs_scheduler_get(u->sched_name);
1248         if (sched == NULL) {
1249                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1250                 return -ENOENT;
1251         }
1252         old_sched = sched;
1253
1254         if (u->pe_name && *u->pe_name) {
1255                 pe = ip_vs_pe_getbyname(u->pe_name);
1256                 if (pe == NULL) {
1257                         pr_info("persistence engine module ip_vs_pe_%s "
1258                                 "not found\n", u->pe_name);
1259                         ret = -ENOENT;
1260                         goto out;
1261                 }
1262                 old_pe = pe;
1263         }
1264
1265 #ifdef CONFIG_IP_VS_IPV6
1266         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1267                 ret = -EINVAL;
1268                 goto out;
1269         }
1270 #endif
1271
1272         write_lock_bh(&__ip_vs_svc_lock);
1273
1274         /*
1275          * Wait until all other svc users go away.
1276          */
1277         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1278
1279         /*
1280          * Set the flags and timeout value
1281          */
1282         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1283         svc->timeout = u->timeout * HZ;
1284         svc->netmask = u->netmask;
1285
1286         old_sched = svc->scheduler;
1287         if (sched != old_sched) {
1288                 /*
1289                  * Unbind the old scheduler
1290                  */
1291                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1292                         old_sched = sched;
1293                         goto out_unlock;
1294                 }
1295
1296                 /*
1297                  * Bind the new scheduler
1298                  */
1299                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1300                         /*
1301                          * If ip_vs_bind_scheduler fails, restore the old
1302                          * scheduler.
1303                          * The main reason of failure is out of memory.
1304                          *
1305                          * The question is if the old scheduler can be
1306                          * restored all the time. TODO: if it cannot be
1307                          * restored some time, we must delete the service,
1308                          * otherwise the system may crash.
1309                          */
1310                         ip_vs_bind_scheduler(svc, old_sched);
1311                         old_sched = sched;
1312                         goto out_unlock;
1313                 }
1314         }
1315
1316         old_pe = svc->pe;
1317         if (pe != old_pe) {
1318                 ip_vs_unbind_pe(svc);
1319                 ip_vs_bind_pe(svc, pe);
1320         }
1321
1322   out_unlock:
1323         write_unlock_bh(&__ip_vs_svc_lock);
1324   out:
1325         ip_vs_scheduler_put(old_sched);
1326         ip_vs_pe_put(old_pe);
1327         return ret;
1328 }
1329
1330
1331 /*
1332  *      Delete a service from the service list
1333  *      - The service must be unlinked, unlocked and not referenced!
1334  *      - We are called under _bh lock
1335  */
1336 static void __ip_vs_del_service(struct ip_vs_service *svc)
1337 {
1338         struct ip_vs_dest *dest, *nxt;
1339         struct ip_vs_scheduler *old_sched;
1340         struct ip_vs_pe *old_pe;
1341
1342         pr_info("%s: enter\n", __func__);
1343
1344         /* Count only IPv4 services for old get/setsockopt interface */
1345         if (svc->af == AF_INET)
1346                 ip_vs_num_services--;
1347
1348         ip_vs_kill_estimator(&svc->stats);
1349
1350         /* Unbind scheduler */
1351         old_sched = svc->scheduler;
1352         ip_vs_unbind_scheduler(svc);
1353         ip_vs_scheduler_put(old_sched);
1354
1355         /* Unbind persistence engine */
1356         old_pe = svc->pe;
1357         ip_vs_unbind_pe(svc);
1358         ip_vs_pe_put(old_pe);
1359
1360         /* Unbind app inc */
1361         if (svc->inc) {
1362                 ip_vs_app_inc_put(svc->inc);
1363                 svc->inc = NULL;
1364         }
1365
1366         /*
1367          *    Unlink the whole destination list
1368          */
1369         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1370                 __ip_vs_unlink_dest(svc, dest, 0);
1371                 __ip_vs_del_dest(dest);
1372         }
1373
1374         /*
1375          *    Update the virtual service counters
1376          */
1377         if (svc->port == FTPPORT)
1378                 atomic_dec(&ip_vs_ftpsvc_counter);
1379         else if (svc->port == 0)
1380                 atomic_dec(&ip_vs_nullsvc_counter);
1381
1382         /*
1383          *    Free the service if nobody refers to it
1384          */
1385         if (atomic_read(&svc->refcnt) == 0) {
1386                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1387                               svc->fwmark,
1388                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1389                               ntohs(svc->port), atomic_read(&svc->usecnt));
1390                 kfree(svc);
1391         }
1392
1393         /* decrease the module use count */
1394         ip_vs_use_count_dec();
1395 }
1396
1397 /*
1398  * Unlink a service from list and try to delete it if its refcnt reached 0
1399  */
1400 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1401 {
1402         /*
1403          * Unhash it from the service table
1404          */
1405         write_lock_bh(&__ip_vs_svc_lock);
1406
1407         ip_vs_svc_unhash(svc);
1408
1409         /*
1410          * Wait until all the svc users go away.
1411          */
1412         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1413
1414         __ip_vs_del_service(svc);
1415
1416         write_unlock_bh(&__ip_vs_svc_lock);
1417 }
1418
1419 /*
1420  *      Delete a service from the service list
1421  */
1422 static int ip_vs_del_service(struct ip_vs_service *svc)
1423 {
1424         if (svc == NULL)
1425                 return -EEXIST;
1426         ip_vs_unlink_service(svc);
1427
1428         return 0;
1429 }
1430
1431
1432 /*
1433  *      Flush all the virtual services
1434  */
1435 static int ip_vs_flush(struct net *net)
1436 {
1437         int idx;
1438         struct ip_vs_service *svc, *nxt;
1439
1440         /*
1441          * Flush the service table hashed by <netns,protocol,addr,port>
1442          */
1443         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1444                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1445                                          s_list) {
1446                         if (net_eq(svc->net, net))
1447                                 ip_vs_unlink_service(svc);
1448                 }
1449         }
1450
1451         /*
1452          * Flush the service table hashed by fwmark
1453          */
1454         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1455                 list_for_each_entry_safe(svc, nxt,
1456                                          &ip_vs_svc_fwm_table[idx], f_list) {
1457                         if (net_eq(svc->net, net))
1458                                 ip_vs_unlink_service(svc);
1459                 }
1460         }
1461
1462         return 0;
1463 }
1464
1465
1466 /*
1467  *      Zero counters in a service or all services
1468  */
1469 static int ip_vs_zero_service(struct ip_vs_service *svc)
1470 {
1471         struct ip_vs_dest *dest;
1472
1473         write_lock_bh(&__ip_vs_svc_lock);
1474         list_for_each_entry(dest, &svc->destinations, n_list) {
1475                 ip_vs_zero_stats(&dest->stats);
1476         }
1477         ip_vs_zero_stats(&svc->stats);
1478         write_unlock_bh(&__ip_vs_svc_lock);
1479         return 0;
1480 }
1481
1482 static int ip_vs_zero_all(struct net *net)
1483 {
1484         int idx;
1485         struct ip_vs_service *svc;
1486
1487         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1488                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1489                         if (net_eq(svc->net, net))
1490                                 ip_vs_zero_service(svc);
1491                 }
1492         }
1493
1494         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1495                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1496                         if (net_eq(svc->net, net))
1497                                 ip_vs_zero_service(svc);
1498                 }
1499         }
1500
1501         ip_vs_zero_stats(&ip_vs_stats);
1502         return 0;
1503 }
1504
1505
1506 static int
1507 proc_do_defense_mode(ctl_table *table, int write,
1508                      void __user *buffer, size_t *lenp, loff_t *ppos)
1509 {
1510         struct net *net = current->nsproxy->net_ns;
1511         int *valp = table->data;
1512         int val = *valp;
1513         int rc;
1514
1515         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1516         if (write && (*valp != val)) {
1517                 if ((*valp < 0) || (*valp > 3)) {
1518                         /* Restore the correct value */
1519                         *valp = val;
1520                 } else {
1521                         update_defense_level(net_ipvs(net));
1522                 }
1523         }
1524         return rc;
1525 }
1526
1527
1528 static int
1529 proc_do_sync_threshold(ctl_table *table, int write,
1530                        void __user *buffer, size_t *lenp, loff_t *ppos)
1531 {
1532         int *valp = table->data;
1533         int val[2];
1534         int rc;
1535
1536         /* backup the value first */
1537         memcpy(val, valp, sizeof(val));
1538
1539         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1540         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1541                 /* Restore the correct value */
1542                 memcpy(valp, val, sizeof(val));
1543         }
1544         return rc;
1545 }
1546
1547 static int
1548 proc_do_sync_mode(ctl_table *table, int write,
1549                      void __user *buffer, size_t *lenp, loff_t *ppos)
1550 {
1551         int *valp = table->data;
1552         int val = *valp;
1553         int rc;
1554
1555         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1556         if (write && (*valp != val)) {
1557                 if ((*valp < 0) || (*valp > 1)) {
1558                         /* Restore the correct value */
1559                         *valp = val;
1560                 } else {
1561                         ip_vs_sync_switch_mode(val);
1562                 }
1563         }
1564         return rc;
1565 }
1566
1567 /*
1568  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1569  */
1570
1571 static struct ctl_table vs_vars[] = {
1572         {
1573                 .procname       = "amemthresh",
1574                 .data           = &sysctl_ip_vs_amemthresh,
1575                 .maxlen         = sizeof(int),
1576                 .mode           = 0644,
1577                 .proc_handler   = proc_dointvec,
1578         },
1579 #ifdef CONFIG_IP_VS_DEBUG
1580         {
1581                 .procname       = "debug_level",
1582                 .data           = &sysctl_ip_vs_debug_level,
1583                 .maxlen         = sizeof(int),
1584                 .mode           = 0644,
1585                 .proc_handler   = proc_dointvec,
1586         },
1587 #endif
1588         {
1589                 .procname       = "am_droprate",
1590                 .data           = &sysctl_ip_vs_am_droprate,
1591                 .maxlen         = sizeof(int),
1592                 .mode           = 0644,
1593                 .proc_handler   = proc_dointvec,
1594         },
1595         {
1596                 .procname       = "drop_entry",
1597                 .data           = &sysctl_ip_vs_drop_entry,
1598                 .maxlen         = sizeof(int),
1599                 .mode           = 0644,
1600                 .proc_handler   = proc_do_defense_mode,
1601         },
1602         {
1603                 .procname       = "drop_packet",
1604                 .data           = &sysctl_ip_vs_drop_packet,
1605                 .maxlen         = sizeof(int),
1606                 .mode           = 0644,
1607                 .proc_handler   = proc_do_defense_mode,
1608         },
1609 #ifdef CONFIG_IP_VS_NFCT
1610         {
1611                 .procname       = "conntrack",
1612                 .data           = &sysctl_ip_vs_conntrack,
1613                 .maxlen         = sizeof(int),
1614                 .mode           = 0644,
1615                 .proc_handler   = &proc_dointvec,
1616         },
1617 #endif
1618         {
1619                 .procname       = "secure_tcp",
1620                 .data           = &sysctl_ip_vs_secure_tcp,
1621                 .maxlen         = sizeof(int),
1622                 .mode           = 0644,
1623                 .proc_handler   = proc_do_defense_mode,
1624         },
1625         {
1626                 .procname       = "snat_reroute",
1627                 .data           = &sysctl_ip_vs_snat_reroute,
1628                 .maxlen         = sizeof(int),
1629                 .mode           = 0644,
1630                 .proc_handler   = &proc_dointvec,
1631         },
1632         {
1633                 .procname       = "sync_version",
1634                 .data           = &sysctl_ip_vs_sync_ver,
1635                 .maxlen         = sizeof(int),
1636                 .mode           = 0644,
1637                 .proc_handler   = &proc_do_sync_mode,
1638         },
1639 #if 0
1640         {
1641                 .procname       = "timeout_established",
1642                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1643                 .maxlen         = sizeof(int),
1644                 .mode           = 0644,
1645                 .proc_handler   = proc_dointvec_jiffies,
1646         },
1647         {
1648                 .procname       = "timeout_synsent",
1649                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1650                 .maxlen         = sizeof(int),
1651                 .mode           = 0644,
1652                 .proc_handler   = proc_dointvec_jiffies,
1653         },
1654         {
1655                 .procname       = "timeout_synrecv",
1656                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1657                 .maxlen         = sizeof(int),
1658                 .mode           = 0644,
1659                 .proc_handler   = proc_dointvec_jiffies,
1660         },
1661         {
1662                 .procname       = "timeout_finwait",
1663                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1664                 .maxlen         = sizeof(int),
1665                 .mode           = 0644,
1666                 .proc_handler   = proc_dointvec_jiffies,
1667         },
1668         {
1669                 .procname       = "timeout_timewait",
1670                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1671                 .maxlen         = sizeof(int),
1672                 .mode           = 0644,
1673                 .proc_handler   = proc_dointvec_jiffies,
1674         },
1675         {
1676                 .procname       = "timeout_close",
1677                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1678                 .maxlen         = sizeof(int),
1679                 .mode           = 0644,
1680                 .proc_handler   = proc_dointvec_jiffies,
1681         },
1682         {
1683                 .procname       = "timeout_closewait",
1684                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1685                 .maxlen         = sizeof(int),
1686                 .mode           = 0644,
1687                 .proc_handler   = proc_dointvec_jiffies,
1688         },
1689         {
1690                 .procname       = "timeout_lastack",
1691                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1692                 .maxlen         = sizeof(int),
1693                 .mode           = 0644,
1694                 .proc_handler   = proc_dointvec_jiffies,
1695         },
1696         {
1697                 .procname       = "timeout_listen",
1698                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1699                 .maxlen         = sizeof(int),
1700                 .mode           = 0644,
1701                 .proc_handler   = proc_dointvec_jiffies,
1702         },
1703         {
1704                 .procname       = "timeout_synack",
1705                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1706                 .maxlen         = sizeof(int),
1707                 .mode           = 0644,
1708                 .proc_handler   = proc_dointvec_jiffies,
1709         },
1710         {
1711                 .procname       = "timeout_udp",
1712                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1713                 .maxlen         = sizeof(int),
1714                 .mode           = 0644,
1715                 .proc_handler   = proc_dointvec_jiffies,
1716         },
1717         {
1718                 .procname       = "timeout_icmp",
1719                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1720                 .maxlen         = sizeof(int),
1721                 .mode           = 0644,
1722                 .proc_handler   = proc_dointvec_jiffies,
1723         },
1724 #endif
1725         {
1726                 .procname       = "cache_bypass",
1727                 .data           = &sysctl_ip_vs_cache_bypass,
1728                 .maxlen         = sizeof(int),
1729                 .mode           = 0644,
1730                 .proc_handler   = proc_dointvec,
1731         },
1732         {
1733                 .procname       = "expire_nodest_conn",
1734                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1735                 .maxlen         = sizeof(int),
1736                 .mode           = 0644,
1737                 .proc_handler   = proc_dointvec,
1738         },
1739         {
1740                 .procname       = "expire_quiescent_template",
1741                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1742                 .maxlen         = sizeof(int),
1743                 .mode           = 0644,
1744                 .proc_handler   = proc_dointvec,
1745         },
1746         {
1747                 .procname       = "sync_threshold",
1748                 .data           = &sysctl_ip_vs_sync_threshold,
1749                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1750                 .mode           = 0644,
1751                 .proc_handler   = proc_do_sync_threshold,
1752         },
1753         {
1754                 .procname       = "nat_icmp_send",
1755                 .data           = &sysctl_ip_vs_nat_icmp_send,
1756                 .maxlen         = sizeof(int),
1757                 .mode           = 0644,
1758                 .proc_handler   = proc_dointvec,
1759         },
1760         { }
1761 };
1762
1763 const struct ctl_path net_vs_ctl_path[] = {
1764         { .procname = "net", },
1765         { .procname = "ipv4", },
1766         { .procname = "vs", },
1767         { }
1768 };
1769 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1770
1771 static struct ctl_table_header * sysctl_header;
1772
1773 #ifdef CONFIG_PROC_FS
1774
1775 struct ip_vs_iter {
1776         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1777         struct list_head *table;
1778         int bucket;
1779 };
1780
1781 /*
1782  *      Write the contents of the VS rule table to a PROCfs file.
1783  *      (It is kept just for backward compatibility)
1784  */
1785 static inline const char *ip_vs_fwd_name(unsigned flags)
1786 {
1787         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1788         case IP_VS_CONN_F_LOCALNODE:
1789                 return "Local";
1790         case IP_VS_CONN_F_TUNNEL:
1791                 return "Tunnel";
1792         case IP_VS_CONN_F_DROUTE:
1793                 return "Route";
1794         default:
1795                 return "Masq";
1796         }
1797 }
1798
1799
1800 /* Get the Nth entry in the two lists */
1801 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1802 {
1803         struct net *net = seq_file_net(seq);
1804         struct ip_vs_iter *iter = seq->private;
1805         int idx;
1806         struct ip_vs_service *svc;
1807
1808         /* look in hash by protocol */
1809         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1810                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1811                         if (net_eq(svc->net, net) && pos-- == 0) {
1812                                 iter->table = ip_vs_svc_table;
1813                                 iter->bucket = idx;
1814                                 return svc;
1815                         }
1816                 }
1817         }
1818
1819         /* keep looking in fwmark */
1820         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1821                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1822                         if (net_eq(svc->net, net) && pos-- == 0) {
1823                                 iter->table = ip_vs_svc_fwm_table;
1824                                 iter->bucket = idx;
1825                                 return svc;
1826                         }
1827                 }
1828         }
1829
1830         return NULL;
1831 }
1832
1833 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1834 __acquires(__ip_vs_svc_lock)
1835 {
1836
1837         read_lock_bh(&__ip_vs_svc_lock);
1838         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1839 }
1840
1841
1842 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1843 {
1844         struct list_head *e;
1845         struct ip_vs_iter *iter;
1846         struct ip_vs_service *svc;
1847
1848         ++*pos;
1849         if (v == SEQ_START_TOKEN)
1850                 return ip_vs_info_array(seq,0);
1851
1852         svc = v;
1853         iter = seq->private;
1854
1855         if (iter->table == ip_vs_svc_table) {
1856                 /* next service in table hashed by protocol */
1857                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1858                         return list_entry(e, struct ip_vs_service, s_list);
1859
1860
1861                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1862                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1863                                             s_list) {
1864                                 return svc;
1865                         }
1866                 }
1867
1868                 iter->table = ip_vs_svc_fwm_table;
1869                 iter->bucket = -1;
1870                 goto scan_fwmark;
1871         }
1872
1873         /* next service in hashed by fwmark */
1874         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1875                 return list_entry(e, struct ip_vs_service, f_list);
1876
1877  scan_fwmark:
1878         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1879                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1880                                     f_list)
1881                         return svc;
1882         }
1883
1884         return NULL;
1885 }
1886
1887 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1888 __releases(__ip_vs_svc_lock)
1889 {
1890         read_unlock_bh(&__ip_vs_svc_lock);
1891 }
1892
1893
1894 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1895 {
1896         if (v == SEQ_START_TOKEN) {
1897                 seq_printf(seq,
1898                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1899                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1900                 seq_puts(seq,
1901                          "Prot LocalAddress:Port Scheduler Flags\n");
1902                 seq_puts(seq,
1903                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1904         } else {
1905                 const struct ip_vs_service *svc = v;
1906                 const struct ip_vs_iter *iter = seq->private;
1907                 const struct ip_vs_dest *dest;
1908
1909                 if (iter->table == ip_vs_svc_table) {
1910 #ifdef CONFIG_IP_VS_IPV6
1911                         if (svc->af == AF_INET6)
1912                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1913                                            ip_vs_proto_name(svc->protocol),
1914                                            &svc->addr.in6,
1915                                            ntohs(svc->port),
1916                                            svc->scheduler->name);
1917                         else
1918 #endif
1919                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
1920                                            ip_vs_proto_name(svc->protocol),
1921                                            ntohl(svc->addr.ip),
1922                                            ntohs(svc->port),
1923                                            svc->scheduler->name,
1924                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1925                 } else {
1926                         seq_printf(seq, "FWM  %08X %s %s",
1927                                    svc->fwmark, svc->scheduler->name,
1928                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1929                 }
1930
1931                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1932                         seq_printf(seq, "persistent %d %08X\n",
1933                                 svc->timeout,
1934                                 ntohl(svc->netmask));
1935                 else
1936                         seq_putc(seq, '\n');
1937
1938                 list_for_each_entry(dest, &svc->destinations, n_list) {
1939 #ifdef CONFIG_IP_VS_IPV6
1940                         if (dest->af == AF_INET6)
1941                                 seq_printf(seq,
1942                                            "  -> [%pI6]:%04X"
1943                                            "      %-7s %-6d %-10d %-10d\n",
1944                                            &dest->addr.in6,
1945                                            ntohs(dest->port),
1946                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1947                                            atomic_read(&dest->weight),
1948                                            atomic_read(&dest->activeconns),
1949                                            atomic_read(&dest->inactconns));
1950                         else
1951 #endif
1952                                 seq_printf(seq,
1953                                            "  -> %08X:%04X      "
1954                                            "%-7s %-6d %-10d %-10d\n",
1955                                            ntohl(dest->addr.ip),
1956                                            ntohs(dest->port),
1957                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1958                                            atomic_read(&dest->weight),
1959                                            atomic_read(&dest->activeconns),
1960                                            atomic_read(&dest->inactconns));
1961
1962                 }
1963         }
1964         return 0;
1965 }
1966
1967 static const struct seq_operations ip_vs_info_seq_ops = {
1968         .start = ip_vs_info_seq_start,
1969         .next  = ip_vs_info_seq_next,
1970         .stop  = ip_vs_info_seq_stop,
1971         .show  = ip_vs_info_seq_show,
1972 };
1973
1974 static int ip_vs_info_open(struct inode *inode, struct file *file)
1975 {
1976         return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1977                         sizeof(struct ip_vs_iter));
1978 }
1979
1980 static const struct file_operations ip_vs_info_fops = {
1981         .owner   = THIS_MODULE,
1982         .open    = ip_vs_info_open,
1983         .read    = seq_read,
1984         .llseek  = seq_lseek,
1985         .release = seq_release_private,
1986 };
1987
1988 #endif
1989
1990 struct ip_vs_stats ip_vs_stats = {
1991         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1992 };
1993
1994 #ifdef CONFIG_PROC_FS
1995 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1996 {
1997
1998 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1999         seq_puts(seq,
2000                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
2001         seq_printf(seq,
2002                    "   Conns  Packets  Packets            Bytes            Bytes\n");
2003
2004         spin_lock_bh(&ip_vs_stats.lock);
2005         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
2006                    ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
2007                    (unsigned long long) ip_vs_stats.ustats.inbytes,
2008                    (unsigned long long) ip_vs_stats.ustats.outbytes);
2009
2010 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2011         seq_puts(seq,
2012                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2013         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
2014                         ip_vs_stats.ustats.cps,
2015                         ip_vs_stats.ustats.inpps,
2016                         ip_vs_stats.ustats.outpps,
2017                         ip_vs_stats.ustats.inbps,
2018                         ip_vs_stats.ustats.outbps);
2019         spin_unlock_bh(&ip_vs_stats.lock);
2020
2021         return 0;
2022 }
2023
2024 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2025 {
2026         return single_open_net(inode, file, ip_vs_stats_show);
2027 }
2028
2029 static const struct file_operations ip_vs_stats_fops = {
2030         .owner = THIS_MODULE,
2031         .open = ip_vs_stats_seq_open,
2032         .read = seq_read,
2033         .llseek = seq_lseek,
2034         .release = single_release,
2035 };
2036
2037 #endif
2038
2039 /*
2040  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2041  */
2042 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2043 {
2044         struct ip_vs_proto_data *pd;
2045
2046         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2047                   u->tcp_timeout,
2048                   u->tcp_fin_timeout,
2049                   u->udp_timeout);
2050
2051 #ifdef CONFIG_IP_VS_PROTO_TCP
2052         if (u->tcp_timeout) {
2053                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2054                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2055                         = u->tcp_timeout * HZ;
2056         }
2057
2058         if (u->tcp_fin_timeout) {
2059                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2060                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2061                         = u->tcp_fin_timeout * HZ;
2062         }
2063 #endif
2064
2065 #ifdef CONFIG_IP_VS_PROTO_UDP
2066         if (u->udp_timeout) {
2067                 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2068                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2069                         = u->udp_timeout * HZ;
2070         }
2071 #endif
2072         return 0;
2073 }
2074
2075
2076 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2077 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2078 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2079                                  sizeof(struct ip_vs_dest_user))
2080 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2081 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2082 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2083
2084 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2085         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2086         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2087         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2088         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2089         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2090         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2091         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2092         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2093         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2094         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2095         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2096 };
2097
2098 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2099                                   struct ip_vs_service_user *usvc_compat)
2100 {
2101         memset(usvc, 0, sizeof(*usvc));
2102
2103         usvc->af                = AF_INET;
2104         usvc->protocol          = usvc_compat->protocol;
2105         usvc->addr.ip           = usvc_compat->addr;
2106         usvc->port              = usvc_compat->port;
2107         usvc->fwmark            = usvc_compat->fwmark;
2108
2109         /* Deep copy of sched_name is not needed here */
2110         usvc->sched_name        = usvc_compat->sched_name;
2111
2112         usvc->flags             = usvc_compat->flags;
2113         usvc->timeout           = usvc_compat->timeout;
2114         usvc->netmask           = usvc_compat->netmask;
2115 }
2116
2117 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2118                                    struct ip_vs_dest_user *udest_compat)
2119 {
2120         memset(udest, 0, sizeof(*udest));
2121
2122         udest->addr.ip          = udest_compat->addr;
2123         udest->port             = udest_compat->port;
2124         udest->conn_flags       = udest_compat->conn_flags;
2125         udest->weight           = udest_compat->weight;
2126         udest->u_threshold      = udest_compat->u_threshold;
2127         udest->l_threshold      = udest_compat->l_threshold;
2128 }
2129
2130 static int
2131 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2132 {
2133         struct net *net = sock_net(sk);
2134         int ret;
2135         unsigned char arg[MAX_ARG_LEN];
2136         struct ip_vs_service_user *usvc_compat;
2137         struct ip_vs_service_user_kern usvc;
2138         struct ip_vs_service *svc;
2139         struct ip_vs_dest_user *udest_compat;
2140         struct ip_vs_dest_user_kern udest;
2141
2142         if (!capable(CAP_NET_ADMIN))
2143                 return -EPERM;
2144
2145         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2146                 return -EINVAL;
2147         if (len < 0 || len >  MAX_ARG_LEN)
2148                 return -EINVAL;
2149         if (len != set_arglen[SET_CMDID(cmd)]) {
2150                 pr_err("set_ctl: len %u != %u\n",
2151                        len, set_arglen[SET_CMDID(cmd)]);
2152                 return -EINVAL;
2153         }
2154
2155         if (copy_from_user(arg, user, len) != 0)
2156                 return -EFAULT;
2157
2158         /* increase the module use count */
2159         ip_vs_use_count_inc();
2160
2161         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2162                 ret = -ERESTARTSYS;
2163                 goto out_dec;
2164         }
2165
2166         if (cmd == IP_VS_SO_SET_FLUSH) {
2167                 /* Flush the virtual service */
2168                 ret = ip_vs_flush(net);
2169                 goto out_unlock;
2170         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2171                 /* Set timeout values for (tcp tcpfin udp) */
2172                 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2173                 goto out_unlock;
2174         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2175                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2176                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2177                 goto out_unlock;
2178         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2179                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2180                 ret = stop_sync_thread(dm->state);
2181                 goto out_unlock;
2182         }
2183
2184         usvc_compat = (struct ip_vs_service_user *)arg;
2185         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2186
2187         /* We only use the new structs internally, so copy userspace compat
2188          * structs to extended internal versions */
2189         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2190         ip_vs_copy_udest_compat(&udest, udest_compat);
2191
2192         if (cmd == IP_VS_SO_SET_ZERO) {
2193                 /* if no service address is set, zero counters in all */
2194                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2195                         ret = ip_vs_zero_all(net);
2196                         goto out_unlock;
2197                 }
2198         }
2199
2200         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2201         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2202             usvc.protocol != IPPROTO_SCTP) {
2203                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2204                        usvc.protocol, &usvc.addr.ip,
2205                        ntohs(usvc.port), usvc.sched_name);
2206                 ret = -EFAULT;
2207                 goto out_unlock;
2208         }
2209
2210         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2211         if (usvc.fwmark == 0)
2212                 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2213                                            &usvc.addr, usvc.port);
2214         else
2215                 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2216
2217         if (cmd != IP_VS_SO_SET_ADD
2218             && (svc == NULL || svc->protocol != usvc.protocol)) {
2219                 ret = -ESRCH;
2220                 goto out_unlock;
2221         }
2222
2223         switch (cmd) {
2224         case IP_VS_SO_SET_ADD:
2225                 if (svc != NULL)
2226                         ret = -EEXIST;
2227                 else
2228                         ret = ip_vs_add_service(net, &usvc, &svc);
2229                 break;
2230         case IP_VS_SO_SET_EDIT:
2231                 ret = ip_vs_edit_service(svc, &usvc);
2232                 break;
2233         case IP_VS_SO_SET_DEL:
2234                 ret = ip_vs_del_service(svc);
2235                 if (!ret)
2236                         goto out_unlock;
2237                 break;
2238         case IP_VS_SO_SET_ZERO:
2239                 ret = ip_vs_zero_service(svc);
2240                 break;
2241         case IP_VS_SO_SET_ADDDEST:
2242                 ret = ip_vs_add_dest(svc, &udest);
2243                 break;
2244         case IP_VS_SO_SET_EDITDEST:
2245                 ret = ip_vs_edit_dest(svc, &udest);
2246                 break;
2247         case IP_VS_SO_SET_DELDEST:
2248                 ret = ip_vs_del_dest(svc, &udest);
2249                 break;
2250         default:
2251                 ret = -EINVAL;
2252         }
2253
2254   out_unlock:
2255         mutex_unlock(&__ip_vs_mutex);
2256   out_dec:
2257         /* decrease the module use count */
2258         ip_vs_use_count_dec();
2259
2260         return ret;
2261 }
2262
2263
2264 static void
2265 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2266 {
2267         spin_lock_bh(&src->lock);
2268         memcpy(dst, &src->ustats, sizeof(*dst));
2269         spin_unlock_bh(&src->lock);
2270 }
2271
2272 static void
2273 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2274 {
2275         dst->protocol = src->protocol;
2276         dst->addr = src->addr.ip;
2277         dst->port = src->port;
2278         dst->fwmark = src->fwmark;
2279         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2280         dst->flags = src->flags;
2281         dst->timeout = src->timeout / HZ;
2282         dst->netmask = src->netmask;
2283         dst->num_dests = src->num_dests;
2284         ip_vs_copy_stats(&dst->stats, &src->stats);
2285 }
2286
2287 static inline int
2288 __ip_vs_get_service_entries(struct net *net,
2289                             const struct ip_vs_get_services *get,
2290                             struct ip_vs_get_services __user *uptr)
2291 {
2292         int idx, count=0;
2293         struct ip_vs_service *svc;
2294         struct ip_vs_service_entry entry;
2295         int ret = 0;
2296
2297         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2298                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2299                         /* Only expose IPv4 entries to old interface */
2300                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2301                                 continue;
2302
2303                         if (count >= get->num_services)
2304                                 goto out;
2305                         memset(&entry, 0, sizeof(entry));
2306                         ip_vs_copy_service(&entry, svc);
2307                         if (copy_to_user(&uptr->entrytable[count],
2308                                          &entry, sizeof(entry))) {
2309                                 ret = -EFAULT;
2310                                 goto out;
2311                         }
2312                         count++;
2313                 }
2314         }
2315
2316         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2317                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2318                         /* Only expose IPv4 entries to old interface */
2319                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2320                                 continue;
2321
2322                         if (count >= get->num_services)
2323                                 goto out;
2324                         memset(&entry, 0, sizeof(entry));
2325                         ip_vs_copy_service(&entry, svc);
2326                         if (copy_to_user(&uptr->entrytable[count],
2327                                          &entry, sizeof(entry))) {
2328                                 ret = -EFAULT;
2329                                 goto out;
2330                         }
2331                         count++;
2332                 }
2333         }
2334   out:
2335         return ret;
2336 }
2337
2338 static inline int
2339 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2340                          struct ip_vs_get_dests __user *uptr)
2341 {
2342         struct ip_vs_service *svc;
2343         union nf_inet_addr addr = { .ip = get->addr };
2344         int ret = 0;
2345
2346         if (get->fwmark)
2347                 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2348         else
2349                 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2350                                            get->port);
2351
2352         if (svc) {
2353                 int count = 0;
2354                 struct ip_vs_dest *dest;
2355                 struct ip_vs_dest_entry entry;
2356
2357                 list_for_each_entry(dest, &svc->destinations, n_list) {
2358                         if (count >= get->num_dests)
2359                                 break;
2360
2361                         entry.addr = dest->addr.ip;
2362                         entry.port = dest->port;
2363                         entry.conn_flags = atomic_read(&dest->conn_flags);
2364                         entry.weight = atomic_read(&dest->weight);
2365                         entry.u_threshold = dest->u_threshold;
2366                         entry.l_threshold = dest->l_threshold;
2367                         entry.activeconns = atomic_read(&dest->activeconns);
2368                         entry.inactconns = atomic_read(&dest->inactconns);
2369                         entry.persistconns = atomic_read(&dest->persistconns);
2370                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2371                         if (copy_to_user(&uptr->entrytable[count],
2372                                          &entry, sizeof(entry))) {
2373                                 ret = -EFAULT;
2374                                 break;
2375                         }
2376                         count++;
2377                 }
2378         } else
2379                 ret = -ESRCH;
2380         return ret;
2381 }
2382
2383 static inline void
2384 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2385 {
2386         struct ip_vs_proto_data *pd;
2387
2388 #ifdef CONFIG_IP_VS_PROTO_TCP
2389         pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2390         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2391         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2392 #endif
2393 #ifdef CONFIG_IP_VS_PROTO_UDP
2394         pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2395         u->udp_timeout =
2396                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2397 #endif
2398 }
2399
2400
2401 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2402 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2403 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2404 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2405 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2406 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2407 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2408
2409 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2410         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2411         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2412         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2413         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2414         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2415         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2416         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2417 };
2418
2419 static int
2420 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2421 {
2422         unsigned char arg[128];
2423         int ret = 0;
2424         unsigned int copylen;
2425         struct net *net = sock_net(sk);
2426
2427         BUG_ON(!net);
2428         if (!capable(CAP_NET_ADMIN))
2429                 return -EPERM;
2430
2431         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2432                 return -EINVAL;
2433
2434         if (*len < get_arglen[GET_CMDID(cmd)]) {
2435                 pr_err("get_ctl: len %u < %u\n",
2436                        *len, get_arglen[GET_CMDID(cmd)]);
2437                 return -EINVAL;
2438         }
2439
2440         copylen = get_arglen[GET_CMDID(cmd)];
2441         if (copylen > 128)
2442                 return -EINVAL;
2443
2444         if (copy_from_user(arg, user, copylen) != 0)
2445                 return -EFAULT;
2446
2447         if (mutex_lock_interruptible(&__ip_vs_mutex))
2448                 return -ERESTARTSYS;
2449
2450         switch (cmd) {
2451         case IP_VS_SO_GET_VERSION:
2452         {
2453                 char buf[64];
2454
2455                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2456                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2457                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2458                         ret = -EFAULT;
2459                         goto out;
2460                 }
2461                 *len = strlen(buf)+1;
2462         }
2463         break;
2464
2465         case IP_VS_SO_GET_INFO:
2466         {
2467                 struct ip_vs_getinfo info;
2468                 info.version = IP_VS_VERSION_CODE;
2469                 info.size = ip_vs_conn_tab_size;
2470                 info.num_services = ip_vs_num_services;
2471                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2472                         ret = -EFAULT;
2473         }
2474         break;
2475
2476         case IP_VS_SO_GET_SERVICES:
2477         {
2478                 struct ip_vs_get_services *get;
2479                 int size;
2480
2481                 get = (struct ip_vs_get_services *)arg;
2482                 size = sizeof(*get) +
2483                         sizeof(struct ip_vs_service_entry) * get->num_services;
2484                 if (*len != size) {
2485                         pr_err("length: %u != %u\n", *len, size);
2486                         ret = -EINVAL;
2487                         goto out;
2488                 }
2489                 ret = __ip_vs_get_service_entries(net, get, user);
2490         }
2491         break;
2492
2493         case IP_VS_SO_GET_SERVICE:
2494         {
2495                 struct ip_vs_service_entry *entry;
2496                 struct ip_vs_service *svc;
2497                 union nf_inet_addr addr;
2498
2499                 entry = (struct ip_vs_service_entry *)arg;
2500                 addr.ip = entry->addr;
2501                 if (entry->fwmark)
2502                         svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2503                 else
2504                         svc = __ip_vs_service_find(net, AF_INET,
2505                                                    entry->protocol, &addr,
2506                                                    entry->port);
2507                 if (svc) {
2508                         ip_vs_copy_service(entry, svc);
2509                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2510                                 ret = -EFAULT;
2511                 } else
2512                         ret = -ESRCH;
2513         }
2514         break;
2515
2516         case IP_VS_SO_GET_DESTS:
2517         {
2518                 struct ip_vs_get_dests *get;
2519                 int size;
2520
2521                 get = (struct ip_vs_get_dests *)arg;
2522                 size = sizeof(*get) +
2523                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2524                 if (*len != size) {
2525                         pr_err("length: %u != %u\n", *len, size);
2526                         ret = -EINVAL;
2527                         goto out;
2528                 }
2529                 ret = __ip_vs_get_dest_entries(net, get, user);
2530         }
2531         break;
2532
2533         case IP_VS_SO_GET_TIMEOUT:
2534         {
2535                 struct ip_vs_timeout_user t;
2536
2537                 __ip_vs_get_timeouts(net, &t);
2538                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2539                         ret = -EFAULT;
2540         }
2541         break;
2542
2543         case IP_VS_SO_GET_DAEMON:
2544         {
2545                 struct ip_vs_daemon_user d[2];
2546
2547                 memset(&d, 0, sizeof(d));
2548                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2549                         d[0].state = IP_VS_STATE_MASTER;
2550                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2551                         d[0].syncid = ip_vs_master_syncid;
2552                 }
2553                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2554                         d[1].state = IP_VS_STATE_BACKUP;
2555                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2556                         d[1].syncid = ip_vs_backup_syncid;
2557                 }
2558                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2559                         ret = -EFAULT;
2560         }
2561         break;
2562
2563         default:
2564                 ret = -EINVAL;
2565         }
2566
2567   out:
2568         mutex_unlock(&__ip_vs_mutex);
2569         return ret;
2570 }
2571
2572
2573 static struct nf_sockopt_ops ip_vs_sockopts = {
2574         .pf             = PF_INET,
2575         .set_optmin     = IP_VS_BASE_CTL,
2576         .set_optmax     = IP_VS_SO_SET_MAX+1,
2577         .set            = do_ip_vs_set_ctl,
2578         .get_optmin     = IP_VS_BASE_CTL,
2579         .get_optmax     = IP_VS_SO_GET_MAX+1,
2580         .get            = do_ip_vs_get_ctl,
2581         .owner          = THIS_MODULE,
2582 };
2583
2584 /*
2585  * Generic Netlink interface
2586  */
2587
2588 /* IPVS genetlink family */
2589 static struct genl_family ip_vs_genl_family = {
2590         .id             = GENL_ID_GENERATE,
2591         .hdrsize        = 0,
2592         .name           = IPVS_GENL_NAME,
2593         .version        = IPVS_GENL_VERSION,
2594         .maxattr        = IPVS_CMD_MAX,
2595 };
2596
2597 /* Policy used for first-level command attributes */
2598 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2599         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2600         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2601         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2602         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2603         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2604         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2605 };
2606
2607 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2608 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2609         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2610         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2611                                             .len = IP_VS_IFNAME_MAXLEN },
2612         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2613 };
2614
2615 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2616 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2617         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2618         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2619         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2620                                             .len = sizeof(union nf_inet_addr) },
2621         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2622         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2623         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2624                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2625         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2626                                             .len = IP_VS_PENAME_MAXLEN },
2627         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2628                                             .len = sizeof(struct ip_vs_flags) },
2629         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2630         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2631         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2632 };
2633
2634 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2635 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2636         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2637                                             .len = sizeof(union nf_inet_addr) },
2638         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2639         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2640         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2641         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2642         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2643         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2644         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2645         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2646         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2647 };
2648
2649 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2650                                  struct ip_vs_stats *stats)
2651 {
2652         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2653         if (!nl_stats)
2654                 return -EMSGSIZE;
2655
2656         spin_lock_bh(&stats->lock);
2657
2658         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2659         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2660         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2661         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2662         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2663         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2664         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2665         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2666         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2667         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2668
2669         spin_unlock_bh(&stats->lock);
2670
2671         nla_nest_end(skb, nl_stats);
2672
2673         return 0;
2674
2675 nla_put_failure:
2676         spin_unlock_bh(&stats->lock);
2677         nla_nest_cancel(skb, nl_stats);
2678         return -EMSGSIZE;
2679 }
2680
2681 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2682                                    struct ip_vs_service *svc)
2683 {
2684         struct nlattr *nl_service;
2685         struct ip_vs_flags flags = { .flags = svc->flags,
2686                                      .mask = ~0 };
2687
2688         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2689         if (!nl_service)
2690                 return -EMSGSIZE;
2691
2692         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2693
2694         if (svc->fwmark) {
2695                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2696         } else {
2697                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2698                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2699                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2700         }
2701
2702         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2703         if (svc->pe)
2704                 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2705         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2706         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2707         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2708
2709         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2710                 goto nla_put_failure;
2711
2712         nla_nest_end(skb, nl_service);
2713
2714         return 0;
2715
2716 nla_put_failure:
2717         nla_nest_cancel(skb, nl_service);
2718         return -EMSGSIZE;
2719 }
2720
2721 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2722                                    struct ip_vs_service *svc,
2723                                    struct netlink_callback *cb)
2724 {
2725         void *hdr;
2726
2727         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2728                           &ip_vs_genl_family, NLM_F_MULTI,
2729                           IPVS_CMD_NEW_SERVICE);
2730         if (!hdr)
2731                 return -EMSGSIZE;
2732
2733         if (ip_vs_genl_fill_service(skb, svc) < 0)
2734                 goto nla_put_failure;
2735
2736         return genlmsg_end(skb, hdr);
2737
2738 nla_put_failure:
2739         genlmsg_cancel(skb, hdr);
2740         return -EMSGSIZE;
2741 }
2742
2743 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2744                                     struct netlink_callback *cb)
2745 {
2746         int idx = 0, i;
2747         int start = cb->args[0];
2748         struct ip_vs_service *svc;
2749         struct net *net = skb_sknet(skb);
2750
2751         mutex_lock(&__ip_vs_mutex);
2752         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2753                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2754                         if (++idx <= start || !net_eq(svc->net, net))
2755                                 continue;
2756                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2757                                 idx--;
2758                                 goto nla_put_failure;
2759                         }
2760                 }
2761         }
2762
2763         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2764                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2765                         if (++idx <= start || !net_eq(svc->net, net))
2766                                 continue;
2767                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2768                                 idx--;
2769                                 goto nla_put_failure;
2770                         }
2771                 }
2772         }
2773
2774 nla_put_failure:
2775         mutex_unlock(&__ip_vs_mutex);
2776         cb->args[0] = idx;
2777
2778         return skb->len;
2779 }
2780
2781 static int ip_vs_genl_parse_service(struct net *net,
2782                                     struct ip_vs_service_user_kern *usvc,
2783                                     struct nlattr *nla, int full_entry,
2784                                     struct ip_vs_service **ret_svc)
2785 {
2786         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2787         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2788         struct ip_vs_service *svc;
2789
2790         /* Parse mandatory identifying service fields first */
2791         if (nla == NULL ||
2792             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2793                 return -EINVAL;
2794
2795         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2796         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2797         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2798         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2799         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2800
2801         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2802                 return -EINVAL;
2803
2804         memset(usvc, 0, sizeof(*usvc));
2805
2806         usvc->af = nla_get_u16(nla_af);
2807 #ifdef CONFIG_IP_VS_IPV6
2808         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2809 #else
2810         if (usvc->af != AF_INET)
2811 #endif
2812                 return -EAFNOSUPPORT;
2813
2814         if (nla_fwmark) {
2815                 usvc->protocol = IPPROTO_TCP;
2816                 usvc->fwmark = nla_get_u32(nla_fwmark);
2817         } else {
2818                 usvc->protocol = nla_get_u16(nla_protocol);
2819                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2820                 usvc->port = nla_get_u16(nla_port);
2821                 usvc->fwmark = 0;
2822         }
2823
2824         if (usvc->fwmark)
2825                 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2826         else
2827                 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2828                                            &usvc->addr, usvc->port);
2829         *ret_svc = svc;
2830
2831         /* If a full entry was requested, check for the additional fields */
2832         if (full_entry) {
2833                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2834                               *nla_netmask;
2835                 struct ip_vs_flags flags;
2836
2837                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2838                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2839                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2840                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2841                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2842
2843                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2844                         return -EINVAL;
2845
2846                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2847
2848                 /* prefill flags from service if it already exists */
2849                 if (svc)
2850                         usvc->flags = svc->flags;
2851
2852                 /* set new flags from userland */
2853                 usvc->flags = (usvc->flags & ~flags.mask) |
2854                               (flags.flags & flags.mask);
2855                 usvc->sched_name = nla_data(nla_sched);
2856                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2857                 usvc->timeout = nla_get_u32(nla_timeout);
2858                 usvc->netmask = nla_get_u32(nla_netmask);
2859         }
2860
2861         return 0;
2862 }
2863
2864 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2865                                                      struct nlattr *nla)
2866 {
2867         struct ip_vs_service_user_kern usvc;
2868         struct ip_vs_service *svc;
2869         int ret;
2870
2871         ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2872         return ret ? ERR_PTR(ret) : svc;
2873 }
2874
2875 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2876 {
2877         struct nlattr *nl_dest;
2878
2879         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2880         if (!nl_dest)
2881                 return -EMSGSIZE;
2882
2883         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2884         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2885
2886         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2887                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2888         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2889         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2890         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2891         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2892                     atomic_read(&dest->activeconns));
2893         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2894                     atomic_read(&dest->inactconns));
2895         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2896                     atomic_read(&dest->persistconns));
2897
2898         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2899                 goto nla_put_failure;
2900
2901         nla_nest_end(skb, nl_dest);
2902
2903         return 0;
2904
2905 nla_put_failure:
2906         nla_nest_cancel(skb, nl_dest);
2907         return -EMSGSIZE;
2908 }
2909
2910 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2911                                 struct netlink_callback *cb)
2912 {
2913         void *hdr;
2914
2915         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2916                           &ip_vs_genl_family, NLM_F_MULTI,
2917                           IPVS_CMD_NEW_DEST);
2918         if (!hdr)
2919                 return -EMSGSIZE;
2920
2921         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2922                 goto nla_put_failure;
2923
2924         return genlmsg_end(skb, hdr);
2925
2926 nla_put_failure:
2927         genlmsg_cancel(skb, hdr);
2928         return -EMSGSIZE;
2929 }
2930
2931 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2932                                  struct netlink_callback *cb)
2933 {
2934         int idx = 0;
2935         int start = cb->args[0];
2936         struct ip_vs_service *svc;
2937         struct ip_vs_dest *dest;
2938         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2939         struct net *net;
2940
2941         mutex_lock(&__ip_vs_mutex);
2942
2943         /* Try to find the service for which to dump destinations */
2944         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2945                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2946                 goto out_err;
2947
2948         net = skb_sknet(skb);
2949         svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
2950         if (IS_ERR(svc) || svc == NULL)
2951                 goto out_err;
2952
2953         /* Dump the destinations */
2954         list_for_each_entry(dest, &svc->destinations, n_list) {
2955                 if (++idx <= start)
2956                         continue;
2957                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2958                         idx--;
2959                         goto nla_put_failure;
2960                 }
2961         }
2962
2963 nla_put_failure:
2964         cb->args[0] = idx;
2965
2966 out_err:
2967         mutex_unlock(&__ip_vs_mutex);
2968
2969         return skb->len;
2970 }
2971
2972 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2973                                  struct nlattr *nla, int full_entry)
2974 {
2975         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2976         struct nlattr *nla_addr, *nla_port;
2977
2978         /* Parse mandatory identifying destination fields first */
2979         if (nla == NULL ||
2980             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2981                 return -EINVAL;
2982
2983         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2984         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2985
2986         if (!(nla_addr && nla_port))
2987                 return -EINVAL;
2988
2989         memset(udest, 0, sizeof(*udest));
2990
2991         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2992         udest->port = nla_get_u16(nla_port);
2993
2994         /* If a full entry was requested, check for the additional fields */
2995         if (full_entry) {
2996                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2997                               *nla_l_thresh;
2998
2999                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3000                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3001                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3002                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3003
3004                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3005                         return -EINVAL;
3006
3007                 udest->conn_flags = nla_get_u32(nla_fwd)
3008                                     & IP_VS_CONN_F_FWD_MASK;
3009                 udest->weight = nla_get_u32(nla_weight);
3010                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3011                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3012         }
3013
3014         return 0;
3015 }
3016
3017 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3018                                   const char *mcast_ifn, __be32 syncid)
3019 {
3020         struct nlattr *nl_daemon;
3021
3022         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3023         if (!nl_daemon)
3024                 return -EMSGSIZE;
3025
3026         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3027         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3028         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3029
3030         nla_nest_end(skb, nl_daemon);
3031
3032         return 0;
3033
3034 nla_put_failure:
3035         nla_nest_cancel(skb, nl_daemon);
3036         return -EMSGSIZE;
3037 }
3038
3039 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3040                                   const char *mcast_ifn, __be32 syncid,
3041                                   struct netlink_callback *cb)
3042 {
3043         void *hdr;
3044         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3045                           &ip_vs_genl_family, NLM_F_MULTI,
3046                           IPVS_CMD_NEW_DAEMON);
3047         if (!hdr)
3048                 return -EMSGSIZE;
3049
3050         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3051                 goto nla_put_failure;
3052
3053         return genlmsg_end(skb, hdr);
3054
3055 nla_put_failure:
3056         genlmsg_cancel(skb, hdr);
3057         return -EMSGSIZE;
3058 }
3059
3060 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3061                                    struct netlink_callback *cb)
3062 {
3063         mutex_lock(&__ip_vs_mutex);
3064         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3065                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3066                                            ip_vs_master_mcast_ifn,
3067                                            ip_vs_master_syncid, cb) < 0)
3068                         goto nla_put_failure;
3069
3070                 cb->args[0] = 1;
3071         }
3072
3073         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3074                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3075                                            ip_vs_backup_mcast_ifn,
3076                                            ip_vs_backup_syncid, cb) < 0)
3077                         goto nla_put_failure;
3078
3079                 cb->args[1] = 1;
3080         }
3081
3082 nla_put_failure:
3083         mutex_unlock(&__ip_vs_mutex);
3084
3085         return skb->len;
3086 }
3087
3088 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3089 {
3090         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3091               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3092               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3093                 return -EINVAL;
3094
3095         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3096                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3097                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3098 }
3099
3100 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3101 {
3102         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3103                 return -EINVAL;
3104
3105         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3106 }
3107
3108 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3109 {
3110         struct ip_vs_timeout_user t;
3111
3112         __ip_vs_get_timeouts(net, &t);
3113
3114         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3115                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3116
3117         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3118                 t.tcp_fin_timeout =
3119                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3120
3121         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3122                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3123
3124         return ip_vs_set_timeout(net, &t);
3125 }
3126
3127 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3128 {
3129         struct ip_vs_service *svc = NULL;
3130         struct ip_vs_service_user_kern usvc;
3131         struct ip_vs_dest_user_kern udest;
3132         int ret = 0, cmd;
3133         int need_full_svc = 0, need_full_dest = 0;
3134         struct net *net;
3135
3136         net = skb_sknet(skb);
3137         cmd = info->genlhdr->cmd;
3138
3139         mutex_lock(&__ip_vs_mutex);
3140
3141         if (cmd == IPVS_CMD_FLUSH) {
3142                 ret = ip_vs_flush(net);
3143                 goto out;
3144         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3145                 ret = ip_vs_genl_set_config(net, info->attrs);
3146                 goto out;
3147         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3148                    cmd == IPVS_CMD_DEL_DAEMON) {
3149
3150                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3151
3152                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3153                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3154                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3155                                      ip_vs_daemon_policy)) {
3156                         ret = -EINVAL;
3157                         goto out;
3158                 }
3159
3160                 if (cmd == IPVS_CMD_NEW_DAEMON)
3161                         ret = ip_vs_genl_new_daemon(daemon_attrs);
3162                 else
3163                         ret = ip_vs_genl_del_daemon(daemon_attrs);
3164                 goto out;
3165         } else if (cmd == IPVS_CMD_ZERO &&
3166                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3167                 ret = ip_vs_zero_all(net);
3168                 goto out;
3169         }
3170
3171         /* All following commands require a service argument, so check if we
3172          * received a valid one. We need a full service specification when
3173          * adding / editing a service. Only identifying members otherwise. */
3174         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3175                 need_full_svc = 1;
3176
3177         ret = ip_vs_genl_parse_service(net, &usvc,
3178                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3179                                        need_full_svc, &svc);
3180         if (ret)
3181                 goto out;
3182
3183         /* Unless we're adding a new service, the service must already exist */
3184         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3185                 ret = -ESRCH;
3186                 goto out;
3187         }
3188
3189         /* Destination commands require a valid destination argument. For
3190          * adding / editing a destination, we need a full destination
3191          * specification. */
3192         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3193             cmd == IPVS_CMD_DEL_DEST) {
3194                 if (cmd != IPVS_CMD_DEL_DEST)
3195                         need_full_dest = 1;
3196
3197                 ret = ip_vs_genl_parse_dest(&udest,
3198                                             info->attrs[IPVS_CMD_ATTR_DEST],
3199                                             need_full_dest);
3200                 if (ret)
3201                         goto out;
3202         }
3203
3204         switch (cmd) {
3205         case IPVS_CMD_NEW_SERVICE:
3206                 if (svc == NULL)
3207                         ret = ip_vs_add_service(net, &usvc, &svc);
3208                 else
3209                         ret = -EEXIST;
3210                 break;
3211         case IPVS_CMD_SET_SERVICE:
3212                 ret = ip_vs_edit_service(svc, &usvc);
3213                 break;
3214         case IPVS_CMD_DEL_SERVICE:
3215                 ret = ip_vs_del_service(svc);
3216                 /* do not use svc, it can be freed */
3217                 break;
3218         case IPVS_CMD_NEW_DEST:
3219                 ret = ip_vs_add_dest(svc, &udest);
3220                 break;
3221         case IPVS_CMD_SET_DEST:
3222                 ret = ip_vs_edit_dest(svc, &udest);
3223                 break;
3224         case IPVS_CMD_DEL_DEST:
3225                 ret = ip_vs_del_dest(svc, &udest);
3226                 break;
3227         case IPVS_CMD_ZERO:
3228                 ret = ip_vs_zero_service(svc);
3229                 break;
3230         default:
3231                 ret = -EINVAL;
3232         }
3233
3234 out:
3235         mutex_unlock(&__ip_vs_mutex);
3236
3237         return ret;
3238 }
3239
3240 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3241 {
3242         struct sk_buff *msg;
3243         void *reply;
3244         int ret, cmd, reply_cmd;
3245         struct net *net;
3246
3247         net = skb_sknet(skb);
3248         cmd = info->genlhdr->cmd;
3249
3250         if (cmd == IPVS_CMD_GET_SERVICE)
3251                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3252         else if (cmd == IPVS_CMD_GET_INFO)
3253                 reply_cmd = IPVS_CMD_SET_INFO;
3254         else if (cmd == IPVS_CMD_GET_CONFIG)
3255                 reply_cmd = IPVS_CMD_SET_CONFIG;
3256         else {
3257                 pr_err("unknown Generic Netlink command\n");
3258                 return -EINVAL;
3259         }
3260
3261         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3262         if (!msg)
3263                 return -ENOMEM;
3264
3265         mutex_lock(&__ip_vs_mutex);
3266
3267         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3268         if (reply == NULL)
3269                 goto nla_put_failure;
3270
3271         switch (cmd) {
3272         case IPVS_CMD_GET_SERVICE:
3273         {
3274                 struct ip_vs_service *svc;
3275
3276                 svc = ip_vs_genl_find_service(net,
3277                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3278                 if (IS_ERR(svc)) {
3279                         ret = PTR_ERR(svc);
3280                         goto out_err;
3281                 } else if (svc) {
3282                         ret = ip_vs_genl_fill_service(msg, svc);
3283                         if (ret)
3284                                 goto nla_put_failure;
3285                 } else {
3286                         ret = -ESRCH;
3287                         goto out_err;
3288                 }
3289
3290                 break;
3291         }
3292
3293         case IPVS_CMD_GET_CONFIG:
3294         {
3295                 struct ip_vs_timeout_user t;
3296
3297                 __ip_vs_get_timeouts(net, &t);
3298 #ifdef CONFIG_IP_VS_PROTO_TCP
3299                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3300                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3301                             t.tcp_fin_timeout);
3302 #endif
3303 #ifdef CONFIG_IP_VS_PROTO_UDP
3304                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3305 #endif
3306
3307                 break;
3308         }
3309
3310         case IPVS_CMD_GET_INFO:
3311                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3312                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3313                             ip_vs_conn_tab_size);
3314                 break;
3315         }
3316
3317         genlmsg_end(msg, reply);
3318         ret = genlmsg_reply(msg, info);
3319         goto out;
3320
3321 nla_put_failure:
3322         pr_err("not enough space in Netlink message\n");
3323         ret = -EMSGSIZE;
3324
3325 out_err:
3326         nlmsg_free(msg);
3327 out:
3328         mutex_unlock(&__ip_vs_mutex);
3329
3330         return ret;
3331 }
3332
3333
3334 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3335         {
3336                 .cmd    = IPVS_CMD_NEW_SERVICE,
3337                 .flags  = GENL_ADMIN_PERM,
3338                 .policy = ip_vs_cmd_policy,
3339                 .doit   = ip_vs_genl_set_cmd,
3340         },
3341         {
3342                 .cmd    = IPVS_CMD_SET_SERVICE,
3343                 .flags  = GENL_ADMIN_PERM,
3344                 .policy = ip_vs_cmd_policy,
3345                 .doit   = ip_vs_genl_set_cmd,
3346         },
3347         {
3348                 .cmd    = IPVS_CMD_DEL_SERVICE,
3349                 .flags  = GENL_ADMIN_PERM,
3350                 .policy = ip_vs_cmd_policy,
3351                 .doit   = ip_vs_genl_set_cmd,
3352         },
3353         {
3354                 .cmd    = IPVS_CMD_GET_SERVICE,
3355                 .flags  = GENL_ADMIN_PERM,
3356                 .doit   = ip_vs_genl_get_cmd,
3357                 .dumpit = ip_vs_genl_dump_services,
3358                 .policy = ip_vs_cmd_policy,
3359         },
3360         {
3361                 .cmd    = IPVS_CMD_NEW_DEST,
3362                 .flags  = GENL_ADMIN_PERM,
3363                 .policy = ip_vs_cmd_policy,
3364                 .doit   = ip_vs_genl_set_cmd,
3365         },
3366         {
3367                 .cmd    = IPVS_CMD_SET_DEST,
3368                 .flags  = GENL_ADMIN_PERM,
3369                 .policy = ip_vs_cmd_policy,
3370                 .doit   = ip_vs_genl_set_cmd,
3371         },
3372         {
3373                 .cmd    = IPVS_CMD_DEL_DEST,
3374                 .flags  = GENL_ADMIN_PERM,
3375                 .policy = ip_vs_cmd_policy,
3376                 .doit   = ip_vs_genl_set_cmd,
3377         },
3378         {
3379                 .cmd    = IPVS_CMD_GET_DEST,
3380                 .flags  = GENL_ADMIN_PERM,
3381                 .policy = ip_vs_cmd_policy,
3382                 .dumpit = ip_vs_genl_dump_dests,
3383         },
3384         {
3385                 .cmd    = IPVS_CMD_NEW_DAEMON,
3386                 .flags  = GENL_ADMIN_PERM,
3387                 .policy = ip_vs_cmd_policy,
3388                 .doit   = ip_vs_genl_set_cmd,
3389         },
3390         {
3391                 .cmd    = IPVS_CMD_DEL_DAEMON,
3392                 .flags  = GENL_ADMIN_PERM,
3393                 .policy = ip_vs_cmd_policy,
3394                 .doit   = ip_vs_genl_set_cmd,
3395         },
3396         {
3397                 .cmd    = IPVS_CMD_GET_DAEMON,
3398                 .flags  = GENL_ADMIN_PERM,
3399                 .dumpit = ip_vs_genl_dump_daemons,
3400         },
3401         {
3402                 .cmd    = IPVS_CMD_SET_CONFIG,
3403                 .flags  = GENL_ADMIN_PERM,
3404                 .policy = ip_vs_cmd_policy,
3405                 .doit   = ip_vs_genl_set_cmd,
3406         },
3407         {
3408                 .cmd    = IPVS_CMD_GET_CONFIG,
3409                 .flags  = GENL_ADMIN_PERM,
3410                 .doit   = ip_vs_genl_get_cmd,
3411         },
3412         {
3413                 .cmd    = IPVS_CMD_GET_INFO,
3414                 .flags  = GENL_ADMIN_PERM,
3415                 .doit   = ip_vs_genl_get_cmd,
3416         },
3417         {
3418                 .cmd    = IPVS_CMD_ZERO,
3419                 .flags  = GENL_ADMIN_PERM,
3420                 .policy = ip_vs_cmd_policy,
3421                 .doit   = ip_vs_genl_set_cmd,
3422         },
3423         {
3424                 .cmd    = IPVS_CMD_FLUSH,
3425                 .flags  = GENL_ADMIN_PERM,
3426                 .doit   = ip_vs_genl_set_cmd,
3427         },
3428 };
3429
3430 static int __init ip_vs_genl_register(void)
3431 {
3432         return genl_register_family_with_ops(&ip_vs_genl_family,
3433                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3434 }
3435
3436 static void ip_vs_genl_unregister(void)
3437 {
3438         genl_unregister_family(&ip_vs_genl_family);
3439 }
3440
3441 /* End of Generic Netlink interface definitions */
3442
3443 /*
3444  * per netns intit/exit func.
3445  */
3446 int __net_init __ip_vs_control_init(struct net *net)
3447 {
3448         int idx;
3449         struct netns_ipvs *ipvs = net_ipvs(net);
3450
3451         if (!net_eq(net, &init_net))    /* netns not enabled yet */
3452                 return -EPERM;
3453
3454         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3455                 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3456
3457         proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3458         proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3459         sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path,
3460                                                   vs_vars);
3461         if (sysctl_header == NULL)
3462                 goto err_reg;
3463         ip_vs_new_estimator(&ip_vs_stats);
3464         return 0;
3465
3466 err_reg:
3467         return -ENOMEM;
3468 }
3469
3470 static void __net_exit __ip_vs_control_cleanup(struct net *net)
3471 {
3472         if (!net_eq(net, &init_net))    /* netns not enabled yet */
3473                 return;
3474
3475         ip_vs_kill_estimator(&ip_vs_stats);
3476         unregister_net_sysctl_table(sysctl_header);
3477         proc_net_remove(net, "ip_vs_stats");
3478         proc_net_remove(net, "ip_vs");
3479 }
3480
3481 static struct pernet_operations ipvs_control_ops = {
3482         .init = __ip_vs_control_init,
3483         .exit = __ip_vs_control_cleanup,
3484 };
3485
3486 int __init ip_vs_control_init(void)
3487 {
3488         int idx;
3489         int ret;
3490
3491         EnterFunction(2);
3492
3493         /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3494         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3495                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3496                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3497         }
3498
3499         ret = register_pernet_subsys(&ipvs_control_ops);
3500         if (ret) {
3501                 pr_err("cannot register namespace.\n");
3502                 goto err;
3503         }
3504
3505         smp_wmb();      /* Do we really need it now ? */
3506
3507         ret = nf_register_sockopt(&ip_vs_sockopts);
3508         if (ret) {
3509                 pr_err("cannot register sockopt.\n");
3510                 goto err_net;
3511         }
3512
3513         ret = ip_vs_genl_register();
3514         if (ret) {
3515                 pr_err("cannot register Generic Netlink interface.\n");
3516                 nf_unregister_sockopt(&ip_vs_sockopts);
3517                 goto err_net;
3518         }
3519
3520         /* Hook the defense timer */
3521         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3522
3523         LeaveFunction(2);
3524         return 0;
3525
3526 err_net:
3527         unregister_pernet_subsys(&ipvs_control_ops);
3528 err:
3529         return ret;
3530 }
3531
3532
3533 void ip_vs_control_cleanup(void)
3534 {
3535         EnterFunction(2);
3536         ip_vs_trash_cleanup();
3537         cancel_delayed_work_sync(&defense_work);
3538         cancel_work_sync(&defense_work.work);
3539         ip_vs_kill_estimator(&ip_vs_stats);
3540         unregister_pernet_subsys(&ipvs_control_ops);
3541         ip_vs_genl_unregister();
3542         nf_unregister_sockopt(&ip_vs_sockopts);
3543         LeaveFunction(2);
3544 }