ipvs: fix synchronization on connection close
[linux-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34
35 #include <linux/netfilter.h>
36 #include <linux/netfilter_ipv4.h>
37 #include <linux/mutex.h>
38
39 #include <net/net_namespace.h>
40 #include <net/ip.h>
41 #ifdef CONFIG_IP_VS_IPV6
42 #include <net/ipv6.h>
43 #include <net/ip6_route.h>
44 #endif
45 #include <net/route.h>
46 #include <net/sock.h>
47 #include <net/genetlink.h>
48
49 #include <asm/uaccess.h>
50
51 #include <net/ip_vs.h>
52
53 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
54 static DEFINE_MUTEX(__ip_vs_mutex);
55
56 /* lock for service table */
57 static DEFINE_RWLOCK(__ip_vs_svc_lock);
58
59 /* lock for table with the real services */
60 static DEFINE_RWLOCK(__ip_vs_rs_lock);
61
62 /* lock for state and timeout tables */
63 static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
64
65 /* lock for drop entry handling */
66 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
67
68 /* lock for drop packet handling */
69 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
70
71 /* 1/rate drop and drop-entry variables */
72 int ip_vs_drop_rate = 0;
73 int ip_vs_drop_counter = 0;
74 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
75
76 /* number of virtual services */
77 static int ip_vs_num_services = 0;
78
79 /* sysctl variables */
80 static int sysctl_ip_vs_drop_entry = 0;
81 static int sysctl_ip_vs_drop_packet = 0;
82 static int sysctl_ip_vs_secure_tcp = 0;
83 static int sysctl_ip_vs_amemthresh = 1024;
84 static int sysctl_ip_vs_am_droprate = 10;
85 int sysctl_ip_vs_cache_bypass = 0;
86 int sysctl_ip_vs_expire_nodest_conn = 0;
87 int sysctl_ip_vs_expire_quiescent_template = 0;
88 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
89 int sysctl_ip_vs_nat_icmp_send = 0;
90
91
92 #ifdef CONFIG_IP_VS_DEBUG
93 static int sysctl_ip_vs_debug_level = 0;
94
95 int ip_vs_get_debug_level(void)
96 {
97         return sysctl_ip_vs_debug_level;
98 }
99 #endif
100
101 #ifdef CONFIG_IP_VS_IPV6
102 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
103 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
104 {
105         struct rt6_info *rt;
106         struct flowi fl = {
107                 .oif = 0,
108                 .nl_u = {
109                         .ip6_u = {
110                                 .daddr = *addr,
111                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
112         };
113
114         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
115         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
116                         return 1;
117
118         return 0;
119 }
120 #endif
121 /*
122  *      update_defense_level is called from keventd and from sysctl,
123  *      so it needs to protect itself from softirqs
124  */
125 static void update_defense_level(void)
126 {
127         struct sysinfo i;
128         static int old_secure_tcp = 0;
129         int availmem;
130         int nomem;
131         int to_change = -1;
132
133         /* we only count free and buffered memory (in pages) */
134         si_meminfo(&i);
135         availmem = i.freeram + i.bufferram;
136         /* however in linux 2.5 the i.bufferram is total page cache size,
137            we need adjust it */
138         /* si_swapinfo(&i); */
139         /* availmem = availmem - (i.totalswap - i.freeswap); */
140
141         nomem = (availmem < sysctl_ip_vs_amemthresh);
142
143         local_bh_disable();
144
145         /* drop_entry */
146         spin_lock(&__ip_vs_dropentry_lock);
147         switch (sysctl_ip_vs_drop_entry) {
148         case 0:
149                 atomic_set(&ip_vs_dropentry, 0);
150                 break;
151         case 1:
152                 if (nomem) {
153                         atomic_set(&ip_vs_dropentry, 1);
154                         sysctl_ip_vs_drop_entry = 2;
155                 } else {
156                         atomic_set(&ip_vs_dropentry, 0);
157                 }
158                 break;
159         case 2:
160                 if (nomem) {
161                         atomic_set(&ip_vs_dropentry, 1);
162                 } else {
163                         atomic_set(&ip_vs_dropentry, 0);
164                         sysctl_ip_vs_drop_entry = 1;
165                 };
166                 break;
167         case 3:
168                 atomic_set(&ip_vs_dropentry, 1);
169                 break;
170         }
171         spin_unlock(&__ip_vs_dropentry_lock);
172
173         /* drop_packet */
174         spin_lock(&__ip_vs_droppacket_lock);
175         switch (sysctl_ip_vs_drop_packet) {
176         case 0:
177                 ip_vs_drop_rate = 0;
178                 break;
179         case 1:
180                 if (nomem) {
181                         ip_vs_drop_rate = ip_vs_drop_counter
182                                 = sysctl_ip_vs_amemthresh /
183                                 (sysctl_ip_vs_amemthresh-availmem);
184                         sysctl_ip_vs_drop_packet = 2;
185                 } else {
186                         ip_vs_drop_rate = 0;
187                 }
188                 break;
189         case 2:
190                 if (nomem) {
191                         ip_vs_drop_rate = ip_vs_drop_counter
192                                 = sysctl_ip_vs_amemthresh /
193                                 (sysctl_ip_vs_amemthresh-availmem);
194                 } else {
195                         ip_vs_drop_rate = 0;
196                         sysctl_ip_vs_drop_packet = 1;
197                 }
198                 break;
199         case 3:
200                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
201                 break;
202         }
203         spin_unlock(&__ip_vs_droppacket_lock);
204
205         /* secure_tcp */
206         write_lock(&__ip_vs_securetcp_lock);
207         switch (sysctl_ip_vs_secure_tcp) {
208         case 0:
209                 if (old_secure_tcp >= 2)
210                         to_change = 0;
211                 break;
212         case 1:
213                 if (nomem) {
214                         if (old_secure_tcp < 2)
215                                 to_change = 1;
216                         sysctl_ip_vs_secure_tcp = 2;
217                 } else {
218                         if (old_secure_tcp >= 2)
219                                 to_change = 0;
220                 }
221                 break;
222         case 2:
223                 if (nomem) {
224                         if (old_secure_tcp < 2)
225                                 to_change = 1;
226                 } else {
227                         if (old_secure_tcp >= 2)
228                                 to_change = 0;
229                         sysctl_ip_vs_secure_tcp = 1;
230                 }
231                 break;
232         case 3:
233                 if (old_secure_tcp < 2)
234                         to_change = 1;
235                 break;
236         }
237         old_secure_tcp = sysctl_ip_vs_secure_tcp;
238         if (to_change >= 0)
239                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
240         write_unlock(&__ip_vs_securetcp_lock);
241
242         local_bh_enable();
243 }
244
245
246 /*
247  *      Timer for checking the defense
248  */
249 #define DEFENSE_TIMER_PERIOD    1*HZ
250 static void defense_work_handler(struct work_struct *work);
251 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
252
253 static void defense_work_handler(struct work_struct *work)
254 {
255         update_defense_level();
256         if (atomic_read(&ip_vs_dropentry))
257                 ip_vs_random_dropentry();
258
259         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
260 }
261
262 int
263 ip_vs_use_count_inc(void)
264 {
265         return try_module_get(THIS_MODULE);
266 }
267
268 void
269 ip_vs_use_count_dec(void)
270 {
271         module_put(THIS_MODULE);
272 }
273
274
275 /*
276  *      Hash table: for virtual service lookups
277  */
278 #define IP_VS_SVC_TAB_BITS 8
279 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
280 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
281
282 /* the service table hashed by <protocol, addr, port> */
283 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
284 /* the service table hashed by fwmark */
285 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
286
287 /*
288  *      Hash table: for real service lookups
289  */
290 #define IP_VS_RTAB_BITS 4
291 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
292 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
293
294 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
295
296 /*
297  *      Trash for destinations
298  */
299 static LIST_HEAD(ip_vs_dest_trash);
300
301 /*
302  *      FTP & NULL virtual service counters
303  */
304 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
305 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
306
307
308 /*
309  *      Returns hash value for virtual service
310  */
311 static __inline__ unsigned
312 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
313                   __be16 port)
314 {
315         register unsigned porth = ntohs(port);
316         __be32 addr_fold = addr->ip;
317
318 #ifdef CONFIG_IP_VS_IPV6
319         if (af == AF_INET6)
320                 addr_fold = addr->ip6[0]^addr->ip6[1]^
321                             addr->ip6[2]^addr->ip6[3];
322 #endif
323
324         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
325                 & IP_VS_SVC_TAB_MASK;
326 }
327
328 /*
329  *      Returns hash value of fwmark for virtual service lookup
330  */
331 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
332 {
333         return fwmark & IP_VS_SVC_TAB_MASK;
334 }
335
336 /*
337  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
338  *      or in the ip_vs_svc_fwm_table by fwmark.
339  *      Should be called with locked tables.
340  */
341 static int ip_vs_svc_hash(struct ip_vs_service *svc)
342 {
343         unsigned hash;
344
345         if (svc->flags & IP_VS_SVC_F_HASHED) {
346                 pr_err("%s(): request for already hashed, called from %pF\n",
347                        __func__, __builtin_return_address(0));
348                 return 0;
349         }
350
351         if (svc->fwmark == 0) {
352                 /*
353                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
354                  */
355                 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
356                                          svc->port);
357                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
358         } else {
359                 /*
360                  *  Hash it by fwmark in ip_vs_svc_fwm_table
361                  */
362                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
363                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
364         }
365
366         svc->flags |= IP_VS_SVC_F_HASHED;
367         /* increase its refcnt because it is referenced by the svc table */
368         atomic_inc(&svc->refcnt);
369         return 1;
370 }
371
372
373 /*
374  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
375  *      Should be called with locked tables.
376  */
377 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
378 {
379         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
380                 pr_err("%s(): request for unhash flagged, called from %pF\n",
381                        __func__, __builtin_return_address(0));
382                 return 0;
383         }
384
385         if (svc->fwmark == 0) {
386                 /* Remove it from the ip_vs_svc_table table */
387                 list_del(&svc->s_list);
388         } else {
389                 /* Remove it from the ip_vs_svc_fwm_table table */
390                 list_del(&svc->f_list);
391         }
392
393         svc->flags &= ~IP_VS_SVC_F_HASHED;
394         atomic_dec(&svc->refcnt);
395         return 1;
396 }
397
398
399 /*
400  *      Get service by {proto,addr,port} in the service table.
401  */
402 static inline struct ip_vs_service *
403 __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
404                     __be16 vport)
405 {
406         unsigned hash;
407         struct ip_vs_service *svc;
408
409         /* Check for "full" addressed entries */
410         hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
411
412         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
413                 if ((svc->af == af)
414                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
415                     && (svc->port == vport)
416                     && (svc->protocol == protocol)) {
417                         /* HIT */
418                         atomic_inc(&svc->usecnt);
419                         return svc;
420                 }
421         }
422
423         return NULL;
424 }
425
426
427 /*
428  *      Get service by {fwmark} in the service table.
429  */
430 static inline struct ip_vs_service *
431 __ip_vs_svc_fwm_get(int af, __u32 fwmark)
432 {
433         unsigned hash;
434         struct ip_vs_service *svc;
435
436         /* Check for fwmark addressed entries */
437         hash = ip_vs_svc_fwm_hashkey(fwmark);
438
439         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
440                 if (svc->fwmark == fwmark && svc->af == af) {
441                         /* HIT */
442                         atomic_inc(&svc->usecnt);
443                         return svc;
444                 }
445         }
446
447         return NULL;
448 }
449
450 struct ip_vs_service *
451 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
452                   const union nf_inet_addr *vaddr, __be16 vport)
453 {
454         struct ip_vs_service *svc;
455
456         read_lock(&__ip_vs_svc_lock);
457
458         /*
459          *      Check the table hashed by fwmark first
460          */
461         if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
462                 goto out;
463
464         /*
465          *      Check the table hashed by <protocol,addr,port>
466          *      for "full" addressed entries
467          */
468         svc = __ip_vs_service_get(af, protocol, vaddr, vport);
469
470         if (svc == NULL
471             && protocol == IPPROTO_TCP
472             && atomic_read(&ip_vs_ftpsvc_counter)
473             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
474                 /*
475                  * Check if ftp service entry exists, the packet
476                  * might belong to FTP data connections.
477                  */
478                 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
479         }
480
481         if (svc == NULL
482             && atomic_read(&ip_vs_nullsvc_counter)) {
483                 /*
484                  * Check if the catch-all port (port zero) exists
485                  */
486                 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
487         }
488
489   out:
490         read_unlock(&__ip_vs_svc_lock);
491
492         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
493                       fwmark, ip_vs_proto_name(protocol),
494                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
495                       svc ? "hit" : "not hit");
496
497         return svc;
498 }
499
500
501 static inline void
502 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
503 {
504         atomic_inc(&svc->refcnt);
505         dest->svc = svc;
506 }
507
508 static inline void
509 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
510 {
511         struct ip_vs_service *svc = dest->svc;
512
513         dest->svc = NULL;
514         if (atomic_dec_and_test(&svc->refcnt))
515                 kfree(svc);
516 }
517
518
519 /*
520  *      Returns hash value for real service
521  */
522 static inline unsigned ip_vs_rs_hashkey(int af,
523                                             const union nf_inet_addr *addr,
524                                             __be16 port)
525 {
526         register unsigned porth = ntohs(port);
527         __be32 addr_fold = addr->ip;
528
529 #ifdef CONFIG_IP_VS_IPV6
530         if (af == AF_INET6)
531                 addr_fold = addr->ip6[0]^addr->ip6[1]^
532                             addr->ip6[2]^addr->ip6[3];
533 #endif
534
535         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
536                 & IP_VS_RTAB_MASK;
537 }
538
539 /*
540  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
541  *      should be called with locked tables.
542  */
543 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
544 {
545         unsigned hash;
546
547         if (!list_empty(&dest->d_list)) {
548                 return 0;
549         }
550
551         /*
552          *      Hash by proto,addr,port,
553          *      which are the parameters of the real service.
554          */
555         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
556
557         list_add(&dest->d_list, &ip_vs_rtable[hash]);
558
559         return 1;
560 }
561
562 /*
563  *      UNhashes ip_vs_dest from ip_vs_rtable.
564  *      should be called with locked tables.
565  */
566 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
567 {
568         /*
569          * Remove it from the ip_vs_rtable table.
570          */
571         if (!list_empty(&dest->d_list)) {
572                 list_del(&dest->d_list);
573                 INIT_LIST_HEAD(&dest->d_list);
574         }
575
576         return 1;
577 }
578
579 /*
580  *      Lookup real service by <proto,addr,port> in the real service table.
581  */
582 struct ip_vs_dest *
583 ip_vs_lookup_real_service(int af, __u16 protocol,
584                           const union nf_inet_addr *daddr,
585                           __be16 dport)
586 {
587         unsigned hash;
588         struct ip_vs_dest *dest;
589
590         /*
591          *      Check for "full" addressed entries
592          *      Return the first found entry
593          */
594         hash = ip_vs_rs_hashkey(af, daddr, dport);
595
596         read_lock(&__ip_vs_rs_lock);
597         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
598                 if ((dest->af == af)
599                     && ip_vs_addr_equal(af, &dest->addr, daddr)
600                     && (dest->port == dport)
601                     && ((dest->protocol == protocol) ||
602                         dest->vfwmark)) {
603                         /* HIT */
604                         read_unlock(&__ip_vs_rs_lock);
605                         return dest;
606                 }
607         }
608         read_unlock(&__ip_vs_rs_lock);
609
610         return NULL;
611 }
612
613 /*
614  *      Lookup destination by {addr,port} in the given service
615  */
616 static struct ip_vs_dest *
617 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
618                   __be16 dport)
619 {
620         struct ip_vs_dest *dest;
621
622         /*
623          * Find the destination for the given service
624          */
625         list_for_each_entry(dest, &svc->destinations, n_list) {
626                 if ((dest->af == svc->af)
627                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
628                     && (dest->port == dport)) {
629                         /* HIT */
630                         return dest;
631                 }
632         }
633
634         return NULL;
635 }
636
637 /*
638  * Find destination by {daddr,dport,vaddr,protocol}
639  * Cretaed to be used in ip_vs_process_message() in
640  * the backup synchronization daemon. It finds the
641  * destination to be bound to the received connection
642  * on the backup.
643  *
644  * ip_vs_lookup_real_service() looked promissing, but
645  * seems not working as expected.
646  */
647 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
648                                    __be16 dport,
649                                    const union nf_inet_addr *vaddr,
650                                    __be16 vport, __u16 protocol)
651 {
652         struct ip_vs_dest *dest;
653         struct ip_vs_service *svc;
654
655         svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
656         if (!svc)
657                 return NULL;
658         dest = ip_vs_lookup_dest(svc, daddr, dport);
659         if (dest)
660                 atomic_inc(&dest->refcnt);
661         ip_vs_service_put(svc);
662         return dest;
663 }
664
665 /*
666  *  Lookup dest by {svc,addr,port} in the destination trash.
667  *  The destination trash is used to hold the destinations that are removed
668  *  from the service table but are still referenced by some conn entries.
669  *  The reason to add the destination trash is when the dest is temporary
670  *  down (either by administrator or by monitor program), the dest can be
671  *  picked back from the trash, the remaining connections to the dest can
672  *  continue, and the counting information of the dest is also useful for
673  *  scheduling.
674  */
675 static struct ip_vs_dest *
676 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
677                      __be16 dport)
678 {
679         struct ip_vs_dest *dest, *nxt;
680
681         /*
682          * Find the destination in trash
683          */
684         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
685                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
686                               "dest->refcnt=%d\n",
687                               dest->vfwmark,
688                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
689                               ntohs(dest->port),
690                               atomic_read(&dest->refcnt));
691                 if (dest->af == svc->af &&
692                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
693                     dest->port == dport &&
694                     dest->vfwmark == svc->fwmark &&
695                     dest->protocol == svc->protocol &&
696                     (svc->fwmark ||
697                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
698                       dest->vport == svc->port))) {
699                         /* HIT */
700                         return dest;
701                 }
702
703                 /*
704                  * Try to purge the destination from trash if not referenced
705                  */
706                 if (atomic_read(&dest->refcnt) == 1) {
707                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
708                                       "from trash\n",
709                                       dest->vfwmark,
710                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
711                                       ntohs(dest->port));
712                         list_del(&dest->n_list);
713                         ip_vs_dst_reset(dest);
714                         __ip_vs_unbind_svc(dest);
715                         kfree(dest);
716                 }
717         }
718
719         return NULL;
720 }
721
722
723 /*
724  *  Clean up all the destinations in the trash
725  *  Called by the ip_vs_control_cleanup()
726  *
727  *  When the ip_vs_control_clearup is activated by ipvs module exit,
728  *  the service tables must have been flushed and all the connections
729  *  are expired, and the refcnt of each destination in the trash must
730  *  be 1, so we simply release them here.
731  */
732 static void ip_vs_trash_cleanup(void)
733 {
734         struct ip_vs_dest *dest, *nxt;
735
736         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
737                 list_del(&dest->n_list);
738                 ip_vs_dst_reset(dest);
739                 __ip_vs_unbind_svc(dest);
740                 kfree(dest);
741         }
742 }
743
744
745 static void
746 ip_vs_zero_stats(struct ip_vs_stats *stats)
747 {
748         spin_lock_bh(&stats->lock);
749
750         memset(&stats->ustats, 0, sizeof(stats->ustats));
751         ip_vs_zero_estimator(stats);
752
753         spin_unlock_bh(&stats->lock);
754 }
755
756 /*
757  *      Update a destination in the given service
758  */
759 static void
760 __ip_vs_update_dest(struct ip_vs_service *svc,
761                     struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
762 {
763         int conn_flags;
764
765         /* set the weight and the flags */
766         atomic_set(&dest->weight, udest->weight);
767         conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
768
769         /* check if local node and update the flags */
770 #ifdef CONFIG_IP_VS_IPV6
771         if (svc->af == AF_INET6) {
772                 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
773                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
774                                 | IP_VS_CONN_F_LOCALNODE;
775                 }
776         } else
777 #endif
778                 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
779                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
780                                 | IP_VS_CONN_F_LOCALNODE;
781                 }
782
783         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
784         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
785                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
786         } else {
787                 /*
788                  *    Put the real service in ip_vs_rtable if not present.
789                  *    For now only for NAT!
790                  */
791                 write_lock_bh(&__ip_vs_rs_lock);
792                 ip_vs_rs_hash(dest);
793                 write_unlock_bh(&__ip_vs_rs_lock);
794         }
795         atomic_set(&dest->conn_flags, conn_flags);
796
797         /* bind the service */
798         if (!dest->svc) {
799                 __ip_vs_bind_svc(dest, svc);
800         } else {
801                 if (dest->svc != svc) {
802                         __ip_vs_unbind_svc(dest);
803                         ip_vs_zero_stats(&dest->stats);
804                         __ip_vs_bind_svc(dest, svc);
805                 }
806         }
807
808         /* set the dest status flags */
809         dest->flags |= IP_VS_DEST_F_AVAILABLE;
810
811         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
812                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
813         dest->u_threshold = udest->u_threshold;
814         dest->l_threshold = udest->l_threshold;
815 }
816
817
818 /*
819  *      Create a destination for the given service
820  */
821 static int
822 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
823                struct ip_vs_dest **dest_p)
824 {
825         struct ip_vs_dest *dest;
826         unsigned atype;
827
828         EnterFunction(2);
829
830 #ifdef CONFIG_IP_VS_IPV6
831         if (svc->af == AF_INET6) {
832                 atype = ipv6_addr_type(&udest->addr.in6);
833                 if ((!(atype & IPV6_ADDR_UNICAST) ||
834                         atype & IPV6_ADDR_LINKLOCAL) &&
835                         !__ip_vs_addr_is_local_v6(&udest->addr.in6))
836                         return -EINVAL;
837         } else
838 #endif
839         {
840                 atype = inet_addr_type(&init_net, udest->addr.ip);
841                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
842                         return -EINVAL;
843         }
844
845         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
846         if (dest == NULL) {
847                 pr_err("%s(): no memory.\n", __func__);
848                 return -ENOMEM;
849         }
850
851         dest->af = svc->af;
852         dest->protocol = svc->protocol;
853         dest->vaddr = svc->addr;
854         dest->vport = svc->port;
855         dest->vfwmark = svc->fwmark;
856         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
857         dest->port = udest->port;
858
859         atomic_set(&dest->activeconns, 0);
860         atomic_set(&dest->inactconns, 0);
861         atomic_set(&dest->persistconns, 0);
862         atomic_set(&dest->refcnt, 0);
863
864         INIT_LIST_HEAD(&dest->d_list);
865         spin_lock_init(&dest->dst_lock);
866         spin_lock_init(&dest->stats.lock);
867         __ip_vs_update_dest(svc, dest, udest);
868         ip_vs_new_estimator(&dest->stats);
869
870         *dest_p = dest;
871
872         LeaveFunction(2);
873         return 0;
874 }
875
876
877 /*
878  *      Add a destination into an existing service
879  */
880 static int
881 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
882 {
883         struct ip_vs_dest *dest;
884         union nf_inet_addr daddr;
885         __be16 dport = udest->port;
886         int ret;
887
888         EnterFunction(2);
889
890         if (udest->weight < 0) {
891                 pr_err("%s(): server weight less than zero\n", __func__);
892                 return -ERANGE;
893         }
894
895         if (udest->l_threshold > udest->u_threshold) {
896                 pr_err("%s(): lower threshold is higher than upper threshold\n",
897                         __func__);
898                 return -ERANGE;
899         }
900
901         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
902
903         /*
904          * Check if the dest already exists in the list
905          */
906         dest = ip_vs_lookup_dest(svc, &daddr, dport);
907
908         if (dest != NULL) {
909                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
910                 return -EEXIST;
911         }
912
913         /*
914          * Check if the dest already exists in the trash and
915          * is from the same service
916          */
917         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
918
919         if (dest != NULL) {
920                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
921                               "dest->refcnt=%d, service %u/%s:%u\n",
922                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
923                               atomic_read(&dest->refcnt),
924                               dest->vfwmark,
925                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
926                               ntohs(dest->vport));
927
928                 __ip_vs_update_dest(svc, dest, udest);
929
930                 /*
931                  * Get the destination from the trash
932                  */
933                 list_del(&dest->n_list);
934
935                 ip_vs_new_estimator(&dest->stats);
936
937                 write_lock_bh(&__ip_vs_svc_lock);
938
939                 /*
940                  * Wait until all other svc users go away.
941                  */
942                 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
943
944                 list_add(&dest->n_list, &svc->destinations);
945                 svc->num_dests++;
946
947                 /* call the update_service function of its scheduler */
948                 if (svc->scheduler->update_service)
949                         svc->scheduler->update_service(svc);
950
951                 write_unlock_bh(&__ip_vs_svc_lock);
952                 return 0;
953         }
954
955         /*
956          * Allocate and initialize the dest structure
957          */
958         ret = ip_vs_new_dest(svc, udest, &dest);
959         if (ret) {
960                 return ret;
961         }
962
963         /*
964          * Add the dest entry into the list
965          */
966         atomic_inc(&dest->refcnt);
967
968         write_lock_bh(&__ip_vs_svc_lock);
969
970         /*
971          * Wait until all other svc users go away.
972          */
973         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
974
975         list_add(&dest->n_list, &svc->destinations);
976         svc->num_dests++;
977
978         /* call the update_service function of its scheduler */
979         if (svc->scheduler->update_service)
980                 svc->scheduler->update_service(svc);
981
982         write_unlock_bh(&__ip_vs_svc_lock);
983
984         LeaveFunction(2);
985
986         return 0;
987 }
988
989
990 /*
991  *      Edit a destination in the given service
992  */
993 static int
994 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
995 {
996         struct ip_vs_dest *dest;
997         union nf_inet_addr daddr;
998         __be16 dport = udest->port;
999
1000         EnterFunction(2);
1001
1002         if (udest->weight < 0) {
1003                 pr_err("%s(): server weight less than zero\n", __func__);
1004                 return -ERANGE;
1005         }
1006
1007         if (udest->l_threshold > udest->u_threshold) {
1008                 pr_err("%s(): lower threshold is higher than upper threshold\n",
1009                         __func__);
1010                 return -ERANGE;
1011         }
1012
1013         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1014
1015         /*
1016          *  Lookup the destination list
1017          */
1018         dest = ip_vs_lookup_dest(svc, &daddr, dport);
1019
1020         if (dest == NULL) {
1021                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1022                 return -ENOENT;
1023         }
1024
1025         __ip_vs_update_dest(svc, dest, udest);
1026
1027         write_lock_bh(&__ip_vs_svc_lock);
1028
1029         /* Wait until all other svc users go away */
1030         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1031
1032         /* call the update_service, because server weight may be changed */
1033         if (svc->scheduler->update_service)
1034                 svc->scheduler->update_service(svc);
1035
1036         write_unlock_bh(&__ip_vs_svc_lock);
1037
1038         LeaveFunction(2);
1039
1040         return 0;
1041 }
1042
1043
1044 /*
1045  *      Delete a destination (must be already unlinked from the service)
1046  */
1047 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1048 {
1049         ip_vs_kill_estimator(&dest->stats);
1050
1051         /*
1052          *  Remove it from the d-linked list with the real services.
1053          */
1054         write_lock_bh(&__ip_vs_rs_lock);
1055         ip_vs_rs_unhash(dest);
1056         write_unlock_bh(&__ip_vs_rs_lock);
1057
1058         /*
1059          *  Decrease the refcnt of the dest, and free the dest
1060          *  if nobody refers to it (refcnt=0). Otherwise, throw
1061          *  the destination into the trash.
1062          */
1063         if (atomic_dec_and_test(&dest->refcnt)) {
1064                 ip_vs_dst_reset(dest);
1065                 /* simply decrease svc->refcnt here, let the caller check
1066                    and release the service if nobody refers to it.
1067                    Only user context can release destination and service,
1068                    and only one user context can update virtual service at a
1069                    time, so the operation here is OK */
1070                 atomic_dec(&dest->svc->refcnt);
1071                 kfree(dest);
1072         } else {
1073                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1074                               "dest->refcnt=%d\n",
1075                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1076                               ntohs(dest->port),
1077                               atomic_read(&dest->refcnt));
1078                 list_add(&dest->n_list, &ip_vs_dest_trash);
1079                 atomic_inc(&dest->refcnt);
1080         }
1081 }
1082
1083
1084 /*
1085  *      Unlink a destination from the given service
1086  */
1087 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1088                                 struct ip_vs_dest *dest,
1089                                 int svcupd)
1090 {
1091         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1092
1093         /*
1094          *  Remove it from the d-linked destination list.
1095          */
1096         list_del(&dest->n_list);
1097         svc->num_dests--;
1098
1099         /*
1100          *  Call the update_service function of its scheduler
1101          */
1102         if (svcupd && svc->scheduler->update_service)
1103                         svc->scheduler->update_service(svc);
1104 }
1105
1106
1107 /*
1108  *      Delete a destination server in the given service
1109  */
1110 static int
1111 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1112 {
1113         struct ip_vs_dest *dest;
1114         __be16 dport = udest->port;
1115
1116         EnterFunction(2);
1117
1118         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1119
1120         if (dest == NULL) {
1121                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1122                 return -ENOENT;
1123         }
1124
1125         write_lock_bh(&__ip_vs_svc_lock);
1126
1127         /*
1128          *      Wait until all other svc users go away.
1129          */
1130         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1131
1132         /*
1133          *      Unlink dest from the service
1134          */
1135         __ip_vs_unlink_dest(svc, dest, 1);
1136
1137         write_unlock_bh(&__ip_vs_svc_lock);
1138
1139         /*
1140          *      Delete the destination
1141          */
1142         __ip_vs_del_dest(dest);
1143
1144         LeaveFunction(2);
1145
1146         return 0;
1147 }
1148
1149
1150 /*
1151  *      Add a service into the service hash table
1152  */
1153 static int
1154 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1155                   struct ip_vs_service **svc_p)
1156 {
1157         int ret = 0;
1158         struct ip_vs_scheduler *sched = NULL;
1159         struct ip_vs_service *svc = NULL;
1160
1161         /* increase the module use count */
1162         ip_vs_use_count_inc();
1163
1164         /* Lookup the scheduler by 'u->sched_name' */
1165         sched = ip_vs_scheduler_get(u->sched_name);
1166         if (sched == NULL) {
1167                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1168                 ret = -ENOENT;
1169                 goto out_mod_dec;
1170         }
1171
1172 #ifdef CONFIG_IP_VS_IPV6
1173         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1174                 ret = -EINVAL;
1175                 goto out_err;
1176         }
1177 #endif
1178
1179         svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1180         if (svc == NULL) {
1181                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1182                 ret = -ENOMEM;
1183                 goto out_err;
1184         }
1185
1186         /* I'm the first user of the service */
1187         atomic_set(&svc->usecnt, 1);
1188         atomic_set(&svc->refcnt, 0);
1189
1190         svc->af = u->af;
1191         svc->protocol = u->protocol;
1192         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1193         svc->port = u->port;
1194         svc->fwmark = u->fwmark;
1195         svc->flags = u->flags;
1196         svc->timeout = u->timeout * HZ;
1197         svc->netmask = u->netmask;
1198
1199         INIT_LIST_HEAD(&svc->destinations);
1200         rwlock_init(&svc->sched_lock);
1201         spin_lock_init(&svc->stats.lock);
1202
1203         /* Bind the scheduler */
1204         ret = ip_vs_bind_scheduler(svc, sched);
1205         if (ret)
1206                 goto out_err;
1207         sched = NULL;
1208
1209         /* Update the virtual service counters */
1210         if (svc->port == FTPPORT)
1211                 atomic_inc(&ip_vs_ftpsvc_counter);
1212         else if (svc->port == 0)
1213                 atomic_inc(&ip_vs_nullsvc_counter);
1214
1215         ip_vs_new_estimator(&svc->stats);
1216
1217         /* Count only IPv4 services for old get/setsockopt interface */
1218         if (svc->af == AF_INET)
1219                 ip_vs_num_services++;
1220
1221         /* Hash the service into the service table */
1222         write_lock_bh(&__ip_vs_svc_lock);
1223         ip_vs_svc_hash(svc);
1224         write_unlock_bh(&__ip_vs_svc_lock);
1225
1226         *svc_p = svc;
1227         return 0;
1228
1229   out_err:
1230         if (svc != NULL) {
1231                 if (svc->scheduler)
1232                         ip_vs_unbind_scheduler(svc);
1233                 if (svc->inc) {
1234                         local_bh_disable();
1235                         ip_vs_app_inc_put(svc->inc);
1236                         local_bh_enable();
1237                 }
1238                 kfree(svc);
1239         }
1240         ip_vs_scheduler_put(sched);
1241
1242   out_mod_dec:
1243         /* decrease the module use count */
1244         ip_vs_use_count_dec();
1245
1246         return ret;
1247 }
1248
1249
1250 /*
1251  *      Edit a service and bind it with a new scheduler
1252  */
1253 static int
1254 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1255 {
1256         struct ip_vs_scheduler *sched, *old_sched;
1257         int ret = 0;
1258
1259         /*
1260          * Lookup the scheduler, by 'u->sched_name'
1261          */
1262         sched = ip_vs_scheduler_get(u->sched_name);
1263         if (sched == NULL) {
1264                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1265                 return -ENOENT;
1266         }
1267         old_sched = sched;
1268
1269 #ifdef CONFIG_IP_VS_IPV6
1270         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1271                 ret = -EINVAL;
1272                 goto out;
1273         }
1274 #endif
1275
1276         write_lock_bh(&__ip_vs_svc_lock);
1277
1278         /*
1279          * Wait until all other svc users go away.
1280          */
1281         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1282
1283         /*
1284          * Set the flags and timeout value
1285          */
1286         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1287         svc->timeout = u->timeout * HZ;
1288         svc->netmask = u->netmask;
1289
1290         old_sched = svc->scheduler;
1291         if (sched != old_sched) {
1292                 /*
1293                  * Unbind the old scheduler
1294                  */
1295                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1296                         old_sched = sched;
1297                         goto out_unlock;
1298                 }
1299
1300                 /*
1301                  * Bind the new scheduler
1302                  */
1303                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1304                         /*
1305                          * If ip_vs_bind_scheduler fails, restore the old
1306                          * scheduler.
1307                          * The main reason of failure is out of memory.
1308                          *
1309                          * The question is if the old scheduler can be
1310                          * restored all the time. TODO: if it cannot be
1311                          * restored some time, we must delete the service,
1312                          * otherwise the system may crash.
1313                          */
1314                         ip_vs_bind_scheduler(svc, old_sched);
1315                         old_sched = sched;
1316                         goto out_unlock;
1317                 }
1318         }
1319
1320   out_unlock:
1321         write_unlock_bh(&__ip_vs_svc_lock);
1322 #ifdef CONFIG_IP_VS_IPV6
1323   out:
1324 #endif
1325
1326         if (old_sched)
1327                 ip_vs_scheduler_put(old_sched);
1328
1329         return ret;
1330 }
1331
1332
1333 /*
1334  *      Delete a service from the service list
1335  *      - The service must be unlinked, unlocked and not referenced!
1336  *      - We are called under _bh lock
1337  */
1338 static void __ip_vs_del_service(struct ip_vs_service *svc)
1339 {
1340         struct ip_vs_dest *dest, *nxt;
1341         struct ip_vs_scheduler *old_sched;
1342
1343         /* Count only IPv4 services for old get/setsockopt interface */
1344         if (svc->af == AF_INET)
1345                 ip_vs_num_services--;
1346
1347         ip_vs_kill_estimator(&svc->stats);
1348
1349         /* Unbind scheduler */
1350         old_sched = svc->scheduler;
1351         ip_vs_unbind_scheduler(svc);
1352         if (old_sched)
1353                 ip_vs_scheduler_put(old_sched);
1354
1355         /* Unbind app inc */
1356         if (svc->inc) {
1357                 ip_vs_app_inc_put(svc->inc);
1358                 svc->inc = NULL;
1359         }
1360
1361         /*
1362          *    Unlink the whole destination list
1363          */
1364         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1365                 __ip_vs_unlink_dest(svc, dest, 0);
1366                 __ip_vs_del_dest(dest);
1367         }
1368
1369         /*
1370          *    Update the virtual service counters
1371          */
1372         if (svc->port == FTPPORT)
1373                 atomic_dec(&ip_vs_ftpsvc_counter);
1374         else if (svc->port == 0)
1375                 atomic_dec(&ip_vs_nullsvc_counter);
1376
1377         /*
1378          *    Free the service if nobody refers to it
1379          */
1380         if (atomic_read(&svc->refcnt) == 0)
1381                 kfree(svc);
1382
1383         /* decrease the module use count */
1384         ip_vs_use_count_dec();
1385 }
1386
1387 /*
1388  *      Delete a service from the service list
1389  */
1390 static int ip_vs_del_service(struct ip_vs_service *svc)
1391 {
1392         if (svc == NULL)
1393                 return -EEXIST;
1394
1395         /*
1396          * Unhash it from the service table
1397          */
1398         write_lock_bh(&__ip_vs_svc_lock);
1399
1400         ip_vs_svc_unhash(svc);
1401
1402         /*
1403          * Wait until all the svc users go away.
1404          */
1405         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1406
1407         __ip_vs_del_service(svc);
1408
1409         write_unlock_bh(&__ip_vs_svc_lock);
1410
1411         return 0;
1412 }
1413
1414
1415 /*
1416  *      Flush all the virtual services
1417  */
1418 static int ip_vs_flush(void)
1419 {
1420         int idx;
1421         struct ip_vs_service *svc, *nxt;
1422
1423         /*
1424          * Flush the service table hashed by <protocol,addr,port>
1425          */
1426         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1427                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1428                         write_lock_bh(&__ip_vs_svc_lock);
1429                         ip_vs_svc_unhash(svc);
1430                         /*
1431                          * Wait until all the svc users go away.
1432                          */
1433                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1434                         __ip_vs_del_service(svc);
1435                         write_unlock_bh(&__ip_vs_svc_lock);
1436                 }
1437         }
1438
1439         /*
1440          * Flush the service table hashed by fwmark
1441          */
1442         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1443                 list_for_each_entry_safe(svc, nxt,
1444                                          &ip_vs_svc_fwm_table[idx], f_list) {
1445                         write_lock_bh(&__ip_vs_svc_lock);
1446                         ip_vs_svc_unhash(svc);
1447                         /*
1448                          * Wait until all the svc users go away.
1449                          */
1450                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1451                         __ip_vs_del_service(svc);
1452                         write_unlock_bh(&__ip_vs_svc_lock);
1453                 }
1454         }
1455
1456         return 0;
1457 }
1458
1459
1460 /*
1461  *      Zero counters in a service or all services
1462  */
1463 static int ip_vs_zero_service(struct ip_vs_service *svc)
1464 {
1465         struct ip_vs_dest *dest;
1466
1467         write_lock_bh(&__ip_vs_svc_lock);
1468         list_for_each_entry(dest, &svc->destinations, n_list) {
1469                 ip_vs_zero_stats(&dest->stats);
1470         }
1471         ip_vs_zero_stats(&svc->stats);
1472         write_unlock_bh(&__ip_vs_svc_lock);
1473         return 0;
1474 }
1475
1476 static int ip_vs_zero_all(void)
1477 {
1478         int idx;
1479         struct ip_vs_service *svc;
1480
1481         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1482                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1483                         ip_vs_zero_service(svc);
1484                 }
1485         }
1486
1487         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1488                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1489                         ip_vs_zero_service(svc);
1490                 }
1491         }
1492
1493         ip_vs_zero_stats(&ip_vs_stats);
1494         return 0;
1495 }
1496
1497
1498 static int
1499 proc_do_defense_mode(ctl_table *table, int write,
1500                      void __user *buffer, size_t *lenp, loff_t *ppos)
1501 {
1502         int *valp = table->data;
1503         int val = *valp;
1504         int rc;
1505
1506         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1507         if (write && (*valp != val)) {
1508                 if ((*valp < 0) || (*valp > 3)) {
1509                         /* Restore the correct value */
1510                         *valp = val;
1511                 } else {
1512                         update_defense_level();
1513                 }
1514         }
1515         return rc;
1516 }
1517
1518
1519 static int
1520 proc_do_sync_threshold(ctl_table *table, int write,
1521                        void __user *buffer, size_t *lenp, loff_t *ppos)
1522 {
1523         int *valp = table->data;
1524         int val[2];
1525         int rc;
1526
1527         /* backup the value first */
1528         memcpy(val, valp, sizeof(val));
1529
1530         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1531         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1532                 /* Restore the correct value */
1533                 memcpy(valp, val, sizeof(val));
1534         }
1535         return rc;
1536 }
1537
1538
1539 /*
1540  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1541  */
1542
1543 static struct ctl_table vs_vars[] = {
1544         {
1545                 .procname       = "amemthresh",
1546                 .data           = &sysctl_ip_vs_amemthresh,
1547                 .maxlen         = sizeof(int),
1548                 .mode           = 0644,
1549                 .proc_handler   = proc_dointvec,
1550         },
1551 #ifdef CONFIG_IP_VS_DEBUG
1552         {
1553                 .procname       = "debug_level",
1554                 .data           = &sysctl_ip_vs_debug_level,
1555                 .maxlen         = sizeof(int),
1556                 .mode           = 0644,
1557                 .proc_handler   = proc_dointvec,
1558         },
1559 #endif
1560         {
1561                 .procname       = "am_droprate",
1562                 .data           = &sysctl_ip_vs_am_droprate,
1563                 .maxlen         = sizeof(int),
1564                 .mode           = 0644,
1565                 .proc_handler   = proc_dointvec,
1566         },
1567         {
1568                 .procname       = "drop_entry",
1569                 .data           = &sysctl_ip_vs_drop_entry,
1570                 .maxlen         = sizeof(int),
1571                 .mode           = 0644,
1572                 .proc_handler   = proc_do_defense_mode,
1573         },
1574         {
1575                 .procname       = "drop_packet",
1576                 .data           = &sysctl_ip_vs_drop_packet,
1577                 .maxlen         = sizeof(int),
1578                 .mode           = 0644,
1579                 .proc_handler   = proc_do_defense_mode,
1580         },
1581         {
1582                 .procname       = "secure_tcp",
1583                 .data           = &sysctl_ip_vs_secure_tcp,
1584                 .maxlen         = sizeof(int),
1585                 .mode           = 0644,
1586                 .proc_handler   = proc_do_defense_mode,
1587         },
1588 #if 0
1589         {
1590                 .procname       = "timeout_established",
1591                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1592                 .maxlen         = sizeof(int),
1593                 .mode           = 0644,
1594                 .proc_handler   = proc_dointvec_jiffies,
1595         },
1596         {
1597                 .procname       = "timeout_synsent",
1598                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1599                 .maxlen         = sizeof(int),
1600                 .mode           = 0644,
1601                 .proc_handler   = proc_dointvec_jiffies,
1602         },
1603         {
1604                 .procname       = "timeout_synrecv",
1605                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1606                 .maxlen         = sizeof(int),
1607                 .mode           = 0644,
1608                 .proc_handler   = proc_dointvec_jiffies,
1609         },
1610         {
1611                 .procname       = "timeout_finwait",
1612                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1613                 .maxlen         = sizeof(int),
1614                 .mode           = 0644,
1615                 .proc_handler   = proc_dointvec_jiffies,
1616         },
1617         {
1618                 .procname       = "timeout_timewait",
1619                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1620                 .maxlen         = sizeof(int),
1621                 .mode           = 0644,
1622                 .proc_handler   = proc_dointvec_jiffies,
1623         },
1624         {
1625                 .procname       = "timeout_close",
1626                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1627                 .maxlen         = sizeof(int),
1628                 .mode           = 0644,
1629                 .proc_handler   = proc_dointvec_jiffies,
1630         },
1631         {
1632                 .procname       = "timeout_closewait",
1633                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1634                 .maxlen         = sizeof(int),
1635                 .mode           = 0644,
1636                 .proc_handler   = proc_dointvec_jiffies,
1637         },
1638         {
1639                 .procname       = "timeout_lastack",
1640                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1641                 .maxlen         = sizeof(int),
1642                 .mode           = 0644,
1643                 .proc_handler   = proc_dointvec_jiffies,
1644         },
1645         {
1646                 .procname       = "timeout_listen",
1647                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1648                 .maxlen         = sizeof(int),
1649                 .mode           = 0644,
1650                 .proc_handler   = proc_dointvec_jiffies,
1651         },
1652         {
1653                 .procname       = "timeout_synack",
1654                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1655                 .maxlen         = sizeof(int),
1656                 .mode           = 0644,
1657                 .proc_handler   = proc_dointvec_jiffies,
1658         },
1659         {
1660                 .procname       = "timeout_udp",
1661                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1662                 .maxlen         = sizeof(int),
1663                 .mode           = 0644,
1664                 .proc_handler   = proc_dointvec_jiffies,
1665         },
1666         {
1667                 .procname       = "timeout_icmp",
1668                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1669                 .maxlen         = sizeof(int),
1670                 .mode           = 0644,
1671                 .proc_handler   = proc_dointvec_jiffies,
1672         },
1673 #endif
1674         {
1675                 .procname       = "cache_bypass",
1676                 .data           = &sysctl_ip_vs_cache_bypass,
1677                 .maxlen         = sizeof(int),
1678                 .mode           = 0644,
1679                 .proc_handler   = proc_dointvec,
1680         },
1681         {
1682                 .procname       = "expire_nodest_conn",
1683                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1684                 .maxlen         = sizeof(int),
1685                 .mode           = 0644,
1686                 .proc_handler   = proc_dointvec,
1687         },
1688         {
1689                 .procname       = "expire_quiescent_template",
1690                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1691                 .maxlen         = sizeof(int),
1692                 .mode           = 0644,
1693                 .proc_handler   = proc_dointvec,
1694         },
1695         {
1696                 .procname       = "sync_threshold",
1697                 .data           = &sysctl_ip_vs_sync_threshold,
1698                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1699                 .mode           = 0644,
1700                 .proc_handler   = proc_do_sync_threshold,
1701         },
1702         {
1703                 .procname       = "nat_icmp_send",
1704                 .data           = &sysctl_ip_vs_nat_icmp_send,
1705                 .maxlen         = sizeof(int),
1706                 .mode           = 0644,
1707                 .proc_handler   = proc_dointvec,
1708         },
1709         { }
1710 };
1711
1712 const struct ctl_path net_vs_ctl_path[] = {
1713         { .procname = "net", },
1714         { .procname = "ipv4", },
1715         { .procname = "vs", },
1716         { }
1717 };
1718 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1719
1720 static struct ctl_table_header * sysctl_header;
1721
1722 #ifdef CONFIG_PROC_FS
1723
1724 struct ip_vs_iter {
1725         struct list_head *table;
1726         int bucket;
1727 };
1728
1729 /*
1730  *      Write the contents of the VS rule table to a PROCfs file.
1731  *      (It is kept just for backward compatibility)
1732  */
1733 static inline const char *ip_vs_fwd_name(unsigned flags)
1734 {
1735         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1736         case IP_VS_CONN_F_LOCALNODE:
1737                 return "Local";
1738         case IP_VS_CONN_F_TUNNEL:
1739                 return "Tunnel";
1740         case IP_VS_CONN_F_DROUTE:
1741                 return "Route";
1742         default:
1743                 return "Masq";
1744         }
1745 }
1746
1747
1748 /* Get the Nth entry in the two lists */
1749 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1750 {
1751         struct ip_vs_iter *iter = seq->private;
1752         int idx;
1753         struct ip_vs_service *svc;
1754
1755         /* look in hash by protocol */
1756         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1757                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1758                         if (pos-- == 0){
1759                                 iter->table = ip_vs_svc_table;
1760                                 iter->bucket = idx;
1761                                 return svc;
1762                         }
1763                 }
1764         }
1765
1766         /* keep looking in fwmark */
1767         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1768                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1769                         if (pos-- == 0) {
1770                                 iter->table = ip_vs_svc_fwm_table;
1771                                 iter->bucket = idx;
1772                                 return svc;
1773                         }
1774                 }
1775         }
1776
1777         return NULL;
1778 }
1779
1780 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1781 __acquires(__ip_vs_svc_lock)
1782 {
1783
1784         read_lock_bh(&__ip_vs_svc_lock);
1785         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1786 }
1787
1788
1789 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1790 {
1791         struct list_head *e;
1792         struct ip_vs_iter *iter;
1793         struct ip_vs_service *svc;
1794
1795         ++*pos;
1796         if (v == SEQ_START_TOKEN)
1797                 return ip_vs_info_array(seq,0);
1798
1799         svc = v;
1800         iter = seq->private;
1801
1802         if (iter->table == ip_vs_svc_table) {
1803                 /* next service in table hashed by protocol */
1804                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1805                         return list_entry(e, struct ip_vs_service, s_list);
1806
1807
1808                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1809                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1810                                             s_list) {
1811                                 return svc;
1812                         }
1813                 }
1814
1815                 iter->table = ip_vs_svc_fwm_table;
1816                 iter->bucket = -1;
1817                 goto scan_fwmark;
1818         }
1819
1820         /* next service in hashed by fwmark */
1821         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1822                 return list_entry(e, struct ip_vs_service, f_list);
1823
1824  scan_fwmark:
1825         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1826                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1827                                     f_list)
1828                         return svc;
1829         }
1830
1831         return NULL;
1832 }
1833
1834 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1835 __releases(__ip_vs_svc_lock)
1836 {
1837         read_unlock_bh(&__ip_vs_svc_lock);
1838 }
1839
1840
1841 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1842 {
1843         if (v == SEQ_START_TOKEN) {
1844                 seq_printf(seq,
1845                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1846                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1847                 seq_puts(seq,
1848                          "Prot LocalAddress:Port Scheduler Flags\n");
1849                 seq_puts(seq,
1850                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1851         } else {
1852                 const struct ip_vs_service *svc = v;
1853                 const struct ip_vs_iter *iter = seq->private;
1854                 const struct ip_vs_dest *dest;
1855
1856                 if (iter->table == ip_vs_svc_table) {
1857 #ifdef CONFIG_IP_VS_IPV6
1858                         if (svc->af == AF_INET6)
1859                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1860                                            ip_vs_proto_name(svc->protocol),
1861                                            &svc->addr.in6,
1862                                            ntohs(svc->port),
1863                                            svc->scheduler->name);
1864                         else
1865 #endif
1866                                 seq_printf(seq, "%s  %08X:%04X %s ",
1867                                            ip_vs_proto_name(svc->protocol),
1868                                            ntohl(svc->addr.ip),
1869                                            ntohs(svc->port),
1870                                            svc->scheduler->name);
1871                 } else {
1872                         seq_printf(seq, "FWM  %08X %s ",
1873                                    svc->fwmark, svc->scheduler->name);
1874                 }
1875
1876                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1877                         seq_printf(seq, "persistent %d %08X\n",
1878                                 svc->timeout,
1879                                 ntohl(svc->netmask));
1880                 else
1881                         seq_putc(seq, '\n');
1882
1883                 list_for_each_entry(dest, &svc->destinations, n_list) {
1884 #ifdef CONFIG_IP_VS_IPV6
1885                         if (dest->af == AF_INET6)
1886                                 seq_printf(seq,
1887                                            "  -> [%pI6]:%04X"
1888                                            "      %-7s %-6d %-10d %-10d\n",
1889                                            &dest->addr.in6,
1890                                            ntohs(dest->port),
1891                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1892                                            atomic_read(&dest->weight),
1893                                            atomic_read(&dest->activeconns),
1894                                            atomic_read(&dest->inactconns));
1895                         else
1896 #endif
1897                                 seq_printf(seq,
1898                                            "  -> %08X:%04X      "
1899                                            "%-7s %-6d %-10d %-10d\n",
1900                                            ntohl(dest->addr.ip),
1901                                            ntohs(dest->port),
1902                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1903                                            atomic_read(&dest->weight),
1904                                            atomic_read(&dest->activeconns),
1905                                            atomic_read(&dest->inactconns));
1906
1907                 }
1908         }
1909         return 0;
1910 }
1911
1912 static const struct seq_operations ip_vs_info_seq_ops = {
1913         .start = ip_vs_info_seq_start,
1914         .next  = ip_vs_info_seq_next,
1915         .stop  = ip_vs_info_seq_stop,
1916         .show  = ip_vs_info_seq_show,
1917 };
1918
1919 static int ip_vs_info_open(struct inode *inode, struct file *file)
1920 {
1921         return seq_open_private(file, &ip_vs_info_seq_ops,
1922                         sizeof(struct ip_vs_iter));
1923 }
1924
1925 static const struct file_operations ip_vs_info_fops = {
1926         .owner   = THIS_MODULE,
1927         .open    = ip_vs_info_open,
1928         .read    = seq_read,
1929         .llseek  = seq_lseek,
1930         .release = seq_release_private,
1931 };
1932
1933 #endif
1934
1935 struct ip_vs_stats ip_vs_stats = {
1936         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1937 };
1938
1939 #ifdef CONFIG_PROC_FS
1940 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1941 {
1942
1943 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1944         seq_puts(seq,
1945                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1946         seq_printf(seq,
1947                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1948
1949         spin_lock_bh(&ip_vs_stats.lock);
1950         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1951                    ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1952                    (unsigned long long) ip_vs_stats.ustats.inbytes,
1953                    (unsigned long long) ip_vs_stats.ustats.outbytes);
1954
1955 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1956         seq_puts(seq,
1957                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1958         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1959                         ip_vs_stats.ustats.cps,
1960                         ip_vs_stats.ustats.inpps,
1961                         ip_vs_stats.ustats.outpps,
1962                         ip_vs_stats.ustats.inbps,
1963                         ip_vs_stats.ustats.outbps);
1964         spin_unlock_bh(&ip_vs_stats.lock);
1965
1966         return 0;
1967 }
1968
1969 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1970 {
1971         return single_open(file, ip_vs_stats_show, NULL);
1972 }
1973
1974 static const struct file_operations ip_vs_stats_fops = {
1975         .owner = THIS_MODULE,
1976         .open = ip_vs_stats_seq_open,
1977         .read = seq_read,
1978         .llseek = seq_lseek,
1979         .release = single_release,
1980 };
1981
1982 #endif
1983
1984 /*
1985  *      Set timeout values for tcp tcpfin udp in the timeout_table.
1986  */
1987 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1988 {
1989         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1990                   u->tcp_timeout,
1991                   u->tcp_fin_timeout,
1992                   u->udp_timeout);
1993
1994 #ifdef CONFIG_IP_VS_PROTO_TCP
1995         if (u->tcp_timeout) {
1996                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1997                         = u->tcp_timeout * HZ;
1998         }
1999
2000         if (u->tcp_fin_timeout) {
2001                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2002                         = u->tcp_fin_timeout * HZ;
2003         }
2004 #endif
2005
2006 #ifdef CONFIG_IP_VS_PROTO_UDP
2007         if (u->udp_timeout) {
2008                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2009                         = u->udp_timeout * HZ;
2010         }
2011 #endif
2012         return 0;
2013 }
2014
2015
2016 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2017 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2018 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2019                                  sizeof(struct ip_vs_dest_user))
2020 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2021 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2022 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2023
2024 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2025         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2026         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2027         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2028         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2029         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2030         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2031         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2032         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2033         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2034         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2035         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2036 };
2037
2038 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2039                                   struct ip_vs_service_user *usvc_compat)
2040 {
2041         usvc->af                = AF_INET;
2042         usvc->protocol          = usvc_compat->protocol;
2043         usvc->addr.ip           = usvc_compat->addr;
2044         usvc->port              = usvc_compat->port;
2045         usvc->fwmark            = usvc_compat->fwmark;
2046
2047         /* Deep copy of sched_name is not needed here */
2048         usvc->sched_name        = usvc_compat->sched_name;
2049
2050         usvc->flags             = usvc_compat->flags;
2051         usvc->timeout           = usvc_compat->timeout;
2052         usvc->netmask           = usvc_compat->netmask;
2053 }
2054
2055 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2056                                    struct ip_vs_dest_user *udest_compat)
2057 {
2058         udest->addr.ip          = udest_compat->addr;
2059         udest->port             = udest_compat->port;
2060         udest->conn_flags       = udest_compat->conn_flags;
2061         udest->weight           = udest_compat->weight;
2062         udest->u_threshold      = udest_compat->u_threshold;
2063         udest->l_threshold      = udest_compat->l_threshold;
2064 }
2065
2066 static int
2067 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2068 {
2069         int ret;
2070         unsigned char arg[MAX_ARG_LEN];
2071         struct ip_vs_service_user *usvc_compat;
2072         struct ip_vs_service_user_kern usvc;
2073         struct ip_vs_service *svc;
2074         struct ip_vs_dest_user *udest_compat;
2075         struct ip_vs_dest_user_kern udest;
2076
2077         if (!capable(CAP_NET_ADMIN))
2078                 return -EPERM;
2079
2080         if (len != set_arglen[SET_CMDID(cmd)]) {
2081                 pr_err("set_ctl: len %u != %u\n",
2082                        len, set_arglen[SET_CMDID(cmd)]);
2083                 return -EINVAL;
2084         }
2085
2086         if (copy_from_user(arg, user, len) != 0)
2087                 return -EFAULT;
2088
2089         /* increase the module use count */
2090         ip_vs_use_count_inc();
2091
2092         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2093                 ret = -ERESTARTSYS;
2094                 goto out_dec;
2095         }
2096
2097         if (cmd == IP_VS_SO_SET_FLUSH) {
2098                 /* Flush the virtual service */
2099                 ret = ip_vs_flush();
2100                 goto out_unlock;
2101         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2102                 /* Set timeout values for (tcp tcpfin udp) */
2103                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2104                 goto out_unlock;
2105         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2106                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2107                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2108                 goto out_unlock;
2109         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2110                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2111                 ret = stop_sync_thread(dm->state);
2112                 goto out_unlock;
2113         }
2114
2115         usvc_compat = (struct ip_vs_service_user *)arg;
2116         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2117
2118         /* We only use the new structs internally, so copy userspace compat
2119          * structs to extended internal versions */
2120         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2121         ip_vs_copy_udest_compat(&udest, udest_compat);
2122
2123         if (cmd == IP_VS_SO_SET_ZERO) {
2124                 /* if no service address is set, zero counters in all */
2125                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2126                         ret = ip_vs_zero_all();
2127                         goto out_unlock;
2128                 }
2129         }
2130
2131         /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
2132         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
2133                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2134                        usvc.protocol, &usvc.addr.ip,
2135                        ntohs(usvc.port), usvc.sched_name);
2136                 ret = -EFAULT;
2137                 goto out_unlock;
2138         }
2139
2140         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2141         if (usvc.fwmark == 0)
2142                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2143                                           &usvc.addr, usvc.port);
2144         else
2145                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2146
2147         if (cmd != IP_VS_SO_SET_ADD
2148             && (svc == NULL || svc->protocol != usvc.protocol)) {
2149                 ret = -ESRCH;
2150                 goto out_unlock;
2151         }
2152
2153         switch (cmd) {
2154         case IP_VS_SO_SET_ADD:
2155                 if (svc != NULL)
2156                         ret = -EEXIST;
2157                 else
2158                         ret = ip_vs_add_service(&usvc, &svc);
2159                 break;
2160         case IP_VS_SO_SET_EDIT:
2161                 ret = ip_vs_edit_service(svc, &usvc);
2162                 break;
2163         case IP_VS_SO_SET_DEL:
2164                 ret = ip_vs_del_service(svc);
2165                 if (!ret)
2166                         goto out_unlock;
2167                 break;
2168         case IP_VS_SO_SET_ZERO:
2169                 ret = ip_vs_zero_service(svc);
2170                 break;
2171         case IP_VS_SO_SET_ADDDEST:
2172                 ret = ip_vs_add_dest(svc, &udest);
2173                 break;
2174         case IP_VS_SO_SET_EDITDEST:
2175                 ret = ip_vs_edit_dest(svc, &udest);
2176                 break;
2177         case IP_VS_SO_SET_DELDEST:
2178                 ret = ip_vs_del_dest(svc, &udest);
2179                 break;
2180         default:
2181                 ret = -EINVAL;
2182         }
2183
2184         if (svc)
2185                 ip_vs_service_put(svc);
2186
2187   out_unlock:
2188         mutex_unlock(&__ip_vs_mutex);
2189   out_dec:
2190         /* decrease the module use count */
2191         ip_vs_use_count_dec();
2192
2193         return ret;
2194 }
2195
2196
2197 static void
2198 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2199 {
2200         spin_lock_bh(&src->lock);
2201         memcpy(dst, &src->ustats, sizeof(*dst));
2202         spin_unlock_bh(&src->lock);
2203 }
2204
2205 static void
2206 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2207 {
2208         dst->protocol = src->protocol;
2209         dst->addr = src->addr.ip;
2210         dst->port = src->port;
2211         dst->fwmark = src->fwmark;
2212         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2213         dst->flags = src->flags;
2214         dst->timeout = src->timeout / HZ;
2215         dst->netmask = src->netmask;
2216         dst->num_dests = src->num_dests;
2217         ip_vs_copy_stats(&dst->stats, &src->stats);
2218 }
2219
2220 static inline int
2221 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2222                             struct ip_vs_get_services __user *uptr)
2223 {
2224         int idx, count=0;
2225         struct ip_vs_service *svc;
2226         struct ip_vs_service_entry entry;
2227         int ret = 0;
2228
2229         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2230                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2231                         /* Only expose IPv4 entries to old interface */
2232                         if (svc->af != AF_INET)
2233                                 continue;
2234
2235                         if (count >= get->num_services)
2236                                 goto out;
2237                         memset(&entry, 0, sizeof(entry));
2238                         ip_vs_copy_service(&entry, svc);
2239                         if (copy_to_user(&uptr->entrytable[count],
2240                                          &entry, sizeof(entry))) {
2241                                 ret = -EFAULT;
2242                                 goto out;
2243                         }
2244                         count++;
2245                 }
2246         }
2247
2248         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2249                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2250                         /* Only expose IPv4 entries to old interface */
2251                         if (svc->af != AF_INET)
2252                                 continue;
2253
2254                         if (count >= get->num_services)
2255                                 goto out;
2256                         memset(&entry, 0, sizeof(entry));
2257                         ip_vs_copy_service(&entry, svc);
2258                         if (copy_to_user(&uptr->entrytable[count],
2259                                          &entry, sizeof(entry))) {
2260                                 ret = -EFAULT;
2261                                 goto out;
2262                         }
2263                         count++;
2264                 }
2265         }
2266   out:
2267         return ret;
2268 }
2269
2270 static inline int
2271 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2272                          struct ip_vs_get_dests __user *uptr)
2273 {
2274         struct ip_vs_service *svc;
2275         union nf_inet_addr addr = { .ip = get->addr };
2276         int ret = 0;
2277
2278         if (get->fwmark)
2279                 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
2280         else
2281                 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2282                                           get->port);
2283
2284         if (svc) {
2285                 int count = 0;
2286                 struct ip_vs_dest *dest;
2287                 struct ip_vs_dest_entry entry;
2288
2289                 list_for_each_entry(dest, &svc->destinations, n_list) {
2290                         if (count >= get->num_dests)
2291                                 break;
2292
2293                         entry.addr = dest->addr.ip;
2294                         entry.port = dest->port;
2295                         entry.conn_flags = atomic_read(&dest->conn_flags);
2296                         entry.weight = atomic_read(&dest->weight);
2297                         entry.u_threshold = dest->u_threshold;
2298                         entry.l_threshold = dest->l_threshold;
2299                         entry.activeconns = atomic_read(&dest->activeconns);
2300                         entry.inactconns = atomic_read(&dest->inactconns);
2301                         entry.persistconns = atomic_read(&dest->persistconns);
2302                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2303                         if (copy_to_user(&uptr->entrytable[count],
2304                                          &entry, sizeof(entry))) {
2305                                 ret = -EFAULT;
2306                                 break;
2307                         }
2308                         count++;
2309                 }
2310                 ip_vs_service_put(svc);
2311         } else
2312                 ret = -ESRCH;
2313         return ret;
2314 }
2315
2316 static inline void
2317 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2318 {
2319 #ifdef CONFIG_IP_VS_PROTO_TCP
2320         u->tcp_timeout =
2321                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2322         u->tcp_fin_timeout =
2323                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2324 #endif
2325 #ifdef CONFIG_IP_VS_PROTO_UDP
2326         u->udp_timeout =
2327                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2328 #endif
2329 }
2330
2331
2332 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2333 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2334 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2335 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2336 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2337 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2338 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2339
2340 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2341         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2342         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2343         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2344         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2345         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2346         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2347         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2348 };
2349
2350 static int
2351 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2352 {
2353         unsigned char arg[128];
2354         int ret = 0;
2355
2356         if (!capable(CAP_NET_ADMIN))
2357                 return -EPERM;
2358
2359         if (*len < get_arglen[GET_CMDID(cmd)]) {
2360                 pr_err("get_ctl: len %u < %u\n",
2361                        *len, get_arglen[GET_CMDID(cmd)]);
2362                 return -EINVAL;
2363         }
2364
2365         if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2366                 return -EFAULT;
2367
2368         if (mutex_lock_interruptible(&__ip_vs_mutex))
2369                 return -ERESTARTSYS;
2370
2371         switch (cmd) {
2372         case IP_VS_SO_GET_VERSION:
2373         {
2374                 char buf[64];
2375
2376                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2377                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2378                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2379                         ret = -EFAULT;
2380                         goto out;
2381                 }
2382                 *len = strlen(buf)+1;
2383         }
2384         break;
2385
2386         case IP_VS_SO_GET_INFO:
2387         {
2388                 struct ip_vs_getinfo info;
2389                 info.version = IP_VS_VERSION_CODE;
2390                 info.size = IP_VS_CONN_TAB_SIZE;
2391                 info.num_services = ip_vs_num_services;
2392                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2393                         ret = -EFAULT;
2394         }
2395         break;
2396
2397         case IP_VS_SO_GET_SERVICES:
2398         {
2399                 struct ip_vs_get_services *get;
2400                 int size;
2401
2402                 get = (struct ip_vs_get_services *)arg;
2403                 size = sizeof(*get) +
2404                         sizeof(struct ip_vs_service_entry) * get->num_services;
2405                 if (*len != size) {
2406                         pr_err("length: %u != %u\n", *len, size);
2407                         ret = -EINVAL;
2408                         goto out;
2409                 }
2410                 ret = __ip_vs_get_service_entries(get, user);
2411         }
2412         break;
2413
2414         case IP_VS_SO_GET_SERVICE:
2415         {
2416                 struct ip_vs_service_entry *entry;
2417                 struct ip_vs_service *svc;
2418                 union nf_inet_addr addr;
2419
2420                 entry = (struct ip_vs_service_entry *)arg;
2421                 addr.ip = entry->addr;
2422                 if (entry->fwmark)
2423                         svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
2424                 else
2425                         svc = __ip_vs_service_get(AF_INET, entry->protocol,
2426                                                   &addr, entry->port);
2427                 if (svc) {
2428                         ip_vs_copy_service(entry, svc);
2429                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2430                                 ret = -EFAULT;
2431                         ip_vs_service_put(svc);
2432                 } else
2433                         ret = -ESRCH;
2434         }
2435         break;
2436
2437         case IP_VS_SO_GET_DESTS:
2438         {
2439                 struct ip_vs_get_dests *get;
2440                 int size;
2441
2442                 get = (struct ip_vs_get_dests *)arg;
2443                 size = sizeof(*get) +
2444                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2445                 if (*len != size) {
2446                         pr_err("length: %u != %u\n", *len, size);
2447                         ret = -EINVAL;
2448                         goto out;
2449                 }
2450                 ret = __ip_vs_get_dest_entries(get, user);
2451         }
2452         break;
2453
2454         case IP_VS_SO_GET_TIMEOUT:
2455         {
2456                 struct ip_vs_timeout_user t;
2457
2458                 __ip_vs_get_timeouts(&t);
2459                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2460                         ret = -EFAULT;
2461         }
2462         break;
2463
2464         case IP_VS_SO_GET_DAEMON:
2465         {
2466                 struct ip_vs_daemon_user d[2];
2467
2468                 memset(&d, 0, sizeof(d));
2469                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2470                         d[0].state = IP_VS_STATE_MASTER;
2471                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2472                         d[0].syncid = ip_vs_master_syncid;
2473                 }
2474                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2475                         d[1].state = IP_VS_STATE_BACKUP;
2476                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2477                         d[1].syncid = ip_vs_backup_syncid;
2478                 }
2479                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2480                         ret = -EFAULT;
2481         }
2482         break;
2483
2484         default:
2485                 ret = -EINVAL;
2486         }
2487
2488   out:
2489         mutex_unlock(&__ip_vs_mutex);
2490         return ret;
2491 }
2492
2493
2494 static struct nf_sockopt_ops ip_vs_sockopts = {
2495         .pf             = PF_INET,
2496         .set_optmin     = IP_VS_BASE_CTL,
2497         .set_optmax     = IP_VS_SO_SET_MAX+1,
2498         .set            = do_ip_vs_set_ctl,
2499         .get_optmin     = IP_VS_BASE_CTL,
2500         .get_optmax     = IP_VS_SO_GET_MAX+1,
2501         .get            = do_ip_vs_get_ctl,
2502         .owner          = THIS_MODULE,
2503 };
2504
2505 /*
2506  * Generic Netlink interface
2507  */
2508
2509 /* IPVS genetlink family */
2510 static struct genl_family ip_vs_genl_family = {
2511         .id             = GENL_ID_GENERATE,
2512         .hdrsize        = 0,
2513         .name           = IPVS_GENL_NAME,
2514         .version        = IPVS_GENL_VERSION,
2515         .maxattr        = IPVS_CMD_MAX,
2516 };
2517
2518 /* Policy used for first-level command attributes */
2519 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2520         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2521         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2522         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2523         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2524         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2525         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2526 };
2527
2528 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2529 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2530         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2531         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2532                                             .len = IP_VS_IFNAME_MAXLEN },
2533         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2534 };
2535
2536 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2537 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2538         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2539         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2540         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2541                                             .len = sizeof(union nf_inet_addr) },
2542         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2543         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2544         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2545                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2546         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2547                                             .len = sizeof(struct ip_vs_flags) },
2548         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2549         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2550         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2551 };
2552
2553 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2554 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2555         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2556                                             .len = sizeof(union nf_inet_addr) },
2557         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2558         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2559         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2560         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2561         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2562         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2563         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2564         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2565         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2566 };
2567
2568 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2569                                  struct ip_vs_stats *stats)
2570 {
2571         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2572         if (!nl_stats)
2573                 return -EMSGSIZE;
2574
2575         spin_lock_bh(&stats->lock);
2576
2577         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2578         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2579         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2580         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2581         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2582         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2583         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2584         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2585         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2586         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2587
2588         spin_unlock_bh(&stats->lock);
2589
2590         nla_nest_end(skb, nl_stats);
2591
2592         return 0;
2593
2594 nla_put_failure:
2595         spin_unlock_bh(&stats->lock);
2596         nla_nest_cancel(skb, nl_stats);
2597         return -EMSGSIZE;
2598 }
2599
2600 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2601                                    struct ip_vs_service *svc)
2602 {
2603         struct nlattr *nl_service;
2604         struct ip_vs_flags flags = { .flags = svc->flags,
2605                                      .mask = ~0 };
2606
2607         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2608         if (!nl_service)
2609                 return -EMSGSIZE;
2610
2611         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2612
2613         if (svc->fwmark) {
2614                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2615         } else {
2616                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2617                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2618                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2619         }
2620
2621         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2622         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2623         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2624         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2625
2626         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2627                 goto nla_put_failure;
2628
2629         nla_nest_end(skb, nl_service);
2630
2631         return 0;
2632
2633 nla_put_failure:
2634         nla_nest_cancel(skb, nl_service);
2635         return -EMSGSIZE;
2636 }
2637
2638 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2639                                    struct ip_vs_service *svc,
2640                                    struct netlink_callback *cb)
2641 {
2642         void *hdr;
2643
2644         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2645                           &ip_vs_genl_family, NLM_F_MULTI,
2646                           IPVS_CMD_NEW_SERVICE);
2647         if (!hdr)
2648                 return -EMSGSIZE;
2649
2650         if (ip_vs_genl_fill_service(skb, svc) < 0)
2651                 goto nla_put_failure;
2652
2653         return genlmsg_end(skb, hdr);
2654
2655 nla_put_failure:
2656         genlmsg_cancel(skb, hdr);
2657         return -EMSGSIZE;
2658 }
2659
2660 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2661                                     struct netlink_callback *cb)
2662 {
2663         int idx = 0, i;
2664         int start = cb->args[0];
2665         struct ip_vs_service *svc;
2666
2667         mutex_lock(&__ip_vs_mutex);
2668         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2669                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2670                         if (++idx <= start)
2671                                 continue;
2672                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2673                                 idx--;
2674                                 goto nla_put_failure;
2675                         }
2676                 }
2677         }
2678
2679         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2680                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2681                         if (++idx <= start)
2682                                 continue;
2683                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2684                                 idx--;
2685                                 goto nla_put_failure;
2686                         }
2687                 }
2688         }
2689
2690 nla_put_failure:
2691         mutex_unlock(&__ip_vs_mutex);
2692         cb->args[0] = idx;
2693
2694         return skb->len;
2695 }
2696
2697 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2698                                     struct nlattr *nla, int full_entry)
2699 {
2700         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2701         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2702
2703         /* Parse mandatory identifying service fields first */
2704         if (nla == NULL ||
2705             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2706                 return -EINVAL;
2707
2708         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2709         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2710         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2711         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2712         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2713
2714         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2715                 return -EINVAL;
2716
2717         usvc->af = nla_get_u16(nla_af);
2718 #ifdef CONFIG_IP_VS_IPV6
2719         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2720 #else
2721         if (usvc->af != AF_INET)
2722 #endif
2723                 return -EAFNOSUPPORT;
2724
2725         if (nla_fwmark) {
2726                 usvc->protocol = IPPROTO_TCP;
2727                 usvc->fwmark = nla_get_u32(nla_fwmark);
2728         } else {
2729                 usvc->protocol = nla_get_u16(nla_protocol);
2730                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2731                 usvc->port = nla_get_u16(nla_port);
2732                 usvc->fwmark = 0;
2733         }
2734
2735         /* If a full entry was requested, check for the additional fields */
2736         if (full_entry) {
2737                 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2738                               *nla_netmask;
2739                 struct ip_vs_flags flags;
2740                 struct ip_vs_service *svc;
2741
2742                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2743                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2744                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2745                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2746
2747                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2748                         return -EINVAL;
2749
2750                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2751
2752                 /* prefill flags from service if it already exists */
2753                 if (usvc->fwmark)
2754                         svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2755                 else
2756                         svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2757                                                   &usvc->addr, usvc->port);
2758                 if (svc) {
2759                         usvc->flags = svc->flags;
2760                         ip_vs_service_put(svc);
2761                 } else
2762                         usvc->flags = 0;
2763
2764                 /* set new flags from userland */
2765                 usvc->flags = (usvc->flags & ~flags.mask) |
2766                               (flags.flags & flags.mask);
2767                 usvc->sched_name = nla_data(nla_sched);
2768                 usvc->timeout = nla_get_u32(nla_timeout);
2769                 usvc->netmask = nla_get_u32(nla_netmask);
2770         }
2771
2772         return 0;
2773 }
2774
2775 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2776 {
2777         struct ip_vs_service_user_kern usvc;
2778         int ret;
2779
2780         ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2781         if (ret)
2782                 return ERR_PTR(ret);
2783
2784         if (usvc.fwmark)
2785                 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2786         else
2787                 return __ip_vs_service_get(usvc.af, usvc.protocol,
2788                                            &usvc.addr, usvc.port);
2789 }
2790
2791 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2792 {
2793         struct nlattr *nl_dest;
2794
2795         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2796         if (!nl_dest)
2797                 return -EMSGSIZE;
2798
2799         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2800         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2801
2802         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2803                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2804         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2805         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2806         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2807         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2808                     atomic_read(&dest->activeconns));
2809         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2810                     atomic_read(&dest->inactconns));
2811         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2812                     atomic_read(&dest->persistconns));
2813
2814         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2815                 goto nla_put_failure;
2816
2817         nla_nest_end(skb, nl_dest);
2818
2819         return 0;
2820
2821 nla_put_failure:
2822         nla_nest_cancel(skb, nl_dest);
2823         return -EMSGSIZE;
2824 }
2825
2826 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2827                                 struct netlink_callback *cb)
2828 {
2829         void *hdr;
2830
2831         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2832                           &ip_vs_genl_family, NLM_F_MULTI,
2833                           IPVS_CMD_NEW_DEST);
2834         if (!hdr)
2835                 return -EMSGSIZE;
2836
2837         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2838                 goto nla_put_failure;
2839
2840         return genlmsg_end(skb, hdr);
2841
2842 nla_put_failure:
2843         genlmsg_cancel(skb, hdr);
2844         return -EMSGSIZE;
2845 }
2846
2847 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2848                                  struct netlink_callback *cb)
2849 {
2850         int idx = 0;
2851         int start = cb->args[0];
2852         struct ip_vs_service *svc;
2853         struct ip_vs_dest *dest;
2854         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2855
2856         mutex_lock(&__ip_vs_mutex);
2857
2858         /* Try to find the service for which to dump destinations */
2859         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2860                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2861                 goto out_err;
2862
2863         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2864         if (IS_ERR(svc) || svc == NULL)
2865                 goto out_err;
2866
2867         /* Dump the destinations */
2868         list_for_each_entry(dest, &svc->destinations, n_list) {
2869                 if (++idx <= start)
2870                         continue;
2871                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2872                         idx--;
2873                         goto nla_put_failure;
2874                 }
2875         }
2876
2877 nla_put_failure:
2878         cb->args[0] = idx;
2879         ip_vs_service_put(svc);
2880
2881 out_err:
2882         mutex_unlock(&__ip_vs_mutex);
2883
2884         return skb->len;
2885 }
2886
2887 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2888                                  struct nlattr *nla, int full_entry)
2889 {
2890         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2891         struct nlattr *nla_addr, *nla_port;
2892
2893         /* Parse mandatory identifying destination fields first */
2894         if (nla == NULL ||
2895             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2896                 return -EINVAL;
2897
2898         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2899         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2900
2901         if (!(nla_addr && nla_port))
2902                 return -EINVAL;
2903
2904         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2905         udest->port = nla_get_u16(nla_port);
2906
2907         /* If a full entry was requested, check for the additional fields */
2908         if (full_entry) {
2909                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2910                               *nla_l_thresh;
2911
2912                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2913                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2914                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2915                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2916
2917                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2918                         return -EINVAL;
2919
2920                 udest->conn_flags = nla_get_u32(nla_fwd)
2921                                     & IP_VS_CONN_F_FWD_MASK;
2922                 udest->weight = nla_get_u32(nla_weight);
2923                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2924                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2925         }
2926
2927         return 0;
2928 }
2929
2930 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2931                                   const char *mcast_ifn, __be32 syncid)
2932 {
2933         struct nlattr *nl_daemon;
2934
2935         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2936         if (!nl_daemon)
2937                 return -EMSGSIZE;
2938
2939         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2940         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2941         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2942
2943         nla_nest_end(skb, nl_daemon);
2944
2945         return 0;
2946
2947 nla_put_failure:
2948         nla_nest_cancel(skb, nl_daemon);
2949         return -EMSGSIZE;
2950 }
2951
2952 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2953                                   const char *mcast_ifn, __be32 syncid,
2954                                   struct netlink_callback *cb)
2955 {
2956         void *hdr;
2957         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2958                           &ip_vs_genl_family, NLM_F_MULTI,
2959                           IPVS_CMD_NEW_DAEMON);
2960         if (!hdr)
2961                 return -EMSGSIZE;
2962
2963         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2964                 goto nla_put_failure;
2965
2966         return genlmsg_end(skb, hdr);
2967
2968 nla_put_failure:
2969         genlmsg_cancel(skb, hdr);
2970         return -EMSGSIZE;
2971 }
2972
2973 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2974                                    struct netlink_callback *cb)
2975 {
2976         mutex_lock(&__ip_vs_mutex);
2977         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2978                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2979                                            ip_vs_master_mcast_ifn,
2980                                            ip_vs_master_syncid, cb) < 0)
2981                         goto nla_put_failure;
2982
2983                 cb->args[0] = 1;
2984         }
2985
2986         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2987                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2988                                            ip_vs_backup_mcast_ifn,
2989                                            ip_vs_backup_syncid, cb) < 0)
2990                         goto nla_put_failure;
2991
2992                 cb->args[1] = 1;
2993         }
2994
2995 nla_put_failure:
2996         mutex_unlock(&__ip_vs_mutex);
2997
2998         return skb->len;
2999 }
3000
3001 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3002 {
3003         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3004               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3005               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3006                 return -EINVAL;
3007
3008         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3009                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3010                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3011 }
3012
3013 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3014 {
3015         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3016                 return -EINVAL;
3017
3018         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3019 }
3020
3021 static int ip_vs_genl_set_config(struct nlattr **attrs)
3022 {
3023         struct ip_vs_timeout_user t;
3024
3025         __ip_vs_get_timeouts(&t);
3026
3027         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3028                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3029
3030         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3031                 t.tcp_fin_timeout =
3032                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3033
3034         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3035                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3036
3037         return ip_vs_set_timeout(&t);
3038 }
3039
3040 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3041 {
3042         struct ip_vs_service *svc = NULL;
3043         struct ip_vs_service_user_kern usvc;
3044         struct ip_vs_dest_user_kern udest;
3045         int ret = 0, cmd;
3046         int need_full_svc = 0, need_full_dest = 0;
3047
3048         cmd = info->genlhdr->cmd;
3049
3050         mutex_lock(&__ip_vs_mutex);
3051
3052         if (cmd == IPVS_CMD_FLUSH) {
3053                 ret = ip_vs_flush();
3054                 goto out;
3055         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3056                 ret = ip_vs_genl_set_config(info->attrs);
3057                 goto out;
3058         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3059                    cmd == IPVS_CMD_DEL_DAEMON) {
3060
3061                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3062
3063                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3064                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3065                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3066                                      ip_vs_daemon_policy)) {
3067                         ret = -EINVAL;
3068                         goto out;
3069                 }
3070
3071                 if (cmd == IPVS_CMD_NEW_DAEMON)
3072                         ret = ip_vs_genl_new_daemon(daemon_attrs);
3073                 else
3074                         ret = ip_vs_genl_del_daemon(daemon_attrs);
3075                 goto out;
3076         } else if (cmd == IPVS_CMD_ZERO &&
3077                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3078                 ret = ip_vs_zero_all();
3079                 goto out;
3080         }
3081
3082         /* All following commands require a service argument, so check if we
3083          * received a valid one. We need a full service specification when
3084          * adding / editing a service. Only identifying members otherwise. */
3085         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3086                 need_full_svc = 1;
3087
3088         ret = ip_vs_genl_parse_service(&usvc,
3089                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3090                                        need_full_svc);
3091         if (ret)
3092                 goto out;
3093
3094         /* Lookup the exact service by <protocol, addr, port> or fwmark */
3095         if (usvc.fwmark == 0)
3096                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3097                                           &usvc.addr, usvc.port);
3098         else
3099                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3100
3101         /* Unless we're adding a new service, the service must already exist */
3102         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3103                 ret = -ESRCH;
3104                 goto out;
3105         }
3106
3107         /* Destination commands require a valid destination argument. For
3108          * adding / editing a destination, we need a full destination
3109          * specification. */
3110         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3111             cmd == IPVS_CMD_DEL_DEST) {
3112                 if (cmd != IPVS_CMD_DEL_DEST)
3113                         need_full_dest = 1;
3114
3115                 ret = ip_vs_genl_parse_dest(&udest,
3116                                             info->attrs[IPVS_CMD_ATTR_DEST],
3117                                             need_full_dest);
3118                 if (ret)
3119                         goto out;
3120         }
3121
3122         switch (cmd) {
3123         case IPVS_CMD_NEW_SERVICE:
3124                 if (svc == NULL)
3125                         ret = ip_vs_add_service(&usvc, &svc);
3126                 else
3127                         ret = -EEXIST;
3128                 break;
3129         case IPVS_CMD_SET_SERVICE:
3130                 ret = ip_vs_edit_service(svc, &usvc);
3131                 break;
3132         case IPVS_CMD_DEL_SERVICE:
3133                 ret = ip_vs_del_service(svc);
3134                 break;
3135         case IPVS_CMD_NEW_DEST:
3136                 ret = ip_vs_add_dest(svc, &udest);
3137                 break;
3138         case IPVS_CMD_SET_DEST:
3139                 ret = ip_vs_edit_dest(svc, &udest);
3140                 break;
3141         case IPVS_CMD_DEL_DEST:
3142                 ret = ip_vs_del_dest(svc, &udest);
3143                 break;
3144         case IPVS_CMD_ZERO:
3145                 ret = ip_vs_zero_service(svc);
3146                 break;
3147         default:
3148                 ret = -EINVAL;
3149         }
3150
3151 out:
3152         if (svc)
3153                 ip_vs_service_put(svc);
3154         mutex_unlock(&__ip_vs_mutex);
3155
3156         return ret;
3157 }
3158
3159 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3160 {
3161         struct sk_buff *msg;
3162         void *reply;
3163         int ret, cmd, reply_cmd;
3164
3165         cmd = info->genlhdr->cmd;
3166
3167         if (cmd == IPVS_CMD_GET_SERVICE)
3168                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3169         else if (cmd == IPVS_CMD_GET_INFO)
3170                 reply_cmd = IPVS_CMD_SET_INFO;
3171         else if (cmd == IPVS_CMD_GET_CONFIG)
3172                 reply_cmd = IPVS_CMD_SET_CONFIG;
3173         else {
3174                 pr_err("unknown Generic Netlink command\n");
3175                 return -EINVAL;
3176         }
3177
3178         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3179         if (!msg)
3180                 return -ENOMEM;
3181
3182         mutex_lock(&__ip_vs_mutex);
3183
3184         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3185         if (reply == NULL)
3186                 goto nla_put_failure;
3187
3188         switch (cmd) {
3189         case IPVS_CMD_GET_SERVICE:
3190         {
3191                 struct ip_vs_service *svc;
3192
3193                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3194                 if (IS_ERR(svc)) {
3195                         ret = PTR_ERR(svc);
3196                         goto out_err;
3197                 } else if (svc) {
3198                         ret = ip_vs_genl_fill_service(msg, svc);
3199                         ip_vs_service_put(svc);
3200                         if (ret)
3201                                 goto nla_put_failure;
3202                 } else {
3203                         ret = -ESRCH;
3204                         goto out_err;
3205                 }
3206
3207                 break;
3208         }
3209
3210         case IPVS_CMD_GET_CONFIG:
3211         {
3212                 struct ip_vs_timeout_user t;
3213
3214                 __ip_vs_get_timeouts(&t);
3215 #ifdef CONFIG_IP_VS_PROTO_TCP
3216                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3217                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3218                             t.tcp_fin_timeout);
3219 #endif
3220 #ifdef CONFIG_IP_VS_PROTO_UDP
3221                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3222 #endif
3223
3224                 break;
3225         }
3226
3227         case IPVS_CMD_GET_INFO:
3228                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3229                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3230                             IP_VS_CONN_TAB_SIZE);
3231                 break;
3232         }
3233
3234         genlmsg_end(msg, reply);
3235         ret = genlmsg_reply(msg, info);
3236         goto out;
3237
3238 nla_put_failure:
3239         pr_err("not enough space in Netlink message\n");
3240         ret = -EMSGSIZE;
3241
3242 out_err:
3243         nlmsg_free(msg);
3244 out:
3245         mutex_unlock(&__ip_vs_mutex);
3246
3247         return ret;
3248 }
3249
3250
3251 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3252         {
3253                 .cmd    = IPVS_CMD_NEW_SERVICE,
3254                 .flags  = GENL_ADMIN_PERM,
3255                 .policy = ip_vs_cmd_policy,
3256                 .doit   = ip_vs_genl_set_cmd,
3257         },
3258         {
3259                 .cmd    = IPVS_CMD_SET_SERVICE,
3260                 .flags  = GENL_ADMIN_PERM,
3261                 .policy = ip_vs_cmd_policy,
3262                 .doit   = ip_vs_genl_set_cmd,
3263         },
3264         {
3265                 .cmd    = IPVS_CMD_DEL_SERVICE,
3266                 .flags  = GENL_ADMIN_PERM,
3267                 .policy = ip_vs_cmd_policy,
3268                 .doit   = ip_vs_genl_set_cmd,
3269         },
3270         {
3271                 .cmd    = IPVS_CMD_GET_SERVICE,
3272                 .flags  = GENL_ADMIN_PERM,
3273                 .doit   = ip_vs_genl_get_cmd,
3274                 .dumpit = ip_vs_genl_dump_services,
3275                 .policy = ip_vs_cmd_policy,
3276         },
3277         {
3278                 .cmd    = IPVS_CMD_NEW_DEST,
3279                 .flags  = GENL_ADMIN_PERM,
3280                 .policy = ip_vs_cmd_policy,
3281                 .doit   = ip_vs_genl_set_cmd,
3282         },
3283         {
3284                 .cmd    = IPVS_CMD_SET_DEST,
3285                 .flags  = GENL_ADMIN_PERM,
3286                 .policy = ip_vs_cmd_policy,
3287                 .doit   = ip_vs_genl_set_cmd,
3288         },
3289         {
3290                 .cmd    = IPVS_CMD_DEL_DEST,
3291                 .flags  = GENL_ADMIN_PERM,
3292                 .policy = ip_vs_cmd_policy,
3293                 .doit   = ip_vs_genl_set_cmd,
3294         },
3295         {
3296                 .cmd    = IPVS_CMD_GET_DEST,
3297                 .flags  = GENL_ADMIN_PERM,
3298                 .policy = ip_vs_cmd_policy,
3299                 .dumpit = ip_vs_genl_dump_dests,
3300         },
3301         {
3302                 .cmd    = IPVS_CMD_NEW_DAEMON,
3303                 .flags  = GENL_ADMIN_PERM,
3304                 .policy = ip_vs_cmd_policy,
3305                 .doit   = ip_vs_genl_set_cmd,
3306         },
3307         {
3308                 .cmd    = IPVS_CMD_DEL_DAEMON,
3309                 .flags  = GENL_ADMIN_PERM,
3310                 .policy = ip_vs_cmd_policy,
3311                 .doit   = ip_vs_genl_set_cmd,
3312         },
3313         {
3314                 .cmd    = IPVS_CMD_GET_DAEMON,
3315                 .flags  = GENL_ADMIN_PERM,
3316                 .dumpit = ip_vs_genl_dump_daemons,
3317         },
3318         {
3319                 .cmd    = IPVS_CMD_SET_CONFIG,
3320                 .flags  = GENL_ADMIN_PERM,
3321                 .policy = ip_vs_cmd_policy,
3322                 .doit   = ip_vs_genl_set_cmd,
3323         },
3324         {
3325                 .cmd    = IPVS_CMD_GET_CONFIG,
3326                 .flags  = GENL_ADMIN_PERM,
3327                 .doit   = ip_vs_genl_get_cmd,
3328         },
3329         {
3330                 .cmd    = IPVS_CMD_GET_INFO,
3331                 .flags  = GENL_ADMIN_PERM,
3332                 .doit   = ip_vs_genl_get_cmd,
3333         },
3334         {
3335                 .cmd    = IPVS_CMD_ZERO,
3336                 .flags  = GENL_ADMIN_PERM,
3337                 .policy = ip_vs_cmd_policy,
3338                 .doit   = ip_vs_genl_set_cmd,
3339         },
3340         {
3341                 .cmd    = IPVS_CMD_FLUSH,
3342                 .flags  = GENL_ADMIN_PERM,
3343                 .doit   = ip_vs_genl_set_cmd,
3344         },
3345 };
3346
3347 static int __init ip_vs_genl_register(void)
3348 {
3349         return genl_register_family_with_ops(&ip_vs_genl_family,
3350                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3351 }
3352
3353 static void ip_vs_genl_unregister(void)
3354 {
3355         genl_unregister_family(&ip_vs_genl_family);
3356 }
3357
3358 /* End of Generic Netlink interface definitions */
3359
3360
3361 int __init ip_vs_control_init(void)
3362 {
3363         int ret;
3364         int idx;
3365
3366         EnterFunction(2);
3367
3368         ret = nf_register_sockopt(&ip_vs_sockopts);
3369         if (ret) {
3370                 pr_err("cannot register sockopt.\n");
3371                 return ret;
3372         }
3373
3374         ret = ip_vs_genl_register();
3375         if (ret) {
3376                 pr_err("cannot register Generic Netlink interface.\n");
3377                 nf_unregister_sockopt(&ip_vs_sockopts);
3378                 return ret;
3379         }
3380
3381         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3382         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3383
3384         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3385
3386         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3387         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3388                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3389                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3390         }
3391         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3392                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3393         }
3394
3395         ip_vs_new_estimator(&ip_vs_stats);
3396
3397         /* Hook the defense timer */
3398         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3399
3400         LeaveFunction(2);
3401         return 0;
3402 }
3403
3404
3405 void ip_vs_control_cleanup(void)
3406 {
3407         EnterFunction(2);
3408         ip_vs_trash_cleanup();
3409         cancel_rearming_delayed_work(&defense_work);
3410         cancel_work_sync(&defense_work.work);
3411         ip_vs_kill_estimator(&ip_vs_stats);
3412         unregister_sysctl_table(sysctl_header);
3413         proc_net_remove(&init_net, "ip_vs_stats");
3414         proc_net_remove(&init_net, "ip_vs");
3415         ip_vs_genl_unregister();
3416         nf_unregister_sockopt(&ip_vs_sockopts);
3417         LeaveFunction(2);
3418 }