ipvs: properly zero stats and rates
[linux-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61 /* sysctl variables */
62
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65
66 int ip_vs_get_debug_level(void)
67 {
68         return sysctl_ip_vs_debug_level;
69 }
70 #endif
71
72 #ifdef CONFIG_IP_VS_IPV6
73 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74 static int __ip_vs_addr_is_local_v6(struct net *net,
75                                     const struct in6_addr *addr)
76 {
77         struct rt6_info *rt;
78         struct flowi fl = {
79                 .oif = 0,
80                 .fl6_dst = *addr,
81                 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
82         };
83
84         rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
85         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
86                         return 1;
87
88         return 0;
89 }
90 #endif
91 /*
92  *      update_defense_level is called from keventd and from sysctl,
93  *      so it needs to protect itself from softirqs
94  */
95 static void update_defense_level(struct netns_ipvs *ipvs)
96 {
97         struct sysinfo i;
98         static int old_secure_tcp = 0;
99         int availmem;
100         int nomem;
101         int to_change = -1;
102
103         /* we only count free and buffered memory (in pages) */
104         si_meminfo(&i);
105         availmem = i.freeram + i.bufferram;
106         /* however in linux 2.5 the i.bufferram is total page cache size,
107            we need adjust it */
108         /* si_swapinfo(&i); */
109         /* availmem = availmem - (i.totalswap - i.freeswap); */
110
111         nomem = (availmem < ipvs->sysctl_amemthresh);
112
113         local_bh_disable();
114
115         /* drop_entry */
116         spin_lock(&ipvs->dropentry_lock);
117         switch (ipvs->sysctl_drop_entry) {
118         case 0:
119                 atomic_set(&ipvs->dropentry, 0);
120                 break;
121         case 1:
122                 if (nomem) {
123                         atomic_set(&ipvs->dropentry, 1);
124                         ipvs->sysctl_drop_entry = 2;
125                 } else {
126                         atomic_set(&ipvs->dropentry, 0);
127                 }
128                 break;
129         case 2:
130                 if (nomem) {
131                         atomic_set(&ipvs->dropentry, 1);
132                 } else {
133                         atomic_set(&ipvs->dropentry, 0);
134                         ipvs->sysctl_drop_entry = 1;
135                 };
136                 break;
137         case 3:
138                 atomic_set(&ipvs->dropentry, 1);
139                 break;
140         }
141         spin_unlock(&ipvs->dropentry_lock);
142
143         /* drop_packet */
144         spin_lock(&ipvs->droppacket_lock);
145         switch (ipvs->sysctl_drop_packet) {
146         case 0:
147                 ipvs->drop_rate = 0;
148                 break;
149         case 1:
150                 if (nomem) {
151                         ipvs->drop_rate = ipvs->drop_counter
152                                 = ipvs->sysctl_amemthresh /
153                                 (ipvs->sysctl_amemthresh-availmem);
154                         ipvs->sysctl_drop_packet = 2;
155                 } else {
156                         ipvs->drop_rate = 0;
157                 }
158                 break;
159         case 2:
160                 if (nomem) {
161                         ipvs->drop_rate = ipvs->drop_counter
162                                 = ipvs->sysctl_amemthresh /
163                                 (ipvs->sysctl_amemthresh-availmem);
164                 } else {
165                         ipvs->drop_rate = 0;
166                         ipvs->sysctl_drop_packet = 1;
167                 }
168                 break;
169         case 3:
170                 ipvs->drop_rate = ipvs->sysctl_am_droprate;
171                 break;
172         }
173         spin_unlock(&ipvs->droppacket_lock);
174
175         /* secure_tcp */
176         spin_lock(&ipvs->securetcp_lock);
177         switch (ipvs->sysctl_secure_tcp) {
178         case 0:
179                 if (old_secure_tcp >= 2)
180                         to_change = 0;
181                 break;
182         case 1:
183                 if (nomem) {
184                         if (old_secure_tcp < 2)
185                                 to_change = 1;
186                         ipvs->sysctl_secure_tcp = 2;
187                 } else {
188                         if (old_secure_tcp >= 2)
189                                 to_change = 0;
190                 }
191                 break;
192         case 2:
193                 if (nomem) {
194                         if (old_secure_tcp < 2)
195                                 to_change = 1;
196                 } else {
197                         if (old_secure_tcp >= 2)
198                                 to_change = 0;
199                         ipvs->sysctl_secure_tcp = 1;
200                 }
201                 break;
202         case 3:
203                 if (old_secure_tcp < 2)
204                         to_change = 1;
205                 break;
206         }
207         old_secure_tcp = ipvs->sysctl_secure_tcp;
208         if (to_change >= 0)
209                 ip_vs_protocol_timeout_change(ipvs,
210                                               ipvs->sysctl_secure_tcp > 1);
211         spin_unlock(&ipvs->securetcp_lock);
212
213         local_bh_enable();
214 }
215
216
217 /*
218  *      Timer for checking the defense
219  */
220 #define DEFENSE_TIMER_PERIOD    1*HZ
221
222 static void defense_work_handler(struct work_struct *work)
223 {
224         struct netns_ipvs *ipvs =
225                 container_of(work, struct netns_ipvs, defense_work.work);
226
227         update_defense_level(ipvs);
228         if (atomic_read(&ipvs->dropentry))
229                 ip_vs_random_dropentry(ipvs->net);
230         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
231 }
232
233 int
234 ip_vs_use_count_inc(void)
235 {
236         return try_module_get(THIS_MODULE);
237 }
238
239 void
240 ip_vs_use_count_dec(void)
241 {
242         module_put(THIS_MODULE);
243 }
244
245
246 /*
247  *      Hash table: for virtual service lookups
248  */
249 #define IP_VS_SVC_TAB_BITS 8
250 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
251 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
252
253 /* the service table hashed by <protocol, addr, port> */
254 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
255 /* the service table hashed by fwmark */
256 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
257
258
259 /*
260  *      Returns hash value for virtual service
261  */
262 static inline unsigned
263 ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
264                   const union nf_inet_addr *addr, __be16 port)
265 {
266         register unsigned porth = ntohs(port);
267         __be32 addr_fold = addr->ip;
268
269 #ifdef CONFIG_IP_VS_IPV6
270         if (af == AF_INET6)
271                 addr_fold = addr->ip6[0]^addr->ip6[1]^
272                             addr->ip6[2]^addr->ip6[3];
273 #endif
274         addr_fold ^= ((size_t)net>>8);
275
276         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
277                 & IP_VS_SVC_TAB_MASK;
278 }
279
280 /*
281  *      Returns hash value of fwmark for virtual service lookup
282  */
283 static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
284 {
285         return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
286 }
287
288 /*
289  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
290  *      or in the ip_vs_svc_fwm_table by fwmark.
291  *      Should be called with locked tables.
292  */
293 static int ip_vs_svc_hash(struct ip_vs_service *svc)
294 {
295         unsigned hash;
296
297         if (svc->flags & IP_VS_SVC_F_HASHED) {
298                 pr_err("%s(): request for already hashed, called from %pF\n",
299                        __func__, __builtin_return_address(0));
300                 return 0;
301         }
302
303         if (svc->fwmark == 0) {
304                 /*
305                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
306                  */
307                 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
308                                          &svc->addr, svc->port);
309                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
310         } else {
311                 /*
312                  *  Hash it by fwmark in svc_fwm_table
313                  */
314                 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
315                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
316         }
317
318         svc->flags |= IP_VS_SVC_F_HASHED;
319         /* increase its refcnt because it is referenced by the svc table */
320         atomic_inc(&svc->refcnt);
321         return 1;
322 }
323
324
325 /*
326  *      Unhashes a service from svc_table / svc_fwm_table.
327  *      Should be called with locked tables.
328  */
329 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
330 {
331         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
332                 pr_err("%s(): request for unhash flagged, called from %pF\n",
333                        __func__, __builtin_return_address(0));
334                 return 0;
335         }
336
337         if (svc->fwmark == 0) {
338                 /* Remove it from the svc_table table */
339                 list_del(&svc->s_list);
340         } else {
341                 /* Remove it from the svc_fwm_table table */
342                 list_del(&svc->f_list);
343         }
344
345         svc->flags &= ~IP_VS_SVC_F_HASHED;
346         atomic_dec(&svc->refcnt);
347         return 1;
348 }
349
350
351 /*
352  *      Get service by {netns, proto,addr,port} in the service table.
353  */
354 static inline struct ip_vs_service *
355 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
356                      const union nf_inet_addr *vaddr, __be16 vport)
357 {
358         unsigned hash;
359         struct ip_vs_service *svc;
360
361         /* Check for "full" addressed entries */
362         hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
363
364         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
365                 if ((svc->af == af)
366                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
367                     && (svc->port == vport)
368                     && (svc->protocol == protocol)
369                     && net_eq(svc->net, net)) {
370                         /* HIT */
371                         return svc;
372                 }
373         }
374
375         return NULL;
376 }
377
378
379 /*
380  *      Get service by {fwmark} in the service table.
381  */
382 static inline struct ip_vs_service *
383 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
384 {
385         unsigned hash;
386         struct ip_vs_service *svc;
387
388         /* Check for fwmark addressed entries */
389         hash = ip_vs_svc_fwm_hashkey(net, fwmark);
390
391         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
392                 if (svc->fwmark == fwmark && svc->af == af
393                     && net_eq(svc->net, net)) {
394                         /* HIT */
395                         return svc;
396                 }
397         }
398
399         return NULL;
400 }
401
402 struct ip_vs_service *
403 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
404                   const union nf_inet_addr *vaddr, __be16 vport)
405 {
406         struct ip_vs_service *svc;
407         struct netns_ipvs *ipvs = net_ipvs(net);
408
409         read_lock(&__ip_vs_svc_lock);
410
411         /*
412          *      Check the table hashed by fwmark first
413          */
414         if (fwmark) {
415                 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
416                 if (svc)
417                         goto out;
418         }
419
420         /*
421          *      Check the table hashed by <protocol,addr,port>
422          *      for "full" addressed entries
423          */
424         svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
425
426         if (svc == NULL
427             && protocol == IPPROTO_TCP
428             && atomic_read(&ipvs->ftpsvc_counter)
429             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
430                 /*
431                  * Check if ftp service entry exists, the packet
432                  * might belong to FTP data connections.
433                  */
434                 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
435         }
436
437         if (svc == NULL
438             && atomic_read(&ipvs->nullsvc_counter)) {
439                 /*
440                  * Check if the catch-all port (port zero) exists
441                  */
442                 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
443         }
444
445   out:
446         if (svc)
447                 atomic_inc(&svc->usecnt);
448         read_unlock(&__ip_vs_svc_lock);
449
450         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
451                       fwmark, ip_vs_proto_name(protocol),
452                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
453                       svc ? "hit" : "not hit");
454
455         return svc;
456 }
457
458
459 static inline void
460 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
461 {
462         atomic_inc(&svc->refcnt);
463         dest->svc = svc;
464 }
465
466 static void
467 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
468 {
469         struct ip_vs_service *svc = dest->svc;
470
471         dest->svc = NULL;
472         if (atomic_dec_and_test(&svc->refcnt)) {
473                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
474                               svc->fwmark,
475                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
476                               ntohs(svc->port), atomic_read(&svc->usecnt));
477                 free_percpu(svc->stats.cpustats);
478                 kfree(svc);
479         }
480 }
481
482
483 /*
484  *      Returns hash value for real service
485  */
486 static inline unsigned ip_vs_rs_hashkey(int af,
487                                             const union nf_inet_addr *addr,
488                                             __be16 port)
489 {
490         register unsigned porth = ntohs(port);
491         __be32 addr_fold = addr->ip;
492
493 #ifdef CONFIG_IP_VS_IPV6
494         if (af == AF_INET6)
495                 addr_fold = addr->ip6[0]^addr->ip6[1]^
496                             addr->ip6[2]^addr->ip6[3];
497 #endif
498
499         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
500                 & IP_VS_RTAB_MASK;
501 }
502
503 /*
504  *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
505  *      should be called with locked tables.
506  */
507 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
508 {
509         unsigned hash;
510
511         if (!list_empty(&dest->d_list)) {
512                 return 0;
513         }
514
515         /*
516          *      Hash by proto,addr,port,
517          *      which are the parameters of the real service.
518          */
519         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
520
521         list_add(&dest->d_list, &ipvs->rs_table[hash]);
522
523         return 1;
524 }
525
526 /*
527  *      UNhashes ip_vs_dest from rs_table.
528  *      should be called with locked tables.
529  */
530 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
531 {
532         /*
533          * Remove it from the rs_table table.
534          */
535         if (!list_empty(&dest->d_list)) {
536                 list_del(&dest->d_list);
537                 INIT_LIST_HEAD(&dest->d_list);
538         }
539
540         return 1;
541 }
542
543 /*
544  *      Lookup real service by <proto,addr,port> in the real service table.
545  */
546 struct ip_vs_dest *
547 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
548                           const union nf_inet_addr *daddr,
549                           __be16 dport)
550 {
551         struct netns_ipvs *ipvs = net_ipvs(net);
552         unsigned hash;
553         struct ip_vs_dest *dest;
554
555         /*
556          *      Check for "full" addressed entries
557          *      Return the first found entry
558          */
559         hash = ip_vs_rs_hashkey(af, daddr, dport);
560
561         read_lock(&ipvs->rs_lock);
562         list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
563                 if ((dest->af == af)
564                     && ip_vs_addr_equal(af, &dest->addr, daddr)
565                     && (dest->port == dport)
566                     && ((dest->protocol == protocol) ||
567                         dest->vfwmark)) {
568                         /* HIT */
569                         read_unlock(&ipvs->rs_lock);
570                         return dest;
571                 }
572         }
573         read_unlock(&ipvs->rs_lock);
574
575         return NULL;
576 }
577
578 /*
579  *      Lookup destination by {addr,port} in the given service
580  */
581 static struct ip_vs_dest *
582 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
583                   __be16 dport)
584 {
585         struct ip_vs_dest *dest;
586
587         /*
588          * Find the destination for the given service
589          */
590         list_for_each_entry(dest, &svc->destinations, n_list) {
591                 if ((dest->af == svc->af)
592                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
593                     && (dest->port == dport)) {
594                         /* HIT */
595                         return dest;
596                 }
597         }
598
599         return NULL;
600 }
601
602 /*
603  * Find destination by {daddr,dport,vaddr,protocol}
604  * Cretaed to be used in ip_vs_process_message() in
605  * the backup synchronization daemon. It finds the
606  * destination to be bound to the received connection
607  * on the backup.
608  *
609  * ip_vs_lookup_real_service() looked promissing, but
610  * seems not working as expected.
611  */
612 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
613                                    const union nf_inet_addr *daddr,
614                                    __be16 dport,
615                                    const union nf_inet_addr *vaddr,
616                                    __be16 vport, __u16 protocol, __u32 fwmark)
617 {
618         struct ip_vs_dest *dest;
619         struct ip_vs_service *svc;
620
621         svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
622         if (!svc)
623                 return NULL;
624         dest = ip_vs_lookup_dest(svc, daddr, dport);
625         if (dest)
626                 atomic_inc(&dest->refcnt);
627         ip_vs_service_put(svc);
628         return dest;
629 }
630
631 /*
632  *  Lookup dest by {svc,addr,port} in the destination trash.
633  *  The destination trash is used to hold the destinations that are removed
634  *  from the service table but are still referenced by some conn entries.
635  *  The reason to add the destination trash is when the dest is temporary
636  *  down (either by administrator or by monitor program), the dest can be
637  *  picked back from the trash, the remaining connections to the dest can
638  *  continue, and the counting information of the dest is also useful for
639  *  scheduling.
640  */
641 static struct ip_vs_dest *
642 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
643                      __be16 dport)
644 {
645         struct ip_vs_dest *dest, *nxt;
646         struct netns_ipvs *ipvs = net_ipvs(svc->net);
647
648         /*
649          * Find the destination in trash
650          */
651         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
652                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
653                               "dest->refcnt=%d\n",
654                               dest->vfwmark,
655                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
656                               ntohs(dest->port),
657                               atomic_read(&dest->refcnt));
658                 if (dest->af == svc->af &&
659                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
660                     dest->port == dport &&
661                     dest->vfwmark == svc->fwmark &&
662                     dest->protocol == svc->protocol &&
663                     (svc->fwmark ||
664                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
665                       dest->vport == svc->port))) {
666                         /* HIT */
667                         return dest;
668                 }
669
670                 /*
671                  * Try to purge the destination from trash if not referenced
672                  */
673                 if (atomic_read(&dest->refcnt) == 1) {
674                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
675                                       "from trash\n",
676                                       dest->vfwmark,
677                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
678                                       ntohs(dest->port));
679                         list_del(&dest->n_list);
680                         ip_vs_dst_reset(dest);
681                         __ip_vs_unbind_svc(dest);
682                         free_percpu(dest->stats.cpustats);
683                         kfree(dest);
684                 }
685         }
686
687         return NULL;
688 }
689
690
691 /*
692  *  Clean up all the destinations in the trash
693  *  Called by the ip_vs_control_cleanup()
694  *
695  *  When the ip_vs_control_clearup is activated by ipvs module exit,
696  *  the service tables must have been flushed and all the connections
697  *  are expired, and the refcnt of each destination in the trash must
698  *  be 1, so we simply release them here.
699  */
700 static void ip_vs_trash_cleanup(struct net *net)
701 {
702         struct ip_vs_dest *dest, *nxt;
703         struct netns_ipvs *ipvs = net_ipvs(net);
704
705         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
706                 list_del(&dest->n_list);
707                 ip_vs_dst_reset(dest);
708                 __ip_vs_unbind_svc(dest);
709                 free_percpu(dest->stats.cpustats);
710                 kfree(dest);
711         }
712 }
713
714 static void
715 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
716 {
717 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
718 #define IP_VS_SHOW_STATS_RATE(r) dst->r = src->ustats.r
719
720         spin_lock_bh(&src->lock);
721
722         IP_VS_SHOW_STATS_COUNTER(conns);
723         IP_VS_SHOW_STATS_COUNTER(inpkts);
724         IP_VS_SHOW_STATS_COUNTER(outpkts);
725         IP_VS_SHOW_STATS_COUNTER(inbytes);
726         IP_VS_SHOW_STATS_COUNTER(outbytes);
727
728         IP_VS_SHOW_STATS_RATE(cps);
729         IP_VS_SHOW_STATS_RATE(inpps);
730         IP_VS_SHOW_STATS_RATE(outpps);
731         IP_VS_SHOW_STATS_RATE(inbps);
732         IP_VS_SHOW_STATS_RATE(outbps);
733
734         spin_unlock_bh(&src->lock);
735 }
736
737 static void
738 ip_vs_zero_stats(struct ip_vs_stats *stats)
739 {
740         spin_lock_bh(&stats->lock);
741
742         /* get current counters as zero point, rates are zeroed */
743
744 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
745 #define IP_VS_ZERO_STATS_RATE(r) stats->ustats.r = 0
746
747         IP_VS_ZERO_STATS_COUNTER(conns);
748         IP_VS_ZERO_STATS_COUNTER(inpkts);
749         IP_VS_ZERO_STATS_COUNTER(outpkts);
750         IP_VS_ZERO_STATS_COUNTER(inbytes);
751         IP_VS_ZERO_STATS_COUNTER(outbytes);
752
753         IP_VS_ZERO_STATS_RATE(cps);
754         IP_VS_ZERO_STATS_RATE(inpps);
755         IP_VS_ZERO_STATS_RATE(outpps);
756         IP_VS_ZERO_STATS_RATE(inbps);
757         IP_VS_ZERO_STATS_RATE(outbps);
758
759         ip_vs_zero_estimator(stats);
760
761         spin_unlock_bh(&stats->lock);
762 }
763
764 /*
765  *      Update a destination in the given service
766  */
767 static void
768 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769                     struct ip_vs_dest_user_kern *udest, int add)
770 {
771         struct netns_ipvs *ipvs = net_ipvs(svc->net);
772         int conn_flags;
773
774         /* set the weight and the flags */
775         atomic_set(&dest->weight, udest->weight);
776         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
777         conn_flags |= IP_VS_CONN_F_INACTIVE;
778
779         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
780         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
781                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
782         } else {
783                 /*
784                  *    Put the real service in rs_table if not present.
785                  *    For now only for NAT!
786                  */
787                 write_lock_bh(&ipvs->rs_lock);
788                 ip_vs_rs_hash(ipvs, dest);
789                 write_unlock_bh(&ipvs->rs_lock);
790         }
791         atomic_set(&dest->conn_flags, conn_flags);
792
793         /* bind the service */
794         if (!dest->svc) {
795                 __ip_vs_bind_svc(dest, svc);
796         } else {
797                 if (dest->svc != svc) {
798                         __ip_vs_unbind_svc(dest);
799                         ip_vs_zero_stats(&dest->stats);
800                         __ip_vs_bind_svc(dest, svc);
801                 }
802         }
803
804         /* set the dest status flags */
805         dest->flags |= IP_VS_DEST_F_AVAILABLE;
806
807         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
808                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
809         dest->u_threshold = udest->u_threshold;
810         dest->l_threshold = udest->l_threshold;
811
812         spin_lock(&dest->dst_lock);
813         ip_vs_dst_reset(dest);
814         spin_unlock(&dest->dst_lock);
815
816         if (add)
817                 ip_vs_new_estimator(svc->net, &dest->stats);
818
819         write_lock_bh(&__ip_vs_svc_lock);
820
821         /* Wait until all other svc users go away */
822         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
823
824         if (add) {
825                 list_add(&dest->n_list, &svc->destinations);
826                 svc->num_dests++;
827         }
828
829         /* call the update_service, because server weight may be changed */
830         if (svc->scheduler->update_service)
831                 svc->scheduler->update_service(svc);
832
833         write_unlock_bh(&__ip_vs_svc_lock);
834 }
835
836
837 /*
838  *      Create a destination for the given service
839  */
840 static int
841 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
842                struct ip_vs_dest **dest_p)
843 {
844         struct ip_vs_dest *dest;
845         unsigned atype;
846
847         EnterFunction(2);
848
849 #ifdef CONFIG_IP_VS_IPV6
850         if (svc->af == AF_INET6) {
851                 atype = ipv6_addr_type(&udest->addr.in6);
852                 if ((!(atype & IPV6_ADDR_UNICAST) ||
853                         atype & IPV6_ADDR_LINKLOCAL) &&
854                         !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
855                         return -EINVAL;
856         } else
857 #endif
858         {
859                 atype = inet_addr_type(svc->net, udest->addr.ip);
860                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
861                         return -EINVAL;
862         }
863
864         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
865         if (dest == NULL) {
866                 pr_err("%s(): no memory.\n", __func__);
867                 return -ENOMEM;
868         }
869         dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
870         if (!dest->stats.cpustats) {
871                 pr_err("%s() alloc_percpu failed\n", __func__);
872                 goto err_alloc;
873         }
874
875         dest->af = svc->af;
876         dest->protocol = svc->protocol;
877         dest->vaddr = svc->addr;
878         dest->vport = svc->port;
879         dest->vfwmark = svc->fwmark;
880         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
881         dest->port = udest->port;
882
883         atomic_set(&dest->activeconns, 0);
884         atomic_set(&dest->inactconns, 0);
885         atomic_set(&dest->persistconns, 0);
886         atomic_set(&dest->refcnt, 1);
887
888         INIT_LIST_HEAD(&dest->d_list);
889         spin_lock_init(&dest->dst_lock);
890         spin_lock_init(&dest->stats.lock);
891         __ip_vs_update_dest(svc, dest, udest, 1);
892
893         *dest_p = dest;
894
895         LeaveFunction(2);
896         return 0;
897
898 err_alloc:
899         kfree(dest);
900         return -ENOMEM;
901 }
902
903
904 /*
905  *      Add a destination into an existing service
906  */
907 static int
908 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
909 {
910         struct ip_vs_dest *dest;
911         union nf_inet_addr daddr;
912         __be16 dport = udest->port;
913         int ret;
914
915         EnterFunction(2);
916
917         if (udest->weight < 0) {
918                 pr_err("%s(): server weight less than zero\n", __func__);
919                 return -ERANGE;
920         }
921
922         if (udest->l_threshold > udest->u_threshold) {
923                 pr_err("%s(): lower threshold is higher than upper threshold\n",
924                         __func__);
925                 return -ERANGE;
926         }
927
928         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
929
930         /*
931          * Check if the dest already exists in the list
932          */
933         dest = ip_vs_lookup_dest(svc, &daddr, dport);
934
935         if (dest != NULL) {
936                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
937                 return -EEXIST;
938         }
939
940         /*
941          * Check if the dest already exists in the trash and
942          * is from the same service
943          */
944         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
945
946         if (dest != NULL) {
947                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
948                               "dest->refcnt=%d, service %u/%s:%u\n",
949                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
950                               atomic_read(&dest->refcnt),
951                               dest->vfwmark,
952                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
953                               ntohs(dest->vport));
954
955                 /*
956                  * Get the destination from the trash
957                  */
958                 list_del(&dest->n_list);
959
960                 __ip_vs_update_dest(svc, dest, udest, 1);
961                 ret = 0;
962         } else {
963                 /*
964                  * Allocate and initialize the dest structure
965                  */
966                 ret = ip_vs_new_dest(svc, udest, &dest);
967         }
968         LeaveFunction(2);
969
970         return ret;
971 }
972
973
974 /*
975  *      Edit a destination in the given service
976  */
977 static int
978 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
979 {
980         struct ip_vs_dest *dest;
981         union nf_inet_addr daddr;
982         __be16 dport = udest->port;
983
984         EnterFunction(2);
985
986         if (udest->weight < 0) {
987                 pr_err("%s(): server weight less than zero\n", __func__);
988                 return -ERANGE;
989         }
990
991         if (udest->l_threshold > udest->u_threshold) {
992                 pr_err("%s(): lower threshold is higher than upper threshold\n",
993                         __func__);
994                 return -ERANGE;
995         }
996
997         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
998
999         /*
1000          *  Lookup the destination list
1001          */
1002         dest = ip_vs_lookup_dest(svc, &daddr, dport);
1003
1004         if (dest == NULL) {
1005                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1006                 return -ENOENT;
1007         }
1008
1009         __ip_vs_update_dest(svc, dest, udest, 0);
1010         LeaveFunction(2);
1011
1012         return 0;
1013 }
1014
1015
1016 /*
1017  *      Delete a destination (must be already unlinked from the service)
1018  */
1019 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1020 {
1021         struct netns_ipvs *ipvs = net_ipvs(net);
1022
1023         ip_vs_kill_estimator(net, &dest->stats);
1024
1025         /*
1026          *  Remove it from the d-linked list with the real services.
1027          */
1028         write_lock_bh(&ipvs->rs_lock);
1029         ip_vs_rs_unhash(dest);
1030         write_unlock_bh(&ipvs->rs_lock);
1031
1032         /*
1033          *  Decrease the refcnt of the dest, and free the dest
1034          *  if nobody refers to it (refcnt=0). Otherwise, throw
1035          *  the destination into the trash.
1036          */
1037         if (atomic_dec_and_test(&dest->refcnt)) {
1038                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1039                               dest->vfwmark,
1040                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1041                               ntohs(dest->port));
1042                 ip_vs_dst_reset(dest);
1043                 /* simply decrease svc->refcnt here, let the caller check
1044                    and release the service if nobody refers to it.
1045                    Only user context can release destination and service,
1046                    and only one user context can update virtual service at a
1047                    time, so the operation here is OK */
1048                 atomic_dec(&dest->svc->refcnt);
1049                 free_percpu(dest->stats.cpustats);
1050                 kfree(dest);
1051         } else {
1052                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1053                               "dest->refcnt=%d\n",
1054                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1055                               ntohs(dest->port),
1056                               atomic_read(&dest->refcnt));
1057                 list_add(&dest->n_list, &ipvs->dest_trash);
1058                 atomic_inc(&dest->refcnt);
1059         }
1060 }
1061
1062
1063 /*
1064  *      Unlink a destination from the given service
1065  */
1066 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1067                                 struct ip_vs_dest *dest,
1068                                 int svcupd)
1069 {
1070         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1071
1072         /*
1073          *  Remove it from the d-linked destination list.
1074          */
1075         list_del(&dest->n_list);
1076         svc->num_dests--;
1077
1078         /*
1079          *  Call the update_service function of its scheduler
1080          */
1081         if (svcupd && svc->scheduler->update_service)
1082                         svc->scheduler->update_service(svc);
1083 }
1084
1085
1086 /*
1087  *      Delete a destination server in the given service
1088  */
1089 static int
1090 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1091 {
1092         struct ip_vs_dest *dest;
1093         __be16 dport = udest->port;
1094
1095         EnterFunction(2);
1096
1097         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1098
1099         if (dest == NULL) {
1100                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1101                 return -ENOENT;
1102         }
1103
1104         write_lock_bh(&__ip_vs_svc_lock);
1105
1106         /*
1107          *      Wait until all other svc users go away.
1108          */
1109         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1110
1111         /*
1112          *      Unlink dest from the service
1113          */
1114         __ip_vs_unlink_dest(svc, dest, 1);
1115
1116         write_unlock_bh(&__ip_vs_svc_lock);
1117
1118         /*
1119          *      Delete the destination
1120          */
1121         __ip_vs_del_dest(svc->net, dest);
1122
1123         LeaveFunction(2);
1124
1125         return 0;
1126 }
1127
1128
1129 /*
1130  *      Add a service into the service hash table
1131  */
1132 static int
1133 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1134                   struct ip_vs_service **svc_p)
1135 {
1136         int ret = 0;
1137         struct ip_vs_scheduler *sched = NULL;
1138         struct ip_vs_pe *pe = NULL;
1139         struct ip_vs_service *svc = NULL;
1140         struct netns_ipvs *ipvs = net_ipvs(net);
1141
1142         /* increase the module use count */
1143         ip_vs_use_count_inc();
1144
1145         /* Lookup the scheduler by 'u->sched_name' */
1146         sched = ip_vs_scheduler_get(u->sched_name);
1147         if (sched == NULL) {
1148                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1149                 ret = -ENOENT;
1150                 goto out_err;
1151         }
1152
1153         if (u->pe_name && *u->pe_name) {
1154                 pe = ip_vs_pe_getbyname(u->pe_name);
1155                 if (pe == NULL) {
1156                         pr_info("persistence engine module ip_vs_pe_%s "
1157                                 "not found\n", u->pe_name);
1158                         ret = -ENOENT;
1159                         goto out_err;
1160                 }
1161         }
1162
1163 #ifdef CONFIG_IP_VS_IPV6
1164         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1165                 ret = -EINVAL;
1166                 goto out_err;
1167         }
1168 #endif
1169
1170         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1171         if (svc == NULL) {
1172                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1173                 ret = -ENOMEM;
1174                 goto out_err;
1175         }
1176         svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1177         if (!svc->stats.cpustats) {
1178                 pr_err("%s() alloc_percpu failed\n", __func__);
1179                 goto out_err;
1180         }
1181
1182         /* I'm the first user of the service */
1183         atomic_set(&svc->usecnt, 0);
1184         atomic_set(&svc->refcnt, 0);
1185
1186         svc->af = u->af;
1187         svc->protocol = u->protocol;
1188         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1189         svc->port = u->port;
1190         svc->fwmark = u->fwmark;
1191         svc->flags = u->flags;
1192         svc->timeout = u->timeout * HZ;
1193         svc->netmask = u->netmask;
1194         svc->net = net;
1195
1196         INIT_LIST_HEAD(&svc->destinations);
1197         rwlock_init(&svc->sched_lock);
1198         spin_lock_init(&svc->stats.lock);
1199
1200         /* Bind the scheduler */
1201         ret = ip_vs_bind_scheduler(svc, sched);
1202         if (ret)
1203                 goto out_err;
1204         sched = NULL;
1205
1206         /* Bind the ct retriever */
1207         ip_vs_bind_pe(svc, pe);
1208         pe = NULL;
1209
1210         /* Update the virtual service counters */
1211         if (svc->port == FTPPORT)
1212                 atomic_inc(&ipvs->ftpsvc_counter);
1213         else if (svc->port == 0)
1214                 atomic_inc(&ipvs->nullsvc_counter);
1215
1216         ip_vs_new_estimator(net, &svc->stats);
1217
1218         /* Count only IPv4 services for old get/setsockopt interface */
1219         if (svc->af == AF_INET)
1220                 ipvs->num_services++;
1221
1222         /* Hash the service into the service table */
1223         write_lock_bh(&__ip_vs_svc_lock);
1224         ip_vs_svc_hash(svc);
1225         write_unlock_bh(&__ip_vs_svc_lock);
1226
1227         *svc_p = svc;
1228         return 0;
1229
1230
1231  out_err:
1232         if (svc != NULL) {
1233                 ip_vs_unbind_scheduler(svc);
1234                 if (svc->inc) {
1235                         local_bh_disable();
1236                         ip_vs_app_inc_put(svc->inc);
1237                         local_bh_enable();
1238                 }
1239                 if (svc->stats.cpustats)
1240                         free_percpu(svc->stats.cpustats);
1241                 kfree(svc);
1242         }
1243         ip_vs_scheduler_put(sched);
1244         ip_vs_pe_put(pe);
1245
1246         /* decrease the module use count */
1247         ip_vs_use_count_dec();
1248
1249         return ret;
1250 }
1251
1252
1253 /*
1254  *      Edit a service and bind it with a new scheduler
1255  */
1256 static int
1257 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1258 {
1259         struct ip_vs_scheduler *sched, *old_sched;
1260         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1261         int ret = 0;
1262
1263         /*
1264          * Lookup the scheduler, by 'u->sched_name'
1265          */
1266         sched = ip_vs_scheduler_get(u->sched_name);
1267         if (sched == NULL) {
1268                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1269                 return -ENOENT;
1270         }
1271         old_sched = sched;
1272
1273         if (u->pe_name && *u->pe_name) {
1274                 pe = ip_vs_pe_getbyname(u->pe_name);
1275                 if (pe == NULL) {
1276                         pr_info("persistence engine module ip_vs_pe_%s "
1277                                 "not found\n", u->pe_name);
1278                         ret = -ENOENT;
1279                         goto out;
1280                 }
1281                 old_pe = pe;
1282         }
1283
1284 #ifdef CONFIG_IP_VS_IPV6
1285         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1286                 ret = -EINVAL;
1287                 goto out;
1288         }
1289 #endif
1290
1291         write_lock_bh(&__ip_vs_svc_lock);
1292
1293         /*
1294          * Wait until all other svc users go away.
1295          */
1296         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1297
1298         /*
1299          * Set the flags and timeout value
1300          */
1301         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1302         svc->timeout = u->timeout * HZ;
1303         svc->netmask = u->netmask;
1304
1305         old_sched = svc->scheduler;
1306         if (sched != old_sched) {
1307                 /*
1308                  * Unbind the old scheduler
1309                  */
1310                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1311                         old_sched = sched;
1312                         goto out_unlock;
1313                 }
1314
1315                 /*
1316                  * Bind the new scheduler
1317                  */
1318                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1319                         /*
1320                          * If ip_vs_bind_scheduler fails, restore the old
1321                          * scheduler.
1322                          * The main reason of failure is out of memory.
1323                          *
1324                          * The question is if the old scheduler can be
1325                          * restored all the time. TODO: if it cannot be
1326                          * restored some time, we must delete the service,
1327                          * otherwise the system may crash.
1328                          */
1329                         ip_vs_bind_scheduler(svc, old_sched);
1330                         old_sched = sched;
1331                         goto out_unlock;
1332                 }
1333         }
1334
1335         old_pe = svc->pe;
1336         if (pe != old_pe) {
1337                 ip_vs_unbind_pe(svc);
1338                 ip_vs_bind_pe(svc, pe);
1339         }
1340
1341   out_unlock:
1342         write_unlock_bh(&__ip_vs_svc_lock);
1343   out:
1344         ip_vs_scheduler_put(old_sched);
1345         ip_vs_pe_put(old_pe);
1346         return ret;
1347 }
1348
1349
1350 /*
1351  *      Delete a service from the service list
1352  *      - The service must be unlinked, unlocked and not referenced!
1353  *      - We are called under _bh lock
1354  */
1355 static void __ip_vs_del_service(struct ip_vs_service *svc)
1356 {
1357         struct ip_vs_dest *dest, *nxt;
1358         struct ip_vs_scheduler *old_sched;
1359         struct ip_vs_pe *old_pe;
1360         struct netns_ipvs *ipvs = net_ipvs(svc->net);
1361
1362         pr_info("%s: enter\n", __func__);
1363
1364         /* Count only IPv4 services for old get/setsockopt interface */
1365         if (svc->af == AF_INET)
1366                 ipvs->num_services--;
1367
1368         ip_vs_kill_estimator(svc->net, &svc->stats);
1369
1370         /* Unbind scheduler */
1371         old_sched = svc->scheduler;
1372         ip_vs_unbind_scheduler(svc);
1373         ip_vs_scheduler_put(old_sched);
1374
1375         /* Unbind persistence engine */
1376         old_pe = svc->pe;
1377         ip_vs_unbind_pe(svc);
1378         ip_vs_pe_put(old_pe);
1379
1380         /* Unbind app inc */
1381         if (svc->inc) {
1382                 ip_vs_app_inc_put(svc->inc);
1383                 svc->inc = NULL;
1384         }
1385
1386         /*
1387          *    Unlink the whole destination list
1388          */
1389         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1390                 __ip_vs_unlink_dest(svc, dest, 0);
1391                 __ip_vs_del_dest(svc->net, dest);
1392         }
1393
1394         /*
1395          *    Update the virtual service counters
1396          */
1397         if (svc->port == FTPPORT)
1398                 atomic_dec(&ipvs->ftpsvc_counter);
1399         else if (svc->port == 0)
1400                 atomic_dec(&ipvs->nullsvc_counter);
1401
1402         /*
1403          *    Free the service if nobody refers to it
1404          */
1405         if (atomic_read(&svc->refcnt) == 0) {
1406                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1407                               svc->fwmark,
1408                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1409                               ntohs(svc->port), atomic_read(&svc->usecnt));
1410                 free_percpu(svc->stats.cpustats);
1411                 kfree(svc);
1412         }
1413
1414         /* decrease the module use count */
1415         ip_vs_use_count_dec();
1416 }
1417
1418 /*
1419  * Unlink a service from list and try to delete it if its refcnt reached 0
1420  */
1421 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1422 {
1423         /*
1424          * Unhash it from the service table
1425          */
1426         write_lock_bh(&__ip_vs_svc_lock);
1427
1428         ip_vs_svc_unhash(svc);
1429
1430         /*
1431          * Wait until all the svc users go away.
1432          */
1433         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1434
1435         __ip_vs_del_service(svc);
1436
1437         write_unlock_bh(&__ip_vs_svc_lock);
1438 }
1439
1440 /*
1441  *      Delete a service from the service list
1442  */
1443 static int ip_vs_del_service(struct ip_vs_service *svc)
1444 {
1445         if (svc == NULL)
1446                 return -EEXIST;
1447         ip_vs_unlink_service(svc);
1448
1449         return 0;
1450 }
1451
1452
1453 /*
1454  *      Flush all the virtual services
1455  */
1456 static int ip_vs_flush(struct net *net)
1457 {
1458         int idx;
1459         struct ip_vs_service *svc, *nxt;
1460
1461         /*
1462          * Flush the service table hashed by <netns,protocol,addr,port>
1463          */
1464         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1465                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1466                                          s_list) {
1467                         if (net_eq(svc->net, net))
1468                                 ip_vs_unlink_service(svc);
1469                 }
1470         }
1471
1472         /*
1473          * Flush the service table hashed by fwmark
1474          */
1475         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1476                 list_for_each_entry_safe(svc, nxt,
1477                                          &ip_vs_svc_fwm_table[idx], f_list) {
1478                         if (net_eq(svc->net, net))
1479                                 ip_vs_unlink_service(svc);
1480                 }
1481         }
1482
1483         return 0;
1484 }
1485
1486
1487 /*
1488  *      Zero counters in a service or all services
1489  */
1490 static int ip_vs_zero_service(struct ip_vs_service *svc)
1491 {
1492         struct ip_vs_dest *dest;
1493
1494         write_lock_bh(&__ip_vs_svc_lock);
1495         list_for_each_entry(dest, &svc->destinations, n_list) {
1496                 ip_vs_zero_stats(&dest->stats);
1497         }
1498         ip_vs_zero_stats(&svc->stats);
1499         write_unlock_bh(&__ip_vs_svc_lock);
1500         return 0;
1501 }
1502
1503 static int ip_vs_zero_all(struct net *net)
1504 {
1505         int idx;
1506         struct ip_vs_service *svc;
1507
1508         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1509                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1510                         if (net_eq(svc->net, net))
1511                                 ip_vs_zero_service(svc);
1512                 }
1513         }
1514
1515         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1516                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1517                         if (net_eq(svc->net, net))
1518                                 ip_vs_zero_service(svc);
1519                 }
1520         }
1521
1522         ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1523         return 0;
1524 }
1525
1526
1527 static int
1528 proc_do_defense_mode(ctl_table *table, int write,
1529                      void __user *buffer, size_t *lenp, loff_t *ppos)
1530 {
1531         struct net *net = current->nsproxy->net_ns;
1532         int *valp = table->data;
1533         int val = *valp;
1534         int rc;
1535
1536         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1537         if (write && (*valp != val)) {
1538                 if ((*valp < 0) || (*valp > 3)) {
1539                         /* Restore the correct value */
1540                         *valp = val;
1541                 } else {
1542                         update_defense_level(net_ipvs(net));
1543                 }
1544         }
1545         return rc;
1546 }
1547
1548
1549 static int
1550 proc_do_sync_threshold(ctl_table *table, int write,
1551                        void __user *buffer, size_t *lenp, loff_t *ppos)
1552 {
1553         int *valp = table->data;
1554         int val[2];
1555         int rc;
1556
1557         /* backup the value first */
1558         memcpy(val, valp, sizeof(val));
1559
1560         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1561         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1562                 /* Restore the correct value */
1563                 memcpy(valp, val, sizeof(val));
1564         }
1565         return rc;
1566 }
1567
1568 static int
1569 proc_do_sync_mode(ctl_table *table, int write,
1570                      void __user *buffer, size_t *lenp, loff_t *ppos)
1571 {
1572         int *valp = table->data;
1573         int val = *valp;
1574         int rc;
1575
1576         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1577         if (write && (*valp != val)) {
1578                 if ((*valp < 0) || (*valp > 1)) {
1579                         /* Restore the correct value */
1580                         *valp = val;
1581                 } else {
1582                         struct net *net = current->nsproxy->net_ns;
1583                         ip_vs_sync_switch_mode(net, val);
1584                 }
1585         }
1586         return rc;
1587 }
1588
1589 /*
1590  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1591  *      Do not change order or insert new entries without
1592  *      align with netns init in __ip_vs_control_init()
1593  */
1594
1595 static struct ctl_table vs_vars[] = {
1596         {
1597                 .procname       = "amemthresh",
1598                 .maxlen         = sizeof(int),
1599                 .mode           = 0644,
1600                 .proc_handler   = proc_dointvec,
1601         },
1602         {
1603                 .procname       = "am_droprate",
1604                 .maxlen         = sizeof(int),
1605                 .mode           = 0644,
1606                 .proc_handler   = proc_dointvec,
1607         },
1608         {
1609                 .procname       = "drop_entry",
1610                 .maxlen         = sizeof(int),
1611                 .mode           = 0644,
1612                 .proc_handler   = proc_do_defense_mode,
1613         },
1614         {
1615                 .procname       = "drop_packet",
1616                 .maxlen         = sizeof(int),
1617                 .mode           = 0644,
1618                 .proc_handler   = proc_do_defense_mode,
1619         },
1620 #ifdef CONFIG_IP_VS_NFCT
1621         {
1622                 .procname       = "conntrack",
1623                 .maxlen         = sizeof(int),
1624                 .mode           = 0644,
1625                 .proc_handler   = &proc_dointvec,
1626         },
1627 #endif
1628         {
1629                 .procname       = "secure_tcp",
1630                 .maxlen         = sizeof(int),
1631                 .mode           = 0644,
1632                 .proc_handler   = proc_do_defense_mode,
1633         },
1634         {
1635                 .procname       = "snat_reroute",
1636                 .maxlen         = sizeof(int),
1637                 .mode           = 0644,
1638                 .proc_handler   = &proc_dointvec,
1639         },
1640         {
1641                 .procname       = "sync_version",
1642                 .maxlen         = sizeof(int),
1643                 .mode           = 0644,
1644                 .proc_handler   = &proc_do_sync_mode,
1645         },
1646         {
1647                 .procname       = "cache_bypass",
1648                 .maxlen         = sizeof(int),
1649                 .mode           = 0644,
1650                 .proc_handler   = proc_dointvec,
1651         },
1652         {
1653                 .procname       = "expire_nodest_conn",
1654                 .maxlen         = sizeof(int),
1655                 .mode           = 0644,
1656                 .proc_handler   = proc_dointvec,
1657         },
1658         {
1659                 .procname       = "expire_quiescent_template",
1660                 .maxlen         = sizeof(int),
1661                 .mode           = 0644,
1662                 .proc_handler   = proc_dointvec,
1663         },
1664         {
1665                 .procname       = "sync_threshold",
1666                 .maxlen         =
1667                         sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1668                 .mode           = 0644,
1669                 .proc_handler   = proc_do_sync_threshold,
1670         },
1671         {
1672                 .procname       = "nat_icmp_send",
1673                 .maxlen         = sizeof(int),
1674                 .mode           = 0644,
1675                 .proc_handler   = proc_dointvec,
1676         },
1677 #ifdef CONFIG_IP_VS_DEBUG
1678         {
1679                 .procname       = "debug_level",
1680                 .data           = &sysctl_ip_vs_debug_level,
1681                 .maxlen         = sizeof(int),
1682                 .mode           = 0644,
1683                 .proc_handler   = proc_dointvec,
1684         },
1685 #endif
1686 #if 0
1687         {
1688                 .procname       = "timeout_established",
1689                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1690                 .maxlen         = sizeof(int),
1691                 .mode           = 0644,
1692                 .proc_handler   = proc_dointvec_jiffies,
1693         },
1694         {
1695                 .procname       = "timeout_synsent",
1696                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1697                 .maxlen         = sizeof(int),
1698                 .mode           = 0644,
1699                 .proc_handler   = proc_dointvec_jiffies,
1700         },
1701         {
1702                 .procname       = "timeout_synrecv",
1703                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1704                 .maxlen         = sizeof(int),
1705                 .mode           = 0644,
1706                 .proc_handler   = proc_dointvec_jiffies,
1707         },
1708         {
1709                 .procname       = "timeout_finwait",
1710                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1711                 .maxlen         = sizeof(int),
1712                 .mode           = 0644,
1713                 .proc_handler   = proc_dointvec_jiffies,
1714         },
1715         {
1716                 .procname       = "timeout_timewait",
1717                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1718                 .maxlen         = sizeof(int),
1719                 .mode           = 0644,
1720                 .proc_handler   = proc_dointvec_jiffies,
1721         },
1722         {
1723                 .procname       = "timeout_close",
1724                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1725                 .maxlen         = sizeof(int),
1726                 .mode           = 0644,
1727                 .proc_handler   = proc_dointvec_jiffies,
1728         },
1729         {
1730                 .procname       = "timeout_closewait",
1731                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1732                 .maxlen         = sizeof(int),
1733                 .mode           = 0644,
1734                 .proc_handler   = proc_dointvec_jiffies,
1735         },
1736         {
1737                 .procname       = "timeout_lastack",
1738                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1739                 .maxlen         = sizeof(int),
1740                 .mode           = 0644,
1741                 .proc_handler   = proc_dointvec_jiffies,
1742         },
1743         {
1744                 .procname       = "timeout_listen",
1745                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1746                 .maxlen         = sizeof(int),
1747                 .mode           = 0644,
1748                 .proc_handler   = proc_dointvec_jiffies,
1749         },
1750         {
1751                 .procname       = "timeout_synack",
1752                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1753                 .maxlen         = sizeof(int),
1754                 .mode           = 0644,
1755                 .proc_handler   = proc_dointvec_jiffies,
1756         },
1757         {
1758                 .procname       = "timeout_udp",
1759                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1760                 .maxlen         = sizeof(int),
1761                 .mode           = 0644,
1762                 .proc_handler   = proc_dointvec_jiffies,
1763         },
1764         {
1765                 .procname       = "timeout_icmp",
1766                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1767                 .maxlen         = sizeof(int),
1768                 .mode           = 0644,
1769                 .proc_handler   = proc_dointvec_jiffies,
1770         },
1771 #endif
1772         { }
1773 };
1774
1775 const struct ctl_path net_vs_ctl_path[] = {
1776         { .procname = "net", },
1777         { .procname = "ipv4", },
1778         { .procname = "vs", },
1779         { }
1780 };
1781 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1782
1783 #ifdef CONFIG_PROC_FS
1784
1785 struct ip_vs_iter {
1786         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1787         struct list_head *table;
1788         int bucket;
1789 };
1790
1791 /*
1792  *      Write the contents of the VS rule table to a PROCfs file.
1793  *      (It is kept just for backward compatibility)
1794  */
1795 static inline const char *ip_vs_fwd_name(unsigned flags)
1796 {
1797         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1798         case IP_VS_CONN_F_LOCALNODE:
1799                 return "Local";
1800         case IP_VS_CONN_F_TUNNEL:
1801                 return "Tunnel";
1802         case IP_VS_CONN_F_DROUTE:
1803                 return "Route";
1804         default:
1805                 return "Masq";
1806         }
1807 }
1808
1809
1810 /* Get the Nth entry in the two lists */
1811 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1812 {
1813         struct net *net = seq_file_net(seq);
1814         struct ip_vs_iter *iter = seq->private;
1815         int idx;
1816         struct ip_vs_service *svc;
1817
1818         /* look in hash by protocol */
1819         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1820                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1821                         if (net_eq(svc->net, net) && pos-- == 0) {
1822                                 iter->table = ip_vs_svc_table;
1823                                 iter->bucket = idx;
1824                                 return svc;
1825                         }
1826                 }
1827         }
1828
1829         /* keep looking in fwmark */
1830         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1831                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1832                         if (net_eq(svc->net, net) && pos-- == 0) {
1833                                 iter->table = ip_vs_svc_fwm_table;
1834                                 iter->bucket = idx;
1835                                 return svc;
1836                         }
1837                 }
1838         }
1839
1840         return NULL;
1841 }
1842
1843 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1844 __acquires(__ip_vs_svc_lock)
1845 {
1846
1847         read_lock_bh(&__ip_vs_svc_lock);
1848         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1849 }
1850
1851
1852 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1853 {
1854         struct list_head *e;
1855         struct ip_vs_iter *iter;
1856         struct ip_vs_service *svc;
1857
1858         ++*pos;
1859         if (v == SEQ_START_TOKEN)
1860                 return ip_vs_info_array(seq,0);
1861
1862         svc = v;
1863         iter = seq->private;
1864
1865         if (iter->table == ip_vs_svc_table) {
1866                 /* next service in table hashed by protocol */
1867                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1868                         return list_entry(e, struct ip_vs_service, s_list);
1869
1870
1871                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1872                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1873                                             s_list) {
1874                                 return svc;
1875                         }
1876                 }
1877
1878                 iter->table = ip_vs_svc_fwm_table;
1879                 iter->bucket = -1;
1880                 goto scan_fwmark;
1881         }
1882
1883         /* next service in hashed by fwmark */
1884         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1885                 return list_entry(e, struct ip_vs_service, f_list);
1886
1887  scan_fwmark:
1888         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1889                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1890                                     f_list)
1891                         return svc;
1892         }
1893
1894         return NULL;
1895 }
1896
1897 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1898 __releases(__ip_vs_svc_lock)
1899 {
1900         read_unlock_bh(&__ip_vs_svc_lock);
1901 }
1902
1903
1904 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1905 {
1906         if (v == SEQ_START_TOKEN) {
1907                 seq_printf(seq,
1908                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1909                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1910                 seq_puts(seq,
1911                          "Prot LocalAddress:Port Scheduler Flags\n");
1912                 seq_puts(seq,
1913                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1914         } else {
1915                 const struct ip_vs_service *svc = v;
1916                 const struct ip_vs_iter *iter = seq->private;
1917                 const struct ip_vs_dest *dest;
1918
1919                 if (iter->table == ip_vs_svc_table) {
1920 #ifdef CONFIG_IP_VS_IPV6
1921                         if (svc->af == AF_INET6)
1922                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1923                                            ip_vs_proto_name(svc->protocol),
1924                                            &svc->addr.in6,
1925                                            ntohs(svc->port),
1926                                            svc->scheduler->name);
1927                         else
1928 #endif
1929                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
1930                                            ip_vs_proto_name(svc->protocol),
1931                                            ntohl(svc->addr.ip),
1932                                            ntohs(svc->port),
1933                                            svc->scheduler->name,
1934                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1935                 } else {
1936                         seq_printf(seq, "FWM  %08X %s %s",
1937                                    svc->fwmark, svc->scheduler->name,
1938                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1939                 }
1940
1941                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1942                         seq_printf(seq, "persistent %d %08X\n",
1943                                 svc->timeout,
1944                                 ntohl(svc->netmask));
1945                 else
1946                         seq_putc(seq, '\n');
1947
1948                 list_for_each_entry(dest, &svc->destinations, n_list) {
1949 #ifdef CONFIG_IP_VS_IPV6
1950                         if (dest->af == AF_INET6)
1951                                 seq_printf(seq,
1952                                            "  -> [%pI6]:%04X"
1953                                            "      %-7s %-6d %-10d %-10d\n",
1954                                            &dest->addr.in6,
1955                                            ntohs(dest->port),
1956                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1957                                            atomic_read(&dest->weight),
1958                                            atomic_read(&dest->activeconns),
1959                                            atomic_read(&dest->inactconns));
1960                         else
1961 #endif
1962                                 seq_printf(seq,
1963                                            "  -> %08X:%04X      "
1964                                            "%-7s %-6d %-10d %-10d\n",
1965                                            ntohl(dest->addr.ip),
1966                                            ntohs(dest->port),
1967                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1968                                            atomic_read(&dest->weight),
1969                                            atomic_read(&dest->activeconns),
1970                                            atomic_read(&dest->inactconns));
1971
1972                 }
1973         }
1974         return 0;
1975 }
1976
1977 static const struct seq_operations ip_vs_info_seq_ops = {
1978         .start = ip_vs_info_seq_start,
1979         .next  = ip_vs_info_seq_next,
1980         .stop  = ip_vs_info_seq_stop,
1981         .show  = ip_vs_info_seq_show,
1982 };
1983
1984 static int ip_vs_info_open(struct inode *inode, struct file *file)
1985 {
1986         return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1987                         sizeof(struct ip_vs_iter));
1988 }
1989
1990 static const struct file_operations ip_vs_info_fops = {
1991         .owner   = THIS_MODULE,
1992         .open    = ip_vs_info_open,
1993         .read    = seq_read,
1994         .llseek  = seq_lseek,
1995         .release = seq_release_private,
1996 };
1997
1998 #endif
1999
2000 #ifdef CONFIG_PROC_FS
2001 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2002 {
2003         struct net *net = seq_file_single_net(seq);
2004         struct ip_vs_stats_user show;
2005
2006 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2007         seq_puts(seq,
2008                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
2009         seq_printf(seq,
2010                    "   Conns  Packets  Packets            Bytes            Bytes\n");
2011
2012         ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2013         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2014                    show.inpkts, show.outpkts,
2015                    (unsigned long long) show.inbytes,
2016                    (unsigned long long) show.outbytes);
2017
2018 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2019         seq_puts(seq,
2020                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2021         seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2022                         show.cps, show.inpps, show.outpps,
2023                         show.inbps, show.outbps);
2024
2025         return 0;
2026 }
2027
2028 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2029 {
2030         return single_open_net(inode, file, ip_vs_stats_show);
2031 }
2032
2033 static const struct file_operations ip_vs_stats_fops = {
2034         .owner = THIS_MODULE,
2035         .open = ip_vs_stats_seq_open,
2036         .read = seq_read,
2037         .llseek = seq_lseek,
2038         .release = single_release,
2039 };
2040
2041 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2042 {
2043         struct net *net = seq_file_single_net(seq);
2044         struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2045         struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2046         int i;
2047
2048 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2049         seq_puts(seq,
2050                  "       Total Incoming Outgoing         Incoming         Outgoing\n");
2051         seq_printf(seq,
2052                    "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2053
2054         for_each_possible_cpu(i) {
2055                 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2056                 unsigned int start;
2057                 __u64 inbytes, outbytes;
2058
2059                 do {
2060                         start = u64_stats_fetch_begin_bh(&u->syncp);
2061                         inbytes = u->ustats.inbytes;
2062                         outbytes = u->ustats.outbytes;
2063                 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2064
2065                 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2066                            i, u->ustats.conns, u->ustats.inpkts,
2067                            u->ustats.outpkts, (__u64)inbytes,
2068                            (__u64)outbytes);
2069         }
2070
2071         spin_lock_bh(&tot_stats->lock);
2072         seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2073                    tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2074                    tot_stats->ustats.outpkts,
2075                    (unsigned long long) tot_stats->ustats.inbytes,
2076                    (unsigned long long) tot_stats->ustats.outbytes);
2077
2078 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2079         seq_puts(seq,
2080                    "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2081         seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2082                         tot_stats->ustats.cps,
2083                         tot_stats->ustats.inpps,
2084                         tot_stats->ustats.outpps,
2085                         tot_stats->ustats.inbps,
2086                         tot_stats->ustats.outbps);
2087         spin_unlock_bh(&tot_stats->lock);
2088
2089         return 0;
2090 }
2091
2092 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2093 {
2094         return single_open_net(inode, file, ip_vs_stats_percpu_show);
2095 }
2096
2097 static const struct file_operations ip_vs_stats_percpu_fops = {
2098         .owner = THIS_MODULE,
2099         .open = ip_vs_stats_percpu_seq_open,
2100         .read = seq_read,
2101         .llseek = seq_lseek,
2102         .release = single_release,
2103 };
2104 #endif
2105
2106 /*
2107  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2108  */
2109 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2110 {
2111 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2112         struct ip_vs_proto_data *pd;
2113 #endif
2114
2115         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2116                   u->tcp_timeout,
2117                   u->tcp_fin_timeout,
2118                   u->udp_timeout);
2119
2120 #ifdef CONFIG_IP_VS_PROTO_TCP
2121         if (u->tcp_timeout) {
2122                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2123                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2124                         = u->tcp_timeout * HZ;
2125         }
2126
2127         if (u->tcp_fin_timeout) {
2128                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2129                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2130                         = u->tcp_fin_timeout * HZ;
2131         }
2132 #endif
2133
2134 #ifdef CONFIG_IP_VS_PROTO_UDP
2135         if (u->udp_timeout) {
2136                 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2137                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2138                         = u->udp_timeout * HZ;
2139         }
2140 #endif
2141         return 0;
2142 }
2143
2144
2145 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2146 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2147 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2148                                  sizeof(struct ip_vs_dest_user))
2149 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2150 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2151 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2152
2153 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2154         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2155         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2156         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2157         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2158         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2159         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2160         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2161         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2162         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2163         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2164         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2165 };
2166
2167 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2168                                   struct ip_vs_service_user *usvc_compat)
2169 {
2170         memset(usvc, 0, sizeof(*usvc));
2171
2172         usvc->af                = AF_INET;
2173         usvc->protocol          = usvc_compat->protocol;
2174         usvc->addr.ip           = usvc_compat->addr;
2175         usvc->port              = usvc_compat->port;
2176         usvc->fwmark            = usvc_compat->fwmark;
2177
2178         /* Deep copy of sched_name is not needed here */
2179         usvc->sched_name        = usvc_compat->sched_name;
2180
2181         usvc->flags             = usvc_compat->flags;
2182         usvc->timeout           = usvc_compat->timeout;
2183         usvc->netmask           = usvc_compat->netmask;
2184 }
2185
2186 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2187                                    struct ip_vs_dest_user *udest_compat)
2188 {
2189         memset(udest, 0, sizeof(*udest));
2190
2191         udest->addr.ip          = udest_compat->addr;
2192         udest->port             = udest_compat->port;
2193         udest->conn_flags       = udest_compat->conn_flags;
2194         udest->weight           = udest_compat->weight;
2195         udest->u_threshold      = udest_compat->u_threshold;
2196         udest->l_threshold      = udest_compat->l_threshold;
2197 }
2198
2199 static int
2200 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2201 {
2202         struct net *net = sock_net(sk);
2203         int ret;
2204         unsigned char arg[MAX_ARG_LEN];
2205         struct ip_vs_service_user *usvc_compat;
2206         struct ip_vs_service_user_kern usvc;
2207         struct ip_vs_service *svc;
2208         struct ip_vs_dest_user *udest_compat;
2209         struct ip_vs_dest_user_kern udest;
2210
2211         if (!capable(CAP_NET_ADMIN))
2212                 return -EPERM;
2213
2214         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2215                 return -EINVAL;
2216         if (len < 0 || len >  MAX_ARG_LEN)
2217                 return -EINVAL;
2218         if (len != set_arglen[SET_CMDID(cmd)]) {
2219                 pr_err("set_ctl: len %u != %u\n",
2220                        len, set_arglen[SET_CMDID(cmd)]);
2221                 return -EINVAL;
2222         }
2223
2224         if (copy_from_user(arg, user, len) != 0)
2225                 return -EFAULT;
2226
2227         /* increase the module use count */
2228         ip_vs_use_count_inc();
2229
2230         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2231                 ret = -ERESTARTSYS;
2232                 goto out_dec;
2233         }
2234
2235         if (cmd == IP_VS_SO_SET_FLUSH) {
2236                 /* Flush the virtual service */
2237                 ret = ip_vs_flush(net);
2238                 goto out_unlock;
2239         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2240                 /* Set timeout values for (tcp tcpfin udp) */
2241                 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2242                 goto out_unlock;
2243         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2244                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2245                 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2246                                         dm->syncid);
2247                 goto out_unlock;
2248         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2249                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2250                 ret = stop_sync_thread(net, dm->state);
2251                 goto out_unlock;
2252         }
2253
2254         usvc_compat = (struct ip_vs_service_user *)arg;
2255         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2256
2257         /* We only use the new structs internally, so copy userspace compat
2258          * structs to extended internal versions */
2259         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2260         ip_vs_copy_udest_compat(&udest, udest_compat);
2261
2262         if (cmd == IP_VS_SO_SET_ZERO) {
2263                 /* if no service address is set, zero counters in all */
2264                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2265                         ret = ip_vs_zero_all(net);
2266                         goto out_unlock;
2267                 }
2268         }
2269
2270         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2271         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2272             usvc.protocol != IPPROTO_SCTP) {
2273                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2274                        usvc.protocol, &usvc.addr.ip,
2275                        ntohs(usvc.port), usvc.sched_name);
2276                 ret = -EFAULT;
2277                 goto out_unlock;
2278         }
2279
2280         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2281         if (usvc.fwmark == 0)
2282                 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2283                                            &usvc.addr, usvc.port);
2284         else
2285                 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2286
2287         if (cmd != IP_VS_SO_SET_ADD
2288             && (svc == NULL || svc->protocol != usvc.protocol)) {
2289                 ret = -ESRCH;
2290                 goto out_unlock;
2291         }
2292
2293         switch (cmd) {
2294         case IP_VS_SO_SET_ADD:
2295                 if (svc != NULL)
2296                         ret = -EEXIST;
2297                 else
2298                         ret = ip_vs_add_service(net, &usvc, &svc);
2299                 break;
2300         case IP_VS_SO_SET_EDIT:
2301                 ret = ip_vs_edit_service(svc, &usvc);
2302                 break;
2303         case IP_VS_SO_SET_DEL:
2304                 ret = ip_vs_del_service(svc);
2305                 if (!ret)
2306                         goto out_unlock;
2307                 break;
2308         case IP_VS_SO_SET_ZERO:
2309                 ret = ip_vs_zero_service(svc);
2310                 break;
2311         case IP_VS_SO_SET_ADDDEST:
2312                 ret = ip_vs_add_dest(svc, &udest);
2313                 break;
2314         case IP_VS_SO_SET_EDITDEST:
2315                 ret = ip_vs_edit_dest(svc, &udest);
2316                 break;
2317         case IP_VS_SO_SET_DELDEST:
2318                 ret = ip_vs_del_dest(svc, &udest);
2319                 break;
2320         default:
2321                 ret = -EINVAL;
2322         }
2323
2324   out_unlock:
2325         mutex_unlock(&__ip_vs_mutex);
2326   out_dec:
2327         /* decrease the module use count */
2328         ip_vs_use_count_dec();
2329
2330         return ret;
2331 }
2332
2333
2334 static void
2335 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2336 {
2337         dst->protocol = src->protocol;
2338         dst->addr = src->addr.ip;
2339         dst->port = src->port;
2340         dst->fwmark = src->fwmark;
2341         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2342         dst->flags = src->flags;
2343         dst->timeout = src->timeout / HZ;
2344         dst->netmask = src->netmask;
2345         dst->num_dests = src->num_dests;
2346         ip_vs_copy_stats(&dst->stats, &src->stats);
2347 }
2348
2349 static inline int
2350 __ip_vs_get_service_entries(struct net *net,
2351                             const struct ip_vs_get_services *get,
2352                             struct ip_vs_get_services __user *uptr)
2353 {
2354         int idx, count=0;
2355         struct ip_vs_service *svc;
2356         struct ip_vs_service_entry entry;
2357         int ret = 0;
2358
2359         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2360                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2361                         /* Only expose IPv4 entries to old interface */
2362                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2363                                 continue;
2364
2365                         if (count >= get->num_services)
2366                                 goto out;
2367                         memset(&entry, 0, sizeof(entry));
2368                         ip_vs_copy_service(&entry, svc);
2369                         if (copy_to_user(&uptr->entrytable[count],
2370                                          &entry, sizeof(entry))) {
2371                                 ret = -EFAULT;
2372                                 goto out;
2373                         }
2374                         count++;
2375                 }
2376         }
2377
2378         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2379                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2380                         /* Only expose IPv4 entries to old interface */
2381                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2382                                 continue;
2383
2384                         if (count >= get->num_services)
2385                                 goto out;
2386                         memset(&entry, 0, sizeof(entry));
2387                         ip_vs_copy_service(&entry, svc);
2388                         if (copy_to_user(&uptr->entrytable[count],
2389                                          &entry, sizeof(entry))) {
2390                                 ret = -EFAULT;
2391                                 goto out;
2392                         }
2393                         count++;
2394                 }
2395         }
2396   out:
2397         return ret;
2398 }
2399
2400 static inline int
2401 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2402                          struct ip_vs_get_dests __user *uptr)
2403 {
2404         struct ip_vs_service *svc;
2405         union nf_inet_addr addr = { .ip = get->addr };
2406         int ret = 0;
2407
2408         if (get->fwmark)
2409                 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2410         else
2411                 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2412                                            get->port);
2413
2414         if (svc) {
2415                 int count = 0;
2416                 struct ip_vs_dest *dest;
2417                 struct ip_vs_dest_entry entry;
2418
2419                 list_for_each_entry(dest, &svc->destinations, n_list) {
2420                         if (count >= get->num_dests)
2421                                 break;
2422
2423                         entry.addr = dest->addr.ip;
2424                         entry.port = dest->port;
2425                         entry.conn_flags = atomic_read(&dest->conn_flags);
2426                         entry.weight = atomic_read(&dest->weight);
2427                         entry.u_threshold = dest->u_threshold;
2428                         entry.l_threshold = dest->l_threshold;
2429                         entry.activeconns = atomic_read(&dest->activeconns);
2430                         entry.inactconns = atomic_read(&dest->inactconns);
2431                         entry.persistconns = atomic_read(&dest->persistconns);
2432                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2433                         if (copy_to_user(&uptr->entrytable[count],
2434                                          &entry, sizeof(entry))) {
2435                                 ret = -EFAULT;
2436                                 break;
2437                         }
2438                         count++;
2439                 }
2440         } else
2441                 ret = -ESRCH;
2442         return ret;
2443 }
2444
2445 static inline void
2446 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2447 {
2448 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2449         struct ip_vs_proto_data *pd;
2450 #endif
2451
2452 #ifdef CONFIG_IP_VS_PROTO_TCP
2453         pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2454         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2455         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2456 #endif
2457 #ifdef CONFIG_IP_VS_PROTO_UDP
2458         pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2459         u->udp_timeout =
2460                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2461 #endif
2462 }
2463
2464
2465 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2466 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2467 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2468 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2469 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2470 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2471 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2472
2473 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2474         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2475         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2476         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2477         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2478         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2479         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2480         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2481 };
2482
2483 static int
2484 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2485 {
2486         unsigned char arg[128];
2487         int ret = 0;
2488         unsigned int copylen;
2489         struct net *net = sock_net(sk);
2490         struct netns_ipvs *ipvs = net_ipvs(net);
2491
2492         BUG_ON(!net);
2493         if (!capable(CAP_NET_ADMIN))
2494                 return -EPERM;
2495
2496         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2497                 return -EINVAL;
2498
2499         if (*len < get_arglen[GET_CMDID(cmd)]) {
2500                 pr_err("get_ctl: len %u < %u\n",
2501                        *len, get_arglen[GET_CMDID(cmd)]);
2502                 return -EINVAL;
2503         }
2504
2505         copylen = get_arglen[GET_CMDID(cmd)];
2506         if (copylen > 128)
2507                 return -EINVAL;
2508
2509         if (copy_from_user(arg, user, copylen) != 0)
2510                 return -EFAULT;
2511
2512         if (mutex_lock_interruptible(&__ip_vs_mutex))
2513                 return -ERESTARTSYS;
2514
2515         switch (cmd) {
2516         case IP_VS_SO_GET_VERSION:
2517         {
2518                 char buf[64];
2519
2520                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2521                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2522                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2523                         ret = -EFAULT;
2524                         goto out;
2525                 }
2526                 *len = strlen(buf)+1;
2527         }
2528         break;
2529
2530         case IP_VS_SO_GET_INFO:
2531         {
2532                 struct ip_vs_getinfo info;
2533                 info.version = IP_VS_VERSION_CODE;
2534                 info.size = ip_vs_conn_tab_size;
2535                 info.num_services = ipvs->num_services;
2536                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2537                         ret = -EFAULT;
2538         }
2539         break;
2540
2541         case IP_VS_SO_GET_SERVICES:
2542         {
2543                 struct ip_vs_get_services *get;
2544                 int size;
2545
2546                 get = (struct ip_vs_get_services *)arg;
2547                 size = sizeof(*get) +
2548                         sizeof(struct ip_vs_service_entry) * get->num_services;
2549                 if (*len != size) {
2550                         pr_err("length: %u != %u\n", *len, size);
2551                         ret = -EINVAL;
2552                         goto out;
2553                 }
2554                 ret = __ip_vs_get_service_entries(net, get, user);
2555         }
2556         break;
2557
2558         case IP_VS_SO_GET_SERVICE:
2559         {
2560                 struct ip_vs_service_entry *entry;
2561                 struct ip_vs_service *svc;
2562                 union nf_inet_addr addr;
2563
2564                 entry = (struct ip_vs_service_entry *)arg;
2565                 addr.ip = entry->addr;
2566                 if (entry->fwmark)
2567                         svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2568                 else
2569                         svc = __ip_vs_service_find(net, AF_INET,
2570                                                    entry->protocol, &addr,
2571                                                    entry->port);
2572                 if (svc) {
2573                         ip_vs_copy_service(entry, svc);
2574                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2575                                 ret = -EFAULT;
2576                 } else
2577                         ret = -ESRCH;
2578         }
2579         break;
2580
2581         case IP_VS_SO_GET_DESTS:
2582         {
2583                 struct ip_vs_get_dests *get;
2584                 int size;
2585
2586                 get = (struct ip_vs_get_dests *)arg;
2587                 size = sizeof(*get) +
2588                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2589                 if (*len != size) {
2590                         pr_err("length: %u != %u\n", *len, size);
2591                         ret = -EINVAL;
2592                         goto out;
2593                 }
2594                 ret = __ip_vs_get_dest_entries(net, get, user);
2595         }
2596         break;
2597
2598         case IP_VS_SO_GET_TIMEOUT:
2599         {
2600                 struct ip_vs_timeout_user t;
2601
2602                 __ip_vs_get_timeouts(net, &t);
2603                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2604                         ret = -EFAULT;
2605         }
2606         break;
2607
2608         case IP_VS_SO_GET_DAEMON:
2609         {
2610                 struct ip_vs_daemon_user d[2];
2611
2612                 memset(&d, 0, sizeof(d));
2613                 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2614                         d[0].state = IP_VS_STATE_MASTER;
2615                         strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2616                                 sizeof(d[0].mcast_ifn));
2617                         d[0].syncid = ipvs->master_syncid;
2618                 }
2619                 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2620                         d[1].state = IP_VS_STATE_BACKUP;
2621                         strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2622                                 sizeof(d[1].mcast_ifn));
2623                         d[1].syncid = ipvs->backup_syncid;
2624                 }
2625                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2626                         ret = -EFAULT;
2627         }
2628         break;
2629
2630         default:
2631                 ret = -EINVAL;
2632         }
2633
2634   out:
2635         mutex_unlock(&__ip_vs_mutex);
2636         return ret;
2637 }
2638
2639
2640 static struct nf_sockopt_ops ip_vs_sockopts = {
2641         .pf             = PF_INET,
2642         .set_optmin     = IP_VS_BASE_CTL,
2643         .set_optmax     = IP_VS_SO_SET_MAX+1,
2644         .set            = do_ip_vs_set_ctl,
2645         .get_optmin     = IP_VS_BASE_CTL,
2646         .get_optmax     = IP_VS_SO_GET_MAX+1,
2647         .get            = do_ip_vs_get_ctl,
2648         .owner          = THIS_MODULE,
2649 };
2650
2651 /*
2652  * Generic Netlink interface
2653  */
2654
2655 /* IPVS genetlink family */
2656 static struct genl_family ip_vs_genl_family = {
2657         .id             = GENL_ID_GENERATE,
2658         .hdrsize        = 0,
2659         .name           = IPVS_GENL_NAME,
2660         .version        = IPVS_GENL_VERSION,
2661         .maxattr        = IPVS_CMD_MAX,
2662         .netnsok        = true,         /* Make ipvsadm to work on netns */
2663 };
2664
2665 /* Policy used for first-level command attributes */
2666 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2667         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2668         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2669         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2670         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2671         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2672         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2673 };
2674
2675 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2676 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2677         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2678         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2679                                             .len = IP_VS_IFNAME_MAXLEN },
2680         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2681 };
2682
2683 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2684 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2685         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2686         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2687         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2688                                             .len = sizeof(union nf_inet_addr) },
2689         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2690         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2691         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2692                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2693         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2694                                             .len = IP_VS_PENAME_MAXLEN },
2695         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2696                                             .len = sizeof(struct ip_vs_flags) },
2697         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2698         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2699         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2700 };
2701
2702 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2703 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2704         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2705                                             .len = sizeof(union nf_inet_addr) },
2706         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2707         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2708         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2709         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2710         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2711         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2712         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2713         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2714         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2715 };
2716
2717 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2718                                  struct ip_vs_stats *stats)
2719 {
2720         struct ip_vs_stats_user ustats;
2721         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2722         if (!nl_stats)
2723                 return -EMSGSIZE;
2724
2725         ip_vs_copy_stats(&ustats, stats);
2726
2727         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2728         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2729         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2730         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2731         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2732         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2733         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2734         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2735         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2736         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
2737
2738         nla_nest_end(skb, nl_stats);
2739
2740         return 0;
2741
2742 nla_put_failure:
2743         nla_nest_cancel(skb, nl_stats);
2744         return -EMSGSIZE;
2745 }
2746
2747 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2748                                    struct ip_vs_service *svc)
2749 {
2750         struct nlattr *nl_service;
2751         struct ip_vs_flags flags = { .flags = svc->flags,
2752                                      .mask = ~0 };
2753
2754         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2755         if (!nl_service)
2756                 return -EMSGSIZE;
2757
2758         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2759
2760         if (svc->fwmark) {
2761                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2762         } else {
2763                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2764                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2765                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2766         }
2767
2768         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2769         if (svc->pe)
2770                 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2771         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2772         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2773         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2774
2775         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2776                 goto nla_put_failure;
2777
2778         nla_nest_end(skb, nl_service);
2779
2780         return 0;
2781
2782 nla_put_failure:
2783         nla_nest_cancel(skb, nl_service);
2784         return -EMSGSIZE;
2785 }
2786
2787 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2788                                    struct ip_vs_service *svc,
2789                                    struct netlink_callback *cb)
2790 {
2791         void *hdr;
2792
2793         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2794                           &ip_vs_genl_family, NLM_F_MULTI,
2795                           IPVS_CMD_NEW_SERVICE);
2796         if (!hdr)
2797                 return -EMSGSIZE;
2798
2799         if (ip_vs_genl_fill_service(skb, svc) < 0)
2800                 goto nla_put_failure;
2801
2802         return genlmsg_end(skb, hdr);
2803
2804 nla_put_failure:
2805         genlmsg_cancel(skb, hdr);
2806         return -EMSGSIZE;
2807 }
2808
2809 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2810                                     struct netlink_callback *cb)
2811 {
2812         int idx = 0, i;
2813         int start = cb->args[0];
2814         struct ip_vs_service *svc;
2815         struct net *net = skb_sknet(skb);
2816
2817         mutex_lock(&__ip_vs_mutex);
2818         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2819                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2820                         if (++idx <= start || !net_eq(svc->net, net))
2821                                 continue;
2822                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2823                                 idx--;
2824                                 goto nla_put_failure;
2825                         }
2826                 }
2827         }
2828
2829         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2830                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2831                         if (++idx <= start || !net_eq(svc->net, net))
2832                                 continue;
2833                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2834                                 idx--;
2835                                 goto nla_put_failure;
2836                         }
2837                 }
2838         }
2839
2840 nla_put_failure:
2841         mutex_unlock(&__ip_vs_mutex);
2842         cb->args[0] = idx;
2843
2844         return skb->len;
2845 }
2846
2847 static int ip_vs_genl_parse_service(struct net *net,
2848                                     struct ip_vs_service_user_kern *usvc,
2849                                     struct nlattr *nla, int full_entry,
2850                                     struct ip_vs_service **ret_svc)
2851 {
2852         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2853         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2854         struct ip_vs_service *svc;
2855
2856         /* Parse mandatory identifying service fields first */
2857         if (nla == NULL ||
2858             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2859                 return -EINVAL;
2860
2861         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2862         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2863         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2864         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2865         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2866
2867         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2868                 return -EINVAL;
2869
2870         memset(usvc, 0, sizeof(*usvc));
2871
2872         usvc->af = nla_get_u16(nla_af);
2873 #ifdef CONFIG_IP_VS_IPV6
2874         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2875 #else
2876         if (usvc->af != AF_INET)
2877 #endif
2878                 return -EAFNOSUPPORT;
2879
2880         if (nla_fwmark) {
2881                 usvc->protocol = IPPROTO_TCP;
2882                 usvc->fwmark = nla_get_u32(nla_fwmark);
2883         } else {
2884                 usvc->protocol = nla_get_u16(nla_protocol);
2885                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2886                 usvc->port = nla_get_u16(nla_port);
2887                 usvc->fwmark = 0;
2888         }
2889
2890         if (usvc->fwmark)
2891                 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2892         else
2893                 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2894                                            &usvc->addr, usvc->port);
2895         *ret_svc = svc;
2896
2897         /* If a full entry was requested, check for the additional fields */
2898         if (full_entry) {
2899                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2900                               *nla_netmask;
2901                 struct ip_vs_flags flags;
2902
2903                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2904                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2905                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2906                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2907                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2908
2909                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2910                         return -EINVAL;
2911
2912                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2913
2914                 /* prefill flags from service if it already exists */
2915                 if (svc)
2916                         usvc->flags = svc->flags;
2917
2918                 /* set new flags from userland */
2919                 usvc->flags = (usvc->flags & ~flags.mask) |
2920                               (flags.flags & flags.mask);
2921                 usvc->sched_name = nla_data(nla_sched);
2922                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2923                 usvc->timeout = nla_get_u32(nla_timeout);
2924                 usvc->netmask = nla_get_u32(nla_netmask);
2925         }
2926
2927         return 0;
2928 }
2929
2930 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2931                                                      struct nlattr *nla)
2932 {
2933         struct ip_vs_service_user_kern usvc;
2934         struct ip_vs_service *svc;
2935         int ret;
2936
2937         ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2938         return ret ? ERR_PTR(ret) : svc;
2939 }
2940
2941 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2942 {
2943         struct nlattr *nl_dest;
2944
2945         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2946         if (!nl_dest)
2947                 return -EMSGSIZE;
2948
2949         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2950         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2951
2952         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2953                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2954         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2955         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2956         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2957         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2958                     atomic_read(&dest->activeconns));
2959         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2960                     atomic_read(&dest->inactconns));
2961         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2962                     atomic_read(&dest->persistconns));
2963
2964         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2965                 goto nla_put_failure;
2966
2967         nla_nest_end(skb, nl_dest);
2968
2969         return 0;
2970
2971 nla_put_failure:
2972         nla_nest_cancel(skb, nl_dest);
2973         return -EMSGSIZE;
2974 }
2975
2976 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2977                                 struct netlink_callback *cb)
2978 {
2979         void *hdr;
2980
2981         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2982                           &ip_vs_genl_family, NLM_F_MULTI,
2983                           IPVS_CMD_NEW_DEST);
2984         if (!hdr)
2985                 return -EMSGSIZE;
2986
2987         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2988                 goto nla_put_failure;
2989
2990         return genlmsg_end(skb, hdr);
2991
2992 nla_put_failure:
2993         genlmsg_cancel(skb, hdr);
2994         return -EMSGSIZE;
2995 }
2996
2997 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2998                                  struct netlink_callback *cb)
2999 {
3000         int idx = 0;
3001         int start = cb->args[0];
3002         struct ip_vs_service *svc;
3003         struct ip_vs_dest *dest;
3004         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3005         struct net *net = skb_sknet(skb);
3006
3007         mutex_lock(&__ip_vs_mutex);
3008
3009         /* Try to find the service for which to dump destinations */
3010         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3011                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3012                 goto out_err;
3013
3014
3015         svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3016         if (IS_ERR(svc) || svc == NULL)
3017                 goto out_err;
3018
3019         /* Dump the destinations */
3020         list_for_each_entry(dest, &svc->destinations, n_list) {
3021                 if (++idx <= start)
3022                         continue;
3023                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3024                         idx--;
3025                         goto nla_put_failure;
3026                 }
3027         }
3028
3029 nla_put_failure:
3030         cb->args[0] = idx;
3031
3032 out_err:
3033         mutex_unlock(&__ip_vs_mutex);
3034
3035         return skb->len;
3036 }
3037
3038 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3039                                  struct nlattr *nla, int full_entry)
3040 {
3041         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3042         struct nlattr *nla_addr, *nla_port;
3043
3044         /* Parse mandatory identifying destination fields first */
3045         if (nla == NULL ||
3046             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3047                 return -EINVAL;
3048
3049         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3050         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3051
3052         if (!(nla_addr && nla_port))
3053                 return -EINVAL;
3054
3055         memset(udest, 0, sizeof(*udest));
3056
3057         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3058         udest->port = nla_get_u16(nla_port);
3059
3060         /* If a full entry was requested, check for the additional fields */
3061         if (full_entry) {
3062                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3063                               *nla_l_thresh;
3064
3065                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3066                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3067                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3068                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3069
3070                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3071                         return -EINVAL;
3072
3073                 udest->conn_flags = nla_get_u32(nla_fwd)
3074                                     & IP_VS_CONN_F_FWD_MASK;
3075                 udest->weight = nla_get_u32(nla_weight);
3076                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3077                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3078         }
3079
3080         return 0;
3081 }
3082
3083 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3084                                   const char *mcast_ifn, __be32 syncid)
3085 {
3086         struct nlattr *nl_daemon;
3087
3088         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3089         if (!nl_daemon)
3090                 return -EMSGSIZE;
3091
3092         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3093         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3094         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3095
3096         nla_nest_end(skb, nl_daemon);
3097
3098         return 0;
3099
3100 nla_put_failure:
3101         nla_nest_cancel(skb, nl_daemon);
3102         return -EMSGSIZE;
3103 }
3104
3105 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3106                                   const char *mcast_ifn, __be32 syncid,
3107                                   struct netlink_callback *cb)
3108 {
3109         void *hdr;
3110         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3111                           &ip_vs_genl_family, NLM_F_MULTI,
3112                           IPVS_CMD_NEW_DAEMON);
3113         if (!hdr)
3114                 return -EMSGSIZE;
3115
3116         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3117                 goto nla_put_failure;
3118
3119         return genlmsg_end(skb, hdr);
3120
3121 nla_put_failure:
3122         genlmsg_cancel(skb, hdr);
3123         return -EMSGSIZE;
3124 }
3125
3126 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3127                                    struct netlink_callback *cb)
3128 {
3129         struct net *net = skb_net(skb);
3130         struct netns_ipvs *ipvs = net_ipvs(net);
3131
3132         mutex_lock(&__ip_vs_mutex);
3133         if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3134                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3135                                            ipvs->master_mcast_ifn,
3136                                            ipvs->master_syncid, cb) < 0)
3137                         goto nla_put_failure;
3138
3139                 cb->args[0] = 1;
3140         }
3141
3142         if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3143                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3144                                            ipvs->backup_mcast_ifn,
3145                                            ipvs->backup_syncid, cb) < 0)
3146                         goto nla_put_failure;
3147
3148                 cb->args[1] = 1;
3149         }
3150
3151 nla_put_failure:
3152         mutex_unlock(&__ip_vs_mutex);
3153
3154         return skb->len;
3155 }
3156
3157 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3158 {
3159         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3160               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3161               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3162                 return -EINVAL;
3163
3164         return start_sync_thread(net,
3165                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3166                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3167                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3168 }
3169
3170 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3171 {
3172         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3173                 return -EINVAL;
3174
3175         return stop_sync_thread(net,
3176                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3177 }
3178
3179 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3180 {
3181         struct ip_vs_timeout_user t;
3182
3183         __ip_vs_get_timeouts(net, &t);
3184
3185         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3186                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3187
3188         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3189                 t.tcp_fin_timeout =
3190                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3191
3192         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3193                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3194
3195         return ip_vs_set_timeout(net, &t);
3196 }
3197
3198 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3199 {
3200         struct ip_vs_service *svc = NULL;
3201         struct ip_vs_service_user_kern usvc;
3202         struct ip_vs_dest_user_kern udest;
3203         int ret = 0, cmd;
3204         int need_full_svc = 0, need_full_dest = 0;
3205         struct net *net;
3206         struct netns_ipvs *ipvs;
3207
3208         net = skb_sknet(skb);
3209         ipvs = net_ipvs(net);
3210         cmd = info->genlhdr->cmd;
3211
3212         mutex_lock(&__ip_vs_mutex);
3213
3214         if (cmd == IPVS_CMD_FLUSH) {
3215                 ret = ip_vs_flush(net);
3216                 goto out;
3217         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3218                 ret = ip_vs_genl_set_config(net, info->attrs);
3219                 goto out;
3220         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3221                    cmd == IPVS_CMD_DEL_DAEMON) {
3222
3223                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3224
3225                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3226                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3227                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3228                                      ip_vs_daemon_policy)) {
3229                         ret = -EINVAL;
3230                         goto out;
3231                 }
3232
3233                 if (cmd == IPVS_CMD_NEW_DAEMON)
3234                         ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3235                 else
3236                         ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3237                 goto out;
3238         } else if (cmd == IPVS_CMD_ZERO &&
3239                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3240                 ret = ip_vs_zero_all(net);
3241                 goto out;
3242         }
3243
3244         /* All following commands require a service argument, so check if we
3245          * received a valid one. We need a full service specification when
3246          * adding / editing a service. Only identifying members otherwise. */
3247         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3248                 need_full_svc = 1;
3249
3250         ret = ip_vs_genl_parse_service(net, &usvc,
3251                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3252                                        need_full_svc, &svc);
3253         if (ret)
3254                 goto out;
3255
3256         /* Unless we're adding a new service, the service must already exist */
3257         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3258                 ret = -ESRCH;
3259                 goto out;
3260         }
3261
3262         /* Destination commands require a valid destination argument. For
3263          * adding / editing a destination, we need a full destination
3264          * specification. */
3265         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3266             cmd == IPVS_CMD_DEL_DEST) {
3267                 if (cmd != IPVS_CMD_DEL_DEST)
3268                         need_full_dest = 1;
3269
3270                 ret = ip_vs_genl_parse_dest(&udest,
3271                                             info->attrs[IPVS_CMD_ATTR_DEST],
3272                                             need_full_dest);
3273                 if (ret)
3274                         goto out;
3275         }
3276
3277         switch (cmd) {
3278         case IPVS_CMD_NEW_SERVICE:
3279                 if (svc == NULL)
3280                         ret = ip_vs_add_service(net, &usvc, &svc);
3281                 else
3282                         ret = -EEXIST;
3283                 break;
3284         case IPVS_CMD_SET_SERVICE:
3285                 ret = ip_vs_edit_service(svc, &usvc);
3286                 break;
3287         case IPVS_CMD_DEL_SERVICE:
3288                 ret = ip_vs_del_service(svc);
3289                 /* do not use svc, it can be freed */
3290                 break;
3291         case IPVS_CMD_NEW_DEST:
3292                 ret = ip_vs_add_dest(svc, &udest);
3293                 break;
3294         case IPVS_CMD_SET_DEST:
3295                 ret = ip_vs_edit_dest(svc, &udest);
3296                 break;
3297         case IPVS_CMD_DEL_DEST:
3298                 ret = ip_vs_del_dest(svc, &udest);
3299                 break;
3300         case IPVS_CMD_ZERO:
3301                 ret = ip_vs_zero_service(svc);
3302                 break;
3303         default:
3304                 ret = -EINVAL;
3305         }
3306
3307 out:
3308         mutex_unlock(&__ip_vs_mutex);
3309
3310         return ret;
3311 }
3312
3313 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3314 {
3315         struct sk_buff *msg;
3316         void *reply;
3317         int ret, cmd, reply_cmd;
3318         struct net *net;
3319         struct netns_ipvs *ipvs;
3320
3321         net = skb_sknet(skb);
3322         ipvs = net_ipvs(net);
3323         cmd = info->genlhdr->cmd;
3324
3325         if (cmd == IPVS_CMD_GET_SERVICE)
3326                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3327         else if (cmd == IPVS_CMD_GET_INFO)
3328                 reply_cmd = IPVS_CMD_SET_INFO;
3329         else if (cmd == IPVS_CMD_GET_CONFIG)
3330                 reply_cmd = IPVS_CMD_SET_CONFIG;
3331         else {
3332                 pr_err("unknown Generic Netlink command\n");
3333                 return -EINVAL;
3334         }
3335
3336         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3337         if (!msg)
3338                 return -ENOMEM;
3339
3340         mutex_lock(&__ip_vs_mutex);
3341
3342         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3343         if (reply == NULL)
3344                 goto nla_put_failure;
3345
3346         switch (cmd) {
3347         case IPVS_CMD_GET_SERVICE:
3348         {
3349                 struct ip_vs_service *svc;
3350
3351                 svc = ip_vs_genl_find_service(net,
3352                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3353                 if (IS_ERR(svc)) {
3354                         ret = PTR_ERR(svc);
3355                         goto out_err;
3356                 } else if (svc) {
3357                         ret = ip_vs_genl_fill_service(msg, svc);
3358                         if (ret)
3359                                 goto nla_put_failure;
3360                 } else {
3361                         ret = -ESRCH;
3362                         goto out_err;
3363                 }
3364
3365                 break;
3366         }
3367
3368         case IPVS_CMD_GET_CONFIG:
3369         {
3370                 struct ip_vs_timeout_user t;
3371
3372                 __ip_vs_get_timeouts(net, &t);
3373 #ifdef CONFIG_IP_VS_PROTO_TCP
3374                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3375                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3376                             t.tcp_fin_timeout);
3377 #endif
3378 #ifdef CONFIG_IP_VS_PROTO_UDP
3379                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3380 #endif
3381
3382                 break;
3383         }
3384
3385         case IPVS_CMD_GET_INFO:
3386                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3387                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3388                             ip_vs_conn_tab_size);
3389                 break;
3390         }
3391
3392         genlmsg_end(msg, reply);
3393         ret = genlmsg_reply(msg, info);
3394         goto out;
3395
3396 nla_put_failure:
3397         pr_err("not enough space in Netlink message\n");
3398         ret = -EMSGSIZE;
3399
3400 out_err:
3401         nlmsg_free(msg);
3402 out:
3403         mutex_unlock(&__ip_vs_mutex);
3404
3405         return ret;
3406 }
3407
3408
3409 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3410         {
3411                 .cmd    = IPVS_CMD_NEW_SERVICE,
3412                 .flags  = GENL_ADMIN_PERM,
3413                 .policy = ip_vs_cmd_policy,
3414                 .doit   = ip_vs_genl_set_cmd,
3415         },
3416         {
3417                 .cmd    = IPVS_CMD_SET_SERVICE,
3418                 .flags  = GENL_ADMIN_PERM,
3419                 .policy = ip_vs_cmd_policy,
3420                 .doit   = ip_vs_genl_set_cmd,
3421         },
3422         {
3423                 .cmd    = IPVS_CMD_DEL_SERVICE,
3424                 .flags  = GENL_ADMIN_PERM,
3425                 .policy = ip_vs_cmd_policy,
3426                 .doit   = ip_vs_genl_set_cmd,
3427         },
3428         {
3429                 .cmd    = IPVS_CMD_GET_SERVICE,
3430                 .flags  = GENL_ADMIN_PERM,
3431                 .doit   = ip_vs_genl_get_cmd,
3432                 .dumpit = ip_vs_genl_dump_services,
3433                 .policy = ip_vs_cmd_policy,
3434         },
3435         {
3436                 .cmd    = IPVS_CMD_NEW_DEST,
3437                 .flags  = GENL_ADMIN_PERM,
3438                 .policy = ip_vs_cmd_policy,
3439                 .doit   = ip_vs_genl_set_cmd,
3440         },
3441         {
3442                 .cmd    = IPVS_CMD_SET_DEST,
3443                 .flags  = GENL_ADMIN_PERM,
3444                 .policy = ip_vs_cmd_policy,
3445                 .doit   = ip_vs_genl_set_cmd,
3446         },
3447         {
3448                 .cmd    = IPVS_CMD_DEL_DEST,
3449                 .flags  = GENL_ADMIN_PERM,
3450                 .policy = ip_vs_cmd_policy,
3451                 .doit   = ip_vs_genl_set_cmd,
3452         },
3453         {
3454                 .cmd    = IPVS_CMD_GET_DEST,
3455                 .flags  = GENL_ADMIN_PERM,
3456                 .policy = ip_vs_cmd_policy,
3457                 .dumpit = ip_vs_genl_dump_dests,
3458         },
3459         {
3460                 .cmd    = IPVS_CMD_NEW_DAEMON,
3461                 .flags  = GENL_ADMIN_PERM,
3462                 .policy = ip_vs_cmd_policy,
3463                 .doit   = ip_vs_genl_set_cmd,
3464         },
3465         {
3466                 .cmd    = IPVS_CMD_DEL_DAEMON,
3467                 .flags  = GENL_ADMIN_PERM,
3468                 .policy = ip_vs_cmd_policy,
3469                 .doit   = ip_vs_genl_set_cmd,
3470         },
3471         {
3472                 .cmd    = IPVS_CMD_GET_DAEMON,
3473                 .flags  = GENL_ADMIN_PERM,
3474                 .dumpit = ip_vs_genl_dump_daemons,
3475         },
3476         {
3477                 .cmd    = IPVS_CMD_SET_CONFIG,
3478                 .flags  = GENL_ADMIN_PERM,
3479                 .policy = ip_vs_cmd_policy,
3480                 .doit   = ip_vs_genl_set_cmd,
3481         },
3482         {
3483                 .cmd    = IPVS_CMD_GET_CONFIG,
3484                 .flags  = GENL_ADMIN_PERM,
3485                 .doit   = ip_vs_genl_get_cmd,
3486         },
3487         {
3488                 .cmd    = IPVS_CMD_GET_INFO,
3489                 .flags  = GENL_ADMIN_PERM,
3490                 .doit   = ip_vs_genl_get_cmd,
3491         },
3492         {
3493                 .cmd    = IPVS_CMD_ZERO,
3494                 .flags  = GENL_ADMIN_PERM,
3495                 .policy = ip_vs_cmd_policy,
3496                 .doit   = ip_vs_genl_set_cmd,
3497         },
3498         {
3499                 .cmd    = IPVS_CMD_FLUSH,
3500                 .flags  = GENL_ADMIN_PERM,
3501                 .doit   = ip_vs_genl_set_cmd,
3502         },
3503 };
3504
3505 static int __init ip_vs_genl_register(void)
3506 {
3507         return genl_register_family_with_ops(&ip_vs_genl_family,
3508                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3509 }
3510
3511 static void ip_vs_genl_unregister(void)
3512 {
3513         genl_unregister_family(&ip_vs_genl_family);
3514 }
3515
3516 /* End of Generic Netlink interface definitions */
3517
3518 /*
3519  * per netns intit/exit func.
3520  */
3521 int __net_init __ip_vs_control_init(struct net *net)
3522 {
3523         int idx;
3524         struct netns_ipvs *ipvs = net_ipvs(net);
3525         struct ctl_table *tbl;
3526
3527         atomic_set(&ipvs->dropentry, 0);
3528         spin_lock_init(&ipvs->dropentry_lock);
3529         spin_lock_init(&ipvs->droppacket_lock);
3530         spin_lock_init(&ipvs->securetcp_lock);
3531         ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3532
3533         /* Initialize rs_table */
3534         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3535                 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3536
3537         INIT_LIST_HEAD(&ipvs->dest_trash);
3538         atomic_set(&ipvs->ftpsvc_counter, 0);
3539         atomic_set(&ipvs->nullsvc_counter, 0);
3540
3541         /* procfs stats */
3542         ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3543         if (!ipvs->tot_stats.cpustats) {
3544                 pr_err("%s() alloc_percpu failed\n", __func__);
3545                 goto err_alloc;
3546         }
3547         spin_lock_init(&ipvs->tot_stats.lock);
3548
3549         proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3550         proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3551         proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3552                              &ip_vs_stats_percpu_fops);
3553
3554         if (!net_eq(net, &init_net)) {
3555                 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3556                 if (tbl == NULL)
3557                         goto err_dup;
3558         } else
3559                 tbl = vs_vars;
3560         /* Initialize sysctl defaults */
3561         idx = 0;
3562         ipvs->sysctl_amemthresh = 1024;
3563         tbl[idx++].data = &ipvs->sysctl_amemthresh;
3564         ipvs->sysctl_am_droprate = 10;
3565         tbl[idx++].data = &ipvs->sysctl_am_droprate;
3566         tbl[idx++].data = &ipvs->sysctl_drop_entry;
3567         tbl[idx++].data = &ipvs->sysctl_drop_packet;
3568 #ifdef CONFIG_IP_VS_NFCT
3569         tbl[idx++].data = &ipvs->sysctl_conntrack;
3570 #endif
3571         tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3572         ipvs->sysctl_snat_reroute = 1;
3573         tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3574         ipvs->sysctl_sync_ver = 1;
3575         tbl[idx++].data = &ipvs->sysctl_sync_ver;
3576         tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3577         tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3578         tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3579         ipvs->sysctl_sync_threshold[0] = 3;
3580         ipvs->sysctl_sync_threshold[1] = 50;
3581         tbl[idx].data = &ipvs->sysctl_sync_threshold;
3582         tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3583         tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3584
3585
3586 #ifdef CONFIG_SYSCTL
3587         ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3588                                                      tbl);
3589         if (ipvs->sysctl_hdr == NULL) {
3590                 if (!net_eq(net, &init_net))
3591                         kfree(tbl);
3592                 goto err_dup;
3593         }
3594 #endif
3595         ip_vs_new_estimator(net, &ipvs->tot_stats);
3596         ipvs->sysctl_tbl = tbl;
3597         /* Schedule defense work */
3598         INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3599         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3600         return 0;
3601
3602 err_dup:
3603         free_percpu(ipvs->tot_stats.cpustats);
3604 err_alloc:
3605         return -ENOMEM;
3606 }
3607
3608 static void __net_exit __ip_vs_control_cleanup(struct net *net)
3609 {
3610         struct netns_ipvs *ipvs = net_ipvs(net);
3611
3612         ip_vs_trash_cleanup(net);
3613         ip_vs_kill_estimator(net, &ipvs->tot_stats);
3614         cancel_delayed_work_sync(&ipvs->defense_work);
3615         cancel_work_sync(&ipvs->defense_work.work);
3616 #ifdef CONFIG_SYSCTL
3617         unregister_net_sysctl_table(ipvs->sysctl_hdr);
3618 #endif
3619         proc_net_remove(net, "ip_vs_stats_percpu");
3620         proc_net_remove(net, "ip_vs_stats");
3621         proc_net_remove(net, "ip_vs");
3622         free_percpu(ipvs->tot_stats.cpustats);
3623 }
3624
3625 static struct pernet_operations ipvs_control_ops = {
3626         .init = __ip_vs_control_init,
3627         .exit = __ip_vs_control_cleanup,
3628 };
3629
3630 int __init ip_vs_control_init(void)
3631 {
3632         int idx;
3633         int ret;
3634
3635         EnterFunction(2);
3636
3637         /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3638         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3639                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3640                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3641         }
3642
3643         ret = register_pernet_subsys(&ipvs_control_ops);
3644         if (ret) {
3645                 pr_err("cannot register namespace.\n");
3646                 goto err;
3647         }
3648
3649         smp_wmb();      /* Do we really need it now ? */
3650
3651         ret = nf_register_sockopt(&ip_vs_sockopts);
3652         if (ret) {
3653                 pr_err("cannot register sockopt.\n");
3654                 goto err_net;
3655         }
3656
3657         ret = ip_vs_genl_register();
3658         if (ret) {
3659                 pr_err("cannot register Generic Netlink interface.\n");
3660                 nf_unregister_sockopt(&ip_vs_sockopts);
3661                 goto err_net;
3662         }
3663
3664         LeaveFunction(2);
3665         return 0;
3666
3667 err_net:
3668         unregister_pernet_subsys(&ipvs_control_ops);
3669 err:
3670         return ret;
3671 }
3672
3673
3674 void ip_vs_control_cleanup(void)
3675 {
3676         EnterFunction(2);
3677         unregister_pernet_subsys(&ipvs_control_ops);
3678         ip_vs_genl_unregister();
3679         nf_unregister_sockopt(&ip_vs_sockopts);
3680         LeaveFunction(2);
3681 }