ipv6: Convert to use flowi6 where applicable.
[linux-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61 /* sysctl variables */
62
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65
66 int ip_vs_get_debug_level(void)
67 {
68         return sysctl_ip_vs_debug_level;
69 }
70 #endif
71
72 #ifdef CONFIG_IP_VS_IPV6
73 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74 static int __ip_vs_addr_is_local_v6(struct net *net,
75                                     const struct in6_addr *addr)
76 {
77         struct rt6_info *rt;
78         struct flowi6 fl6 = {
79                 .daddr = *addr,
80         };
81
82         rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
83         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
84                 return 1;
85
86         return 0;
87 }
88 #endif
89 /*
90  *      update_defense_level is called from keventd and from sysctl,
91  *      so it needs to protect itself from softirqs
92  */
93 static void update_defense_level(struct netns_ipvs *ipvs)
94 {
95         struct sysinfo i;
96         static int old_secure_tcp = 0;
97         int availmem;
98         int nomem;
99         int to_change = -1;
100
101         /* we only count free and buffered memory (in pages) */
102         si_meminfo(&i);
103         availmem = i.freeram + i.bufferram;
104         /* however in linux 2.5 the i.bufferram is total page cache size,
105            we need adjust it */
106         /* si_swapinfo(&i); */
107         /* availmem = availmem - (i.totalswap - i.freeswap); */
108
109         nomem = (availmem < ipvs->sysctl_amemthresh);
110
111         local_bh_disable();
112
113         /* drop_entry */
114         spin_lock(&ipvs->dropentry_lock);
115         switch (ipvs->sysctl_drop_entry) {
116         case 0:
117                 atomic_set(&ipvs->dropentry, 0);
118                 break;
119         case 1:
120                 if (nomem) {
121                         atomic_set(&ipvs->dropentry, 1);
122                         ipvs->sysctl_drop_entry = 2;
123                 } else {
124                         atomic_set(&ipvs->dropentry, 0);
125                 }
126                 break;
127         case 2:
128                 if (nomem) {
129                         atomic_set(&ipvs->dropentry, 1);
130                 } else {
131                         atomic_set(&ipvs->dropentry, 0);
132                         ipvs->sysctl_drop_entry = 1;
133                 };
134                 break;
135         case 3:
136                 atomic_set(&ipvs->dropentry, 1);
137                 break;
138         }
139         spin_unlock(&ipvs->dropentry_lock);
140
141         /* drop_packet */
142         spin_lock(&ipvs->droppacket_lock);
143         switch (ipvs->sysctl_drop_packet) {
144         case 0:
145                 ipvs->drop_rate = 0;
146                 break;
147         case 1:
148                 if (nomem) {
149                         ipvs->drop_rate = ipvs->drop_counter
150                                 = ipvs->sysctl_amemthresh /
151                                 (ipvs->sysctl_amemthresh-availmem);
152                         ipvs->sysctl_drop_packet = 2;
153                 } else {
154                         ipvs->drop_rate = 0;
155                 }
156                 break;
157         case 2:
158                 if (nomem) {
159                         ipvs->drop_rate = ipvs->drop_counter
160                                 = ipvs->sysctl_amemthresh /
161                                 (ipvs->sysctl_amemthresh-availmem);
162                 } else {
163                         ipvs->drop_rate = 0;
164                         ipvs->sysctl_drop_packet = 1;
165                 }
166                 break;
167         case 3:
168                 ipvs->drop_rate = ipvs->sysctl_am_droprate;
169                 break;
170         }
171         spin_unlock(&ipvs->droppacket_lock);
172
173         /* secure_tcp */
174         spin_lock(&ipvs->securetcp_lock);
175         switch (ipvs->sysctl_secure_tcp) {
176         case 0:
177                 if (old_secure_tcp >= 2)
178                         to_change = 0;
179                 break;
180         case 1:
181                 if (nomem) {
182                         if (old_secure_tcp < 2)
183                                 to_change = 1;
184                         ipvs->sysctl_secure_tcp = 2;
185                 } else {
186                         if (old_secure_tcp >= 2)
187                                 to_change = 0;
188                 }
189                 break;
190         case 2:
191                 if (nomem) {
192                         if (old_secure_tcp < 2)
193                                 to_change = 1;
194                 } else {
195                         if (old_secure_tcp >= 2)
196                                 to_change = 0;
197                         ipvs->sysctl_secure_tcp = 1;
198                 }
199                 break;
200         case 3:
201                 if (old_secure_tcp < 2)
202                         to_change = 1;
203                 break;
204         }
205         old_secure_tcp = ipvs->sysctl_secure_tcp;
206         if (to_change >= 0)
207                 ip_vs_protocol_timeout_change(ipvs,
208                                               ipvs->sysctl_secure_tcp > 1);
209         spin_unlock(&ipvs->securetcp_lock);
210
211         local_bh_enable();
212 }
213
214
215 /*
216  *      Timer for checking the defense
217  */
218 #define DEFENSE_TIMER_PERIOD    1*HZ
219
220 static void defense_work_handler(struct work_struct *work)
221 {
222         struct netns_ipvs *ipvs =
223                 container_of(work, struct netns_ipvs, defense_work.work);
224
225         update_defense_level(ipvs);
226         if (atomic_read(&ipvs->dropentry))
227                 ip_vs_random_dropentry(ipvs->net);
228         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
229 }
230
231 int
232 ip_vs_use_count_inc(void)
233 {
234         return try_module_get(THIS_MODULE);
235 }
236
237 void
238 ip_vs_use_count_dec(void)
239 {
240         module_put(THIS_MODULE);
241 }
242
243
244 /*
245  *      Hash table: for virtual service lookups
246  */
247 #define IP_VS_SVC_TAB_BITS 8
248 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
249 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
250
251 /* the service table hashed by <protocol, addr, port> */
252 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
253 /* the service table hashed by fwmark */
254 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
255
256
257 /*
258  *      Returns hash value for virtual service
259  */
260 static inline unsigned
261 ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
262                   const union nf_inet_addr *addr, __be16 port)
263 {
264         register unsigned porth = ntohs(port);
265         __be32 addr_fold = addr->ip;
266
267 #ifdef CONFIG_IP_VS_IPV6
268         if (af == AF_INET6)
269                 addr_fold = addr->ip6[0]^addr->ip6[1]^
270                             addr->ip6[2]^addr->ip6[3];
271 #endif
272         addr_fold ^= ((size_t)net>>8);
273
274         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
275                 & IP_VS_SVC_TAB_MASK;
276 }
277
278 /*
279  *      Returns hash value of fwmark for virtual service lookup
280  */
281 static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
282 {
283         return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
284 }
285
286 /*
287  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
288  *      or in the ip_vs_svc_fwm_table by fwmark.
289  *      Should be called with locked tables.
290  */
291 static int ip_vs_svc_hash(struct ip_vs_service *svc)
292 {
293         unsigned hash;
294
295         if (svc->flags & IP_VS_SVC_F_HASHED) {
296                 pr_err("%s(): request for already hashed, called from %pF\n",
297                        __func__, __builtin_return_address(0));
298                 return 0;
299         }
300
301         if (svc->fwmark == 0) {
302                 /*
303                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
304                  */
305                 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
306                                          &svc->addr, svc->port);
307                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
308         } else {
309                 /*
310                  *  Hash it by fwmark in svc_fwm_table
311                  */
312                 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
313                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
314         }
315
316         svc->flags |= IP_VS_SVC_F_HASHED;
317         /* increase its refcnt because it is referenced by the svc table */
318         atomic_inc(&svc->refcnt);
319         return 1;
320 }
321
322
323 /*
324  *      Unhashes a service from svc_table / svc_fwm_table.
325  *      Should be called with locked tables.
326  */
327 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
328 {
329         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
330                 pr_err("%s(): request for unhash flagged, called from %pF\n",
331                        __func__, __builtin_return_address(0));
332                 return 0;
333         }
334
335         if (svc->fwmark == 0) {
336                 /* Remove it from the svc_table table */
337                 list_del(&svc->s_list);
338         } else {
339                 /* Remove it from the svc_fwm_table table */
340                 list_del(&svc->f_list);
341         }
342
343         svc->flags &= ~IP_VS_SVC_F_HASHED;
344         atomic_dec(&svc->refcnt);
345         return 1;
346 }
347
348
349 /*
350  *      Get service by {netns, proto,addr,port} in the service table.
351  */
352 static inline struct ip_vs_service *
353 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
354                      const union nf_inet_addr *vaddr, __be16 vport)
355 {
356         unsigned hash;
357         struct ip_vs_service *svc;
358
359         /* Check for "full" addressed entries */
360         hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
361
362         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
363                 if ((svc->af == af)
364                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
365                     && (svc->port == vport)
366                     && (svc->protocol == protocol)
367                     && net_eq(svc->net, net)) {
368                         /* HIT */
369                         return svc;
370                 }
371         }
372
373         return NULL;
374 }
375
376
377 /*
378  *      Get service by {fwmark} in the service table.
379  */
380 static inline struct ip_vs_service *
381 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
382 {
383         unsigned hash;
384         struct ip_vs_service *svc;
385
386         /* Check for fwmark addressed entries */
387         hash = ip_vs_svc_fwm_hashkey(net, fwmark);
388
389         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
390                 if (svc->fwmark == fwmark && svc->af == af
391                     && net_eq(svc->net, net)) {
392                         /* HIT */
393                         return svc;
394                 }
395         }
396
397         return NULL;
398 }
399
400 struct ip_vs_service *
401 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
402                   const union nf_inet_addr *vaddr, __be16 vport)
403 {
404         struct ip_vs_service *svc;
405         struct netns_ipvs *ipvs = net_ipvs(net);
406
407         read_lock(&__ip_vs_svc_lock);
408
409         /*
410          *      Check the table hashed by fwmark first
411          */
412         svc = __ip_vs_svc_fwm_find(net, af, fwmark);
413         if (fwmark && svc)
414                 goto out;
415
416         /*
417          *      Check the table hashed by <protocol,addr,port>
418          *      for "full" addressed entries
419          */
420         svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
421
422         if (svc == NULL
423             && protocol == IPPROTO_TCP
424             && atomic_read(&ipvs->ftpsvc_counter)
425             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
426                 /*
427                  * Check if ftp service entry exists, the packet
428                  * might belong to FTP data connections.
429                  */
430                 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
431         }
432
433         if (svc == NULL
434             && atomic_read(&ipvs->nullsvc_counter)) {
435                 /*
436                  * Check if the catch-all port (port zero) exists
437                  */
438                 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
439         }
440
441   out:
442         if (svc)
443                 atomic_inc(&svc->usecnt);
444         read_unlock(&__ip_vs_svc_lock);
445
446         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
447                       fwmark, ip_vs_proto_name(protocol),
448                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
449                       svc ? "hit" : "not hit");
450
451         return svc;
452 }
453
454
455 static inline void
456 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
457 {
458         atomic_inc(&svc->refcnt);
459         dest->svc = svc;
460 }
461
462 static void
463 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
464 {
465         struct ip_vs_service *svc = dest->svc;
466
467         dest->svc = NULL;
468         if (atomic_dec_and_test(&svc->refcnt)) {
469                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
470                               svc->fwmark,
471                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
472                               ntohs(svc->port), atomic_read(&svc->usecnt));
473                 free_percpu(svc->stats.cpustats);
474                 kfree(svc);
475         }
476 }
477
478
479 /*
480  *      Returns hash value for real service
481  */
482 static inline unsigned ip_vs_rs_hashkey(int af,
483                                             const union nf_inet_addr *addr,
484                                             __be16 port)
485 {
486         register unsigned porth = ntohs(port);
487         __be32 addr_fold = addr->ip;
488
489 #ifdef CONFIG_IP_VS_IPV6
490         if (af == AF_INET6)
491                 addr_fold = addr->ip6[0]^addr->ip6[1]^
492                             addr->ip6[2]^addr->ip6[3];
493 #endif
494
495         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
496                 & IP_VS_RTAB_MASK;
497 }
498
499 /*
500  *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
501  *      should be called with locked tables.
502  */
503 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
504 {
505         unsigned hash;
506
507         if (!list_empty(&dest->d_list)) {
508                 return 0;
509         }
510
511         /*
512          *      Hash by proto,addr,port,
513          *      which are the parameters of the real service.
514          */
515         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
516
517         list_add(&dest->d_list, &ipvs->rs_table[hash]);
518
519         return 1;
520 }
521
522 /*
523  *      UNhashes ip_vs_dest from rs_table.
524  *      should be called with locked tables.
525  */
526 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
527 {
528         /*
529          * Remove it from the rs_table table.
530          */
531         if (!list_empty(&dest->d_list)) {
532                 list_del(&dest->d_list);
533                 INIT_LIST_HEAD(&dest->d_list);
534         }
535
536         return 1;
537 }
538
539 /*
540  *      Lookup real service by <proto,addr,port> in the real service table.
541  */
542 struct ip_vs_dest *
543 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
544                           const union nf_inet_addr *daddr,
545                           __be16 dport)
546 {
547         struct netns_ipvs *ipvs = net_ipvs(net);
548         unsigned hash;
549         struct ip_vs_dest *dest;
550
551         /*
552          *      Check for "full" addressed entries
553          *      Return the first found entry
554          */
555         hash = ip_vs_rs_hashkey(af, daddr, dport);
556
557         read_lock(&ipvs->rs_lock);
558         list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
559                 if ((dest->af == af)
560                     && ip_vs_addr_equal(af, &dest->addr, daddr)
561                     && (dest->port == dport)
562                     && ((dest->protocol == protocol) ||
563                         dest->vfwmark)) {
564                         /* HIT */
565                         read_unlock(&ipvs->rs_lock);
566                         return dest;
567                 }
568         }
569         read_unlock(&ipvs->rs_lock);
570
571         return NULL;
572 }
573
574 /*
575  *      Lookup destination by {addr,port} in the given service
576  */
577 static struct ip_vs_dest *
578 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
579                   __be16 dport)
580 {
581         struct ip_vs_dest *dest;
582
583         /*
584          * Find the destination for the given service
585          */
586         list_for_each_entry(dest, &svc->destinations, n_list) {
587                 if ((dest->af == svc->af)
588                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
589                     && (dest->port == dport)) {
590                         /* HIT */
591                         return dest;
592                 }
593         }
594
595         return NULL;
596 }
597
598 /*
599  * Find destination by {daddr,dport,vaddr,protocol}
600  * Cretaed to be used in ip_vs_process_message() in
601  * the backup synchronization daemon. It finds the
602  * destination to be bound to the received connection
603  * on the backup.
604  *
605  * ip_vs_lookup_real_service() looked promissing, but
606  * seems not working as expected.
607  */
608 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
609                                    const union nf_inet_addr *daddr,
610                                    __be16 dport,
611                                    const union nf_inet_addr *vaddr,
612                                    __be16 vport, __u16 protocol, __u32 fwmark)
613 {
614         struct ip_vs_dest *dest;
615         struct ip_vs_service *svc;
616
617         svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
618         if (!svc)
619                 return NULL;
620         dest = ip_vs_lookup_dest(svc, daddr, dport);
621         if (dest)
622                 atomic_inc(&dest->refcnt);
623         ip_vs_service_put(svc);
624         return dest;
625 }
626
627 /*
628  *  Lookup dest by {svc,addr,port} in the destination trash.
629  *  The destination trash is used to hold the destinations that are removed
630  *  from the service table but are still referenced by some conn entries.
631  *  The reason to add the destination trash is when the dest is temporary
632  *  down (either by administrator or by monitor program), the dest can be
633  *  picked back from the trash, the remaining connections to the dest can
634  *  continue, and the counting information of the dest is also useful for
635  *  scheduling.
636  */
637 static struct ip_vs_dest *
638 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
639                      __be16 dport)
640 {
641         struct ip_vs_dest *dest, *nxt;
642         struct netns_ipvs *ipvs = net_ipvs(svc->net);
643
644         /*
645          * Find the destination in trash
646          */
647         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
648                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
649                               "dest->refcnt=%d\n",
650                               dest->vfwmark,
651                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
652                               ntohs(dest->port),
653                               atomic_read(&dest->refcnt));
654                 if (dest->af == svc->af &&
655                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
656                     dest->port == dport &&
657                     dest->vfwmark == svc->fwmark &&
658                     dest->protocol == svc->protocol &&
659                     (svc->fwmark ||
660                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
661                       dest->vport == svc->port))) {
662                         /* HIT */
663                         return dest;
664                 }
665
666                 /*
667                  * Try to purge the destination from trash if not referenced
668                  */
669                 if (atomic_read(&dest->refcnt) == 1) {
670                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
671                                       "from trash\n",
672                                       dest->vfwmark,
673                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
674                                       ntohs(dest->port));
675                         list_del(&dest->n_list);
676                         ip_vs_dst_reset(dest);
677                         __ip_vs_unbind_svc(dest);
678                         free_percpu(dest->stats.cpustats);
679                         kfree(dest);
680                 }
681         }
682
683         return NULL;
684 }
685
686
687 /*
688  *  Clean up all the destinations in the trash
689  *  Called by the ip_vs_control_cleanup()
690  *
691  *  When the ip_vs_control_clearup is activated by ipvs module exit,
692  *  the service tables must have been flushed and all the connections
693  *  are expired, and the refcnt of each destination in the trash must
694  *  be 1, so we simply release them here.
695  */
696 static void ip_vs_trash_cleanup(struct net *net)
697 {
698         struct ip_vs_dest *dest, *nxt;
699         struct netns_ipvs *ipvs = net_ipvs(net);
700
701         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
702                 list_del(&dest->n_list);
703                 ip_vs_dst_reset(dest);
704                 __ip_vs_unbind_svc(dest);
705                 free_percpu(dest->stats.cpustats);
706                 kfree(dest);
707         }
708 }
709
710
711 static void
712 ip_vs_zero_stats(struct ip_vs_stats *stats)
713 {
714         spin_lock_bh(&stats->lock);
715
716         memset(&stats->ustats, 0, sizeof(stats->ustats));
717         ip_vs_zero_estimator(stats);
718
719         spin_unlock_bh(&stats->lock);
720 }
721
722 /*
723  *      Update a destination in the given service
724  */
725 static void
726 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
727                     struct ip_vs_dest_user_kern *udest, int add)
728 {
729         struct netns_ipvs *ipvs = net_ipvs(svc->net);
730         int conn_flags;
731
732         /* set the weight and the flags */
733         atomic_set(&dest->weight, udest->weight);
734         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
735         conn_flags |= IP_VS_CONN_F_INACTIVE;
736
737         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
738         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
739                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
740         } else {
741                 /*
742                  *    Put the real service in rs_table if not present.
743                  *    For now only for NAT!
744                  */
745                 write_lock_bh(&ipvs->rs_lock);
746                 ip_vs_rs_hash(ipvs, dest);
747                 write_unlock_bh(&ipvs->rs_lock);
748         }
749         atomic_set(&dest->conn_flags, conn_flags);
750
751         /* bind the service */
752         if (!dest->svc) {
753                 __ip_vs_bind_svc(dest, svc);
754         } else {
755                 if (dest->svc != svc) {
756                         __ip_vs_unbind_svc(dest);
757                         ip_vs_zero_stats(&dest->stats);
758                         __ip_vs_bind_svc(dest, svc);
759                 }
760         }
761
762         /* set the dest status flags */
763         dest->flags |= IP_VS_DEST_F_AVAILABLE;
764
765         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
766                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
767         dest->u_threshold = udest->u_threshold;
768         dest->l_threshold = udest->l_threshold;
769
770         spin_lock_bh(&dest->dst_lock);
771         ip_vs_dst_reset(dest);
772         spin_unlock_bh(&dest->dst_lock);
773
774         if (add)
775                 ip_vs_new_estimator(svc->net, &dest->stats);
776
777         write_lock_bh(&__ip_vs_svc_lock);
778
779         /* Wait until all other svc users go away */
780         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
781
782         if (add) {
783                 list_add(&dest->n_list, &svc->destinations);
784                 svc->num_dests++;
785         }
786
787         /* call the update_service, because server weight may be changed */
788         if (svc->scheduler->update_service)
789                 svc->scheduler->update_service(svc);
790
791         write_unlock_bh(&__ip_vs_svc_lock);
792 }
793
794
795 /*
796  *      Create a destination for the given service
797  */
798 static int
799 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
800                struct ip_vs_dest **dest_p)
801 {
802         struct ip_vs_dest *dest;
803         unsigned atype;
804
805         EnterFunction(2);
806
807 #ifdef CONFIG_IP_VS_IPV6
808         if (svc->af == AF_INET6) {
809                 atype = ipv6_addr_type(&udest->addr.in6);
810                 if ((!(atype & IPV6_ADDR_UNICAST) ||
811                         atype & IPV6_ADDR_LINKLOCAL) &&
812                         !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
813                         return -EINVAL;
814         } else
815 #endif
816         {
817                 atype = inet_addr_type(svc->net, udest->addr.ip);
818                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
819                         return -EINVAL;
820         }
821
822         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
823         if (dest == NULL) {
824                 pr_err("%s(): no memory.\n", __func__);
825                 return -ENOMEM;
826         }
827         dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
828         if (!dest->stats.cpustats) {
829                 pr_err("%s() alloc_percpu failed\n", __func__);
830                 goto err_alloc;
831         }
832
833         dest->af = svc->af;
834         dest->protocol = svc->protocol;
835         dest->vaddr = svc->addr;
836         dest->vport = svc->port;
837         dest->vfwmark = svc->fwmark;
838         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
839         dest->port = udest->port;
840
841         atomic_set(&dest->activeconns, 0);
842         atomic_set(&dest->inactconns, 0);
843         atomic_set(&dest->persistconns, 0);
844         atomic_set(&dest->refcnt, 1);
845
846         INIT_LIST_HEAD(&dest->d_list);
847         spin_lock_init(&dest->dst_lock);
848         spin_lock_init(&dest->stats.lock);
849         __ip_vs_update_dest(svc, dest, udest, 1);
850
851         *dest_p = dest;
852
853         LeaveFunction(2);
854         return 0;
855
856 err_alloc:
857         kfree(dest);
858         return -ENOMEM;
859 }
860
861
862 /*
863  *      Add a destination into an existing service
864  */
865 static int
866 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
867 {
868         struct ip_vs_dest *dest;
869         union nf_inet_addr daddr;
870         __be16 dport = udest->port;
871         int ret;
872
873         EnterFunction(2);
874
875         if (udest->weight < 0) {
876                 pr_err("%s(): server weight less than zero\n", __func__);
877                 return -ERANGE;
878         }
879
880         if (udest->l_threshold > udest->u_threshold) {
881                 pr_err("%s(): lower threshold is higher than upper threshold\n",
882                         __func__);
883                 return -ERANGE;
884         }
885
886         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
887
888         /*
889          * Check if the dest already exists in the list
890          */
891         dest = ip_vs_lookup_dest(svc, &daddr, dport);
892
893         if (dest != NULL) {
894                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
895                 return -EEXIST;
896         }
897
898         /*
899          * Check if the dest already exists in the trash and
900          * is from the same service
901          */
902         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
903
904         if (dest != NULL) {
905                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
906                               "dest->refcnt=%d, service %u/%s:%u\n",
907                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
908                               atomic_read(&dest->refcnt),
909                               dest->vfwmark,
910                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
911                               ntohs(dest->vport));
912
913                 /*
914                  * Get the destination from the trash
915                  */
916                 list_del(&dest->n_list);
917
918                 __ip_vs_update_dest(svc, dest, udest, 1);
919                 ret = 0;
920         } else {
921                 /*
922                  * Allocate and initialize the dest structure
923                  */
924                 ret = ip_vs_new_dest(svc, udest, &dest);
925         }
926         LeaveFunction(2);
927
928         return ret;
929 }
930
931
932 /*
933  *      Edit a destination in the given service
934  */
935 static int
936 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
937 {
938         struct ip_vs_dest *dest;
939         union nf_inet_addr daddr;
940         __be16 dport = udest->port;
941
942         EnterFunction(2);
943
944         if (udest->weight < 0) {
945                 pr_err("%s(): server weight less than zero\n", __func__);
946                 return -ERANGE;
947         }
948
949         if (udest->l_threshold > udest->u_threshold) {
950                 pr_err("%s(): lower threshold is higher than upper threshold\n",
951                         __func__);
952                 return -ERANGE;
953         }
954
955         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
956
957         /*
958          *  Lookup the destination list
959          */
960         dest = ip_vs_lookup_dest(svc, &daddr, dport);
961
962         if (dest == NULL) {
963                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
964                 return -ENOENT;
965         }
966
967         __ip_vs_update_dest(svc, dest, udest, 0);
968         LeaveFunction(2);
969
970         return 0;
971 }
972
973
974 /*
975  *      Delete a destination (must be already unlinked from the service)
976  */
977 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
978 {
979         struct netns_ipvs *ipvs = net_ipvs(net);
980
981         ip_vs_kill_estimator(net, &dest->stats);
982
983         /*
984          *  Remove it from the d-linked list with the real services.
985          */
986         write_lock_bh(&ipvs->rs_lock);
987         ip_vs_rs_unhash(dest);
988         write_unlock_bh(&ipvs->rs_lock);
989
990         /*
991          *  Decrease the refcnt of the dest, and free the dest
992          *  if nobody refers to it (refcnt=0). Otherwise, throw
993          *  the destination into the trash.
994          */
995         if (atomic_dec_and_test(&dest->refcnt)) {
996                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
997                               dest->vfwmark,
998                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
999                               ntohs(dest->port));
1000                 ip_vs_dst_reset(dest);
1001                 /* simply decrease svc->refcnt here, let the caller check
1002                    and release the service if nobody refers to it.
1003                    Only user context can release destination and service,
1004                    and only one user context can update virtual service at a
1005                    time, so the operation here is OK */
1006                 atomic_dec(&dest->svc->refcnt);
1007                 free_percpu(dest->stats.cpustats);
1008                 kfree(dest);
1009         } else {
1010                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1011                               "dest->refcnt=%d\n",
1012                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1013                               ntohs(dest->port),
1014                               atomic_read(&dest->refcnt));
1015                 list_add(&dest->n_list, &ipvs->dest_trash);
1016                 atomic_inc(&dest->refcnt);
1017         }
1018 }
1019
1020
1021 /*
1022  *      Unlink a destination from the given service
1023  */
1024 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1025                                 struct ip_vs_dest *dest,
1026                                 int svcupd)
1027 {
1028         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1029
1030         /*
1031          *  Remove it from the d-linked destination list.
1032          */
1033         list_del(&dest->n_list);
1034         svc->num_dests--;
1035
1036         /*
1037          *  Call the update_service function of its scheduler
1038          */
1039         if (svcupd && svc->scheduler->update_service)
1040                         svc->scheduler->update_service(svc);
1041 }
1042
1043
1044 /*
1045  *      Delete a destination server in the given service
1046  */
1047 static int
1048 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1049 {
1050         struct ip_vs_dest *dest;
1051         __be16 dport = udest->port;
1052
1053         EnterFunction(2);
1054
1055         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1056
1057         if (dest == NULL) {
1058                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1059                 return -ENOENT;
1060         }
1061
1062         write_lock_bh(&__ip_vs_svc_lock);
1063
1064         /*
1065          *      Wait until all other svc users go away.
1066          */
1067         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1068
1069         /*
1070          *      Unlink dest from the service
1071          */
1072         __ip_vs_unlink_dest(svc, dest, 1);
1073
1074         write_unlock_bh(&__ip_vs_svc_lock);
1075
1076         /*
1077          *      Delete the destination
1078          */
1079         __ip_vs_del_dest(svc->net, dest);
1080
1081         LeaveFunction(2);
1082
1083         return 0;
1084 }
1085
1086
1087 /*
1088  *      Add a service into the service hash table
1089  */
1090 static int
1091 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1092                   struct ip_vs_service **svc_p)
1093 {
1094         int ret = 0;
1095         struct ip_vs_scheduler *sched = NULL;
1096         struct ip_vs_pe *pe = NULL;
1097         struct ip_vs_service *svc = NULL;
1098         struct netns_ipvs *ipvs = net_ipvs(net);
1099
1100         /* increase the module use count */
1101         ip_vs_use_count_inc();
1102
1103         /* Lookup the scheduler by 'u->sched_name' */
1104         sched = ip_vs_scheduler_get(u->sched_name);
1105         if (sched == NULL) {
1106                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1107                 ret = -ENOENT;
1108                 goto out_err;
1109         }
1110
1111         if (u->pe_name && *u->pe_name) {
1112                 pe = ip_vs_pe_getbyname(u->pe_name);
1113                 if (pe == NULL) {
1114                         pr_info("persistence engine module ip_vs_pe_%s "
1115                                 "not found\n", u->pe_name);
1116                         ret = -ENOENT;
1117                         goto out_err;
1118                 }
1119         }
1120
1121 #ifdef CONFIG_IP_VS_IPV6
1122         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1123                 ret = -EINVAL;
1124                 goto out_err;
1125         }
1126 #endif
1127
1128         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1129         if (svc == NULL) {
1130                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1131                 ret = -ENOMEM;
1132                 goto out_err;
1133         }
1134         svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1135         if (!svc->stats.cpustats) {
1136                 pr_err("%s() alloc_percpu failed\n", __func__);
1137                 goto out_err;
1138         }
1139
1140         /* I'm the first user of the service */
1141         atomic_set(&svc->usecnt, 0);
1142         atomic_set(&svc->refcnt, 0);
1143
1144         svc->af = u->af;
1145         svc->protocol = u->protocol;
1146         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1147         svc->port = u->port;
1148         svc->fwmark = u->fwmark;
1149         svc->flags = u->flags;
1150         svc->timeout = u->timeout * HZ;
1151         svc->netmask = u->netmask;
1152         svc->net = net;
1153
1154         INIT_LIST_HEAD(&svc->destinations);
1155         rwlock_init(&svc->sched_lock);
1156         spin_lock_init(&svc->stats.lock);
1157
1158         /* Bind the scheduler */
1159         ret = ip_vs_bind_scheduler(svc, sched);
1160         if (ret)
1161                 goto out_err;
1162         sched = NULL;
1163
1164         /* Bind the ct retriever */
1165         ip_vs_bind_pe(svc, pe);
1166         pe = NULL;
1167
1168         /* Update the virtual service counters */
1169         if (svc->port == FTPPORT)
1170                 atomic_inc(&ipvs->ftpsvc_counter);
1171         else if (svc->port == 0)
1172                 atomic_inc(&ipvs->nullsvc_counter);
1173
1174         ip_vs_new_estimator(net, &svc->stats);
1175
1176         /* Count only IPv4 services for old get/setsockopt interface */
1177         if (svc->af == AF_INET)
1178                 ipvs->num_services++;
1179
1180         /* Hash the service into the service table */
1181         write_lock_bh(&__ip_vs_svc_lock);
1182         ip_vs_svc_hash(svc);
1183         write_unlock_bh(&__ip_vs_svc_lock);
1184
1185         *svc_p = svc;
1186         return 0;
1187
1188
1189  out_err:
1190         if (svc != NULL) {
1191                 ip_vs_unbind_scheduler(svc);
1192                 if (svc->inc) {
1193                         local_bh_disable();
1194                         ip_vs_app_inc_put(svc->inc);
1195                         local_bh_enable();
1196                 }
1197                 if (svc->stats.cpustats)
1198                         free_percpu(svc->stats.cpustats);
1199                 kfree(svc);
1200         }
1201         ip_vs_scheduler_put(sched);
1202         ip_vs_pe_put(pe);
1203
1204         /* decrease the module use count */
1205         ip_vs_use_count_dec();
1206
1207         return ret;
1208 }
1209
1210
1211 /*
1212  *      Edit a service and bind it with a new scheduler
1213  */
1214 static int
1215 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1216 {
1217         struct ip_vs_scheduler *sched, *old_sched;
1218         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1219         int ret = 0;
1220
1221         /*
1222          * Lookup the scheduler, by 'u->sched_name'
1223          */
1224         sched = ip_vs_scheduler_get(u->sched_name);
1225         if (sched == NULL) {
1226                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1227                 return -ENOENT;
1228         }
1229         old_sched = sched;
1230
1231         if (u->pe_name && *u->pe_name) {
1232                 pe = ip_vs_pe_getbyname(u->pe_name);
1233                 if (pe == NULL) {
1234                         pr_info("persistence engine module ip_vs_pe_%s "
1235                                 "not found\n", u->pe_name);
1236                         ret = -ENOENT;
1237                         goto out;
1238                 }
1239                 old_pe = pe;
1240         }
1241
1242 #ifdef CONFIG_IP_VS_IPV6
1243         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1244                 ret = -EINVAL;
1245                 goto out;
1246         }
1247 #endif
1248
1249         write_lock_bh(&__ip_vs_svc_lock);
1250
1251         /*
1252          * Wait until all other svc users go away.
1253          */
1254         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1255
1256         /*
1257          * Set the flags and timeout value
1258          */
1259         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1260         svc->timeout = u->timeout * HZ;
1261         svc->netmask = u->netmask;
1262
1263         old_sched = svc->scheduler;
1264         if (sched != old_sched) {
1265                 /*
1266                  * Unbind the old scheduler
1267                  */
1268                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1269                         old_sched = sched;
1270                         goto out_unlock;
1271                 }
1272
1273                 /*
1274                  * Bind the new scheduler
1275                  */
1276                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1277                         /*
1278                          * If ip_vs_bind_scheduler fails, restore the old
1279                          * scheduler.
1280                          * The main reason of failure is out of memory.
1281                          *
1282                          * The question is if the old scheduler can be
1283                          * restored all the time. TODO: if it cannot be
1284                          * restored some time, we must delete the service,
1285                          * otherwise the system may crash.
1286                          */
1287                         ip_vs_bind_scheduler(svc, old_sched);
1288                         old_sched = sched;
1289                         goto out_unlock;
1290                 }
1291         }
1292
1293         old_pe = svc->pe;
1294         if (pe != old_pe) {
1295                 ip_vs_unbind_pe(svc);
1296                 ip_vs_bind_pe(svc, pe);
1297         }
1298
1299   out_unlock:
1300         write_unlock_bh(&__ip_vs_svc_lock);
1301   out:
1302         ip_vs_scheduler_put(old_sched);
1303         ip_vs_pe_put(old_pe);
1304         return ret;
1305 }
1306
1307
1308 /*
1309  *      Delete a service from the service list
1310  *      - The service must be unlinked, unlocked and not referenced!
1311  *      - We are called under _bh lock
1312  */
1313 static void __ip_vs_del_service(struct ip_vs_service *svc)
1314 {
1315         struct ip_vs_dest *dest, *nxt;
1316         struct ip_vs_scheduler *old_sched;
1317         struct ip_vs_pe *old_pe;
1318         struct netns_ipvs *ipvs = net_ipvs(svc->net);
1319
1320         pr_info("%s: enter\n", __func__);
1321
1322         /* Count only IPv4 services for old get/setsockopt interface */
1323         if (svc->af == AF_INET)
1324                 ipvs->num_services--;
1325
1326         ip_vs_kill_estimator(svc->net, &svc->stats);
1327
1328         /* Unbind scheduler */
1329         old_sched = svc->scheduler;
1330         ip_vs_unbind_scheduler(svc);
1331         ip_vs_scheduler_put(old_sched);
1332
1333         /* Unbind persistence engine */
1334         old_pe = svc->pe;
1335         ip_vs_unbind_pe(svc);
1336         ip_vs_pe_put(old_pe);
1337
1338         /* Unbind app inc */
1339         if (svc->inc) {
1340                 ip_vs_app_inc_put(svc->inc);
1341                 svc->inc = NULL;
1342         }
1343
1344         /*
1345          *    Unlink the whole destination list
1346          */
1347         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1348                 __ip_vs_unlink_dest(svc, dest, 0);
1349                 __ip_vs_del_dest(svc->net, dest);
1350         }
1351
1352         /*
1353          *    Update the virtual service counters
1354          */
1355         if (svc->port == FTPPORT)
1356                 atomic_dec(&ipvs->ftpsvc_counter);
1357         else if (svc->port == 0)
1358                 atomic_dec(&ipvs->nullsvc_counter);
1359
1360         /*
1361          *    Free the service if nobody refers to it
1362          */
1363         if (atomic_read(&svc->refcnt) == 0) {
1364                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1365                               svc->fwmark,
1366                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1367                               ntohs(svc->port), atomic_read(&svc->usecnt));
1368                 free_percpu(svc->stats.cpustats);
1369                 kfree(svc);
1370         }
1371
1372         /* decrease the module use count */
1373         ip_vs_use_count_dec();
1374 }
1375
1376 /*
1377  * Unlink a service from list and try to delete it if its refcnt reached 0
1378  */
1379 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1380 {
1381         /*
1382          * Unhash it from the service table
1383          */
1384         write_lock_bh(&__ip_vs_svc_lock);
1385
1386         ip_vs_svc_unhash(svc);
1387
1388         /*
1389          * Wait until all the svc users go away.
1390          */
1391         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1392
1393         __ip_vs_del_service(svc);
1394
1395         write_unlock_bh(&__ip_vs_svc_lock);
1396 }
1397
1398 /*
1399  *      Delete a service from the service list
1400  */
1401 static int ip_vs_del_service(struct ip_vs_service *svc)
1402 {
1403         if (svc == NULL)
1404                 return -EEXIST;
1405         ip_vs_unlink_service(svc);
1406
1407         return 0;
1408 }
1409
1410
1411 /*
1412  *      Flush all the virtual services
1413  */
1414 static int ip_vs_flush(struct net *net)
1415 {
1416         int idx;
1417         struct ip_vs_service *svc, *nxt;
1418
1419         /*
1420          * Flush the service table hashed by <netns,protocol,addr,port>
1421          */
1422         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1423                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1424                                          s_list) {
1425                         if (net_eq(svc->net, net))
1426                                 ip_vs_unlink_service(svc);
1427                 }
1428         }
1429
1430         /*
1431          * Flush the service table hashed by fwmark
1432          */
1433         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1434                 list_for_each_entry_safe(svc, nxt,
1435                                          &ip_vs_svc_fwm_table[idx], f_list) {
1436                         if (net_eq(svc->net, net))
1437                                 ip_vs_unlink_service(svc);
1438                 }
1439         }
1440
1441         return 0;
1442 }
1443
1444
1445 /*
1446  *      Zero counters in a service or all services
1447  */
1448 static int ip_vs_zero_service(struct ip_vs_service *svc)
1449 {
1450         struct ip_vs_dest *dest;
1451
1452         write_lock_bh(&__ip_vs_svc_lock);
1453         list_for_each_entry(dest, &svc->destinations, n_list) {
1454                 ip_vs_zero_stats(&dest->stats);
1455         }
1456         ip_vs_zero_stats(&svc->stats);
1457         write_unlock_bh(&__ip_vs_svc_lock);
1458         return 0;
1459 }
1460
1461 static int ip_vs_zero_all(struct net *net)
1462 {
1463         int idx;
1464         struct ip_vs_service *svc;
1465
1466         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1467                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1468                         if (net_eq(svc->net, net))
1469                                 ip_vs_zero_service(svc);
1470                 }
1471         }
1472
1473         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1474                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1475                         if (net_eq(svc->net, net))
1476                                 ip_vs_zero_service(svc);
1477                 }
1478         }
1479
1480         ip_vs_zero_stats(net_ipvs(net)->tot_stats);
1481         return 0;
1482 }
1483
1484
1485 static int
1486 proc_do_defense_mode(ctl_table *table, int write,
1487                      void __user *buffer, size_t *lenp, loff_t *ppos)
1488 {
1489         struct net *net = current->nsproxy->net_ns;
1490         int *valp = table->data;
1491         int val = *valp;
1492         int rc;
1493
1494         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1495         if (write && (*valp != val)) {
1496                 if ((*valp < 0) || (*valp > 3)) {
1497                         /* Restore the correct value */
1498                         *valp = val;
1499                 } else {
1500                         update_defense_level(net_ipvs(net));
1501                 }
1502         }
1503         return rc;
1504 }
1505
1506
1507 static int
1508 proc_do_sync_threshold(ctl_table *table, int write,
1509                        void __user *buffer, size_t *lenp, loff_t *ppos)
1510 {
1511         int *valp = table->data;
1512         int val[2];
1513         int rc;
1514
1515         /* backup the value first */
1516         memcpy(val, valp, sizeof(val));
1517
1518         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1519         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1520                 /* Restore the correct value */
1521                 memcpy(valp, val, sizeof(val));
1522         }
1523         return rc;
1524 }
1525
1526 static int
1527 proc_do_sync_mode(ctl_table *table, int write,
1528                      void __user *buffer, size_t *lenp, loff_t *ppos)
1529 {
1530         int *valp = table->data;
1531         int val = *valp;
1532         int rc;
1533
1534         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1535         if (write && (*valp != val)) {
1536                 if ((*valp < 0) || (*valp > 1)) {
1537                         /* Restore the correct value */
1538                         *valp = val;
1539                 } else {
1540                         struct net *net = current->nsproxy->net_ns;
1541                         ip_vs_sync_switch_mode(net, val);
1542                 }
1543         }
1544         return rc;
1545 }
1546
1547 /*
1548  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1549  *      Do not change order or insert new entries without
1550  *      align with netns init in __ip_vs_control_init()
1551  */
1552
1553 static struct ctl_table vs_vars[] = {
1554         {
1555                 .procname       = "amemthresh",
1556                 .maxlen         = sizeof(int),
1557                 .mode           = 0644,
1558                 .proc_handler   = proc_dointvec,
1559         },
1560         {
1561                 .procname       = "am_droprate",
1562                 .maxlen         = sizeof(int),
1563                 .mode           = 0644,
1564                 .proc_handler   = proc_dointvec,
1565         },
1566         {
1567                 .procname       = "drop_entry",
1568                 .maxlen         = sizeof(int),
1569                 .mode           = 0644,
1570                 .proc_handler   = proc_do_defense_mode,
1571         },
1572         {
1573                 .procname       = "drop_packet",
1574                 .maxlen         = sizeof(int),
1575                 .mode           = 0644,
1576                 .proc_handler   = proc_do_defense_mode,
1577         },
1578 #ifdef CONFIG_IP_VS_NFCT
1579         {
1580                 .procname       = "conntrack",
1581                 .maxlen         = sizeof(int),
1582                 .mode           = 0644,
1583                 .proc_handler   = &proc_dointvec,
1584         },
1585 #endif
1586         {
1587                 .procname       = "secure_tcp",
1588                 .maxlen         = sizeof(int),
1589                 .mode           = 0644,
1590                 .proc_handler   = proc_do_defense_mode,
1591         },
1592         {
1593                 .procname       = "snat_reroute",
1594                 .maxlen         = sizeof(int),
1595                 .mode           = 0644,
1596                 .proc_handler   = &proc_dointvec,
1597         },
1598         {
1599                 .procname       = "sync_version",
1600                 .maxlen         = sizeof(int),
1601                 .mode           = 0644,
1602                 .proc_handler   = &proc_do_sync_mode,
1603         },
1604         {
1605                 .procname       = "cache_bypass",
1606                 .maxlen         = sizeof(int),
1607                 .mode           = 0644,
1608                 .proc_handler   = proc_dointvec,
1609         },
1610         {
1611                 .procname       = "expire_nodest_conn",
1612                 .maxlen         = sizeof(int),
1613                 .mode           = 0644,
1614                 .proc_handler   = proc_dointvec,
1615         },
1616         {
1617                 .procname       = "expire_quiescent_template",
1618                 .maxlen         = sizeof(int),
1619                 .mode           = 0644,
1620                 .proc_handler   = proc_dointvec,
1621         },
1622         {
1623                 .procname       = "sync_threshold",
1624                 .maxlen         =
1625                         sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1626                 .mode           = 0644,
1627                 .proc_handler   = proc_do_sync_threshold,
1628         },
1629         {
1630                 .procname       = "nat_icmp_send",
1631                 .maxlen         = sizeof(int),
1632                 .mode           = 0644,
1633                 .proc_handler   = proc_dointvec,
1634         },
1635 #ifdef CONFIG_IP_VS_DEBUG
1636         {
1637                 .procname       = "debug_level",
1638                 .data           = &sysctl_ip_vs_debug_level,
1639                 .maxlen         = sizeof(int),
1640                 .mode           = 0644,
1641                 .proc_handler   = proc_dointvec,
1642         },
1643 #endif
1644 #if 0
1645         {
1646                 .procname       = "timeout_established",
1647                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1648                 .maxlen         = sizeof(int),
1649                 .mode           = 0644,
1650                 .proc_handler   = proc_dointvec_jiffies,
1651         },
1652         {
1653                 .procname       = "timeout_synsent",
1654                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1655                 .maxlen         = sizeof(int),
1656                 .mode           = 0644,
1657                 .proc_handler   = proc_dointvec_jiffies,
1658         },
1659         {
1660                 .procname       = "timeout_synrecv",
1661                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1662                 .maxlen         = sizeof(int),
1663                 .mode           = 0644,
1664                 .proc_handler   = proc_dointvec_jiffies,
1665         },
1666         {
1667                 .procname       = "timeout_finwait",
1668                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1669                 .maxlen         = sizeof(int),
1670                 .mode           = 0644,
1671                 .proc_handler   = proc_dointvec_jiffies,
1672         },
1673         {
1674                 .procname       = "timeout_timewait",
1675                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1676                 .maxlen         = sizeof(int),
1677                 .mode           = 0644,
1678                 .proc_handler   = proc_dointvec_jiffies,
1679         },
1680         {
1681                 .procname       = "timeout_close",
1682                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1683                 .maxlen         = sizeof(int),
1684                 .mode           = 0644,
1685                 .proc_handler   = proc_dointvec_jiffies,
1686         },
1687         {
1688                 .procname       = "timeout_closewait",
1689                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1690                 .maxlen         = sizeof(int),
1691                 .mode           = 0644,
1692                 .proc_handler   = proc_dointvec_jiffies,
1693         },
1694         {
1695                 .procname       = "timeout_lastack",
1696                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1697                 .maxlen         = sizeof(int),
1698                 .mode           = 0644,
1699                 .proc_handler   = proc_dointvec_jiffies,
1700         },
1701         {
1702                 .procname       = "timeout_listen",
1703                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1704                 .maxlen         = sizeof(int),
1705                 .mode           = 0644,
1706                 .proc_handler   = proc_dointvec_jiffies,
1707         },
1708         {
1709                 .procname       = "timeout_synack",
1710                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1711                 .maxlen         = sizeof(int),
1712                 .mode           = 0644,
1713                 .proc_handler   = proc_dointvec_jiffies,
1714         },
1715         {
1716                 .procname       = "timeout_udp",
1717                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1718                 .maxlen         = sizeof(int),
1719                 .mode           = 0644,
1720                 .proc_handler   = proc_dointvec_jiffies,
1721         },
1722         {
1723                 .procname       = "timeout_icmp",
1724                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1725                 .maxlen         = sizeof(int),
1726                 .mode           = 0644,
1727                 .proc_handler   = proc_dointvec_jiffies,
1728         },
1729 #endif
1730         { }
1731 };
1732
1733 const struct ctl_path net_vs_ctl_path[] = {
1734         { .procname = "net", },
1735         { .procname = "ipv4", },
1736         { .procname = "vs", },
1737         { }
1738 };
1739 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1740
1741 #ifdef CONFIG_PROC_FS
1742
1743 struct ip_vs_iter {
1744         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1745         struct list_head *table;
1746         int bucket;
1747 };
1748
1749 /*
1750  *      Write the contents of the VS rule table to a PROCfs file.
1751  *      (It is kept just for backward compatibility)
1752  */
1753 static inline const char *ip_vs_fwd_name(unsigned flags)
1754 {
1755         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1756         case IP_VS_CONN_F_LOCALNODE:
1757                 return "Local";
1758         case IP_VS_CONN_F_TUNNEL:
1759                 return "Tunnel";
1760         case IP_VS_CONN_F_DROUTE:
1761                 return "Route";
1762         default:
1763                 return "Masq";
1764         }
1765 }
1766
1767
1768 /* Get the Nth entry in the two lists */
1769 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1770 {
1771         struct net *net = seq_file_net(seq);
1772         struct ip_vs_iter *iter = seq->private;
1773         int idx;
1774         struct ip_vs_service *svc;
1775
1776         /* look in hash by protocol */
1777         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1778                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1779                         if (net_eq(svc->net, net) && pos-- == 0) {
1780                                 iter->table = ip_vs_svc_table;
1781                                 iter->bucket = idx;
1782                                 return svc;
1783                         }
1784                 }
1785         }
1786
1787         /* keep looking in fwmark */
1788         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1789                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1790                         if (net_eq(svc->net, net) && pos-- == 0) {
1791                                 iter->table = ip_vs_svc_fwm_table;
1792                                 iter->bucket = idx;
1793                                 return svc;
1794                         }
1795                 }
1796         }
1797
1798         return NULL;
1799 }
1800
1801 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1802 __acquires(__ip_vs_svc_lock)
1803 {
1804
1805         read_lock_bh(&__ip_vs_svc_lock);
1806         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1807 }
1808
1809
1810 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1811 {
1812         struct list_head *e;
1813         struct ip_vs_iter *iter;
1814         struct ip_vs_service *svc;
1815
1816         ++*pos;
1817         if (v == SEQ_START_TOKEN)
1818                 return ip_vs_info_array(seq,0);
1819
1820         svc = v;
1821         iter = seq->private;
1822
1823         if (iter->table == ip_vs_svc_table) {
1824                 /* next service in table hashed by protocol */
1825                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1826                         return list_entry(e, struct ip_vs_service, s_list);
1827
1828
1829                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1830                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1831                                             s_list) {
1832                                 return svc;
1833                         }
1834                 }
1835
1836                 iter->table = ip_vs_svc_fwm_table;
1837                 iter->bucket = -1;
1838                 goto scan_fwmark;
1839         }
1840
1841         /* next service in hashed by fwmark */
1842         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1843                 return list_entry(e, struct ip_vs_service, f_list);
1844
1845  scan_fwmark:
1846         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1847                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1848                                     f_list)
1849                         return svc;
1850         }
1851
1852         return NULL;
1853 }
1854
1855 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1856 __releases(__ip_vs_svc_lock)
1857 {
1858         read_unlock_bh(&__ip_vs_svc_lock);
1859 }
1860
1861
1862 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1863 {
1864         if (v == SEQ_START_TOKEN) {
1865                 seq_printf(seq,
1866                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1867                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1868                 seq_puts(seq,
1869                          "Prot LocalAddress:Port Scheduler Flags\n");
1870                 seq_puts(seq,
1871                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1872         } else {
1873                 const struct ip_vs_service *svc = v;
1874                 const struct ip_vs_iter *iter = seq->private;
1875                 const struct ip_vs_dest *dest;
1876
1877                 if (iter->table == ip_vs_svc_table) {
1878 #ifdef CONFIG_IP_VS_IPV6
1879                         if (svc->af == AF_INET6)
1880                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1881                                            ip_vs_proto_name(svc->protocol),
1882                                            &svc->addr.in6,
1883                                            ntohs(svc->port),
1884                                            svc->scheduler->name);
1885                         else
1886 #endif
1887                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
1888                                            ip_vs_proto_name(svc->protocol),
1889                                            ntohl(svc->addr.ip),
1890                                            ntohs(svc->port),
1891                                            svc->scheduler->name,
1892                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1893                 } else {
1894                         seq_printf(seq, "FWM  %08X %s %s",
1895                                    svc->fwmark, svc->scheduler->name,
1896                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1897                 }
1898
1899                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1900                         seq_printf(seq, "persistent %d %08X\n",
1901                                 svc->timeout,
1902                                 ntohl(svc->netmask));
1903                 else
1904                         seq_putc(seq, '\n');
1905
1906                 list_for_each_entry(dest, &svc->destinations, n_list) {
1907 #ifdef CONFIG_IP_VS_IPV6
1908                         if (dest->af == AF_INET6)
1909                                 seq_printf(seq,
1910                                            "  -> [%pI6]:%04X"
1911                                            "      %-7s %-6d %-10d %-10d\n",
1912                                            &dest->addr.in6,
1913                                            ntohs(dest->port),
1914                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1915                                            atomic_read(&dest->weight),
1916                                            atomic_read(&dest->activeconns),
1917                                            atomic_read(&dest->inactconns));
1918                         else
1919 #endif
1920                                 seq_printf(seq,
1921                                            "  -> %08X:%04X      "
1922                                            "%-7s %-6d %-10d %-10d\n",
1923                                            ntohl(dest->addr.ip),
1924                                            ntohs(dest->port),
1925                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1926                                            atomic_read(&dest->weight),
1927                                            atomic_read(&dest->activeconns),
1928                                            atomic_read(&dest->inactconns));
1929
1930                 }
1931         }
1932         return 0;
1933 }
1934
1935 static const struct seq_operations ip_vs_info_seq_ops = {
1936         .start = ip_vs_info_seq_start,
1937         .next  = ip_vs_info_seq_next,
1938         .stop  = ip_vs_info_seq_stop,
1939         .show  = ip_vs_info_seq_show,
1940 };
1941
1942 static int ip_vs_info_open(struct inode *inode, struct file *file)
1943 {
1944         return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1945                         sizeof(struct ip_vs_iter));
1946 }
1947
1948 static const struct file_operations ip_vs_info_fops = {
1949         .owner   = THIS_MODULE,
1950         .open    = ip_vs_info_open,
1951         .read    = seq_read,
1952         .llseek  = seq_lseek,
1953         .release = seq_release_private,
1954 };
1955
1956 #endif
1957
1958 #ifdef CONFIG_PROC_FS
1959 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1960 {
1961         struct net *net = seq_file_single_net(seq);
1962         struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
1963
1964 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1965         seq_puts(seq,
1966                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1967         seq_printf(seq,
1968                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1969
1970         spin_lock_bh(&tot_stats->lock);
1971         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1972                    tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1973                    (unsigned long long) tot_stats->ustats.inbytes,
1974                    (unsigned long long) tot_stats->ustats.outbytes);
1975
1976 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1977         seq_puts(seq,
1978                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1979         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1980                         tot_stats->ustats.cps,
1981                         tot_stats->ustats.inpps,
1982                         tot_stats->ustats.outpps,
1983                         tot_stats->ustats.inbps,
1984                         tot_stats->ustats.outbps);
1985         spin_unlock_bh(&tot_stats->lock);
1986
1987         return 0;
1988 }
1989
1990 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1991 {
1992         return single_open_net(inode, file, ip_vs_stats_show);
1993 }
1994
1995 static const struct file_operations ip_vs_stats_fops = {
1996         .owner = THIS_MODULE,
1997         .open = ip_vs_stats_seq_open,
1998         .read = seq_read,
1999         .llseek = seq_lseek,
2000         .release = single_release,
2001 };
2002
2003 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2004 {
2005         struct net *net = seq_file_single_net(seq);
2006         struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2007         int i;
2008
2009 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2010         seq_puts(seq,
2011                  "       Total Incoming Outgoing         Incoming         Outgoing\n");
2012         seq_printf(seq,
2013                    "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2014
2015         for_each_possible_cpu(i) {
2016                 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2017                 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2018                             i, u->ustats.conns, u->ustats.inpkts,
2019                             u->ustats.outpkts, (__u64)u->ustats.inbytes,
2020                             (__u64)u->ustats.outbytes);
2021         }
2022
2023         spin_lock_bh(&tot_stats->lock);
2024         seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2025                    tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2026                    tot_stats->ustats.outpkts,
2027                    (unsigned long long) tot_stats->ustats.inbytes,
2028                    (unsigned long long) tot_stats->ustats.outbytes);
2029
2030 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2031         seq_puts(seq,
2032                    "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2033         seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2034                         tot_stats->ustats.cps,
2035                         tot_stats->ustats.inpps,
2036                         tot_stats->ustats.outpps,
2037                         tot_stats->ustats.inbps,
2038                         tot_stats->ustats.outbps);
2039         spin_unlock_bh(&tot_stats->lock);
2040
2041         return 0;
2042 }
2043
2044 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2045 {
2046         return single_open_net(inode, file, ip_vs_stats_percpu_show);
2047 }
2048
2049 static const struct file_operations ip_vs_stats_percpu_fops = {
2050         .owner = THIS_MODULE,
2051         .open = ip_vs_stats_percpu_seq_open,
2052         .read = seq_read,
2053         .llseek = seq_lseek,
2054         .release = single_release,
2055 };
2056 #endif
2057
2058 /*
2059  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2060  */
2061 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2062 {
2063 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2064         struct ip_vs_proto_data *pd;
2065 #endif
2066
2067         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2068                   u->tcp_timeout,
2069                   u->tcp_fin_timeout,
2070                   u->udp_timeout);
2071
2072 #ifdef CONFIG_IP_VS_PROTO_TCP
2073         if (u->tcp_timeout) {
2074                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2075                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2076                         = u->tcp_timeout * HZ;
2077         }
2078
2079         if (u->tcp_fin_timeout) {
2080                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2081                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2082                         = u->tcp_fin_timeout * HZ;
2083         }
2084 #endif
2085
2086 #ifdef CONFIG_IP_VS_PROTO_UDP
2087         if (u->udp_timeout) {
2088                 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2089                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2090                         = u->udp_timeout * HZ;
2091         }
2092 #endif
2093         return 0;
2094 }
2095
2096
2097 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2098 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2099 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2100                                  sizeof(struct ip_vs_dest_user))
2101 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2102 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2103 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2104
2105 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2106         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2107         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2108         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2109         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2110         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2111         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2112         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2113         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2114         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2115         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2116         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2117 };
2118
2119 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2120                                   struct ip_vs_service_user *usvc_compat)
2121 {
2122         memset(usvc, 0, sizeof(*usvc));
2123
2124         usvc->af                = AF_INET;
2125         usvc->protocol          = usvc_compat->protocol;
2126         usvc->addr.ip           = usvc_compat->addr;
2127         usvc->port              = usvc_compat->port;
2128         usvc->fwmark            = usvc_compat->fwmark;
2129
2130         /* Deep copy of sched_name is not needed here */
2131         usvc->sched_name        = usvc_compat->sched_name;
2132
2133         usvc->flags             = usvc_compat->flags;
2134         usvc->timeout           = usvc_compat->timeout;
2135         usvc->netmask           = usvc_compat->netmask;
2136 }
2137
2138 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2139                                    struct ip_vs_dest_user *udest_compat)
2140 {
2141         memset(udest, 0, sizeof(*udest));
2142
2143         udest->addr.ip          = udest_compat->addr;
2144         udest->port             = udest_compat->port;
2145         udest->conn_flags       = udest_compat->conn_flags;
2146         udest->weight           = udest_compat->weight;
2147         udest->u_threshold      = udest_compat->u_threshold;
2148         udest->l_threshold      = udest_compat->l_threshold;
2149 }
2150
2151 static int
2152 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2153 {
2154         struct net *net = sock_net(sk);
2155         int ret;
2156         unsigned char arg[MAX_ARG_LEN];
2157         struct ip_vs_service_user *usvc_compat;
2158         struct ip_vs_service_user_kern usvc;
2159         struct ip_vs_service *svc;
2160         struct ip_vs_dest_user *udest_compat;
2161         struct ip_vs_dest_user_kern udest;
2162
2163         if (!capable(CAP_NET_ADMIN))
2164                 return -EPERM;
2165
2166         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2167                 return -EINVAL;
2168         if (len < 0 || len >  MAX_ARG_LEN)
2169                 return -EINVAL;
2170         if (len != set_arglen[SET_CMDID(cmd)]) {
2171                 pr_err("set_ctl: len %u != %u\n",
2172                        len, set_arglen[SET_CMDID(cmd)]);
2173                 return -EINVAL;
2174         }
2175
2176         if (copy_from_user(arg, user, len) != 0)
2177                 return -EFAULT;
2178
2179         /* increase the module use count */
2180         ip_vs_use_count_inc();
2181
2182         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2183                 ret = -ERESTARTSYS;
2184                 goto out_dec;
2185         }
2186
2187         if (cmd == IP_VS_SO_SET_FLUSH) {
2188                 /* Flush the virtual service */
2189                 ret = ip_vs_flush(net);
2190                 goto out_unlock;
2191         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2192                 /* Set timeout values for (tcp tcpfin udp) */
2193                 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2194                 goto out_unlock;
2195         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2196                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2197                 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2198                                         dm->syncid);
2199                 goto out_unlock;
2200         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2201                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2202                 ret = stop_sync_thread(net, dm->state);
2203                 goto out_unlock;
2204         }
2205
2206         usvc_compat = (struct ip_vs_service_user *)arg;
2207         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2208
2209         /* We only use the new structs internally, so copy userspace compat
2210          * structs to extended internal versions */
2211         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2212         ip_vs_copy_udest_compat(&udest, udest_compat);
2213
2214         if (cmd == IP_VS_SO_SET_ZERO) {
2215                 /* if no service address is set, zero counters in all */
2216                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2217                         ret = ip_vs_zero_all(net);
2218                         goto out_unlock;
2219                 }
2220         }
2221
2222         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2223         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2224             usvc.protocol != IPPROTO_SCTP) {
2225                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2226                        usvc.protocol, &usvc.addr.ip,
2227                        ntohs(usvc.port), usvc.sched_name);
2228                 ret = -EFAULT;
2229                 goto out_unlock;
2230         }
2231
2232         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2233         if (usvc.fwmark == 0)
2234                 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2235                                            &usvc.addr, usvc.port);
2236         else
2237                 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2238
2239         if (cmd != IP_VS_SO_SET_ADD
2240             && (svc == NULL || svc->protocol != usvc.protocol)) {
2241                 ret = -ESRCH;
2242                 goto out_unlock;
2243         }
2244
2245         switch (cmd) {
2246         case IP_VS_SO_SET_ADD:
2247                 if (svc != NULL)
2248                         ret = -EEXIST;
2249                 else
2250                         ret = ip_vs_add_service(net, &usvc, &svc);
2251                 break;
2252         case IP_VS_SO_SET_EDIT:
2253                 ret = ip_vs_edit_service(svc, &usvc);
2254                 break;
2255         case IP_VS_SO_SET_DEL:
2256                 ret = ip_vs_del_service(svc);
2257                 if (!ret)
2258                         goto out_unlock;
2259                 break;
2260         case IP_VS_SO_SET_ZERO:
2261                 ret = ip_vs_zero_service(svc);
2262                 break;
2263         case IP_VS_SO_SET_ADDDEST:
2264                 ret = ip_vs_add_dest(svc, &udest);
2265                 break;
2266         case IP_VS_SO_SET_EDITDEST:
2267                 ret = ip_vs_edit_dest(svc, &udest);
2268                 break;
2269         case IP_VS_SO_SET_DELDEST:
2270                 ret = ip_vs_del_dest(svc, &udest);
2271                 break;
2272         default:
2273                 ret = -EINVAL;
2274         }
2275
2276   out_unlock:
2277         mutex_unlock(&__ip_vs_mutex);
2278   out_dec:
2279         /* decrease the module use count */
2280         ip_vs_use_count_dec();
2281
2282         return ret;
2283 }
2284
2285
2286 static void
2287 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2288 {
2289         spin_lock_bh(&src->lock);
2290         memcpy(dst, &src->ustats, sizeof(*dst));
2291         spin_unlock_bh(&src->lock);
2292 }
2293
2294 static void
2295 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2296 {
2297         dst->protocol = src->protocol;
2298         dst->addr = src->addr.ip;
2299         dst->port = src->port;
2300         dst->fwmark = src->fwmark;
2301         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2302         dst->flags = src->flags;
2303         dst->timeout = src->timeout / HZ;
2304         dst->netmask = src->netmask;
2305         dst->num_dests = src->num_dests;
2306         ip_vs_copy_stats(&dst->stats, &src->stats);
2307 }
2308
2309 static inline int
2310 __ip_vs_get_service_entries(struct net *net,
2311                             const struct ip_vs_get_services *get,
2312                             struct ip_vs_get_services __user *uptr)
2313 {
2314         int idx, count=0;
2315         struct ip_vs_service *svc;
2316         struct ip_vs_service_entry entry;
2317         int ret = 0;
2318
2319         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2320                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2321                         /* Only expose IPv4 entries to old interface */
2322                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2323                                 continue;
2324
2325                         if (count >= get->num_services)
2326                                 goto out;
2327                         memset(&entry, 0, sizeof(entry));
2328                         ip_vs_copy_service(&entry, svc);
2329                         if (copy_to_user(&uptr->entrytable[count],
2330                                          &entry, sizeof(entry))) {
2331                                 ret = -EFAULT;
2332                                 goto out;
2333                         }
2334                         count++;
2335                 }
2336         }
2337
2338         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2339                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2340                         /* Only expose IPv4 entries to old interface */
2341                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2342                                 continue;
2343
2344                         if (count >= get->num_services)
2345                                 goto out;
2346                         memset(&entry, 0, sizeof(entry));
2347                         ip_vs_copy_service(&entry, svc);
2348                         if (copy_to_user(&uptr->entrytable[count],
2349                                          &entry, sizeof(entry))) {
2350                                 ret = -EFAULT;
2351                                 goto out;
2352                         }
2353                         count++;
2354                 }
2355         }
2356   out:
2357         return ret;
2358 }
2359
2360 static inline int
2361 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2362                          struct ip_vs_get_dests __user *uptr)
2363 {
2364         struct ip_vs_service *svc;
2365         union nf_inet_addr addr = { .ip = get->addr };
2366         int ret = 0;
2367
2368         if (get->fwmark)
2369                 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2370         else
2371                 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2372                                            get->port);
2373
2374         if (svc) {
2375                 int count = 0;
2376                 struct ip_vs_dest *dest;
2377                 struct ip_vs_dest_entry entry;
2378
2379                 list_for_each_entry(dest, &svc->destinations, n_list) {
2380                         if (count >= get->num_dests)
2381                                 break;
2382
2383                         entry.addr = dest->addr.ip;
2384                         entry.port = dest->port;
2385                         entry.conn_flags = atomic_read(&dest->conn_flags);
2386                         entry.weight = atomic_read(&dest->weight);
2387                         entry.u_threshold = dest->u_threshold;
2388                         entry.l_threshold = dest->l_threshold;
2389                         entry.activeconns = atomic_read(&dest->activeconns);
2390                         entry.inactconns = atomic_read(&dest->inactconns);
2391                         entry.persistconns = atomic_read(&dest->persistconns);
2392                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2393                         if (copy_to_user(&uptr->entrytable[count],
2394                                          &entry, sizeof(entry))) {
2395                                 ret = -EFAULT;
2396                                 break;
2397                         }
2398                         count++;
2399                 }
2400         } else
2401                 ret = -ESRCH;
2402         return ret;
2403 }
2404
2405 static inline void
2406 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2407 {
2408 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2409         struct ip_vs_proto_data *pd;
2410 #endif
2411
2412 #ifdef CONFIG_IP_VS_PROTO_TCP
2413         pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2414         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2415         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2416 #endif
2417 #ifdef CONFIG_IP_VS_PROTO_UDP
2418         pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2419         u->udp_timeout =
2420                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2421 #endif
2422 }
2423
2424
2425 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2426 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2427 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2428 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2429 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2430 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2431 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2432
2433 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2434         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2435         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2436         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2437         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2438         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2439         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2440         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2441 };
2442
2443 static int
2444 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2445 {
2446         unsigned char arg[128];
2447         int ret = 0;
2448         unsigned int copylen;
2449         struct net *net = sock_net(sk);
2450         struct netns_ipvs *ipvs = net_ipvs(net);
2451
2452         BUG_ON(!net);
2453         if (!capable(CAP_NET_ADMIN))
2454                 return -EPERM;
2455
2456         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2457                 return -EINVAL;
2458
2459         if (*len < get_arglen[GET_CMDID(cmd)]) {
2460                 pr_err("get_ctl: len %u < %u\n",
2461                        *len, get_arglen[GET_CMDID(cmd)]);
2462                 return -EINVAL;
2463         }
2464
2465         copylen = get_arglen[GET_CMDID(cmd)];
2466         if (copylen > 128)
2467                 return -EINVAL;
2468
2469         if (copy_from_user(arg, user, copylen) != 0)
2470                 return -EFAULT;
2471
2472         if (mutex_lock_interruptible(&__ip_vs_mutex))
2473                 return -ERESTARTSYS;
2474
2475         switch (cmd) {
2476         case IP_VS_SO_GET_VERSION:
2477         {
2478                 char buf[64];
2479
2480                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2481                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2482                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2483                         ret = -EFAULT;
2484                         goto out;
2485                 }
2486                 *len = strlen(buf)+1;
2487         }
2488         break;
2489
2490         case IP_VS_SO_GET_INFO:
2491         {
2492                 struct ip_vs_getinfo info;
2493                 info.version = IP_VS_VERSION_CODE;
2494                 info.size = ip_vs_conn_tab_size;
2495                 info.num_services = ipvs->num_services;
2496                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2497                         ret = -EFAULT;
2498         }
2499         break;
2500
2501         case IP_VS_SO_GET_SERVICES:
2502         {
2503                 struct ip_vs_get_services *get;
2504                 int size;
2505
2506                 get = (struct ip_vs_get_services *)arg;
2507                 size = sizeof(*get) +
2508                         sizeof(struct ip_vs_service_entry) * get->num_services;
2509                 if (*len != size) {
2510                         pr_err("length: %u != %u\n", *len, size);
2511                         ret = -EINVAL;
2512                         goto out;
2513                 }
2514                 ret = __ip_vs_get_service_entries(net, get, user);
2515         }
2516         break;
2517
2518         case IP_VS_SO_GET_SERVICE:
2519         {
2520                 struct ip_vs_service_entry *entry;
2521                 struct ip_vs_service *svc;
2522                 union nf_inet_addr addr;
2523
2524                 entry = (struct ip_vs_service_entry *)arg;
2525                 addr.ip = entry->addr;
2526                 if (entry->fwmark)
2527                         svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2528                 else
2529                         svc = __ip_vs_service_find(net, AF_INET,
2530                                                    entry->protocol, &addr,
2531                                                    entry->port);
2532                 if (svc) {
2533                         ip_vs_copy_service(entry, svc);
2534                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2535                                 ret = -EFAULT;
2536                 } else
2537                         ret = -ESRCH;
2538         }
2539         break;
2540
2541         case IP_VS_SO_GET_DESTS:
2542         {
2543                 struct ip_vs_get_dests *get;
2544                 int size;
2545
2546                 get = (struct ip_vs_get_dests *)arg;
2547                 size = sizeof(*get) +
2548                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2549                 if (*len != size) {
2550                         pr_err("length: %u != %u\n", *len, size);
2551                         ret = -EINVAL;
2552                         goto out;
2553                 }
2554                 ret = __ip_vs_get_dest_entries(net, get, user);
2555         }
2556         break;
2557
2558         case IP_VS_SO_GET_TIMEOUT:
2559         {
2560                 struct ip_vs_timeout_user t;
2561
2562                 __ip_vs_get_timeouts(net, &t);
2563                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2564                         ret = -EFAULT;
2565         }
2566         break;
2567
2568         case IP_VS_SO_GET_DAEMON:
2569         {
2570                 struct ip_vs_daemon_user d[2];
2571
2572                 memset(&d, 0, sizeof(d));
2573                 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2574                         d[0].state = IP_VS_STATE_MASTER;
2575                         strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2576                                 sizeof(d[0].mcast_ifn));
2577                         d[0].syncid = ipvs->master_syncid;
2578                 }
2579                 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2580                         d[1].state = IP_VS_STATE_BACKUP;
2581                         strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2582                                 sizeof(d[1].mcast_ifn));
2583                         d[1].syncid = ipvs->backup_syncid;
2584                 }
2585                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2586                         ret = -EFAULT;
2587         }
2588         break;
2589
2590         default:
2591                 ret = -EINVAL;
2592         }
2593
2594   out:
2595         mutex_unlock(&__ip_vs_mutex);
2596         return ret;
2597 }
2598
2599
2600 static struct nf_sockopt_ops ip_vs_sockopts = {
2601         .pf             = PF_INET,
2602         .set_optmin     = IP_VS_BASE_CTL,
2603         .set_optmax     = IP_VS_SO_SET_MAX+1,
2604         .set            = do_ip_vs_set_ctl,
2605         .get_optmin     = IP_VS_BASE_CTL,
2606         .get_optmax     = IP_VS_SO_GET_MAX+1,
2607         .get            = do_ip_vs_get_ctl,
2608         .owner          = THIS_MODULE,
2609 };
2610
2611 /*
2612  * Generic Netlink interface
2613  */
2614
2615 /* IPVS genetlink family */
2616 static struct genl_family ip_vs_genl_family = {
2617         .id             = GENL_ID_GENERATE,
2618         .hdrsize        = 0,
2619         .name           = IPVS_GENL_NAME,
2620         .version        = IPVS_GENL_VERSION,
2621         .maxattr        = IPVS_CMD_MAX,
2622         .netnsok        = true,         /* Make ipvsadm to work on netns */
2623 };
2624
2625 /* Policy used for first-level command attributes */
2626 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2627         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2628         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2629         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2630         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2631         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2632         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2633 };
2634
2635 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2636 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2637         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2638         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2639                                             .len = IP_VS_IFNAME_MAXLEN },
2640         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2641 };
2642
2643 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2644 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2645         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2646         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2647         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2648                                             .len = sizeof(union nf_inet_addr) },
2649         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2650         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2651         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2652                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2653         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2654                                             .len = IP_VS_PENAME_MAXLEN },
2655         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2656                                             .len = sizeof(struct ip_vs_flags) },
2657         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2658         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2659         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2660 };
2661
2662 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2663 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2664         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2665                                             .len = sizeof(union nf_inet_addr) },
2666         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2667         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2668         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2669         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2670         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2671         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2672         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2673         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2674         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2675 };
2676
2677 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2678                                  struct ip_vs_stats *stats)
2679 {
2680         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2681         if (!nl_stats)
2682                 return -EMSGSIZE;
2683
2684         spin_lock_bh(&stats->lock);
2685
2686         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2687         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2688         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2689         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2690         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2691         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2692         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2693         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2694         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2695         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2696
2697         spin_unlock_bh(&stats->lock);
2698
2699         nla_nest_end(skb, nl_stats);
2700
2701         return 0;
2702
2703 nla_put_failure:
2704         spin_unlock_bh(&stats->lock);
2705         nla_nest_cancel(skb, nl_stats);
2706         return -EMSGSIZE;
2707 }
2708
2709 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2710                                    struct ip_vs_service *svc)
2711 {
2712         struct nlattr *nl_service;
2713         struct ip_vs_flags flags = { .flags = svc->flags,
2714                                      .mask = ~0 };
2715
2716         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2717         if (!nl_service)
2718                 return -EMSGSIZE;
2719
2720         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2721
2722         if (svc->fwmark) {
2723                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2724         } else {
2725                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2726                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2727                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2728         }
2729
2730         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2731         if (svc->pe)
2732                 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2733         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2734         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2735         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2736
2737         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2738                 goto nla_put_failure;
2739
2740         nla_nest_end(skb, nl_service);
2741
2742         return 0;
2743
2744 nla_put_failure:
2745         nla_nest_cancel(skb, nl_service);
2746         return -EMSGSIZE;
2747 }
2748
2749 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2750                                    struct ip_vs_service *svc,
2751                                    struct netlink_callback *cb)
2752 {
2753         void *hdr;
2754
2755         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2756                           &ip_vs_genl_family, NLM_F_MULTI,
2757                           IPVS_CMD_NEW_SERVICE);
2758         if (!hdr)
2759                 return -EMSGSIZE;
2760
2761         if (ip_vs_genl_fill_service(skb, svc) < 0)
2762                 goto nla_put_failure;
2763
2764         return genlmsg_end(skb, hdr);
2765
2766 nla_put_failure:
2767         genlmsg_cancel(skb, hdr);
2768         return -EMSGSIZE;
2769 }
2770
2771 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2772                                     struct netlink_callback *cb)
2773 {
2774         int idx = 0, i;
2775         int start = cb->args[0];
2776         struct ip_vs_service *svc;
2777         struct net *net = skb_sknet(skb);
2778
2779         mutex_lock(&__ip_vs_mutex);
2780         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2781                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2782                         if (++idx <= start || !net_eq(svc->net, net))
2783                                 continue;
2784                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2785                                 idx--;
2786                                 goto nla_put_failure;
2787                         }
2788                 }
2789         }
2790
2791         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2792                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2793                         if (++idx <= start || !net_eq(svc->net, net))
2794                                 continue;
2795                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2796                                 idx--;
2797                                 goto nla_put_failure;
2798                         }
2799                 }
2800         }
2801
2802 nla_put_failure:
2803         mutex_unlock(&__ip_vs_mutex);
2804         cb->args[0] = idx;
2805
2806         return skb->len;
2807 }
2808
2809 static int ip_vs_genl_parse_service(struct net *net,
2810                                     struct ip_vs_service_user_kern *usvc,
2811                                     struct nlattr *nla, int full_entry,
2812                                     struct ip_vs_service **ret_svc)
2813 {
2814         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2815         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2816         struct ip_vs_service *svc;
2817
2818         /* Parse mandatory identifying service fields first */
2819         if (nla == NULL ||
2820             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2821                 return -EINVAL;
2822
2823         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2824         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2825         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2826         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2827         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2828
2829         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2830                 return -EINVAL;
2831
2832         memset(usvc, 0, sizeof(*usvc));
2833
2834         usvc->af = nla_get_u16(nla_af);
2835 #ifdef CONFIG_IP_VS_IPV6
2836         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2837 #else
2838         if (usvc->af != AF_INET)
2839 #endif
2840                 return -EAFNOSUPPORT;
2841
2842         if (nla_fwmark) {
2843                 usvc->protocol = IPPROTO_TCP;
2844                 usvc->fwmark = nla_get_u32(nla_fwmark);
2845         } else {
2846                 usvc->protocol = nla_get_u16(nla_protocol);
2847                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2848                 usvc->port = nla_get_u16(nla_port);
2849                 usvc->fwmark = 0;
2850         }
2851
2852         if (usvc->fwmark)
2853                 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2854         else
2855                 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2856                                            &usvc->addr, usvc->port);
2857         *ret_svc = svc;
2858
2859         /* If a full entry was requested, check for the additional fields */
2860         if (full_entry) {
2861                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2862                               *nla_netmask;
2863                 struct ip_vs_flags flags;
2864
2865                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2866                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2867                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2868                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2869                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2870
2871                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2872                         return -EINVAL;
2873
2874                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2875
2876                 /* prefill flags from service if it already exists */
2877                 if (svc)
2878                         usvc->flags = svc->flags;
2879
2880                 /* set new flags from userland */
2881                 usvc->flags = (usvc->flags & ~flags.mask) |
2882                               (flags.flags & flags.mask);
2883                 usvc->sched_name = nla_data(nla_sched);
2884                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2885                 usvc->timeout = nla_get_u32(nla_timeout);
2886                 usvc->netmask = nla_get_u32(nla_netmask);
2887         }
2888
2889         return 0;
2890 }
2891
2892 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2893                                                      struct nlattr *nla)
2894 {
2895         struct ip_vs_service_user_kern usvc;
2896         struct ip_vs_service *svc;
2897         int ret;
2898
2899         ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2900         return ret ? ERR_PTR(ret) : svc;
2901 }
2902
2903 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2904 {
2905         struct nlattr *nl_dest;
2906
2907         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2908         if (!nl_dest)
2909                 return -EMSGSIZE;
2910
2911         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2912         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2913
2914         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2915                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2916         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2917         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2918         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2919         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2920                     atomic_read(&dest->activeconns));
2921         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2922                     atomic_read(&dest->inactconns));
2923         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2924                     atomic_read(&dest->persistconns));
2925
2926         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2927                 goto nla_put_failure;
2928
2929         nla_nest_end(skb, nl_dest);
2930
2931         return 0;
2932
2933 nla_put_failure:
2934         nla_nest_cancel(skb, nl_dest);
2935         return -EMSGSIZE;
2936 }
2937
2938 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2939                                 struct netlink_callback *cb)
2940 {
2941         void *hdr;
2942
2943         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2944                           &ip_vs_genl_family, NLM_F_MULTI,
2945                           IPVS_CMD_NEW_DEST);
2946         if (!hdr)
2947                 return -EMSGSIZE;
2948
2949         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2950                 goto nla_put_failure;
2951
2952         return genlmsg_end(skb, hdr);
2953
2954 nla_put_failure:
2955         genlmsg_cancel(skb, hdr);
2956         return -EMSGSIZE;
2957 }
2958
2959 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2960                                  struct netlink_callback *cb)
2961 {
2962         int idx = 0;
2963         int start = cb->args[0];
2964         struct ip_vs_service *svc;
2965         struct ip_vs_dest *dest;
2966         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2967         struct net *net = skb_sknet(skb);
2968
2969         mutex_lock(&__ip_vs_mutex);
2970
2971         /* Try to find the service for which to dump destinations */
2972         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2973                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2974                 goto out_err;
2975
2976
2977         svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
2978         if (IS_ERR(svc) || svc == NULL)
2979                 goto out_err;
2980
2981         /* Dump the destinations */
2982         list_for_each_entry(dest, &svc->destinations, n_list) {
2983                 if (++idx <= start)
2984                         continue;
2985                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2986                         idx--;
2987                         goto nla_put_failure;
2988                 }
2989         }
2990
2991 nla_put_failure:
2992         cb->args[0] = idx;
2993
2994 out_err:
2995         mutex_unlock(&__ip_vs_mutex);
2996
2997         return skb->len;
2998 }
2999
3000 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3001                                  struct nlattr *nla, int full_entry)
3002 {
3003         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3004         struct nlattr *nla_addr, *nla_port;
3005
3006         /* Parse mandatory identifying destination fields first */
3007         if (nla == NULL ||
3008             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3009                 return -EINVAL;
3010
3011         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3012         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3013
3014         if (!(nla_addr && nla_port))
3015                 return -EINVAL;
3016
3017         memset(udest, 0, sizeof(*udest));
3018
3019         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3020         udest->port = nla_get_u16(nla_port);
3021
3022         /* If a full entry was requested, check for the additional fields */
3023         if (full_entry) {
3024                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3025                               *nla_l_thresh;
3026
3027                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3028                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3029                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3030                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3031
3032                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3033                         return -EINVAL;
3034
3035                 udest->conn_flags = nla_get_u32(nla_fwd)
3036                                     & IP_VS_CONN_F_FWD_MASK;
3037                 udest->weight = nla_get_u32(nla_weight);
3038                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3039                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3040         }
3041
3042         return 0;
3043 }
3044
3045 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3046                                   const char *mcast_ifn, __be32 syncid)
3047 {
3048         struct nlattr *nl_daemon;
3049
3050         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3051         if (!nl_daemon)
3052                 return -EMSGSIZE;
3053
3054         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3055         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3056         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3057
3058         nla_nest_end(skb, nl_daemon);
3059
3060         return 0;
3061
3062 nla_put_failure:
3063         nla_nest_cancel(skb, nl_daemon);
3064         return -EMSGSIZE;
3065 }
3066
3067 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3068                                   const char *mcast_ifn, __be32 syncid,
3069                                   struct netlink_callback *cb)
3070 {
3071         void *hdr;
3072         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3073                           &ip_vs_genl_family, NLM_F_MULTI,
3074                           IPVS_CMD_NEW_DAEMON);
3075         if (!hdr)
3076                 return -EMSGSIZE;
3077
3078         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3079                 goto nla_put_failure;
3080
3081         return genlmsg_end(skb, hdr);
3082
3083 nla_put_failure:
3084         genlmsg_cancel(skb, hdr);
3085         return -EMSGSIZE;
3086 }
3087
3088 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3089                                    struct netlink_callback *cb)
3090 {
3091         struct net *net = skb_net(skb);
3092         struct netns_ipvs *ipvs = net_ipvs(net);
3093
3094         mutex_lock(&__ip_vs_mutex);
3095         if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3096                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3097                                            ipvs->master_mcast_ifn,
3098                                            ipvs->master_syncid, cb) < 0)
3099                         goto nla_put_failure;
3100
3101                 cb->args[0] = 1;
3102         }
3103
3104         if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3105                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3106                                            ipvs->backup_mcast_ifn,
3107                                            ipvs->backup_syncid, cb) < 0)
3108                         goto nla_put_failure;
3109
3110                 cb->args[1] = 1;
3111         }
3112
3113 nla_put_failure:
3114         mutex_unlock(&__ip_vs_mutex);
3115
3116         return skb->len;
3117 }
3118
3119 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3120 {
3121         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3122               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3123               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3124                 return -EINVAL;
3125
3126         return start_sync_thread(net,
3127                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3128                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3129                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3130 }
3131
3132 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3133 {
3134         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3135                 return -EINVAL;
3136
3137         return stop_sync_thread(net,
3138                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3139 }
3140
3141 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3142 {
3143         struct ip_vs_timeout_user t;
3144
3145         __ip_vs_get_timeouts(net, &t);
3146
3147         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3148                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3149
3150         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3151                 t.tcp_fin_timeout =
3152                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3153
3154         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3155                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3156
3157         return ip_vs_set_timeout(net, &t);
3158 }
3159
3160 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3161 {
3162         struct ip_vs_service *svc = NULL;
3163         struct ip_vs_service_user_kern usvc;
3164         struct ip_vs_dest_user_kern udest;
3165         int ret = 0, cmd;
3166         int need_full_svc = 0, need_full_dest = 0;
3167         struct net *net;
3168         struct netns_ipvs *ipvs;
3169
3170         net = skb_sknet(skb);
3171         ipvs = net_ipvs(net);
3172         cmd = info->genlhdr->cmd;
3173
3174         mutex_lock(&__ip_vs_mutex);
3175
3176         if (cmd == IPVS_CMD_FLUSH) {
3177                 ret = ip_vs_flush(net);
3178                 goto out;
3179         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3180                 ret = ip_vs_genl_set_config(net, info->attrs);
3181                 goto out;
3182         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3183                    cmd == IPVS_CMD_DEL_DAEMON) {
3184
3185                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3186
3187                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3188                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3189                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3190                                      ip_vs_daemon_policy)) {
3191                         ret = -EINVAL;
3192                         goto out;
3193                 }
3194
3195                 if (cmd == IPVS_CMD_NEW_DAEMON)
3196                         ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3197                 else
3198                         ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3199                 goto out;
3200         } else if (cmd == IPVS_CMD_ZERO &&
3201                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3202                 ret = ip_vs_zero_all(net);
3203                 goto out;
3204         }
3205
3206         /* All following commands require a service argument, so check if we
3207          * received a valid one. We need a full service specification when
3208          * adding / editing a service. Only identifying members otherwise. */
3209         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3210                 need_full_svc = 1;
3211
3212         ret = ip_vs_genl_parse_service(net, &usvc,
3213                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3214                                        need_full_svc, &svc);
3215         if (ret)
3216                 goto out;
3217
3218         /* Unless we're adding a new service, the service must already exist */
3219         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3220                 ret = -ESRCH;
3221                 goto out;
3222         }
3223
3224         /* Destination commands require a valid destination argument. For
3225          * adding / editing a destination, we need a full destination
3226          * specification. */
3227         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3228             cmd == IPVS_CMD_DEL_DEST) {
3229                 if (cmd != IPVS_CMD_DEL_DEST)
3230                         need_full_dest = 1;
3231
3232                 ret = ip_vs_genl_parse_dest(&udest,
3233                                             info->attrs[IPVS_CMD_ATTR_DEST],
3234                                             need_full_dest);
3235                 if (ret)
3236                         goto out;
3237         }
3238
3239         switch (cmd) {
3240         case IPVS_CMD_NEW_SERVICE:
3241                 if (svc == NULL)
3242                         ret = ip_vs_add_service(net, &usvc, &svc);
3243                 else
3244                         ret = -EEXIST;
3245                 break;
3246         case IPVS_CMD_SET_SERVICE:
3247                 ret = ip_vs_edit_service(svc, &usvc);
3248                 break;
3249         case IPVS_CMD_DEL_SERVICE:
3250                 ret = ip_vs_del_service(svc);
3251                 /* do not use svc, it can be freed */
3252                 break;
3253         case IPVS_CMD_NEW_DEST:
3254                 ret = ip_vs_add_dest(svc, &udest);
3255                 break;
3256         case IPVS_CMD_SET_DEST:
3257                 ret = ip_vs_edit_dest(svc, &udest);
3258                 break;
3259         case IPVS_CMD_DEL_DEST:
3260                 ret = ip_vs_del_dest(svc, &udest);
3261                 break;
3262         case IPVS_CMD_ZERO:
3263                 ret = ip_vs_zero_service(svc);
3264                 break;
3265         default:
3266                 ret = -EINVAL;
3267         }
3268
3269 out:
3270         mutex_unlock(&__ip_vs_mutex);
3271
3272         return ret;
3273 }
3274
3275 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3276 {
3277         struct sk_buff *msg;
3278         void *reply;
3279         int ret, cmd, reply_cmd;
3280         struct net *net;
3281         struct netns_ipvs *ipvs;
3282
3283         net = skb_sknet(skb);
3284         ipvs = net_ipvs(net);
3285         cmd = info->genlhdr->cmd;
3286
3287         if (cmd == IPVS_CMD_GET_SERVICE)
3288                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3289         else if (cmd == IPVS_CMD_GET_INFO)
3290                 reply_cmd = IPVS_CMD_SET_INFO;
3291         else if (cmd == IPVS_CMD_GET_CONFIG)
3292                 reply_cmd = IPVS_CMD_SET_CONFIG;
3293         else {
3294                 pr_err("unknown Generic Netlink command\n");
3295                 return -EINVAL;
3296         }
3297
3298         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3299         if (!msg)
3300                 return -ENOMEM;
3301
3302         mutex_lock(&__ip_vs_mutex);
3303
3304         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3305         if (reply == NULL)
3306                 goto nla_put_failure;
3307
3308         switch (cmd) {
3309         case IPVS_CMD_GET_SERVICE:
3310         {
3311                 struct ip_vs_service *svc;
3312
3313                 svc = ip_vs_genl_find_service(net,
3314                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3315                 if (IS_ERR(svc)) {
3316                         ret = PTR_ERR(svc);
3317                         goto out_err;
3318                 } else if (svc) {
3319                         ret = ip_vs_genl_fill_service(msg, svc);
3320                         if (ret)
3321                                 goto nla_put_failure;
3322                 } else {
3323                         ret = -ESRCH;
3324                         goto out_err;
3325                 }
3326
3327                 break;
3328         }
3329
3330         case IPVS_CMD_GET_CONFIG:
3331         {
3332                 struct ip_vs_timeout_user t;
3333
3334                 __ip_vs_get_timeouts(net, &t);
3335 #ifdef CONFIG_IP_VS_PROTO_TCP
3336                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3337                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3338                             t.tcp_fin_timeout);
3339 #endif
3340 #ifdef CONFIG_IP_VS_PROTO_UDP
3341                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3342 #endif
3343
3344                 break;
3345         }
3346
3347         case IPVS_CMD_GET_INFO:
3348                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3349                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3350                             ip_vs_conn_tab_size);
3351                 break;
3352         }
3353
3354         genlmsg_end(msg, reply);
3355         ret = genlmsg_reply(msg, info);
3356         goto out;
3357
3358 nla_put_failure:
3359         pr_err("not enough space in Netlink message\n");
3360         ret = -EMSGSIZE;
3361
3362 out_err:
3363         nlmsg_free(msg);
3364 out:
3365         mutex_unlock(&__ip_vs_mutex);
3366
3367         return ret;
3368 }
3369
3370
3371 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3372         {
3373                 .cmd    = IPVS_CMD_NEW_SERVICE,
3374                 .flags  = GENL_ADMIN_PERM,
3375                 .policy = ip_vs_cmd_policy,
3376                 .doit   = ip_vs_genl_set_cmd,
3377         },
3378         {
3379                 .cmd    = IPVS_CMD_SET_SERVICE,
3380                 .flags  = GENL_ADMIN_PERM,
3381                 .policy = ip_vs_cmd_policy,
3382                 .doit   = ip_vs_genl_set_cmd,
3383         },
3384         {
3385                 .cmd    = IPVS_CMD_DEL_SERVICE,
3386                 .flags  = GENL_ADMIN_PERM,
3387                 .policy = ip_vs_cmd_policy,
3388                 .doit   = ip_vs_genl_set_cmd,
3389         },
3390         {
3391                 .cmd    = IPVS_CMD_GET_SERVICE,
3392                 .flags  = GENL_ADMIN_PERM,
3393                 .doit   = ip_vs_genl_get_cmd,
3394                 .dumpit = ip_vs_genl_dump_services,
3395                 .policy = ip_vs_cmd_policy,
3396         },
3397         {
3398                 .cmd    = IPVS_CMD_NEW_DEST,
3399                 .flags  = GENL_ADMIN_PERM,
3400                 .policy = ip_vs_cmd_policy,
3401                 .doit   = ip_vs_genl_set_cmd,
3402         },
3403         {
3404                 .cmd    = IPVS_CMD_SET_DEST,
3405                 .flags  = GENL_ADMIN_PERM,
3406                 .policy = ip_vs_cmd_policy,
3407                 .doit   = ip_vs_genl_set_cmd,
3408         },
3409         {
3410                 .cmd    = IPVS_CMD_DEL_DEST,
3411                 .flags  = GENL_ADMIN_PERM,
3412                 .policy = ip_vs_cmd_policy,
3413                 .doit   = ip_vs_genl_set_cmd,
3414         },
3415         {
3416                 .cmd    = IPVS_CMD_GET_DEST,
3417                 .flags  = GENL_ADMIN_PERM,
3418                 .policy = ip_vs_cmd_policy,
3419                 .dumpit = ip_vs_genl_dump_dests,
3420         },
3421         {
3422                 .cmd    = IPVS_CMD_NEW_DAEMON,
3423                 .flags  = GENL_ADMIN_PERM,
3424                 .policy = ip_vs_cmd_policy,
3425                 .doit   = ip_vs_genl_set_cmd,
3426         },
3427         {
3428                 .cmd    = IPVS_CMD_DEL_DAEMON,
3429                 .flags  = GENL_ADMIN_PERM,
3430                 .policy = ip_vs_cmd_policy,
3431                 .doit   = ip_vs_genl_set_cmd,
3432         },
3433         {
3434                 .cmd    = IPVS_CMD_GET_DAEMON,
3435                 .flags  = GENL_ADMIN_PERM,
3436                 .dumpit = ip_vs_genl_dump_daemons,
3437         },
3438         {
3439                 .cmd    = IPVS_CMD_SET_CONFIG,
3440                 .flags  = GENL_ADMIN_PERM,
3441                 .policy = ip_vs_cmd_policy,
3442                 .doit   = ip_vs_genl_set_cmd,
3443         },
3444         {
3445                 .cmd    = IPVS_CMD_GET_CONFIG,
3446                 .flags  = GENL_ADMIN_PERM,
3447                 .doit   = ip_vs_genl_get_cmd,
3448         },
3449         {
3450                 .cmd    = IPVS_CMD_GET_INFO,
3451                 .flags  = GENL_ADMIN_PERM,
3452                 .doit   = ip_vs_genl_get_cmd,
3453         },
3454         {
3455                 .cmd    = IPVS_CMD_ZERO,
3456                 .flags  = GENL_ADMIN_PERM,
3457                 .policy = ip_vs_cmd_policy,
3458                 .doit   = ip_vs_genl_set_cmd,
3459         },
3460         {
3461                 .cmd    = IPVS_CMD_FLUSH,
3462                 .flags  = GENL_ADMIN_PERM,
3463                 .doit   = ip_vs_genl_set_cmd,
3464         },
3465 };
3466
3467 static int __init ip_vs_genl_register(void)
3468 {
3469         return genl_register_family_with_ops(&ip_vs_genl_family,
3470                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3471 }
3472
3473 static void ip_vs_genl_unregister(void)
3474 {
3475         genl_unregister_family(&ip_vs_genl_family);
3476 }
3477
3478 /* End of Generic Netlink interface definitions */
3479
3480 /*
3481  * per netns intit/exit func.
3482  */
3483 int __net_init __ip_vs_control_init(struct net *net)
3484 {
3485         int idx;
3486         struct netns_ipvs *ipvs = net_ipvs(net);
3487         struct ctl_table *tbl;
3488
3489         atomic_set(&ipvs->dropentry, 0);
3490         spin_lock_init(&ipvs->dropentry_lock);
3491         spin_lock_init(&ipvs->droppacket_lock);
3492         spin_lock_init(&ipvs->securetcp_lock);
3493         ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3494
3495         /* Initialize rs_table */
3496         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3497                 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3498
3499         INIT_LIST_HEAD(&ipvs->dest_trash);
3500         atomic_set(&ipvs->ftpsvc_counter, 0);
3501         atomic_set(&ipvs->nullsvc_counter, 0);
3502
3503         /* procfs stats */
3504         ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3505         if (ipvs->tot_stats == NULL) {
3506                 pr_err("%s(): no memory.\n", __func__);
3507                 return -ENOMEM;
3508         }
3509         ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3510         if (!ipvs->cpustats) {
3511                 pr_err("%s() alloc_percpu failed\n", __func__);
3512                 goto err_alloc;
3513         }
3514         spin_lock_init(&ipvs->tot_stats->lock);
3515
3516         proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3517         proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3518         proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3519                              &ip_vs_stats_percpu_fops);
3520
3521         if (!net_eq(net, &init_net)) {
3522                 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3523                 if (tbl == NULL)
3524                         goto err_dup;
3525         } else
3526                 tbl = vs_vars;
3527         /* Initialize sysctl defaults */
3528         idx = 0;
3529         ipvs->sysctl_amemthresh = 1024;
3530         tbl[idx++].data = &ipvs->sysctl_amemthresh;
3531         ipvs->sysctl_am_droprate = 10;
3532         tbl[idx++].data = &ipvs->sysctl_am_droprate;
3533         tbl[idx++].data = &ipvs->sysctl_drop_entry;
3534         tbl[idx++].data = &ipvs->sysctl_drop_packet;
3535 #ifdef CONFIG_IP_VS_NFCT
3536         tbl[idx++].data = &ipvs->sysctl_conntrack;
3537 #endif
3538         tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3539         ipvs->sysctl_snat_reroute = 1;
3540         tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3541         ipvs->sysctl_sync_ver = 1;
3542         tbl[idx++].data = &ipvs->sysctl_sync_ver;
3543         tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3544         tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3545         tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3546         ipvs->sysctl_sync_threshold[0] = 3;
3547         ipvs->sysctl_sync_threshold[1] = 50;
3548         tbl[idx].data = &ipvs->sysctl_sync_threshold;
3549         tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3550         tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3551
3552
3553 #ifdef CONFIG_SYSCTL
3554         ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3555                                                      tbl);
3556         if (ipvs->sysctl_hdr == NULL) {
3557                 if (!net_eq(net, &init_net))
3558                         kfree(tbl);
3559                 goto err_dup;
3560         }
3561 #endif
3562         ip_vs_new_estimator(net, ipvs->tot_stats);
3563         ipvs->sysctl_tbl = tbl;
3564         /* Schedule defense work */
3565         INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3566         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3567         return 0;
3568
3569 err_dup:
3570         free_percpu(ipvs->cpustats);
3571 err_alloc:
3572         kfree(ipvs->tot_stats);
3573         return -ENOMEM;
3574 }
3575
3576 static void __net_exit __ip_vs_control_cleanup(struct net *net)
3577 {
3578         struct netns_ipvs *ipvs = net_ipvs(net);
3579
3580         ip_vs_trash_cleanup(net);
3581         ip_vs_kill_estimator(net, ipvs->tot_stats);
3582         cancel_delayed_work_sync(&ipvs->defense_work);
3583         cancel_work_sync(&ipvs->defense_work.work);
3584 #ifdef CONFIG_SYSCTL
3585         unregister_net_sysctl_table(ipvs->sysctl_hdr);
3586 #endif
3587         proc_net_remove(net, "ip_vs_stats_percpu");
3588         proc_net_remove(net, "ip_vs_stats");
3589         proc_net_remove(net, "ip_vs");
3590         free_percpu(ipvs->cpustats);
3591         kfree(ipvs->tot_stats);
3592 }
3593
3594 static struct pernet_operations ipvs_control_ops = {
3595         .init = __ip_vs_control_init,
3596         .exit = __ip_vs_control_cleanup,
3597 };
3598
3599 int __init ip_vs_control_init(void)
3600 {
3601         int idx;
3602         int ret;
3603
3604         EnterFunction(2);
3605
3606         /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3607         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3608                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3609                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3610         }
3611
3612         ret = register_pernet_subsys(&ipvs_control_ops);
3613         if (ret) {
3614                 pr_err("cannot register namespace.\n");
3615                 goto err;
3616         }
3617
3618         smp_wmb();      /* Do we really need it now ? */
3619
3620         ret = nf_register_sockopt(&ip_vs_sockopts);
3621         if (ret) {
3622                 pr_err("cannot register sockopt.\n");
3623                 goto err_net;
3624         }
3625
3626         ret = ip_vs_genl_register();
3627         if (ret) {
3628                 pr_err("cannot register Generic Netlink interface.\n");
3629                 nf_unregister_sockopt(&ip_vs_sockopts);
3630                 goto err_net;
3631         }
3632
3633         LeaveFunction(2);
3634         return 0;
3635
3636 err_net:
3637         unregister_pernet_subsys(&ipvs_control_ops);
3638 err:
3639         return ret;
3640 }
3641
3642
3643 void ip_vs_control_cleanup(void)
3644 {
3645         EnterFunction(2);
3646         unregister_pernet_subsys(&ipvs_control_ops);
3647         ip_vs_genl_unregister();
3648         nf_unregister_sockopt(&ip_vs_sockopts);
3649         LeaveFunction(2);
3650 }