IPv6: fix race between cleanup and add/delete address
[linux-2.6.git] / net / netfilter / ipvs / ip_vs_proto_udp.c
1 /*
2  * ip_vs_proto_udp.c:   UDP load balancing support for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #define KMSG_COMPONENT "IPVS"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19 #include <linux/in.h>
20 #include <linux/ip.h>
21 #include <linux/kernel.h>
22 #include <linux/netfilter.h>
23 #include <linux/netfilter_ipv4.h>
24 #include <linux/udp.h>
25
26 #include <net/ip_vs.h>
27 #include <net/ip.h>
28 #include <net/ip6_checksum.h>
29
30 static struct ip_vs_conn *
31 udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
32                 const struct ip_vs_iphdr *iph, unsigned int proto_off,
33                 int inverse)
34 {
35         struct ip_vs_conn *cp;
36         __be16 _ports[2], *pptr;
37
38         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
39         if (pptr == NULL)
40                 return NULL;
41
42         if (likely(!inverse)) {
43                 cp = ip_vs_conn_in_get(af, iph->protocol,
44                                        &iph->saddr, pptr[0],
45                                        &iph->daddr, pptr[1]);
46         } else {
47                 cp = ip_vs_conn_in_get(af, iph->protocol,
48                                        &iph->daddr, pptr[1],
49                                        &iph->saddr, pptr[0]);
50         }
51
52         return cp;
53 }
54
55
56 static struct ip_vs_conn *
57 udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
58                  const struct ip_vs_iphdr *iph, unsigned int proto_off,
59                  int inverse)
60 {
61         struct ip_vs_conn *cp;
62         __be16 _ports[2], *pptr;
63
64         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
65         if (pptr == NULL)
66                 return NULL;
67
68         if (likely(!inverse)) {
69                 cp = ip_vs_conn_out_get(af, iph->protocol,
70                                         &iph->saddr, pptr[0],
71                                         &iph->daddr, pptr[1]);
72         } else {
73                 cp = ip_vs_conn_out_get(af, iph->protocol,
74                                         &iph->daddr, pptr[1],
75                                         &iph->saddr, pptr[0]);
76         }
77
78         return cp;
79 }
80
81
82 static int
83 udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
84                   int *verdict, struct ip_vs_conn **cpp)
85 {
86         struct ip_vs_service *svc;
87         struct udphdr _udph, *uh;
88         struct ip_vs_iphdr iph;
89
90         ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
91
92         uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
93         if (uh == NULL) {
94                 *verdict = NF_DROP;
95                 return 0;
96         }
97
98         svc = ip_vs_service_get(af, skb->mark, iph.protocol,
99                                 &iph.daddr, uh->dest);
100         if (svc) {
101                 if (ip_vs_todrop()) {
102                         /*
103                          * It seems that we are very loaded.
104                          * We have to drop this packet :(
105                          */
106                         ip_vs_service_put(svc);
107                         *verdict = NF_DROP;
108                         return 0;
109                 }
110
111                 /*
112                  * Let the virtual server select a real server for the
113                  * incoming connection, and create a connection entry.
114                  */
115                 *cpp = ip_vs_schedule(svc, skb);
116                 if (!*cpp) {
117                         *verdict = ip_vs_leave(svc, skb, pp);
118                         return 0;
119                 }
120                 ip_vs_service_put(svc);
121         }
122         return 1;
123 }
124
125
126 static inline void
127 udp_fast_csum_update(int af, struct udphdr *uhdr,
128                      const union nf_inet_addr *oldip,
129                      const union nf_inet_addr *newip,
130                      __be16 oldport, __be16 newport)
131 {
132 #ifdef CONFIG_IP_VS_IPV6
133         if (af == AF_INET6)
134                 uhdr->check =
135                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
136                                          ip_vs_check_diff2(oldport, newport,
137                                                 ~csum_unfold(uhdr->check))));
138         else
139 #endif
140                 uhdr->check =
141                         csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
142                                          ip_vs_check_diff2(oldport, newport,
143                                                 ~csum_unfold(uhdr->check))));
144         if (!uhdr->check)
145                 uhdr->check = CSUM_MANGLED_0;
146 }
147
148 static inline void
149 udp_partial_csum_update(int af, struct udphdr *uhdr,
150                      const union nf_inet_addr *oldip,
151                      const union nf_inet_addr *newip,
152                      __be16 oldlen, __be16 newlen)
153 {
154 #ifdef CONFIG_IP_VS_IPV6
155         if (af == AF_INET6)
156                 uhdr->check =
157                         csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
158                                          ip_vs_check_diff2(oldlen, newlen,
159                                                 ~csum_unfold(uhdr->check))));
160         else
161 #endif
162         uhdr->check =
163                 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
164                                 ip_vs_check_diff2(oldlen, newlen,
165                                                 ~csum_unfold(uhdr->check))));
166 }
167
168
169 static int
170 udp_snat_handler(struct sk_buff *skb,
171                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
172 {
173         struct udphdr *udph;
174         unsigned int udphoff;
175         int oldlen;
176
177 #ifdef CONFIG_IP_VS_IPV6
178         if (cp->af == AF_INET6)
179                 udphoff = sizeof(struct ipv6hdr);
180         else
181 #endif
182                 udphoff = ip_hdrlen(skb);
183         oldlen = skb->len - udphoff;
184
185         /* csum_check requires unshared skb */
186         if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
187                 return 0;
188
189         if (unlikely(cp->app != NULL)) {
190                 /* Some checks before mangling */
191                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
192                         return 0;
193
194                 /*
195                  *      Call application helper if needed
196                  */
197                 if (!ip_vs_app_pkt_out(cp, skb))
198                         return 0;
199         }
200
201         udph = (void *)skb_network_header(skb) + udphoff;
202         udph->source = cp->vport;
203
204         /*
205          *      Adjust UDP checksums
206          */
207         if (skb->ip_summed == CHECKSUM_PARTIAL) {
208                 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
209                                         htons(oldlen),
210                                         htons(skb->len - udphoff));
211         } else if (!cp->app && (udph->check != 0)) {
212                 /* Only port and addr are changed, do fast csum update */
213                 udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
214                                      cp->dport, cp->vport);
215                 if (skb->ip_summed == CHECKSUM_COMPLETE)
216                         skb->ip_summed = CHECKSUM_NONE;
217         } else {
218                 /* full checksum calculation */
219                 udph->check = 0;
220                 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
221 #ifdef CONFIG_IP_VS_IPV6
222                 if (cp->af == AF_INET6)
223                         udph->check = csum_ipv6_magic(&cp->vaddr.in6,
224                                                       &cp->caddr.in6,
225                                                       skb->len - udphoff,
226                                                       cp->protocol, skb->csum);
227                 else
228 #endif
229                         udph->check = csum_tcpudp_magic(cp->vaddr.ip,
230                                                         cp->caddr.ip,
231                                                         skb->len - udphoff,
232                                                         cp->protocol,
233                                                         skb->csum);
234                 if (udph->check == 0)
235                         udph->check = CSUM_MANGLED_0;
236                 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
237                           pp->name, udph->check,
238                           (char*)&(udph->check) - (char*)udph);
239         }
240         return 1;
241 }
242
243
244 static int
245 udp_dnat_handler(struct sk_buff *skb,
246                  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
247 {
248         struct udphdr *udph;
249         unsigned int udphoff;
250         int oldlen;
251
252 #ifdef CONFIG_IP_VS_IPV6
253         if (cp->af == AF_INET6)
254                 udphoff = sizeof(struct ipv6hdr);
255         else
256 #endif
257                 udphoff = ip_hdrlen(skb);
258         oldlen = skb->len - udphoff;
259
260         /* csum_check requires unshared skb */
261         if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
262                 return 0;
263
264         if (unlikely(cp->app != NULL)) {
265                 /* Some checks before mangling */
266                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
267                         return 0;
268
269                 /*
270                  *      Attempt ip_vs_app call.
271                  *      It will fix ip_vs_conn
272                  */
273                 if (!ip_vs_app_pkt_in(cp, skb))
274                         return 0;
275         }
276
277         udph = (void *)skb_network_header(skb) + udphoff;
278         udph->dest = cp->dport;
279
280         /*
281          *      Adjust UDP checksums
282          */
283         if (skb->ip_summed == CHECKSUM_PARTIAL) {
284                 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
285                                         htons(oldlen),
286                                         htons(skb->len - udphoff));
287         } else if (!cp->app && (udph->check != 0)) {
288                 /* Only port and addr are changed, do fast csum update */
289                 udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
290                                      cp->vport, cp->dport);
291                 if (skb->ip_summed == CHECKSUM_COMPLETE)
292                         skb->ip_summed = CHECKSUM_NONE;
293         } else {
294                 /* full checksum calculation */
295                 udph->check = 0;
296                 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
297 #ifdef CONFIG_IP_VS_IPV6
298                 if (cp->af == AF_INET6)
299                         udph->check = csum_ipv6_magic(&cp->caddr.in6,
300                                                       &cp->daddr.in6,
301                                                       skb->len - udphoff,
302                                                       cp->protocol, skb->csum);
303                 else
304 #endif
305                         udph->check = csum_tcpudp_magic(cp->caddr.ip,
306                                                         cp->daddr.ip,
307                                                         skb->len - udphoff,
308                                                         cp->protocol,
309                                                         skb->csum);
310                 if (udph->check == 0)
311                         udph->check = CSUM_MANGLED_0;
312                 skb->ip_summed = CHECKSUM_UNNECESSARY;
313         }
314         return 1;
315 }
316
317
318 static int
319 udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
320 {
321         struct udphdr _udph, *uh;
322         unsigned int udphoff;
323
324 #ifdef CONFIG_IP_VS_IPV6
325         if (af == AF_INET6)
326                 udphoff = sizeof(struct ipv6hdr);
327         else
328 #endif
329                 udphoff = ip_hdrlen(skb);
330
331         uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
332         if (uh == NULL)
333                 return 0;
334
335         if (uh->check != 0) {
336                 switch (skb->ip_summed) {
337                 case CHECKSUM_NONE:
338                         skb->csum = skb_checksum(skb, udphoff,
339                                                  skb->len - udphoff, 0);
340                 case CHECKSUM_COMPLETE:
341 #ifdef CONFIG_IP_VS_IPV6
342                         if (af == AF_INET6) {
343                                 if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
344                                                     &ipv6_hdr(skb)->daddr,
345                                                     skb->len - udphoff,
346                                                     ipv6_hdr(skb)->nexthdr,
347                                                     skb->csum)) {
348                                         IP_VS_DBG_RL_PKT(0, pp, skb, 0,
349                                                          "Failed checksum for");
350                                         return 0;
351                                 }
352                         } else
353 #endif
354                                 if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
355                                                       ip_hdr(skb)->daddr,
356                                                       skb->len - udphoff,
357                                                       ip_hdr(skb)->protocol,
358                                                       skb->csum)) {
359                                         IP_VS_DBG_RL_PKT(0, pp, skb, 0,
360                                                          "Failed checksum for");
361                                         return 0;
362                                 }
363                         break;
364                 default:
365                         /* No need to checksum. */
366                         break;
367                 }
368         }
369         return 1;
370 }
371
372
373 /*
374  *      Note: the caller guarantees that only one of register_app,
375  *      unregister_app or app_conn_bind is called each time.
376  */
377
378 #define UDP_APP_TAB_BITS        4
379 #define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
380 #define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
381
382 static struct list_head udp_apps[UDP_APP_TAB_SIZE];
383 static DEFINE_SPINLOCK(udp_app_lock);
384
385 static inline __u16 udp_app_hashkey(__be16 port)
386 {
387         return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
388                 & UDP_APP_TAB_MASK;
389 }
390
391
392 static int udp_register_app(struct ip_vs_app *inc)
393 {
394         struct ip_vs_app *i;
395         __u16 hash;
396         __be16 port = inc->port;
397         int ret = 0;
398
399         hash = udp_app_hashkey(port);
400
401
402         spin_lock_bh(&udp_app_lock);
403         list_for_each_entry(i, &udp_apps[hash], p_list) {
404                 if (i->port == port) {
405                         ret = -EEXIST;
406                         goto out;
407                 }
408         }
409         list_add(&inc->p_list, &udp_apps[hash]);
410         atomic_inc(&ip_vs_protocol_udp.appcnt);
411
412   out:
413         spin_unlock_bh(&udp_app_lock);
414         return ret;
415 }
416
417
418 static void
419 udp_unregister_app(struct ip_vs_app *inc)
420 {
421         spin_lock_bh(&udp_app_lock);
422         atomic_dec(&ip_vs_protocol_udp.appcnt);
423         list_del(&inc->p_list);
424         spin_unlock_bh(&udp_app_lock);
425 }
426
427
428 static int udp_app_conn_bind(struct ip_vs_conn *cp)
429 {
430         int hash;
431         struct ip_vs_app *inc;
432         int result = 0;
433
434         /* Default binding: bind app only for NAT */
435         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
436                 return 0;
437
438         /* Lookup application incarnations and bind the right one */
439         hash = udp_app_hashkey(cp->vport);
440
441         spin_lock(&udp_app_lock);
442         list_for_each_entry(inc, &udp_apps[hash], p_list) {
443                 if (inc->port == cp->vport) {
444                         if (unlikely(!ip_vs_app_inc_get(inc)))
445                                 break;
446                         spin_unlock(&udp_app_lock);
447
448                         IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
449                                       "%s:%u to app %s on port %u\n",
450                                       __func__,
451                                       IP_VS_DBG_ADDR(cp->af, &cp->caddr),
452                                       ntohs(cp->cport),
453                                       IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
454                                       ntohs(cp->vport),
455                                       inc->name, ntohs(inc->port));
456
457                         cp->app = inc;
458                         if (inc->init_conn)
459                                 result = inc->init_conn(inc, cp);
460                         goto out;
461                 }
462         }
463         spin_unlock(&udp_app_lock);
464
465   out:
466         return result;
467 }
468
469
470 static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
471         [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
472         [IP_VS_UDP_S_LAST]              =       2*HZ,
473 };
474
475 static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
476         [IP_VS_UDP_S_NORMAL]            =       "UDP",
477         [IP_VS_UDP_S_LAST]              =       "BUG!",
478 };
479
480
481 static int
482 udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
483 {
484         return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
485                                        udp_state_name_table, sname, to);
486 }
487
488 static const char * udp_state_name(int state)
489 {
490         if (state >= IP_VS_UDP_S_LAST)
491                 return "ERR!";
492         return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
493 }
494
495 static int
496 udp_state_transition(struct ip_vs_conn *cp, int direction,
497                      const struct sk_buff *skb,
498                      struct ip_vs_protocol *pp)
499 {
500         cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
501         return 1;
502 }
503
504 static void udp_init(struct ip_vs_protocol *pp)
505 {
506         IP_VS_INIT_HASH_TABLE(udp_apps);
507         pp->timeout_table = udp_timeouts;
508 }
509
510 static void udp_exit(struct ip_vs_protocol *pp)
511 {
512 }
513
514
515 struct ip_vs_protocol ip_vs_protocol_udp = {
516         .name =                 "UDP",
517         .protocol =             IPPROTO_UDP,
518         .num_states =           IP_VS_UDP_S_LAST,
519         .dont_defrag =          0,
520         .init =                 udp_init,
521         .exit =                 udp_exit,
522         .conn_schedule =        udp_conn_schedule,
523         .conn_in_get =          udp_conn_in_get,
524         .conn_out_get =         udp_conn_out_get,
525         .snat_handler =         udp_snat_handler,
526         .dnat_handler =         udp_dnat_handler,
527         .csum_check =           udp_csum_check,
528         .state_transition =     udp_state_transition,
529         .state_name =           udp_state_name,
530         .register_app =         udp_register_app,
531         .unregister_app =       udp_unregister_app,
532         .app_conn_bind =        udp_app_conn_bind,
533         .debug_packet =         ip_vs_tcpudp_debug_packet,
534         .timeout_change =       NULL,
535         .set_state_timeout =    udp_set_state_timeout,
536 };