net: replace NIPQUAD() in net/netfilter/
[linux-2.6.git] / net / netfilter / ipvs / ip_vs_xmit.c
1 /*
2  * ip_vs_xmit.c: various packet transmitters for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #include <linux/kernel.h>
17 #include <linux/tcp.h>                  /* for tcphdr */
18 #include <net/ip.h>
19 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
20 #include <net/udp.h>
21 #include <net/icmp.h>                   /* for icmp_send */
22 #include <net/route.h>                  /* for ip_route_output */
23 #include <net/ipv6.h>
24 #include <net/ip6_route.h>
25 #include <linux/icmpv6.h>
26 #include <linux/netfilter.h>
27 #include <linux/netfilter_ipv4.h>
28
29 #include <net/ip_vs.h>
30
31
32 /*
33  *      Destination cache to speed up outgoing route lookup
34  */
35 static inline void
36 __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
37 {
38         struct dst_entry *old_dst;
39
40         old_dst = dest->dst_cache;
41         dest->dst_cache = dst;
42         dest->dst_rtos = rtos;
43         dst_release(old_dst);
44 }
45
46 static inline struct dst_entry *
47 __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
48 {
49         struct dst_entry *dst = dest->dst_cache;
50
51         if (!dst)
52                 return NULL;
53         if ((dst->obsolete
54              || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
55             dst->ops->check(dst, cookie) == NULL) {
56                 dest->dst_cache = NULL;
57                 dst_release(dst);
58                 return NULL;
59         }
60         dst_hold(dst);
61         return dst;
62 }
63
64 static struct rtable *
65 __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
66 {
67         struct rtable *rt;                      /* Route to the other host */
68         struct ip_vs_dest *dest = cp->dest;
69
70         if (dest) {
71                 spin_lock(&dest->dst_lock);
72                 if (!(rt = (struct rtable *)
73                       __ip_vs_dst_check(dest, rtos, 0))) {
74                         struct flowi fl = {
75                                 .oif = 0,
76                                 .nl_u = {
77                                         .ip4_u = {
78                                                 .daddr = dest->addr.ip,
79                                                 .saddr = 0,
80                                                 .tos = rtos, } },
81                         };
82
83                         if (ip_route_output_key(&init_net, &rt, &fl)) {
84                                 spin_unlock(&dest->dst_lock);
85                                 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
86                                              &dest->addr.ip);
87                                 return NULL;
88                         }
89                         __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
90                         IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
91                                   &dest->addr.ip,
92                                   atomic_read(&rt->u.dst.__refcnt), rtos);
93                 }
94                 spin_unlock(&dest->dst_lock);
95         } else {
96                 struct flowi fl = {
97                         .oif = 0,
98                         .nl_u = {
99                                 .ip4_u = {
100                                         .daddr = cp->daddr.ip,
101                                         .saddr = 0,
102                                         .tos = rtos, } },
103                 };
104
105                 if (ip_route_output_key(&init_net, &rt, &fl)) {
106                         IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
107                                      &cp->daddr.ip);
108                         return NULL;
109                 }
110         }
111
112         return rt;
113 }
114
115 #ifdef CONFIG_IP_VS_IPV6
116 static struct rt6_info *
117 __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
118 {
119         struct rt6_info *rt;                    /* Route to the other host */
120         struct ip_vs_dest *dest = cp->dest;
121
122         if (dest) {
123                 spin_lock(&dest->dst_lock);
124                 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
125                 if (!rt) {
126                         struct flowi fl = {
127                                 .oif = 0,
128                                 .nl_u = {
129                                         .ip6_u = {
130                                                 .daddr = dest->addr.in6,
131                                                 .saddr = {
132                                                         .s6_addr32 =
133                                                                 { 0, 0, 0, 0 },
134                                                 },
135                                         },
136                                 },
137                         };
138
139                         rt = (struct rt6_info *)ip6_route_output(&init_net,
140                                                                  NULL, &fl);
141                         if (!rt) {
142                                 spin_unlock(&dest->dst_lock);
143                                 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
144                                              &dest->addr.in6);
145                                 return NULL;
146                         }
147                         __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
148                         IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
149                                   &dest->addr.in6,
150                                   atomic_read(&rt->u.dst.__refcnt));
151                 }
152                 spin_unlock(&dest->dst_lock);
153         } else {
154                 struct flowi fl = {
155                         .oif = 0,
156                         .nl_u = {
157                                 .ip6_u = {
158                                         .daddr = cp->daddr.in6,
159                                         .saddr = {
160                                                 .s6_addr32 = { 0, 0, 0, 0 },
161                                         },
162                                 },
163                         },
164                 };
165
166                 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
167                 if (!rt) {
168                         IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
169                                      &cp->daddr.in6);
170                         return NULL;
171                 }
172         }
173
174         return rt;
175 }
176 #endif
177
178
179 /*
180  *      Release dest->dst_cache before a dest is removed
181  */
182 void
183 ip_vs_dst_reset(struct ip_vs_dest *dest)
184 {
185         struct dst_entry *old_dst;
186
187         old_dst = dest->dst_cache;
188         dest->dst_cache = NULL;
189         dst_release(old_dst);
190 }
191
192 #define IP_VS_XMIT(pf, skb, rt)                         \
193 do {                                                    \
194         (skb)->ipvs_property = 1;                       \
195         skb_forward_csum(skb);                          \
196         NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,     \
197                 (rt)->u.dst.dev, dst_output);           \
198 } while (0)
199
200
201 /*
202  *      NULL transmitter (do nothing except return NF_ACCEPT)
203  */
204 int
205 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
206                 struct ip_vs_protocol *pp)
207 {
208         /* we do not touch skb and do not need pskb ptr */
209         return NF_ACCEPT;
210 }
211
212
213 /*
214  *      Bypass transmitter
215  *      Let packets bypass the destination when the destination is not
216  *      available, it may be only used in transparent cache cluster.
217  */
218 int
219 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
220                   struct ip_vs_protocol *pp)
221 {
222         struct rtable *rt;                      /* Route to the other host */
223         struct iphdr  *iph = ip_hdr(skb);
224         u8     tos = iph->tos;
225         int    mtu;
226         struct flowi fl = {
227                 .oif = 0,
228                 .nl_u = {
229                         .ip4_u = {
230                                 .daddr = iph->daddr,
231                                 .saddr = 0,
232                                 .tos = RT_TOS(tos), } },
233         };
234
235         EnterFunction(10);
236
237         if (ip_route_output_key(&init_net, &rt, &fl)) {
238                 IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, dest: %pI4\n",
239                              &iph->daddr);
240                 goto tx_error_icmp;
241         }
242
243         /* MTU checking */
244         mtu = dst_mtu(&rt->u.dst);
245         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
246                 ip_rt_put(rt);
247                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
248                 IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
249                 goto tx_error;
250         }
251
252         /*
253          * Call ip_send_check because we are not sure it is called
254          * after ip_defrag. Is copy-on-write needed?
255          */
256         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
257                 ip_rt_put(rt);
258                 return NF_STOLEN;
259         }
260         ip_send_check(ip_hdr(skb));
261
262         /* drop old route */
263         dst_release(skb->dst);
264         skb->dst = &rt->u.dst;
265
266         /* Another hack: avoid icmp_send in ip_fragment */
267         skb->local_df = 1;
268
269         IP_VS_XMIT(PF_INET, skb, rt);
270
271         LeaveFunction(10);
272         return NF_STOLEN;
273
274  tx_error_icmp:
275         dst_link_failure(skb);
276  tx_error:
277         kfree_skb(skb);
278         LeaveFunction(10);
279         return NF_STOLEN;
280 }
281
282 #ifdef CONFIG_IP_VS_IPV6
283 int
284 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
285                      struct ip_vs_protocol *pp)
286 {
287         struct rt6_info *rt;                    /* Route to the other host */
288         struct ipv6hdr  *iph = ipv6_hdr(skb);
289         int    mtu;
290         struct flowi fl = {
291                 .oif = 0,
292                 .nl_u = {
293                         .ip6_u = {
294                                 .daddr = iph->daddr,
295                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
296         };
297
298         EnterFunction(10);
299
300         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
301         if (!rt) {
302                 IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, dest: %pI6\n",
303                              &iph->daddr);
304                 goto tx_error_icmp;
305         }
306
307         /* MTU checking */
308         mtu = dst_mtu(&rt->u.dst);
309         if (skb->len > mtu) {
310                 dst_release(&rt->u.dst);
311                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
312                 IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
313                 goto tx_error;
314         }
315
316         /*
317          * Call ip_send_check because we are not sure it is called
318          * after ip_defrag. Is copy-on-write needed?
319          */
320         skb = skb_share_check(skb, GFP_ATOMIC);
321         if (unlikely(skb == NULL)) {
322                 dst_release(&rt->u.dst);
323                 return NF_STOLEN;
324         }
325
326         /* drop old route */
327         dst_release(skb->dst);
328         skb->dst = &rt->u.dst;
329
330         /* Another hack: avoid icmp_send in ip_fragment */
331         skb->local_df = 1;
332
333         IP_VS_XMIT(PF_INET6, skb, rt);
334
335         LeaveFunction(10);
336         return NF_STOLEN;
337
338  tx_error_icmp:
339         dst_link_failure(skb);
340  tx_error:
341         kfree_skb(skb);
342         LeaveFunction(10);
343         return NF_STOLEN;
344 }
345 #endif
346
347 /*
348  *      NAT transmitter (only for outside-to-inside nat forwarding)
349  *      Not used for related ICMP
350  */
351 int
352 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
353                struct ip_vs_protocol *pp)
354 {
355         struct rtable *rt;              /* Route to the other host */
356         int mtu;
357         struct iphdr *iph = ip_hdr(skb);
358
359         EnterFunction(10);
360
361         /* check if it is a connection of no-client-port */
362         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
363                 __be16 _pt, *p;
364                 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
365                 if (p == NULL)
366                         goto tx_error;
367                 ip_vs_conn_fill_cport(cp, *p);
368                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
369         }
370
371         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
372                 goto tx_error_icmp;
373
374         /* MTU checking */
375         mtu = dst_mtu(&rt->u.dst);
376         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
377                 ip_rt_put(rt);
378                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
379                 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
380                 goto tx_error;
381         }
382
383         /* copy-on-write the packet before mangling it */
384         if (!skb_make_writable(skb, sizeof(struct iphdr)))
385                 goto tx_error_put;
386
387         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
388                 goto tx_error_put;
389
390         /* drop old route */
391         dst_release(skb->dst);
392         skb->dst = &rt->u.dst;
393
394         /* mangle the packet */
395         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
396                 goto tx_error;
397         ip_hdr(skb)->daddr = cp->daddr.ip;
398         ip_send_check(ip_hdr(skb));
399
400         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
401
402         /* FIXME: when application helper enlarges the packet and the length
403            is larger than the MTU of outgoing device, there will be still
404            MTU problem. */
405
406         /* Another hack: avoid icmp_send in ip_fragment */
407         skb->local_df = 1;
408
409         IP_VS_XMIT(PF_INET, skb, rt);
410
411         LeaveFunction(10);
412         return NF_STOLEN;
413
414   tx_error_icmp:
415         dst_link_failure(skb);
416   tx_error:
417         LeaveFunction(10);
418         kfree_skb(skb);
419         return NF_STOLEN;
420   tx_error_put:
421         ip_rt_put(rt);
422         goto tx_error;
423 }
424
425 #ifdef CONFIG_IP_VS_IPV6
426 int
427 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
428                   struct ip_vs_protocol *pp)
429 {
430         struct rt6_info *rt;            /* Route to the other host */
431         int mtu;
432
433         EnterFunction(10);
434
435         /* check if it is a connection of no-client-port */
436         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
437                 __be16 _pt, *p;
438                 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
439                                        sizeof(_pt), &_pt);
440                 if (p == NULL)
441                         goto tx_error;
442                 ip_vs_conn_fill_cport(cp, *p);
443                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
444         }
445
446         rt = __ip_vs_get_out_rt_v6(cp);
447         if (!rt)
448                 goto tx_error_icmp;
449
450         /* MTU checking */
451         mtu = dst_mtu(&rt->u.dst);
452         if (skb->len > mtu) {
453                 dst_release(&rt->u.dst);
454                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
455                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
456                                  "ip_vs_nat_xmit_v6(): frag needed for");
457                 goto tx_error;
458         }
459
460         /* copy-on-write the packet before mangling it */
461         if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
462                 goto tx_error_put;
463
464         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
465                 goto tx_error_put;
466
467         /* drop old route */
468         dst_release(skb->dst);
469         skb->dst = &rt->u.dst;
470
471         /* mangle the packet */
472         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
473                 goto tx_error;
474         ipv6_hdr(skb)->daddr = cp->daddr.in6;
475
476         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
477
478         /* FIXME: when application helper enlarges the packet and the length
479            is larger than the MTU of outgoing device, there will be still
480            MTU problem. */
481
482         /* Another hack: avoid icmp_send in ip_fragment */
483         skb->local_df = 1;
484
485         IP_VS_XMIT(PF_INET6, skb, rt);
486
487         LeaveFunction(10);
488         return NF_STOLEN;
489
490 tx_error_icmp:
491         dst_link_failure(skb);
492 tx_error:
493         LeaveFunction(10);
494         kfree_skb(skb);
495         return NF_STOLEN;
496 tx_error_put:
497         dst_release(&rt->u.dst);
498         goto tx_error;
499 }
500 #endif
501
502
503 /*
504  *   IP Tunneling transmitter
505  *
506  *   This function encapsulates the packet in a new IP packet, its
507  *   destination will be set to cp->daddr. Most code of this function
508  *   is taken from ipip.c.
509  *
510  *   It is used in VS/TUN cluster. The load balancer selects a real
511  *   server from a cluster based on a scheduling algorithm,
512  *   encapsulates the request packet and forwards it to the selected
513  *   server. For example, all real servers are configured with
514  *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
515  *   the encapsulated packet, it will decapsulate the packet, processe
516  *   the request and return the response packets directly to the client
517  *   without passing the load balancer. This can greatly increase the
518  *   scalability of virtual server.
519  *
520  *   Used for ANY protocol
521  */
522 int
523 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
524                   struct ip_vs_protocol *pp)
525 {
526         struct rtable *rt;                      /* Route to the other host */
527         struct net_device *tdev;                /* Device to other host */
528         struct iphdr  *old_iph = ip_hdr(skb);
529         u8     tos = old_iph->tos;
530         __be16 df = old_iph->frag_off;
531         sk_buff_data_t old_transport_header = skb->transport_header;
532         struct iphdr  *iph;                     /* Our new IP header */
533         unsigned int max_headroom;              /* The extra header space needed */
534         int    mtu;
535
536         EnterFunction(10);
537
538         if (skb->protocol != htons(ETH_P_IP)) {
539                 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
540                              "ETH_P_IP: %d, skb protocol: %d\n",
541                              htons(ETH_P_IP), skb->protocol);
542                 goto tx_error;
543         }
544
545         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
546                 goto tx_error_icmp;
547
548         tdev = rt->u.dst.dev;
549
550         mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
551         if (mtu < 68) {
552                 ip_rt_put(rt);
553                 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
554                 goto tx_error;
555         }
556         if (skb->dst)
557                 skb->dst->ops->update_pmtu(skb->dst, mtu);
558
559         df |= (old_iph->frag_off & htons(IP_DF));
560
561         if ((old_iph->frag_off & htons(IP_DF))
562             && mtu < ntohs(old_iph->tot_len)) {
563                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
564                 ip_rt_put(rt);
565                 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
566                 goto tx_error;
567         }
568
569         /*
570          * Okay, now see if we can stuff it in the buffer as-is.
571          */
572         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
573
574         if (skb_headroom(skb) < max_headroom
575             || skb_cloned(skb) || skb_shared(skb)) {
576                 struct sk_buff *new_skb =
577                         skb_realloc_headroom(skb, max_headroom);
578                 if (!new_skb) {
579                         ip_rt_put(rt);
580                         kfree_skb(skb);
581                         IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
582                         return NF_STOLEN;
583                 }
584                 kfree_skb(skb);
585                 skb = new_skb;
586                 old_iph = ip_hdr(skb);
587         }
588
589         skb->transport_header = old_transport_header;
590
591         /* fix old IP header checksum */
592         ip_send_check(old_iph);
593
594         skb_push(skb, sizeof(struct iphdr));
595         skb_reset_network_header(skb);
596         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
597
598         /* drop old route */
599         dst_release(skb->dst);
600         skb->dst = &rt->u.dst;
601
602         /*
603          *      Push down and install the IPIP header.
604          */
605         iph                     =       ip_hdr(skb);
606         iph->version            =       4;
607         iph->ihl                =       sizeof(struct iphdr)>>2;
608         iph->frag_off           =       df;
609         iph->protocol           =       IPPROTO_IPIP;
610         iph->tos                =       tos;
611         iph->daddr              =       rt->rt_dst;
612         iph->saddr              =       rt->rt_src;
613         iph->ttl                =       old_iph->ttl;
614         ip_select_ident(iph, &rt->u.dst, NULL);
615
616         /* Another hack: avoid icmp_send in ip_fragment */
617         skb->local_df = 1;
618
619         ip_local_out(skb);
620
621         LeaveFunction(10);
622
623         return NF_STOLEN;
624
625   tx_error_icmp:
626         dst_link_failure(skb);
627   tx_error:
628         kfree_skb(skb);
629         LeaveFunction(10);
630         return NF_STOLEN;
631 }
632
633 #ifdef CONFIG_IP_VS_IPV6
634 int
635 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
636                      struct ip_vs_protocol *pp)
637 {
638         struct rt6_info *rt;            /* Route to the other host */
639         struct net_device *tdev;        /* Device to other host */
640         struct ipv6hdr  *old_iph = ipv6_hdr(skb);
641         sk_buff_data_t old_transport_header = skb->transport_header;
642         struct ipv6hdr  *iph;           /* Our new IP header */
643         unsigned int max_headroom;      /* The extra header space needed */
644         int    mtu;
645
646         EnterFunction(10);
647
648         if (skb->protocol != htons(ETH_P_IPV6)) {
649                 IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
650                              "ETH_P_IPV6: %d, skb protocol: %d\n",
651                              htons(ETH_P_IPV6), skb->protocol);
652                 goto tx_error;
653         }
654
655         rt = __ip_vs_get_out_rt_v6(cp);
656         if (!rt)
657                 goto tx_error_icmp;
658
659         tdev = rt->u.dst.dev;
660
661         mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
662         /* TODO IPv6: do we need this check in IPv6? */
663         if (mtu < 1280) {
664                 dst_release(&rt->u.dst);
665                 IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
666                 goto tx_error;
667         }
668         if (skb->dst)
669                 skb->dst->ops->update_pmtu(skb->dst, mtu);
670
671         if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
672                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
673                 dst_release(&rt->u.dst);
674                 IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
675                 goto tx_error;
676         }
677
678         /*
679          * Okay, now see if we can stuff it in the buffer as-is.
680          */
681         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
682
683         if (skb_headroom(skb) < max_headroom
684             || skb_cloned(skb) || skb_shared(skb)) {
685                 struct sk_buff *new_skb =
686                         skb_realloc_headroom(skb, max_headroom);
687                 if (!new_skb) {
688                         dst_release(&rt->u.dst);
689                         kfree_skb(skb);
690                         IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
691                         return NF_STOLEN;
692                 }
693                 kfree_skb(skb);
694                 skb = new_skb;
695                 old_iph = ipv6_hdr(skb);
696         }
697
698         skb->transport_header = old_transport_header;
699
700         skb_push(skb, sizeof(struct ipv6hdr));
701         skb_reset_network_header(skb);
702         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
703
704         /* drop old route */
705         dst_release(skb->dst);
706         skb->dst = &rt->u.dst;
707
708         /*
709          *      Push down and install the IPIP header.
710          */
711         iph                     =       ipv6_hdr(skb);
712         iph->version            =       6;
713         iph->nexthdr            =       IPPROTO_IPV6;
714         iph->payload_len        =       old_iph->payload_len + sizeof(old_iph);
715         iph->priority           =       old_iph->priority;
716         memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
717         iph->daddr              =       rt->rt6i_dst.addr;
718         iph->saddr              =       cp->vaddr.in6; /* rt->rt6i_src.addr; */
719         iph->hop_limit          =       old_iph->hop_limit;
720
721         /* Another hack: avoid icmp_send in ip_fragment */
722         skb->local_df = 1;
723
724         ip6_local_out(skb);
725
726         LeaveFunction(10);
727
728         return NF_STOLEN;
729
730 tx_error_icmp:
731         dst_link_failure(skb);
732 tx_error:
733         kfree_skb(skb);
734         LeaveFunction(10);
735         return NF_STOLEN;
736 }
737 #endif
738
739
740 /*
741  *      Direct Routing transmitter
742  *      Used for ANY protocol
743  */
744 int
745 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
746               struct ip_vs_protocol *pp)
747 {
748         struct rtable *rt;                      /* Route to the other host */
749         struct iphdr  *iph = ip_hdr(skb);
750         int    mtu;
751
752         EnterFunction(10);
753
754         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
755                 goto tx_error_icmp;
756
757         /* MTU checking */
758         mtu = dst_mtu(&rt->u.dst);
759         if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
760                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
761                 ip_rt_put(rt);
762                 IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
763                 goto tx_error;
764         }
765
766         /*
767          * Call ip_send_check because we are not sure it is called
768          * after ip_defrag. Is copy-on-write needed?
769          */
770         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
771                 ip_rt_put(rt);
772                 return NF_STOLEN;
773         }
774         ip_send_check(ip_hdr(skb));
775
776         /* drop old route */
777         dst_release(skb->dst);
778         skb->dst = &rt->u.dst;
779
780         /* Another hack: avoid icmp_send in ip_fragment */
781         skb->local_df = 1;
782
783         IP_VS_XMIT(PF_INET, skb, rt);
784
785         LeaveFunction(10);
786         return NF_STOLEN;
787
788   tx_error_icmp:
789         dst_link_failure(skb);
790   tx_error:
791         kfree_skb(skb);
792         LeaveFunction(10);
793         return NF_STOLEN;
794 }
795
796 #ifdef CONFIG_IP_VS_IPV6
797 int
798 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
799                  struct ip_vs_protocol *pp)
800 {
801         struct rt6_info *rt;                    /* Route to the other host */
802         int    mtu;
803
804         EnterFunction(10);
805
806         rt = __ip_vs_get_out_rt_v6(cp);
807         if (!rt)
808                 goto tx_error_icmp;
809
810         /* MTU checking */
811         mtu = dst_mtu(&rt->u.dst);
812         if (skb->len > mtu) {
813                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
814                 dst_release(&rt->u.dst);
815                 IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
816                 goto tx_error;
817         }
818
819         /*
820          * Call ip_send_check because we are not sure it is called
821          * after ip_defrag. Is copy-on-write needed?
822          */
823         skb = skb_share_check(skb, GFP_ATOMIC);
824         if (unlikely(skb == NULL)) {
825                 dst_release(&rt->u.dst);
826                 return NF_STOLEN;
827         }
828
829         /* drop old route */
830         dst_release(skb->dst);
831         skb->dst = &rt->u.dst;
832
833         /* Another hack: avoid icmp_send in ip_fragment */
834         skb->local_df = 1;
835
836         IP_VS_XMIT(PF_INET6, skb, rt);
837
838         LeaveFunction(10);
839         return NF_STOLEN;
840
841 tx_error_icmp:
842         dst_link_failure(skb);
843 tx_error:
844         kfree_skb(skb);
845         LeaveFunction(10);
846         return NF_STOLEN;
847 }
848 #endif
849
850
851 /*
852  *      ICMP packet transmitter
853  *      called by the ip_vs_in_icmp
854  */
855 int
856 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
857                 struct ip_vs_protocol *pp, int offset)
858 {
859         struct rtable   *rt;    /* Route to the other host */
860         int mtu;
861         int rc;
862
863         EnterFunction(10);
864
865         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
866            forwarded directly here, because there is no need to
867            translate address/port back */
868         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
869                 if (cp->packet_xmit)
870                         rc = cp->packet_xmit(skb, cp, pp);
871                 else
872                         rc = NF_ACCEPT;
873                 /* do not touch skb anymore */
874                 atomic_inc(&cp->in_pkts);
875                 goto out;
876         }
877
878         /*
879          * mangle and send the packet here (only for VS/NAT)
880          */
881
882         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
883                 goto tx_error_icmp;
884
885         /* MTU checking */
886         mtu = dst_mtu(&rt->u.dst);
887         if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
888                 ip_rt_put(rt);
889                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
890                 IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
891                 goto tx_error;
892         }
893
894         /* copy-on-write the packet before mangling it */
895         if (!skb_make_writable(skb, offset))
896                 goto tx_error_put;
897
898         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
899                 goto tx_error_put;
900
901         /* drop the old route when skb is not shared */
902         dst_release(skb->dst);
903         skb->dst = &rt->u.dst;
904
905         ip_vs_nat_icmp(skb, pp, cp, 0);
906
907         /* Another hack: avoid icmp_send in ip_fragment */
908         skb->local_df = 1;
909
910         IP_VS_XMIT(PF_INET, skb, rt);
911
912         rc = NF_STOLEN;
913         goto out;
914
915   tx_error_icmp:
916         dst_link_failure(skb);
917   tx_error:
918         dev_kfree_skb(skb);
919         rc = NF_STOLEN;
920   out:
921         LeaveFunction(10);
922         return rc;
923   tx_error_put:
924         ip_rt_put(rt);
925         goto tx_error;
926 }
927
928 #ifdef CONFIG_IP_VS_IPV6
929 int
930 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
931                 struct ip_vs_protocol *pp, int offset)
932 {
933         struct rt6_info *rt;    /* Route to the other host */
934         int mtu;
935         int rc;
936
937         EnterFunction(10);
938
939         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
940            forwarded directly here, because there is no need to
941            translate address/port back */
942         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
943                 if (cp->packet_xmit)
944                         rc = cp->packet_xmit(skb, cp, pp);
945                 else
946                         rc = NF_ACCEPT;
947                 /* do not touch skb anymore */
948                 atomic_inc(&cp->in_pkts);
949                 goto out;
950         }
951
952         /*
953          * mangle and send the packet here (only for VS/NAT)
954          */
955
956         rt = __ip_vs_get_out_rt_v6(cp);
957         if (!rt)
958                 goto tx_error_icmp;
959
960         /* MTU checking */
961         mtu = dst_mtu(&rt->u.dst);
962         if (skb->len > mtu) {
963                 dst_release(&rt->u.dst);
964                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
965                 IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
966                 goto tx_error;
967         }
968
969         /* copy-on-write the packet before mangling it */
970         if (!skb_make_writable(skb, offset))
971                 goto tx_error_put;
972
973         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
974                 goto tx_error_put;
975
976         /* drop the old route when skb is not shared */
977         dst_release(skb->dst);
978         skb->dst = &rt->u.dst;
979
980         ip_vs_nat_icmp_v6(skb, pp, cp, 0);
981
982         /* Another hack: avoid icmp_send in ip_fragment */
983         skb->local_df = 1;
984
985         IP_VS_XMIT(PF_INET6, skb, rt);
986
987         rc = NF_STOLEN;
988         goto out;
989
990 tx_error_icmp:
991         dst_link_failure(skb);
992 tx_error:
993         dev_kfree_skb(skb);
994         rc = NF_STOLEN;
995 out:
996         LeaveFunction(10);
997         return rc;
998 tx_error_put:
999         dst_release(&rt->u.dst);
1000         goto tx_error;
1001 }
1002 #endif