fc342dda950a654e2068d1e68f4657971259a102
[linux-2.6.git] / net / netfilter / ipvs / ip_vs_xmit.c
1 /*
2  * ip_vs_xmit.c: various packet transmitters for IPVS
3  *
4  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5  *              Julian Anastasov <ja@ssi.bg>
6  *
7  *              This program is free software; you can redistribute it and/or
8  *              modify it under the terms of the GNU General Public License
9  *              as published by the Free Software Foundation; either version
10  *              2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  */
15
16 #include <linux/kernel.h>
17 #include <linux/tcp.h>                  /* for tcphdr */
18 #include <net/ip.h>
19 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
20 #include <net/udp.h>
21 #include <net/icmp.h>                   /* for icmp_send */
22 #include <net/route.h>                  /* for ip_route_output */
23 #include <net/ipv6.h>
24 #include <net/ip6_route.h>
25 #include <linux/icmpv6.h>
26 #include <linux/netfilter.h>
27 #include <linux/netfilter_ipv4.h>
28
29 #include <net/ip_vs.h>
30
31
32 /*
33  *      Destination cache to speed up outgoing route lookup
34  */
35 static inline void
36 __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
37 {
38         struct dst_entry *old_dst;
39
40         old_dst = dest->dst_cache;
41         dest->dst_cache = dst;
42         dest->dst_rtos = rtos;
43         dst_release(old_dst);
44 }
45
46 static inline struct dst_entry *
47 __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
48 {
49         struct dst_entry *dst = dest->dst_cache;
50
51         if (!dst)
52                 return NULL;
53         if ((dst->obsolete
54              || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
55             dst->ops->check(dst, cookie) == NULL) {
56                 dest->dst_cache = NULL;
57                 dst_release(dst);
58                 return NULL;
59         }
60         dst_hold(dst);
61         return dst;
62 }
63
64 static struct rtable *
65 __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
66 {
67         struct rtable *rt;                      /* Route to the other host */
68         struct ip_vs_dest *dest = cp->dest;
69
70         if (dest) {
71                 spin_lock(&dest->dst_lock);
72                 if (!(rt = (struct rtable *)
73                       __ip_vs_dst_check(dest, rtos, 0))) {
74                         struct flowi fl = {
75                                 .oif = 0,
76                                 .nl_u = {
77                                         .ip4_u = {
78                                                 .daddr = dest->addr.ip,
79                                                 .saddr = 0,
80                                                 .tos = rtos, } },
81                         };
82
83                         if (ip_route_output_key(&init_net, &rt, &fl)) {
84                                 spin_unlock(&dest->dst_lock);
85                                 IP_VS_DBG_RL("ip_route_output error, "
86                                              "dest: %u.%u.%u.%u\n",
87                                              NIPQUAD(dest->addr.ip));
88                                 return NULL;
89                         }
90                         __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
91                         IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
92                                   NIPQUAD(dest->addr.ip),
93                                   atomic_read(&rt->u.dst.__refcnt), rtos);
94                 }
95                 spin_unlock(&dest->dst_lock);
96         } else {
97                 struct flowi fl = {
98                         .oif = 0,
99                         .nl_u = {
100                                 .ip4_u = {
101                                         .daddr = cp->daddr.ip,
102                                         .saddr = 0,
103                                         .tos = rtos, } },
104                 };
105
106                 if (ip_route_output_key(&init_net, &rt, &fl)) {
107                         IP_VS_DBG_RL("ip_route_output error, dest: "
108                                      "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip));
109                         return NULL;
110                 }
111         }
112
113         return rt;
114 }
115
116 #ifdef CONFIG_IP_VS_IPV6
117 static struct rt6_info *
118 __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
119 {
120         struct rt6_info *rt;                    /* Route to the other host */
121         struct ip_vs_dest *dest = cp->dest;
122
123         if (dest) {
124                 spin_lock(&dest->dst_lock);
125                 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
126                 if (!rt) {
127                         struct flowi fl = {
128                                 .oif = 0,
129                                 .nl_u = {
130                                         .ip6_u = {
131                                                 .daddr = dest->addr.in6,
132                                                 .saddr = {
133                                                         .s6_addr32 =
134                                                                 { 0, 0, 0, 0 },
135                                                 },
136                                         },
137                                 },
138                         };
139
140                         rt = (struct rt6_info *)ip6_route_output(&init_net,
141                                                                  NULL, &fl);
142                         if (!rt) {
143                                 spin_unlock(&dest->dst_lock);
144                                 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
145                                              &dest->addr.in6);
146                                 return NULL;
147                         }
148                         __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
149                         IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
150                                   &dest->addr.in6,
151                                   atomic_read(&rt->u.dst.__refcnt));
152                 }
153                 spin_unlock(&dest->dst_lock);
154         } else {
155                 struct flowi fl = {
156                         .oif = 0,
157                         .nl_u = {
158                                 .ip6_u = {
159                                         .daddr = cp->daddr.in6,
160                                         .saddr = {
161                                                 .s6_addr32 = { 0, 0, 0, 0 },
162                                         },
163                                 },
164                         },
165                 };
166
167                 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
168                 if (!rt) {
169                         IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
170                                      &cp->daddr.in6);
171                         return NULL;
172                 }
173         }
174
175         return rt;
176 }
177 #endif
178
179
180 /*
181  *      Release dest->dst_cache before a dest is removed
182  */
183 void
184 ip_vs_dst_reset(struct ip_vs_dest *dest)
185 {
186         struct dst_entry *old_dst;
187
188         old_dst = dest->dst_cache;
189         dest->dst_cache = NULL;
190         dst_release(old_dst);
191 }
192
193 #define IP_VS_XMIT(pf, skb, rt)                         \
194 do {                                                    \
195         (skb)->ipvs_property = 1;                       \
196         skb_forward_csum(skb);                          \
197         NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,     \
198                 (rt)->u.dst.dev, dst_output);           \
199 } while (0)
200
201
202 /*
203  *      NULL transmitter (do nothing except return NF_ACCEPT)
204  */
205 int
206 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
207                 struct ip_vs_protocol *pp)
208 {
209         /* we do not touch skb and do not need pskb ptr */
210         return NF_ACCEPT;
211 }
212
213
214 /*
215  *      Bypass transmitter
216  *      Let packets bypass the destination when the destination is not
217  *      available, it may be only used in transparent cache cluster.
218  */
219 int
220 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
221                   struct ip_vs_protocol *pp)
222 {
223         struct rtable *rt;                      /* Route to the other host */
224         struct iphdr  *iph = ip_hdr(skb);
225         u8     tos = iph->tos;
226         int    mtu;
227         struct flowi fl = {
228                 .oif = 0,
229                 .nl_u = {
230                         .ip4_u = {
231                                 .daddr = iph->daddr,
232                                 .saddr = 0,
233                                 .tos = RT_TOS(tos), } },
234         };
235
236         EnterFunction(10);
237
238         if (ip_route_output_key(&init_net, &rt, &fl)) {
239                 IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
240                              "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
241                 goto tx_error_icmp;
242         }
243
244         /* MTU checking */
245         mtu = dst_mtu(&rt->u.dst);
246         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
247                 ip_rt_put(rt);
248                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
249                 IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
250                 goto tx_error;
251         }
252
253         /*
254          * Call ip_send_check because we are not sure it is called
255          * after ip_defrag. Is copy-on-write needed?
256          */
257         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
258                 ip_rt_put(rt);
259                 return NF_STOLEN;
260         }
261         ip_send_check(ip_hdr(skb));
262
263         /* drop old route */
264         dst_release(skb->dst);
265         skb->dst = &rt->u.dst;
266
267         /* Another hack: avoid icmp_send in ip_fragment */
268         skb->local_df = 1;
269
270         IP_VS_XMIT(PF_INET, skb, rt);
271
272         LeaveFunction(10);
273         return NF_STOLEN;
274
275  tx_error_icmp:
276         dst_link_failure(skb);
277  tx_error:
278         kfree_skb(skb);
279         LeaveFunction(10);
280         return NF_STOLEN;
281 }
282
283 #ifdef CONFIG_IP_VS_IPV6
284 int
285 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
286                      struct ip_vs_protocol *pp)
287 {
288         struct rt6_info *rt;                    /* Route to the other host */
289         struct ipv6hdr  *iph = ipv6_hdr(skb);
290         int    mtu;
291         struct flowi fl = {
292                 .oif = 0,
293                 .nl_u = {
294                         .ip6_u = {
295                                 .daddr = iph->daddr,
296                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
297         };
298
299         EnterFunction(10);
300
301         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
302         if (!rt) {
303                 IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, dest: %pI6\n",
304                              &iph->daddr);
305                 goto tx_error_icmp;
306         }
307
308         /* MTU checking */
309         mtu = dst_mtu(&rt->u.dst);
310         if (skb->len > mtu) {
311                 dst_release(&rt->u.dst);
312                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
313                 IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
314                 goto tx_error;
315         }
316
317         /*
318          * Call ip_send_check because we are not sure it is called
319          * after ip_defrag. Is copy-on-write needed?
320          */
321         skb = skb_share_check(skb, GFP_ATOMIC);
322         if (unlikely(skb == NULL)) {
323                 dst_release(&rt->u.dst);
324                 return NF_STOLEN;
325         }
326
327         /* drop old route */
328         dst_release(skb->dst);
329         skb->dst = &rt->u.dst;
330
331         /* Another hack: avoid icmp_send in ip_fragment */
332         skb->local_df = 1;
333
334         IP_VS_XMIT(PF_INET6, skb, rt);
335
336         LeaveFunction(10);
337         return NF_STOLEN;
338
339  tx_error_icmp:
340         dst_link_failure(skb);
341  tx_error:
342         kfree_skb(skb);
343         LeaveFunction(10);
344         return NF_STOLEN;
345 }
346 #endif
347
348 /*
349  *      NAT transmitter (only for outside-to-inside nat forwarding)
350  *      Not used for related ICMP
351  */
352 int
353 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
354                struct ip_vs_protocol *pp)
355 {
356         struct rtable *rt;              /* Route to the other host */
357         int mtu;
358         struct iphdr *iph = ip_hdr(skb);
359
360         EnterFunction(10);
361
362         /* check if it is a connection of no-client-port */
363         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
364                 __be16 _pt, *p;
365                 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
366                 if (p == NULL)
367                         goto tx_error;
368                 ip_vs_conn_fill_cport(cp, *p);
369                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
370         }
371
372         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
373                 goto tx_error_icmp;
374
375         /* MTU checking */
376         mtu = dst_mtu(&rt->u.dst);
377         if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
378                 ip_rt_put(rt);
379                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
380                 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
381                 goto tx_error;
382         }
383
384         /* copy-on-write the packet before mangling it */
385         if (!skb_make_writable(skb, sizeof(struct iphdr)))
386                 goto tx_error_put;
387
388         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
389                 goto tx_error_put;
390
391         /* drop old route */
392         dst_release(skb->dst);
393         skb->dst = &rt->u.dst;
394
395         /* mangle the packet */
396         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
397                 goto tx_error;
398         ip_hdr(skb)->daddr = cp->daddr.ip;
399         ip_send_check(ip_hdr(skb));
400
401         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
402
403         /* FIXME: when application helper enlarges the packet and the length
404            is larger than the MTU of outgoing device, there will be still
405            MTU problem. */
406
407         /* Another hack: avoid icmp_send in ip_fragment */
408         skb->local_df = 1;
409
410         IP_VS_XMIT(PF_INET, skb, rt);
411
412         LeaveFunction(10);
413         return NF_STOLEN;
414
415   tx_error_icmp:
416         dst_link_failure(skb);
417   tx_error:
418         LeaveFunction(10);
419         kfree_skb(skb);
420         return NF_STOLEN;
421   tx_error_put:
422         ip_rt_put(rt);
423         goto tx_error;
424 }
425
426 #ifdef CONFIG_IP_VS_IPV6
427 int
428 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
429                   struct ip_vs_protocol *pp)
430 {
431         struct rt6_info *rt;            /* Route to the other host */
432         int mtu;
433
434         EnterFunction(10);
435
436         /* check if it is a connection of no-client-port */
437         if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
438                 __be16 _pt, *p;
439                 p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
440                                        sizeof(_pt), &_pt);
441                 if (p == NULL)
442                         goto tx_error;
443                 ip_vs_conn_fill_cport(cp, *p);
444                 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
445         }
446
447         rt = __ip_vs_get_out_rt_v6(cp);
448         if (!rt)
449                 goto tx_error_icmp;
450
451         /* MTU checking */
452         mtu = dst_mtu(&rt->u.dst);
453         if (skb->len > mtu) {
454                 dst_release(&rt->u.dst);
455                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
456                 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
457                                  "ip_vs_nat_xmit_v6(): frag needed for");
458                 goto tx_error;
459         }
460
461         /* copy-on-write the packet before mangling it */
462         if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
463                 goto tx_error_put;
464
465         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
466                 goto tx_error_put;
467
468         /* drop old route */
469         dst_release(skb->dst);
470         skb->dst = &rt->u.dst;
471
472         /* mangle the packet */
473         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
474                 goto tx_error;
475         ipv6_hdr(skb)->daddr = cp->daddr.in6;
476
477         IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
478
479         /* FIXME: when application helper enlarges the packet and the length
480            is larger than the MTU of outgoing device, there will be still
481            MTU problem. */
482
483         /* Another hack: avoid icmp_send in ip_fragment */
484         skb->local_df = 1;
485
486         IP_VS_XMIT(PF_INET6, skb, rt);
487
488         LeaveFunction(10);
489         return NF_STOLEN;
490
491 tx_error_icmp:
492         dst_link_failure(skb);
493 tx_error:
494         LeaveFunction(10);
495         kfree_skb(skb);
496         return NF_STOLEN;
497 tx_error_put:
498         dst_release(&rt->u.dst);
499         goto tx_error;
500 }
501 #endif
502
503
504 /*
505  *   IP Tunneling transmitter
506  *
507  *   This function encapsulates the packet in a new IP packet, its
508  *   destination will be set to cp->daddr. Most code of this function
509  *   is taken from ipip.c.
510  *
511  *   It is used in VS/TUN cluster. The load balancer selects a real
512  *   server from a cluster based on a scheduling algorithm,
513  *   encapsulates the request packet and forwards it to the selected
514  *   server. For example, all real servers are configured with
515  *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
516  *   the encapsulated packet, it will decapsulate the packet, processe
517  *   the request and return the response packets directly to the client
518  *   without passing the load balancer. This can greatly increase the
519  *   scalability of virtual server.
520  *
521  *   Used for ANY protocol
522  */
523 int
524 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
525                   struct ip_vs_protocol *pp)
526 {
527         struct rtable *rt;                      /* Route to the other host */
528         struct net_device *tdev;                /* Device to other host */
529         struct iphdr  *old_iph = ip_hdr(skb);
530         u8     tos = old_iph->tos;
531         __be16 df = old_iph->frag_off;
532         sk_buff_data_t old_transport_header = skb->transport_header;
533         struct iphdr  *iph;                     /* Our new IP header */
534         unsigned int max_headroom;              /* The extra header space needed */
535         int    mtu;
536
537         EnterFunction(10);
538
539         if (skb->protocol != htons(ETH_P_IP)) {
540                 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
541                              "ETH_P_IP: %d, skb protocol: %d\n",
542                              htons(ETH_P_IP), skb->protocol);
543                 goto tx_error;
544         }
545
546         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
547                 goto tx_error_icmp;
548
549         tdev = rt->u.dst.dev;
550
551         mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
552         if (mtu < 68) {
553                 ip_rt_put(rt);
554                 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
555                 goto tx_error;
556         }
557         if (skb->dst)
558                 skb->dst->ops->update_pmtu(skb->dst, mtu);
559
560         df |= (old_iph->frag_off & htons(IP_DF));
561
562         if ((old_iph->frag_off & htons(IP_DF))
563             && mtu < ntohs(old_iph->tot_len)) {
564                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
565                 ip_rt_put(rt);
566                 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
567                 goto tx_error;
568         }
569
570         /*
571          * Okay, now see if we can stuff it in the buffer as-is.
572          */
573         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
574
575         if (skb_headroom(skb) < max_headroom
576             || skb_cloned(skb) || skb_shared(skb)) {
577                 struct sk_buff *new_skb =
578                         skb_realloc_headroom(skb, max_headroom);
579                 if (!new_skb) {
580                         ip_rt_put(rt);
581                         kfree_skb(skb);
582                         IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
583                         return NF_STOLEN;
584                 }
585                 kfree_skb(skb);
586                 skb = new_skb;
587                 old_iph = ip_hdr(skb);
588         }
589
590         skb->transport_header = old_transport_header;
591
592         /* fix old IP header checksum */
593         ip_send_check(old_iph);
594
595         skb_push(skb, sizeof(struct iphdr));
596         skb_reset_network_header(skb);
597         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
598
599         /* drop old route */
600         dst_release(skb->dst);
601         skb->dst = &rt->u.dst;
602
603         /*
604          *      Push down and install the IPIP header.
605          */
606         iph                     =       ip_hdr(skb);
607         iph->version            =       4;
608         iph->ihl                =       sizeof(struct iphdr)>>2;
609         iph->frag_off           =       df;
610         iph->protocol           =       IPPROTO_IPIP;
611         iph->tos                =       tos;
612         iph->daddr              =       rt->rt_dst;
613         iph->saddr              =       rt->rt_src;
614         iph->ttl                =       old_iph->ttl;
615         ip_select_ident(iph, &rt->u.dst, NULL);
616
617         /* Another hack: avoid icmp_send in ip_fragment */
618         skb->local_df = 1;
619
620         ip_local_out(skb);
621
622         LeaveFunction(10);
623
624         return NF_STOLEN;
625
626   tx_error_icmp:
627         dst_link_failure(skb);
628   tx_error:
629         kfree_skb(skb);
630         LeaveFunction(10);
631         return NF_STOLEN;
632 }
633
634 #ifdef CONFIG_IP_VS_IPV6
635 int
636 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
637                      struct ip_vs_protocol *pp)
638 {
639         struct rt6_info *rt;            /* Route to the other host */
640         struct net_device *tdev;        /* Device to other host */
641         struct ipv6hdr  *old_iph = ipv6_hdr(skb);
642         sk_buff_data_t old_transport_header = skb->transport_header;
643         struct ipv6hdr  *iph;           /* Our new IP header */
644         unsigned int max_headroom;      /* The extra header space needed */
645         int    mtu;
646
647         EnterFunction(10);
648
649         if (skb->protocol != htons(ETH_P_IPV6)) {
650                 IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
651                              "ETH_P_IPV6: %d, skb protocol: %d\n",
652                              htons(ETH_P_IPV6), skb->protocol);
653                 goto tx_error;
654         }
655
656         rt = __ip_vs_get_out_rt_v6(cp);
657         if (!rt)
658                 goto tx_error_icmp;
659
660         tdev = rt->u.dst.dev;
661
662         mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
663         /* TODO IPv6: do we need this check in IPv6? */
664         if (mtu < 1280) {
665                 dst_release(&rt->u.dst);
666                 IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
667                 goto tx_error;
668         }
669         if (skb->dst)
670                 skb->dst->ops->update_pmtu(skb->dst, mtu);
671
672         if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
673                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
674                 dst_release(&rt->u.dst);
675                 IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
676                 goto tx_error;
677         }
678
679         /*
680          * Okay, now see if we can stuff it in the buffer as-is.
681          */
682         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
683
684         if (skb_headroom(skb) < max_headroom
685             || skb_cloned(skb) || skb_shared(skb)) {
686                 struct sk_buff *new_skb =
687                         skb_realloc_headroom(skb, max_headroom);
688                 if (!new_skb) {
689                         dst_release(&rt->u.dst);
690                         kfree_skb(skb);
691                         IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
692                         return NF_STOLEN;
693                 }
694                 kfree_skb(skb);
695                 skb = new_skb;
696                 old_iph = ipv6_hdr(skb);
697         }
698
699         skb->transport_header = old_transport_header;
700
701         skb_push(skb, sizeof(struct ipv6hdr));
702         skb_reset_network_header(skb);
703         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
704
705         /* drop old route */
706         dst_release(skb->dst);
707         skb->dst = &rt->u.dst;
708
709         /*
710          *      Push down and install the IPIP header.
711          */
712         iph                     =       ipv6_hdr(skb);
713         iph->version            =       6;
714         iph->nexthdr            =       IPPROTO_IPV6;
715         iph->payload_len        =       old_iph->payload_len + sizeof(old_iph);
716         iph->priority           =       old_iph->priority;
717         memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
718         iph->daddr              =       rt->rt6i_dst.addr;
719         iph->saddr              =       cp->vaddr.in6; /* rt->rt6i_src.addr; */
720         iph->hop_limit          =       old_iph->hop_limit;
721
722         /* Another hack: avoid icmp_send in ip_fragment */
723         skb->local_df = 1;
724
725         ip6_local_out(skb);
726
727         LeaveFunction(10);
728
729         return NF_STOLEN;
730
731 tx_error_icmp:
732         dst_link_failure(skb);
733 tx_error:
734         kfree_skb(skb);
735         LeaveFunction(10);
736         return NF_STOLEN;
737 }
738 #endif
739
740
741 /*
742  *      Direct Routing transmitter
743  *      Used for ANY protocol
744  */
745 int
746 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
747               struct ip_vs_protocol *pp)
748 {
749         struct rtable *rt;                      /* Route to the other host */
750         struct iphdr  *iph = ip_hdr(skb);
751         int    mtu;
752
753         EnterFunction(10);
754
755         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
756                 goto tx_error_icmp;
757
758         /* MTU checking */
759         mtu = dst_mtu(&rt->u.dst);
760         if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
761                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
762                 ip_rt_put(rt);
763                 IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
764                 goto tx_error;
765         }
766
767         /*
768          * Call ip_send_check because we are not sure it is called
769          * after ip_defrag. Is copy-on-write needed?
770          */
771         if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
772                 ip_rt_put(rt);
773                 return NF_STOLEN;
774         }
775         ip_send_check(ip_hdr(skb));
776
777         /* drop old route */
778         dst_release(skb->dst);
779         skb->dst = &rt->u.dst;
780
781         /* Another hack: avoid icmp_send in ip_fragment */
782         skb->local_df = 1;
783
784         IP_VS_XMIT(PF_INET, skb, rt);
785
786         LeaveFunction(10);
787         return NF_STOLEN;
788
789   tx_error_icmp:
790         dst_link_failure(skb);
791   tx_error:
792         kfree_skb(skb);
793         LeaveFunction(10);
794         return NF_STOLEN;
795 }
796
797 #ifdef CONFIG_IP_VS_IPV6
798 int
799 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
800                  struct ip_vs_protocol *pp)
801 {
802         struct rt6_info *rt;                    /* Route to the other host */
803         int    mtu;
804
805         EnterFunction(10);
806
807         rt = __ip_vs_get_out_rt_v6(cp);
808         if (!rt)
809                 goto tx_error_icmp;
810
811         /* MTU checking */
812         mtu = dst_mtu(&rt->u.dst);
813         if (skb->len > mtu) {
814                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
815                 dst_release(&rt->u.dst);
816                 IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
817                 goto tx_error;
818         }
819
820         /*
821          * Call ip_send_check because we are not sure it is called
822          * after ip_defrag. Is copy-on-write needed?
823          */
824         skb = skb_share_check(skb, GFP_ATOMIC);
825         if (unlikely(skb == NULL)) {
826                 dst_release(&rt->u.dst);
827                 return NF_STOLEN;
828         }
829
830         /* drop old route */
831         dst_release(skb->dst);
832         skb->dst = &rt->u.dst;
833
834         /* Another hack: avoid icmp_send in ip_fragment */
835         skb->local_df = 1;
836
837         IP_VS_XMIT(PF_INET6, skb, rt);
838
839         LeaveFunction(10);
840         return NF_STOLEN;
841
842 tx_error_icmp:
843         dst_link_failure(skb);
844 tx_error:
845         kfree_skb(skb);
846         LeaveFunction(10);
847         return NF_STOLEN;
848 }
849 #endif
850
851
852 /*
853  *      ICMP packet transmitter
854  *      called by the ip_vs_in_icmp
855  */
856 int
857 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
858                 struct ip_vs_protocol *pp, int offset)
859 {
860         struct rtable   *rt;    /* Route to the other host */
861         int mtu;
862         int rc;
863
864         EnterFunction(10);
865
866         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
867            forwarded directly here, because there is no need to
868            translate address/port back */
869         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
870                 if (cp->packet_xmit)
871                         rc = cp->packet_xmit(skb, cp, pp);
872                 else
873                         rc = NF_ACCEPT;
874                 /* do not touch skb anymore */
875                 atomic_inc(&cp->in_pkts);
876                 goto out;
877         }
878
879         /*
880          * mangle and send the packet here (only for VS/NAT)
881          */
882
883         if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
884                 goto tx_error_icmp;
885
886         /* MTU checking */
887         mtu = dst_mtu(&rt->u.dst);
888         if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
889                 ip_rt_put(rt);
890                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
891                 IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
892                 goto tx_error;
893         }
894
895         /* copy-on-write the packet before mangling it */
896         if (!skb_make_writable(skb, offset))
897                 goto tx_error_put;
898
899         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
900                 goto tx_error_put;
901
902         /* drop the old route when skb is not shared */
903         dst_release(skb->dst);
904         skb->dst = &rt->u.dst;
905
906         ip_vs_nat_icmp(skb, pp, cp, 0);
907
908         /* Another hack: avoid icmp_send in ip_fragment */
909         skb->local_df = 1;
910
911         IP_VS_XMIT(PF_INET, skb, rt);
912
913         rc = NF_STOLEN;
914         goto out;
915
916   tx_error_icmp:
917         dst_link_failure(skb);
918   tx_error:
919         dev_kfree_skb(skb);
920         rc = NF_STOLEN;
921   out:
922         LeaveFunction(10);
923         return rc;
924   tx_error_put:
925         ip_rt_put(rt);
926         goto tx_error;
927 }
928
929 #ifdef CONFIG_IP_VS_IPV6
930 int
931 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
932                 struct ip_vs_protocol *pp, int offset)
933 {
934         struct rt6_info *rt;    /* Route to the other host */
935         int mtu;
936         int rc;
937
938         EnterFunction(10);
939
940         /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
941            forwarded directly here, because there is no need to
942            translate address/port back */
943         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
944                 if (cp->packet_xmit)
945                         rc = cp->packet_xmit(skb, cp, pp);
946                 else
947                         rc = NF_ACCEPT;
948                 /* do not touch skb anymore */
949                 atomic_inc(&cp->in_pkts);
950                 goto out;
951         }
952
953         /*
954          * mangle and send the packet here (only for VS/NAT)
955          */
956
957         rt = __ip_vs_get_out_rt_v6(cp);
958         if (!rt)
959                 goto tx_error_icmp;
960
961         /* MTU checking */
962         mtu = dst_mtu(&rt->u.dst);
963         if (skb->len > mtu) {
964                 dst_release(&rt->u.dst);
965                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
966                 IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
967                 goto tx_error;
968         }
969
970         /* copy-on-write the packet before mangling it */
971         if (!skb_make_writable(skb, offset))
972                 goto tx_error_put;
973
974         if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
975                 goto tx_error_put;
976
977         /* drop the old route when skb is not shared */
978         dst_release(skb->dst);
979         skb->dst = &rt->u.dst;
980
981         ip_vs_nat_icmp_v6(skb, pp, cp, 0);
982
983         /* Another hack: avoid icmp_send in ip_fragment */
984         skb->local_df = 1;
985
986         IP_VS_XMIT(PF_INET6, skb, rt);
987
988         rc = NF_STOLEN;
989         goto out;
990
991 tx_error_icmp:
992         dst_link_failure(skb);
993 tx_error:
994         dev_kfree_skb(skb);
995         rc = NF_STOLEN;
996 out:
997         LeaveFunction(10);
998         return rc;
999 tx_error_put:
1000         dst_release(&rt->u.dst);
1001         goto tx_error;
1002 }
1003 #endif