ipvs: Create init functions for estimator code
[linux-2.6.git] / net / ipv4 / ipvs / ip_vs_core.c
index 5fb257d..9fbf0a6 100644 (file)
@@ -5,8 +5,6 @@
  *              high-performance and highly available server based on a
  *              cluster of servers.
  *
- * Version:     $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $
- *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *              Peter Kese <peter.kese@ijs.si>
  *              Julian Anastasov <ja@ssi.bg>
@@ -22,6 +20,7 @@
  *
  * Changes:
  *     Paul `Rusty' Russell            properly handle non-linear skbs
+ *     Harald Welte                    don't use nfcache
  *
  */
 
@@ -57,7 +56,6 @@ EXPORT_SYMBOL(ip_vs_conn_put);
 #ifdef CONFIG_IP_VS_DEBUG
 EXPORT_SYMBOL(ip_vs_get_debug_level);
 #endif
-EXPORT_SYMBOL(ip_vs_make_skb_writable);
 
 
 /* ID used in ICMP lookups */
@@ -162,42 +160,6 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
 }
 
 
-int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len)
-{
-       struct sk_buff *skb = *pskb;
-
-       /* skb is already used, better copy skb and its payload */
-       if (unlikely(skb_shared(skb) || skb->sk))
-               goto copy_skb;
-
-       /* skb data is already used, copy it */
-       if (unlikely(skb_cloned(skb)))
-               goto copy_data;
-
-       return pskb_may_pull(skb, writable_len);
-
-  copy_data:
-       if (unlikely(writable_len > skb->len))
-               return 0;
-       return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-
-  copy_skb:
-       if (unlikely(writable_len > skb->len))
-               return 0;
-       skb = skb_copy(skb, GFP_ATOMIC);
-       if (!skb)
-               return 0;
-       BUG_ON(skb_is_nonlinear(skb));
-
-       /* Rest of kernel will get very unhappy if we pass it a
-          suddenly-orphaned skbuff */
-       if ((*pskb)->sk)
-               skb_set_owner_w(skb, (*pskb)->sk);
-       kfree_skb(*pskb);
-       *pskb = skb;
-       return 1;
-}
-
 /*
  *  IPVS persistent scheduling function
  *  It creates a connection entry according to its template if exists,
@@ -208,14 +170,14 @@ int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len)
 static struct ip_vs_conn *
 ip_vs_sched_persist(struct ip_vs_service *svc,
                    const struct sk_buff *skb,
-                   __u16 ports[2])
+                   __be16 ports[2])
 {
        struct ip_vs_conn *cp = NULL;
-       struct iphdr *iph = skb->nh.iph;
+       struct iphdr *iph = ip_hdr(skb);
        struct ip_vs_dest *dest;
        struct ip_vs_conn *ct;
-       __u16  dport;    /* destination port to forward */
-       __u32  snet;     /* source network of the client, after masking */
+       __be16  dport;   /* destination port to forward */
+       __be32  snet;    /* source network of the client, after masking */
 
        /* Mask saddr with the netmask to adjust template granularity */
        snet = iph->saddr & svc->netmask;
@@ -242,10 +204,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
        if (ports[1] == svc->port) {
                /* Check if a template already exists */
                if (svc->port != FTPPORT)
-                       ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+                       ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
                                               iph->daddr, ports[1]);
                else
-                       ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+                       ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
                                               iph->daddr, 0);
 
                if (!ct || !ip_vs_check_template(ct)) {
@@ -271,14 +233,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                                                    iph->daddr,
                                                    ports[1],
                                                    dest->addr, dest->port,
-                                                   0,
+                                                   IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        else
                                ct = ip_vs_conn_new(iph->protocol,
                                                    snet, 0,
                                                    iph->daddr, 0,
                                                    dest->addr, 0,
-                                                   0,
+                                                   IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        if (ct == NULL)
                                return NULL;
@@ -297,10 +259,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
                 */
                if (svc->fwmark)
-                       ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0,
+                       ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
                                               htonl(svc->fwmark), 0);
                else
-                       ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+                       ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
                                               iph->daddr, 0);
 
                if (!ct || !ip_vs_check_template(ct)) {
@@ -325,14 +287,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                                                    snet, 0,
                                                    htonl(svc->fwmark), 0,
                                                    dest->addr, 0,
-                                                   0,
+                                                   IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        else
                                ct = ip_vs_conn_new(iph->protocol,
                                                    snet, 0,
                                                    iph->daddr, 0,
                                                    dest->addr, 0,
-                                                   0,
+                                                   IP_VS_CONN_F_TEMPLATE,
                                                    dest);
                        if (ct == NULL)
                                return NULL;
@@ -380,9 +342,9 @@ struct ip_vs_conn *
 ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
        struct ip_vs_conn *cp = NULL;
-       struct iphdr *iph = skb->nh.iph;
+       struct iphdr *iph = ip_hdr(skb);
        struct ip_vs_dest *dest;
-       __u16 _ports[2], *pptr;
+       __be16 _ports[2], *pptr;
 
        pptr = skb_header_pointer(skb, iph->ihl*4,
                                  sizeof(_ports), _ports);
@@ -425,7 +387,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
                return NULL;
 
        IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
-                 "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n",
+                 "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
                  ip_vs_fwd_tag(cp),
                  NIPQUAD(cp->caddr), ntohs(cp->cport),
                  NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -445,8 +407,8 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                struct ip_vs_protocol *pp)
 {
-       __u16 _ports[2], *pptr;
-       struct iphdr *iph = skb->nh.iph;
+       __be16 _ports[2], *pptr;
+       struct iphdr *iph = ip_hdr(skb);
 
        pptr = skb_header_pointer(skb, iph->ihl*4,
                                  sizeof(_ports), _ports);
@@ -459,7 +421,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
           and the destination is RTN_UNICAST (and not local), then create
           a cache_bypass connection entry */
        if (sysctl_ip_vs_cache_bypass && svc->fwmark
-           && (inet_addr_type(iph->daddr) == RTN_UNICAST)) {
+           && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
                int ret, cs;
                struct ip_vs_conn *cp;
 
@@ -517,39 +479,37 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 
 
 /*
- *      It is hooked before NF_IP_PRI_NAT_SRC at the NF_IP_POST_ROUTING
+ *      It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
  *      chain, and is used for VS/NAT.
  *      It detects packets for VS/NAT connections and sends the packets
  *      immediately. This can avoid that iptable_nat mangles the packets
  *      for VS/NAT.
  */
 static unsigned int ip_vs_post_routing(unsigned int hooknum,
-                                      struct sk_buff **pskb,
+                                      struct sk_buff *skb,
                                       const struct net_device *in,
                                       const struct net_device *out,
                                       int (*okfn)(struct sk_buff *))
 {
-       if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY))
+       if (!skb->ipvs_property)
                return NF_ACCEPT;
-
        /* The packet was sent from IPVS, exit this chain */
-       (*okfn)(*pskb);
-
-       return NF_STOLEN;
+       return NF_STOP;
 }
 
-u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
+__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
 {
-       return (u16) csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
+       return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
 }
 
-static inline struct sk_buff *
-ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
-       skb = ip_defrag(skb, user);
-       if (skb)
-               ip_send_check(skb->nh.iph);
-       return skb;
+       int err = ip_defrag(skb, user);
+
+       if (!err)
+               ip_send_check(ip_hdr(skb));
+
+       return err;
 }
 
 /*
@@ -559,9 +519,10 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
                    struct ip_vs_conn *cp, int inout)
 {
-       struct iphdr *iph        = skb->nh.iph;
+       struct iphdr *iph        = ip_hdr(skb);
        unsigned int icmp_offset = iph->ihl*4;
-       struct icmphdr *icmph    = (struct icmphdr *)(skb->nh.raw + icmp_offset);
+       struct icmphdr *icmph    = (struct icmphdr *)(skb_network_header(skb) +
+                                                     icmp_offset);
        struct iphdr *ciph       = (struct iphdr *)(icmph + 1);
 
        if (inout) {
@@ -578,7 +539,7 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 
        /* the TCP/UDP port */
        if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
-               __u16 *ports = (void *)ciph + ciph->ihl*4;
+               __be16 *ports = (void *)ciph + ciph->ihl*4;
 
                if (inout)
                        ports[1] = cp->vport;
@@ -606,9 +567,8 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
  *     Currently handles error types - unreachable, quench, ttl exceeded.
  *     (Only used in VS/NAT)
  */
-static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
+static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 {
-       struct sk_buff *skb = *pskb;
        struct iphdr *iph;
        struct icmphdr  _icmph, *ic;
        struct iphdr    _ciph, *cih;    /* The ip header contained within the ICMP */
@@ -619,14 +579,12 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
        *related = 1;
 
        /* reassemble IP fragments */
-       if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
-               skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
-               if (!skb)
+       if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+               if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
                        return NF_STOLEN;
-               *pskb = skb;
        }
 
-       iph = skb->nh.iph;
+       iph = ip_hdr(skb);
        offset = ihl = iph->ihl * 4;
        ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
        if (ic == NULL)
@@ -661,7 +619,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
                return NF_ACCEPT;
 
        /* Is the embedded protocol header present? */
-       if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+       if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
                     pp->dont_defrag))
                return NF_ACCEPT;
 
@@ -677,13 +635,12 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
        verdict = NF_DROP;
 
        if (IP_VS_FWD_METHOD(cp) != 0) {
-               IP_VS_ERR("shouldn't reach here, because the box is on the"
+               IP_VS_ERR("shouldn't reach here, because the box is on the "
                          "half connection in the tun/dr module.\n");
        }
 
        /* Ensure the checksum is correct */
-       if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
-           ip_vs_checksum_complete(skb, ihl)) {
+       if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
                /* Failed checksum! */
                IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
                          NIPQUAD(iph->saddr));
@@ -692,16 +649,15 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 
        if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
                offset += 2 * sizeof(__u16);
-       if (!ip_vs_make_skb_writable(pskb, offset))
+       if (!skb_make_writable(skb, offset))
                goto out;
-       skb = *pskb;
 
        ip_vs_nat_icmp(skb, pp, cp, 1);
 
        /* do the statistics and put it back */
        ip_vs_out_stats(cp, skb);
 
-       skb->nfcache |= NFC_IPVS_PROPERTY;
+       skb->ipvs_property = 1;
        verdict = NF_ACCEPT;
 
   out:
@@ -714,24 +670,22 @@ static inline int is_tcp_reset(const struct sk_buff *skb)
 {
        struct tcphdr _tcph, *th;
 
-       th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
-                               sizeof(_tcph), &_tcph);
+       th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
        if (th == NULL)
                return 0;
        return th->rst;
 }
 
 /*
- *     It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT.
+ *     It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
  *     Check if outgoing packet belongs to the established ip_vs_conn,
  *      rewrite addresses of the packet and send it on its way...
  */
 static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
          const struct net_device *in, const struct net_device *out,
          int (*okfn)(struct sk_buff *))
 {
-       struct sk_buff  *skb = *pskb;
        struct iphdr    *iph;
        struct ip_vs_protocol *pp;
        struct ip_vs_conn *cp;
@@ -739,17 +693,16 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 
        EnterFunction(11);
 
-       if (skb->nfcache & NFC_IPVS_PROPERTY)
+       if (skb->ipvs_property)
                return NF_ACCEPT;
 
-       iph = skb->nh.iph;
+       iph = ip_hdr(skb);
        if (unlikely(iph->protocol == IPPROTO_ICMP)) {
-               int related, verdict = ip_vs_out_icmp(pskb, &related);
+               int related, verdict = ip_vs_out_icmp(skb, &related);
 
                if (related)
                        return verdict;
-               skb = *pskb;
-               iph = skb->nh.iph;
+               iph = ip_hdr(skb);
        }
 
        pp = ip_vs_proto_get(iph->protocol);
@@ -757,13 +710,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
                return NF_ACCEPT;
 
        /* reassemble IP fragments */
-       if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
+       if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
                     !pp->dont_defrag)) {
-               skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
-               if (!skb)
+               if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
                        return NF_STOLEN;
-               iph = skb->nh.iph;
-               *pskb = skb;
+               iph = ip_hdr(skb);
        }
 
        ihl = iph->ihl << 2;
@@ -777,7 +728,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
                if (sysctl_ip_vs_nat_icmp_send &&
                    (pp->protocol == IPPROTO_TCP ||
                     pp->protocol == IPPROTO_UDP)) {
-                       __u16 _ports[2], *pptr;
+                       __be16 _ports[2], *pptr;
 
                        pptr = skb_header_pointer(skb, ihl,
                                                  sizeof(_ports), _ports);
@@ -805,15 +756,23 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 
        IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
 
-       if (!ip_vs_make_skb_writable(pskb, ihl))
+       if (!skb_make_writable(skb, ihl))
                goto drop;
 
        /* mangle the packet */
-       if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
+       if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
+               goto drop;
+       ip_hdr(skb)->saddr = cp->vaddr;
+       ip_send_check(ip_hdr(skb));
+
+       /* For policy routing, packets originating from this
+        * machine itself may be routed differently to packets
+        * passing through.  We want this packet to be routed as
+        * if it came from this machine itself.  So re-compute
+        * the routing information.
+        */
+       if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
                goto drop;
-       skb = *pskb;
-       skb->nh.iph->saddr = cp->vaddr;
-       ip_send_check(skb->nh.iph);
 
        IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
 
@@ -821,14 +780,14 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
        ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
        ip_vs_conn_put(cp);
 
-       skb->nfcache |= NFC_IPVS_PROPERTY;
+       skb->ipvs_property = 1;
 
        LeaveFunction(11);
        return NF_ACCEPT;
 
   drop:
        ip_vs_conn_put(cp);
-       kfree_skb(*pskb);
+       kfree_skb(skb);
        return NF_STOLEN;
 }
 
@@ -839,10 +798,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
  *     forward to the right destination host if relevant.
  *     Currently handles error types - unreachable, quench, ttl exceeded.
  */
-static int 
-ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
+static int
+ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 {
-       struct sk_buff *skb = *pskb;
        struct iphdr *iph;
        struct icmphdr  _icmph, *ic;
        struct iphdr    _ciph, *cih;    /* The ip header contained within the ICMP */
@@ -853,16 +811,13 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
        *related = 1;
 
        /* reassemble IP fragments */
-       if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
-               skb = ip_vs_gather_frags(skb,
-                                        hooknum == NF_IP_LOCAL_IN ?
-                                        IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
-               if (!skb)
+       if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+               if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
+                                           IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
                        return NF_STOLEN;
-               *pskb = skb;
        }
 
-       iph = skb->nh.iph;
+       iph = ip_hdr(skb);
        offset = ihl = iph->ihl * 4;
        ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
        if (ic == NULL)
@@ -897,7 +852,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
                return NF_ACCEPT;
 
        /* Is the embedded protocol header present? */
-       if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+       if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
                     pp->dont_defrag))
                return NF_ACCEPT;
 
@@ -913,8 +868,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
        verdict = NF_DROP;
 
        /* Ensure the checksum is correct */
-       if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
-           ip_vs_checksum_complete(skb, ihl)) {
+       if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
                /* Failed checksum! */
                IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
                          NIPQUAD(iph->saddr));
@@ -939,11 +893,10 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
  *     and send it on its way...
  */
 static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
         const struct net_device *in, const struct net_device *out,
         int (*okfn)(struct sk_buff *))
 {
-       struct sk_buff  *skb = *pskb;
        struct iphdr    *iph;
        struct ip_vs_protocol *pp;
        struct ip_vs_conn *cp;
@@ -955,22 +908,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
         *      ... don't know why 1st test DOES NOT include 2nd (?)
         */
        if (unlikely(skb->pkt_type != PACKET_HOST
-                    || skb->dev == &loopback_dev || skb->sk)) {
+                    || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
                IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
                          skb->pkt_type,
-                         skb->nh.iph->protocol,
-                         NIPQUAD(skb->nh.iph->daddr));
+                         ip_hdr(skb)->protocol,
+                         NIPQUAD(ip_hdr(skb)->daddr));
                return NF_ACCEPT;
        }
 
-       iph = skb->nh.iph;
+       iph = ip_hdr(skb);
        if (unlikely(iph->protocol == IPPROTO_ICMP)) {
-               int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
+               int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
 
                if (related)
                        return verdict;
-               skb = *pskb;
-               iph = skb->nh.iph;
+               iph = ip_hdr(skb);
        }
 
        /* Protocol supported? */
@@ -1008,11 +960,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
                if (sysctl_ip_vs_expire_nodest_conn) {
                        /* try to expire the connection immediately */
                        ip_vs_conn_expire_now(cp);
-               } else {
-                       /* don't restart its timer, and silently
-                          drop the packet. */
-                       __ip_vs_conn_put(cp);
                }
+               /* don't restart its timer, and silently
+                  drop the packet. */
+               __ip_vs_conn_put(cp);
                return NF_DROP;
        }
 
@@ -1026,15 +977,24 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
                ret = NF_ACCEPT;
        }
 
-       /* increase its packet counter and check if it is needed
-          to be synchronized */
+       /* Increase its packet counter and check if it is needed
+        * to be synchronized
+        *
+        * Sync connection if it is about to close to
+        * encorage the standby servers to update the connections timeout
+        */
        atomic_inc(&cp->in_pkts);
        if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
-           (cp->protocol != IPPROTO_TCP ||
-            cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-           (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
-            == sysctl_ip_vs_sync_threshold[0]))
+           (((cp->protocol != IPPROTO_TCP ||
+              cp->state == IP_VS_TCP_S_ESTABLISHED) &&
+             (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
+              == sysctl_ip_vs_sync_threshold[0])) ||
+            ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
+             ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
+              (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
+              (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
                ip_vs_sync_conn(cp);
+       cp->old_state = cp->state;
 
        ip_vs_conn_put(cp);
        return ret;
@@ -1042,65 +1002,64 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
 
 
 /*
- *     It is hooked at the NF_IP_FORWARD chain, in order to catch ICMP
+ *     It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
  *      related packets destined for 0.0.0.0/0.
  *      When fwmark-based virtual service is used, such as transparent
  *      cache cluster, TCP packets can be marked and routed to ip_vs_in,
  *      but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
- *      sent to ip_vs_in_icmp. So, catch them at the NF_IP_FORWARD chain
+ *      sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
  *      and send them to ip_vs_in_icmp.
  */
 static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
                   const struct net_device *in, const struct net_device *out,
                   int (*okfn)(struct sk_buff *))
 {
        int r;
 
-       if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
+       if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
                return NF_ACCEPT;
 
-       return ip_vs_in_icmp(pskb, &r, hooknum);
+       return ip_vs_in_icmp(skb, &r, hooknum);
 }
 
 
-/* After packet filtering, forward packet through VS/DR, VS/TUN,
-   or VS/NAT(change destination), so that filtering rules can be
-   applied to IPVS. */
-static struct nf_hook_ops ip_vs_in_ops = {
-       .hook           = ip_vs_in,
-       .owner          = THIS_MODULE,
-       .pf             = PF_INET,
-       .hooknum        = NF_IP_LOCAL_IN,
-       .priority       = 100,
-};
-
-/* After packet filtering, change source only for VS/NAT */
-static struct nf_hook_ops ip_vs_out_ops = {
-       .hook           = ip_vs_out,
-       .owner          = THIS_MODULE,
-       .pf             = PF_INET,
-       .hooknum        = NF_IP_FORWARD,
-       .priority       = 100,
-};
-
-/* After packet filtering (but before ip_vs_out_icmp), catch icmp
-   destined for 0.0.0.0/0, which is for incoming IPVS connections */
-static struct nf_hook_ops ip_vs_forward_icmp_ops = {
-       .hook           = ip_vs_forward_icmp,
-       .owner          = THIS_MODULE,
-       .pf             = PF_INET,
-       .hooknum        = NF_IP_FORWARD,
-       .priority       = 99,
-};
-
-/* Before the netfilter connection tracking, exit from POST_ROUTING */
-static struct nf_hook_ops ip_vs_post_routing_ops = {
-       .hook           = ip_vs_post_routing,
-       .owner          = THIS_MODULE,
-       .pf             = PF_INET,
-       .hooknum        = NF_IP_POST_ROUTING,
-       .priority       = NF_IP_PRI_NAT_SRC-1,
+static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+       /* After packet filtering, forward packet through VS/DR, VS/TUN,
+        * or VS/NAT(change destination), so that filtering rules can be
+        * applied to IPVS. */
+       {
+               .hook           = ip_vs_in,
+               .owner          = THIS_MODULE,
+               .pf             = PF_INET,
+               .hooknum        = NF_INET_LOCAL_IN,
+               .priority       = 100,
+       },
+       /* After packet filtering, change source only for VS/NAT */
+       {
+               .hook           = ip_vs_out,
+               .owner          = THIS_MODULE,
+               .pf             = PF_INET,
+               .hooknum        = NF_INET_FORWARD,
+               .priority       = 100,
+       },
+       /* After packet filtering (but before ip_vs_out_icmp), catch icmp
+        * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+       {
+               .hook           = ip_vs_forward_icmp,
+               .owner          = THIS_MODULE,
+               .pf             = PF_INET,
+               .hooknum        = NF_INET_FORWARD,
+               .priority       = 99,
+       },
+       /* Before the netfilter connection tracking, exit from POST_ROUTING */
+       {
+               .hook           = ip_vs_post_routing,
+               .owner          = THIS_MODULE,
+               .pf             = PF_INET,
+               .hooknum        = NF_INET_POST_ROUTING,
+               .priority       = NF_IP_PRI_NAT_SRC-1,
+       },
 };
 
 
@@ -1111,10 +1070,12 @@ static int __init ip_vs_init(void)
 {
        int ret;
 
+       ip_vs_estimator_init();
+
        ret = ip_vs_control_init();
        if (ret < 0) {
                IP_VS_ERR("can't setup control.\n");
-               goto cleanup_nothing;
+               goto cleanup_estimator;
        }
 
        ip_vs_protocol_init();
@@ -1131,37 +1092,15 @@ static int __init ip_vs_init(void)
                goto cleanup_app;
        }
 
-       ret = nf_register_hook(&ip_vs_in_ops);
+       ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
        if (ret < 0) {
-               IP_VS_ERR("can't register in hook.\n");
+               IP_VS_ERR("can't register hooks.\n");
                goto cleanup_conn;
        }
 
-       ret = nf_register_hook(&ip_vs_out_ops);
-       if (ret < 0) {
-               IP_VS_ERR("can't register out hook.\n");
-               goto cleanup_inops;
-       }
-       ret = nf_register_hook(&ip_vs_post_routing_ops);
-       if (ret < 0) {
-               IP_VS_ERR("can't register post_routing hook.\n");
-               goto cleanup_outops;
-       }
-       ret = nf_register_hook(&ip_vs_forward_icmp_ops);
-       if (ret < 0) {
-               IP_VS_ERR("can't register forward_icmp hook.\n");
-               goto cleanup_postroutingops;
-       }
-
        IP_VS_INFO("ipvs loaded.\n");
        return ret;
 
-  cleanup_postroutingops:
-       nf_unregister_hook(&ip_vs_post_routing_ops);
-  cleanup_outops:
-       nf_unregister_hook(&ip_vs_out_ops);
-  cleanup_inops:
-       nf_unregister_hook(&ip_vs_in_ops);
   cleanup_conn:
        ip_vs_conn_cleanup();
   cleanup_app:
@@ -1169,20 +1108,19 @@ static int __init ip_vs_init(void)
   cleanup_protocol:
        ip_vs_protocol_cleanup();
        ip_vs_control_cleanup();
-  cleanup_nothing:
+  cleanup_estimator:
+       ip_vs_estimator_cleanup();
        return ret;
 }
 
 static void __exit ip_vs_cleanup(void)
 {
-       nf_unregister_hook(&ip_vs_forward_icmp_ops);
-       nf_unregister_hook(&ip_vs_post_routing_ops);
-       nf_unregister_hook(&ip_vs_out_ops);
-       nf_unregister_hook(&ip_vs_in_ops);
+       nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
        ip_vs_conn_cleanup();
        ip_vs_app_cleanup();
        ip_vs_protocol_cleanup();
        ip_vs_control_cleanup();
+       ip_vs_estimator_cleanup();
        IP_VS_INFO("ipvs unloaded.\n");
 }