netfilter: push reasm skb through instead of original frag skbs
Jiri Pirko [Wed, 6 Nov 2013 16:52:20 +0000 (17:52 +0100)]
[ Upstream commit 6aafeef03b9d9ecf255f3a80ed85ee070260e1ae ]

Pushing original fragments through causes several problems. For example
for matching, frags may not be matched correctly. Take following
example:

<example>
On HOSTA do:
ip6tables -I INPUT -p icmpv6 -j DROP
ip6tables -I INPUT -p icmpv6 -m icmp6 --icmpv6-type 128 -j ACCEPT

and on HOSTB you do:
ping6 HOSTA -s2000    (MTU is 1500)

Incoming echo requests will be filtered out on HOSTA. This issue does
not occur with smaller packets than MTU (where fragmentation does not happen)
</example>

As was discussed previously, the only correct solution seems to be to use
reassembled skb instead of separete frags. Doing this has positive side
effects in reducing sk_buff by one pointer (nfct_reasm) and also the reams
dances in ipvs and conntrack can be removed.

Future plan is to remove net/ipv6/netfilter/nf_conntrack_reasm.c
entirely and use code in net/ipv6/reassembly.c instead.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

include/linux/skbuff.h
include/net/ip_vs.h
include/net/netfilter/ipv6/nf_defrag_ipv6.h
net/core/skbuff.c
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_pe_sip.c

index eaf6027..74db47e 100644 (file)
@@ -331,11 +331,6 @@ typedef unsigned int sk_buff_data_t;
 typedef unsigned char *sk_buff_data_t;
 #endif
 
-#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
-    defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
-#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
-#endif
-
 /** 
  *     struct sk_buff - socket buffer
  *     @next: Next buffer in list
@@ -368,7 +363,6 @@ typedef unsigned char *sk_buff_data_t;
  *     @protocol: Packet protocol from driver
  *     @destructor: Destruct function
  *     @nfct: Associated connection, if any
- *     @nfct_reasm: netfilter conntrack re-assembly pointer
  *     @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *     @skb_iif: ifindex of device we arrived on
  *     @tc_index: Traffic control index
@@ -455,9 +449,6 @@ struct sk_buff {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        struct nf_conntrack     *nfct;
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-       struct sk_buff          *nfct_reasm;
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        struct nf_bridge_info   *nf_bridge;
 #endif
@@ -2700,18 +2691,6 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
                atomic_inc(&nfct->use);
 }
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
-{
-       if (skb)
-               atomic_inc(&skb->users);
-}
-static inline void nf_conntrack_put_reasm(struct sk_buff *skb)
-{
-       if (skb)
-               kfree_skb(skb);
-}
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
 {
@@ -2730,10 +2709,6 @@ static inline void nf_reset(struct sk_buff *skb)
        nf_conntrack_put(skb->nfct);
        skb->nfct = NULL;
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-       nf_conntrack_put_reasm(skb->nfct_reasm);
-       skb->nfct_reasm = NULL;
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        nf_bridge_put(skb->nf_bridge);
        skb->nf_bridge = NULL;
@@ -2755,10 +2730,6 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
        nf_conntrack_get(src->nfct);
        dst->nfctinfo = src->nfctinfo;
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-       dst->nfct_reasm = src->nfct_reasm;
-       nf_conntrack_get_reasm(src->nfct_reasm);
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        dst->nf_bridge  = src->nf_bridge;
        nf_bridge_get(src->nf_bridge);
@@ -2770,9 +2741,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        nf_conntrack_put(dst->nfct);
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-       nf_conntrack_put_reasm(dst->nfct_reasm);
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        nf_bridge_put(dst->nf_bridge);
 #endif
index 4c062cc..f0c13a3 100644 (file)
@@ -109,7 +109,6 @@ extern int ip_vs_conn_tab_size;
 struct ip_vs_iphdr {
        __u32 len;      /* IPv4 simply where L4 starts
                           IPv6 where L4 Transport Header starts */
-       __u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */
        __u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
        __s16 protocol;
        __s32 flags;
@@ -117,34 +116,12 @@ struct ip_vs_iphdr {
        union nf_inet_addr daddr;
 };
 
-/* Dependency to module: nf_defrag_ipv6 */
-#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
-static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
-{
-       return skb->nfct_reasm;
-}
-static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
-                                     int len, void *buffer,
-                                     const struct ip_vs_iphdr *ipvsh)
-{
-       if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb)))
-               return skb_header_pointer(skb_nfct_reasm(skb),
-                                         ipvsh->thoff_reasm, len, buffer);
-
-       return skb_header_pointer(skb, offset, len, buffer);
-}
-#else
-static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
-{
-       return NULL;
-}
 static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
                                      int len, void *buffer,
                                      const struct ip_vs_iphdr *ipvsh)
 {
        return skb_header_pointer(skb, offset, len, buffer);
 }
-#endif
 
 static inline void
 ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
@@ -171,19 +148,12 @@ ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
                        (struct ipv6hdr *)skb_network_header(skb);
                iphdr->saddr.in6 = iph->saddr;
                iphdr->daddr.in6 = iph->daddr;
-               /* ipv6_find_hdr() updates len, flags, thoff_reasm */
-               iphdr->thoff_reasm = 0;
+               /* ipv6_find_hdr() updates len, flags */
                iphdr->len       = 0;
                iphdr->flags     = 0;
                iphdr->protocol  = ipv6_find_hdr(skb, &iphdr->len, -1,
                                                 &iphdr->fragoffs,
                                                 &iphdr->flags);
-               /* get proto from re-assembled packet and it's offset */
-               if (skb_nfct_reasm(skb))
-                       iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb),
-                                                       &iphdr->thoff_reasm,
-                                                       -1, NULL, NULL);
-
        } else
 #endif
        {
index fd79c9a..17920d8 100644 (file)
@@ -6,10 +6,7 @@ extern void nf_defrag_ipv6_enable(void);
 extern int nf_ct_frag6_init(void);
 extern void nf_ct_frag6_cleanup(void);
 extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
-extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
-                              struct net_device *in,
-                              struct net_device *out,
-                              int (*okfn)(struct sk_buff *));
+extern void nf_ct_frag6_consume_orig(struct sk_buff *skb);
 
 struct inet_frags_ctl;
 
index 1c1738c..d9e8736 100644 (file)
@@ -585,9 +585,6 @@ static void skb_release_head_state(struct sk_buff *skb)
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
        nf_conntrack_put(skb->nfct);
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-       nf_conntrack_put_reasm(skb->nfct_reasm);
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
        nf_bridge_put(skb->nf_bridge);
 #endif
index c9b6a6e..97cd750 100644 (file)
@@ -172,63 +172,13 @@ out:
        return nf_conntrack_confirm(skb);
 }
 
-static unsigned int __ipv6_conntrack_in(struct net *net,
-                                       unsigned int hooknum,
-                                       struct sk_buff *skb,
-                                       const struct net_device *in,
-                                       const struct net_device *out,
-                                       int (*okfn)(struct sk_buff *))
-{
-       struct sk_buff *reasm = skb->nfct_reasm;
-       const struct nf_conn_help *help;
-       struct nf_conn *ct;
-       enum ip_conntrack_info ctinfo;
-
-       /* This packet is fragmented and has reassembled packet. */
-       if (reasm) {
-               /* Reassembled packet isn't parsed yet ? */
-               if (!reasm->nfct) {
-                       unsigned int ret;
-
-                       ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
-                       if (ret != NF_ACCEPT)
-                               return ret;
-               }
-
-               /* Conntrack helpers need the entire reassembled packet in the
-                * POST_ROUTING hook. In case of unconfirmed connections NAT
-                * might reassign a helper, so the entire packet is also
-                * required.
-                */
-               ct = nf_ct_get(reasm, &ctinfo);
-               if (ct != NULL && !nf_ct_is_untracked(ct)) {
-                       help = nfct_help(ct);
-                       if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
-                               nf_conntrack_get_reasm(reasm);
-                               NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
-                                              (struct net_device *)in,
-                                              (struct net_device *)out,
-                                              okfn, NF_IP6_PRI_CONNTRACK + 1);
-                               return NF_DROP_ERR(-ECANCELED);
-                       }
-               }
-
-               nf_conntrack_get(reasm->nfct);
-               skb->nfct = reasm->nfct;
-               skb->nfctinfo = reasm->nfctinfo;
-               return NF_ACCEPT;
-       }
-
-       return nf_conntrack_in(net, PF_INET6, hooknum, skb);
-}
-
 static unsigned int ipv6_conntrack_in(unsigned int hooknum,
                                      struct sk_buff *skb,
                                      const struct net_device *in,
                                      const struct net_device *out,
                                      int (*okfn)(struct sk_buff *))
 {
-       return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
+       return nf_conntrack_in(dev_net(in), PF_INET6, hooknum, skb);
 }
 
 static unsigned int ipv6_conntrack_local(unsigned int hooknum,
@@ -242,7 +192,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
                net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
                return NF_ACCEPT;
        }
-       return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
+       return nf_conntrack_in(dev_net(out), PF_INET6, hooknum, skb);
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
index dffdc1a..253566a 100644 (file)
@@ -621,31 +621,16 @@ ret_orig:
        return skb;
 }
 
-void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
-                       struct net_device *in, struct net_device *out,
-                       int (*okfn)(struct sk_buff *))
+void nf_ct_frag6_consume_orig(struct sk_buff *skb)
 {
        struct sk_buff *s, *s2;
-       unsigned int ret = 0;
 
        for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
-               nf_conntrack_put_reasm(s->nfct_reasm);
-               nf_conntrack_get_reasm(skb);
-               s->nfct_reasm = skb;
-
                s2 = s->next;
                s->next = NULL;
-
-               if (ret != -ECANCELED)
-                       ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
-                                            in, out, okfn,
-                                            NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
-               else
-                       kfree_skb(s);
-
+               consume_skb(s);
                s = s2;
        }
-       nf_conntrack_put_reasm(skb);
 }
 
 static int nf_ct_net_init(struct net *net)
index aacd121..581dd9e 100644 (file)
@@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
        if (reasm == skb)
                return NF_ACCEPT;
 
-       nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
-                          (struct net_device *)out, okfn);
+       nf_ct_frag6_consume_orig(reasm);
+
+       NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
+                      (struct net_device *) in, (struct net_device *) out,
+                      okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
 
        return NF_STOLEN;
 }
index 23b8eb5..21a3a47 100644 (file)
@@ -1131,12 +1131,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
        ip_vs_fill_iph_skb(af, skb, &iph);
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6) {
-               if (!iph.fragoffs && skb_nfct_reasm(skb)) {
-                       struct sk_buff *reasm = skb_nfct_reasm(skb);
-                       /* Save fw mark for coming frags */
-                       reasm->ipvs_property = 1;
-                       reasm->mark = skb->mark;
-               }
                if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
                        int related;
                        int verdict = ip_vs_out_icmp_v6(skb, &related,
@@ -1606,12 +1600,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6) {
-               if (!iph.fragoffs && skb_nfct_reasm(skb)) {
-                       struct sk_buff *reasm = skb_nfct_reasm(skb);
-                       /* Save fw mark for coming frags. */
-                       reasm->ipvs_property = 1;
-                       reasm->mark = skb->mark;
-               }
                if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
                        int related;
                        int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
@@ -1663,9 +1651,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
                /* sorry, all this trouble for a no-hit :) */
                IP_VS_DBG_PKT(12, af, pp, skb, 0,
                              "ip_vs_in: packet continues traversal as normal");
-               if (iph.fragoffs && !skb_nfct_reasm(skb)) {
+               if (iph.fragoffs) {
                        /* Fragment that couldn't be mapped to a conn entry
-                        * and don't have any pointer to a reasm skb
                         * is missing module nf_defrag_ipv6
                         */
                        IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
@@ -1748,38 +1735,6 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
 #ifdef CONFIG_IP_VS_IPV6
 
 /*
- * AF_INET6 fragment handling
- * Copy info from first fragment, to the rest of them.
- */
-static unsigned int
-ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
-                    const struct net_device *in,
-                    const struct net_device *out,
-                    int (*okfn)(struct sk_buff *))
-{
-       struct sk_buff *reasm = skb_nfct_reasm(skb);
-       struct net *net;
-
-       /* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
-        * ipvs_property is set when checking first fragment
-        * in ip_vs_in() and ip_vs_out().
-        */
-       if (reasm)
-               IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
-       if (!reasm || !reasm->ipvs_property)
-               return NF_ACCEPT;
-
-       net = skb_net(skb);
-       if (!net_ipvs(net)->enable)
-               return NF_ACCEPT;
-
-       /* Copy stored fw mark, saved in ip_vs_{in,out} */
-       skb->mark = reasm->mark;
-
-       return NF_ACCEPT;
-}
-
-/*
  *     AF_INET6 handler in NF_INET_LOCAL_IN chain
  *     Schedule and forward packets from remote clients
  */
@@ -1916,14 +1871,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
                .priority       = 100,
        },
 #ifdef CONFIG_IP_VS_IPV6
-       /* After mangle & nat fetch 2:nd fragment and following */
-       {
-               .hook           = ip_vs_preroute_frag6,
-               .owner          = THIS_MODULE,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_PRE_ROUTING,
-               .priority       = NF_IP6_PRI_NAT_DST + 1,
-       },
        /* After packet filtering, change source only for VS/NAT */
        {
                .hook           = ip_vs_reply6,
index 9ef22bd..bed5f70 100644 (file)
@@ -65,7 +65,6 @@ static int get_callid(const char *dptr, unsigned int dataoff,
 static int
 ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 {
-       struct sk_buff *reasm = skb_nfct_reasm(skb);
        struct ip_vs_iphdr iph;
        unsigned int dataoff, datalen, matchoff, matchlen;
        const char *dptr;
@@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
        /* todo: IPv6 fragments:
         *       I think this only should be done for the first fragment. /HS
         */
-       if (reasm) {
-               skb = reasm;
-               dataoff = iph.thoff_reasm + sizeof(struct udphdr);
-       } else
-               dataoff = iph.len + sizeof(struct udphdr);
+       dataoff = iph.len + sizeof(struct udphdr);
 
        if (dataoff >= skb->len)
                return -EINVAL;
-       /* todo: Check if this will mess-up the reasm skb !!! /HS */
        retc = skb_linearize(skb);
        if (retc < 0)
                return retc;