mac80211: fix another race in aggregation start
[linux-2.6.git] / net / netfilter / nf_queue.c
index bfc2928..84d0fd4 100644 (file)
@@ -1,4 +1,5 @@
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
@@ -8,6 +9,7 @@
 #include <linux/rcupdate.h>
 #include <net/protocol.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/dst.h>
 
 #include "nf_internals.h"
 
  * long term mutex.  The handler must provide an an outfn() to accept packets
  * for queueing and must reinject all packets it receives, no matter what.
  */
-static const struct nf_queue_handler *queue_handler[NPROTO];
+static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
 
 static DEFINE_MUTEX(queue_handler_mutex);
 
 /* return EBUSY when somebody else is registered, return EEXIST if the
  * same handler is registered, return 0 in case of success. */
-int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh)
+int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
        int ret;
+       const struct nf_queue_handler *old;
 
-       if (pf >= NPROTO)
+       if (pf >= ARRAY_SIZE(queue_handler))
                return -EINVAL;
 
        mutex_lock(&queue_handler_mutex);
-       if (queue_handler[pf] == qh)
+       old = rcu_dereference_protected(queue_handler[pf],
+                                       lockdep_is_held(&queue_handler_mutex));
+       if (old == qh)
                ret = -EEXIST;
-       else if (queue_handler[pf])
+       else if (old)
                ret = -EBUSY;
        else {
                rcu_assign_pointer(queue_handler[pf], qh);
@@ -45,13 +50,17 @@ int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh)
 EXPORT_SYMBOL(nf_register_queue_handler);
 
 /* The caller must flush their queue before this */
-int nf_unregister_queue_handler(int pf, const struct nf_queue_handler *qh)
+int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
-       if (pf >= NPROTO)
+       const struct nf_queue_handler *old;
+
+       if (pf >= ARRAY_SIZE(queue_handler))
                return -EINVAL;
 
        mutex_lock(&queue_handler_mutex);
-       if (queue_handler[pf] != qh) {
+       old = rcu_dereference_protected(queue_handler[pf],
+                                       lockdep_is_held(&queue_handler_mutex));
+       if (old && old != qh) {
                mutex_unlock(&queue_handler_mutex);
                return -EINVAL;
        }
@@ -67,11 +76,14 @@ EXPORT_SYMBOL(nf_unregister_queue_handler);
 
 void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
 {
-       int pf;
+       u_int8_t pf;
 
        mutex_lock(&queue_handler_mutex);
-       for (pf = 0; pf < NPROTO; pf++)  {
-               if (queue_handler[pf] == qh)
+       for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++)  {
+               if (rcu_dereference_protected(
+                               queue_handler[pf],
+                               lockdep_is_held(&queue_handler_mutex)
+                               ) == qh)
                        rcu_assign_pointer(queue_handler[pf], NULL);
        }
        mutex_unlock(&queue_handler_mutex);
@@ -107,13 +119,13 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
  */
 static int __nf_queue(struct sk_buff *skb,
                      struct list_head *elem,
-                     int pf, unsigned int hook,
+                     u_int8_t pf, unsigned int hook,
                      struct net_device *indev,
                      struct net_device *outdev,
                      int (*okfn)(struct sk_buff *),
                      unsigned int queuenum)
 {
-       int status;
+       int status = -ENOENT;
        struct nf_queue_entry *entry = NULL;
 #ifdef CONFIG_BRIDGE_NETFILTER
        struct net_device *physindev;
@@ -122,20 +134,24 @@ static int __nf_queue(struct sk_buff *skb,
        const struct nf_afinfo *afinfo;
        const struct nf_queue_handler *qh;
 
-       /* QUEUE == DROP if noone is waiting, to be safe. */
+       /* QUEUE == DROP if no one is waiting, to be safe. */
        rcu_read_lock();
 
        qh = rcu_dereference(queue_handler[pf]);
-       if (!qh)
+       if (!qh) {
+               status = -ESRCH;
                goto err_unlock;
+       }
 
        afinfo = nf_get_afinfo(pf);
        if (!afinfo)
                goto err_unlock;
 
        entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
-       if (!entry)
+       if (!entry) {
+               status = -ENOMEM;
                goto err_unlock;
+       }
 
        *entry = (struct nf_queue_entry) {
                .skb    = skb,
@@ -149,11 +165,9 @@ static int __nf_queue(struct sk_buff *skb,
 
        /* If it's going away, ignore hook. */
        if (!try_module_get(entry->elem->owner)) {
-               rcu_read_unlock();
-               kfree(entry);
-               return 0;
+               status = -ECANCELED;
+               goto err_unlock;
        }
-
        /* Bump dev refs so they don't vanish while packet is out */
        if (indev)
                dev_hold(indev);
@@ -169,6 +183,7 @@ static int __nf_queue(struct sk_buff *skb,
                        dev_hold(physoutdev);
        }
 #endif
+       skb_dst_force(skb);
        afinfo->saveroute(skb, entry);
        status = qh->outfn(entry, queuenum);
 
@@ -179,54 +194,70 @@ static int __nf_queue(struct sk_buff *skb,
                goto err;
        }
 
-       return 1;
+       return 0;
 
 err_unlock:
        rcu_read_unlock();
 err:
-       kfree_skb(skb);
        kfree(entry);
-       return 1;
+       return status;
 }
 
 int nf_queue(struct sk_buff *skb,
             struct list_head *elem,
-            int pf, unsigned int hook,
+            u_int8_t pf, unsigned int hook,
             struct net_device *indev,
             struct net_device *outdev,
             int (*okfn)(struct sk_buff *),
             unsigned int queuenum)
 {
        struct sk_buff *segs;
+       int err;
+       unsigned int queued;
 
        if (!skb_is_gso(skb))
                return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
                                  queuenum);
 
        switch (pf) {
-       case AF_INET:
+       case NFPROTO_IPV4:
                skb->protocol = htons(ETH_P_IP);
                break;
-       case AF_INET6:
+       case NFPROTO_IPV6:
                skb->protocol = htons(ETH_P_IPV6);
                break;
        }
 
        segs = skb_gso_segment(skb, 0);
-       kfree_skb(skb);
-       if (unlikely(IS_ERR(segs)))
-               return 1;
+       /* Does not use PTR_ERR to limit the number of error codes that can be
+        * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean
+        * 'ignore this hook'.
+        */
+       if (IS_ERR(segs))
+               return -EINVAL;
 
+       queued = 0;
+       err = 0;
        do {
                struct sk_buff *nskb = segs->next;
 
                segs->next = NULL;
-               if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
-                               queuenum))
+               if (err == 0)
+                       err = __nf_queue(segs, elem, pf, hook, indev,
+                                          outdev, okfn, queuenum);
+               if (err == 0)
+                       queued++;
+               else
                        kfree_skb(segs);
                segs = nskb;
        } while (segs);
-       return 1;
+
+       /* also free orig skb if only some segments were queued */
+       if (unlikely(err && queued))
+               err = 0;
+       if (err == 0)
+               kfree_skb(skb);
+       return err;
 }
 
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
@@ -234,6 +265,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
        struct sk_buff *skb = entry->skb;
        struct list_head *elem = &entry->elem->list;
        const struct nf_afinfo *afinfo;
+       int err;
 
        rcu_read_lock();
 
@@ -265,27 +297,34 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
                local_bh_disable();
                entry->okfn(skb);
                local_bh_enable();
-       case NF_STOLEN:
                break;
        case NF_QUEUE:
-               if (!__nf_queue(skb, elem, entry->pf, entry->hook,
-                               entry->indev, entry->outdev, entry->okfn,
-                               verdict >> NF_VERDICT_BITS))
-                       goto next_hook;
+               err = __nf_queue(skb, elem, entry->pf, entry->hook,
+                                entry->indev, entry->outdev, entry->okfn,
+                                verdict >> NF_VERDICT_QBITS);
+               if (err < 0) {
+                       if (err == -ECANCELED)
+                               goto next_hook;
+                       if (err == -ESRCH &&
+                          (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+                               goto next_hook;
+                       kfree_skb(skb);
+               }
+               break;
+       case NF_STOLEN:
                break;
        default:
                kfree_skb(skb);
        }
        rcu_read_unlock();
        kfree(entry);
-       return;
 }
 EXPORT_SYMBOL(nf_reinject);
 
 #ifdef CONFIG_PROC_FS
 static void *seq_start(struct seq_file *seq, loff_t *pos)
 {
-       if (*pos >= NPROTO)
+       if (*pos >= ARRAY_SIZE(queue_handler))
                return NULL;
 
        return pos;
@@ -295,7 +334,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
        (*pos)++;
 
-       if (*pos >= NPROTO)
+       if (*pos >= ARRAY_SIZE(queue_handler))
                return NULL;
 
        return pos;
@@ -348,12 +387,9 @@ static const struct file_operations nfqueue_file_ops = {
 int __init netfilter_queue_init(void)
 {
 #ifdef CONFIG_PROC_FS
-       struct proc_dir_entry *pde;
-
-       pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter);
-       if (!pde)
+       if (!proc_create("nf_queue", S_IRUGO,
+                        proc_net_netfilter, &nfqueue_file_ops))
                return -1;
-       pde->proc_fops = &nfqueue_file_ops;
 #endif
        return 0;
 }