mac80211: fix another race in aggregation start
[linux-2.6.git] / net / netfilter / nf_queue.c
index 823fbf4..84d0fd4 100644 (file)
@@ -1,4 +1,5 @@
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
@@ -7,6 +8,8 @@
 #include <linux/seq_file.h>
 #include <linux/rcupdate.h>
 #include <net/protocol.h>
+#include <net/netfilter/nf_queue.h>
+#include <net/dst.h>
 
 #include "nf_internals.h"
 
  * long term mutex.  The handler must provide an an outfn() to accept packets
  * for queueing and must reinject all packets it receives, no matter what.
  */
-static struct nf_queue_handler *queue_handler[NPROTO];
+static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
 
 static DEFINE_MUTEX(queue_handler_mutex);
 
 /* return EBUSY when somebody else is registered, return EEXIST if the
  * same handler is registered, return 0 in case of success. */
-int nf_register_queue_handler(int pf, struct nf_queue_handler *qh)
+int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
        int ret;
+       const struct nf_queue_handler *old;
 
-       if (pf >= NPROTO)
+       if (pf >= ARRAY_SIZE(queue_handler))
                return -EINVAL;
 
        mutex_lock(&queue_handler_mutex);
-       if (queue_handler[pf] == qh)
+       old = rcu_dereference_protected(queue_handler[pf],
+                                       lockdep_is_held(&queue_handler_mutex));
+       if (old == qh)
                ret = -EEXIST;
-       else if (queue_handler[pf])
+       else if (old)
                ret = -EBUSY;
        else {
                rcu_assign_pointer(queue_handler[pf], qh);
@@ -44,13 +50,17 @@ int nf_register_queue_handler(int pf, struct nf_queue_handler *qh)
 EXPORT_SYMBOL(nf_register_queue_handler);
 
 /* The caller must flush their queue before this */
-int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh)
+int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
-       if (pf >= NPROTO)
+       const struct nf_queue_handler *old;
+
+       if (pf >= ARRAY_SIZE(queue_handler))
                return -EINVAL;
 
        mutex_lock(&queue_handler_mutex);
-       if (queue_handler[pf] != qh) {
+       old = rcu_dereference_protected(queue_handler[pf],
+                                       lockdep_is_held(&queue_handler_mutex));
+       if (old && old != qh) {
                mutex_unlock(&queue_handler_mutex);
                return -EINVAL;
        }
@@ -64,13 +74,16 @@ int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh)
 }
 EXPORT_SYMBOL(nf_unregister_queue_handler);
 
-void nf_unregister_queue_handlers(struct nf_queue_handler *qh)
+void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
 {
-       int pf;
+       u_int8_t pf;
 
        mutex_lock(&queue_handler_mutex);
-       for (pf = 0; pf < NPROTO; pf++)  {
-               if (queue_handler[pf] == qh)
+       for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++)  {
+               if (rcu_dereference_protected(
+                               queue_handler[pf],
+                               lockdep_is_held(&queue_handler_mutex)
+                               ) == qh)
                        rcu_assign_pointer(queue_handler[pf], NULL);
        }
        mutex_unlock(&queue_handler_mutex);
@@ -79,174 +92,184 @@ void nf_unregister_queue_handlers(struct nf_queue_handler *qh)
 }
 EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
 
+static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
+{
+       /* Release those devices we held, or Alexey will kill me. */
+       if (entry->indev)
+               dev_put(entry->indev);
+       if (entry->outdev)
+               dev_put(entry->outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+       if (entry->skb->nf_bridge) {
+               struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
+
+               if (nf_bridge->physindev)
+                       dev_put(nf_bridge->physindev);
+               if (nf_bridge->physoutdev)
+                       dev_put(nf_bridge->physoutdev);
+       }
+#endif
+       /* Drop reference to owner of hook which queued us. */
+       module_put(entry->elem->owner);
+}
+
 /*
  * Any packet that leaves via this function must come back
  * through nf_reinject().
  */
 static int __nf_queue(struct sk_buff *skb,
                      struct list_head *elem,
-                     int pf, unsigned int hook,
+                     u_int8_t pf, unsigned int hook,
                      struct net_device *indev,
                      struct net_device *outdev,
                      int (*okfn)(struct sk_buff *),
                      unsigned int queuenum)
 {
-       int status;
-       struct nf_info *info;
+       int status = -ENOENT;
+       struct nf_queue_entry *entry = NULL;
 #ifdef CONFIG_BRIDGE_NETFILTER
-       struct net_device *physindev = NULL;
-       struct net_device *physoutdev = NULL;
+       struct net_device *physindev;
+       struct net_device *physoutdev;
 #endif
-       struct nf_afinfo *afinfo;
-       struct nf_queue_handler *qh;
+       const struct nf_afinfo *afinfo;
+       const struct nf_queue_handler *qh;
 
-       /* QUEUE == DROP if noone is waiting, to be safe. */
+       /* QUEUE == DROP if no one is waiting, to be safe. */
        rcu_read_lock();
 
        qh = rcu_dereference(queue_handler[pf]);
        if (!qh) {
-               rcu_read_unlock();
-               kfree_skb(skb);
-               return 1;
+               status = -ESRCH;
+               goto err_unlock;
        }
 
        afinfo = nf_get_afinfo(pf);
-       if (!afinfo) {
-               rcu_read_unlock();
-               kfree_skb(skb);
-               return 1;
-       }
+       if (!afinfo)
+               goto err_unlock;
 
-       info = kmalloc(sizeof(*info) + afinfo->route_key_size, GFP_ATOMIC);
-       if (!info) {
-               if (net_ratelimit())
-                       printk(KERN_ERR "OOM queueing packet %p\n",
-                              skb);
-               rcu_read_unlock();
-               kfree_skb(skb);
-               return 1;
+       entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
+       if (!entry) {
+               status = -ENOMEM;
+               goto err_unlock;
        }
 
-       *info = (struct nf_info) {
-               (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
+       *entry = (struct nf_queue_entry) {
+               .skb    = skb,
+               .elem   = list_entry(elem, struct nf_hook_ops, list),
+               .pf     = pf,
+               .hook   = hook,
+               .indev  = indev,
+               .outdev = outdev,
+               .okfn   = okfn,
+       };
 
        /* If it's going away, ignore hook. */
-       if (!try_module_get(info->elem->owner)) {
-               rcu_read_unlock();
-               kfree(info);
-               return 0;
+       if (!try_module_get(entry->elem->owner)) {
+               status = -ECANCELED;
+               goto err_unlock;
        }
-
        /* Bump dev refs so they don't vanish while packet is out */
-       if (indev) dev_hold(indev);
-       if (outdev) dev_hold(outdev);
-
+       if (indev)
+               dev_hold(indev);
+       if (outdev)
+               dev_hold(outdev);
 #ifdef CONFIG_BRIDGE_NETFILTER
        if (skb->nf_bridge) {
                physindev = skb->nf_bridge->physindev;
-               if (physindev) dev_hold(physindev);
+               if (physindev)
+                       dev_hold(physindev);
                physoutdev = skb->nf_bridge->physoutdev;
-               if (physoutdev) dev_hold(physoutdev);
+               if (physoutdev)
+                       dev_hold(physoutdev);
        }
 #endif
-       afinfo->saveroute(skb, info);
-       status = qh->outfn(skb, info, queuenum, qh->data);
+       skb_dst_force(skb);
+       afinfo->saveroute(skb, entry);
+       status = qh->outfn(entry, queuenum);
 
        rcu_read_unlock();
 
        if (status < 0) {
-               /* James M doesn't say fuck enough. */
-               if (indev) dev_put(indev);
-               if (outdev) dev_put(outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
-               if (physindev) dev_put(physindev);
-               if (physoutdev) dev_put(physoutdev);
-#endif
-               module_put(info->elem->owner);
-               kfree(info);
-               kfree_skb(skb);
-
-               return 1;
+               nf_queue_entry_release_refs(entry);
+               goto err;
        }
 
-       return 1;
+       return 0;
+
+err_unlock:
+       rcu_read_unlock();
+err:
+       kfree(entry);
+       return status;
 }
 
 int nf_queue(struct sk_buff *skb,
             struct list_head *elem,
-            int pf, unsigned int hook,
+            u_int8_t pf, unsigned int hook,
             struct net_device *indev,
             struct net_device *outdev,
             int (*okfn)(struct sk_buff *),
             unsigned int queuenum)
 {
        struct sk_buff *segs;
+       int err;
+       unsigned int queued;
 
        if (!skb_is_gso(skb))
                return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
                                  queuenum);
 
        switch (pf) {
-       case AF_INET:
+       case NFPROTO_IPV4:
                skb->protocol = htons(ETH_P_IP);
                break;
-       case AF_INET6:
+       case NFPROTO_IPV6:
                skb->protocol = htons(ETH_P_IPV6);
                break;
        }
 
        segs = skb_gso_segment(skb, 0);
-       kfree_skb(skb);
-       if (unlikely(IS_ERR(segs)))
-               return 1;
+       /* Does not use PTR_ERR to limit the number of error codes that can be
+        * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean
+        * 'ignore this hook'.
+        */
+       if (IS_ERR(segs))
+               return -EINVAL;
 
+       queued = 0;
+       err = 0;
        do {
                struct sk_buff *nskb = segs->next;
 
                segs->next = NULL;
-               if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
-                               queuenum))
+               if (err == 0)
+                       err = __nf_queue(segs, elem, pf, hook, indev,
+                                          outdev, okfn, queuenum);
+               if (err == 0)
+                       queued++;
+               else
                        kfree_skb(segs);
                segs = nskb;
        } while (segs);
-       return 1;
+
+       /* also free orig skb if only some segments were queued */
+       if (unlikely(err && queued))
+               err = 0;
+       if (err == 0)
+               kfree_skb(skb);
+       return err;
 }
 
-void nf_reinject(struct sk_buff *skb, struct nf_info *info,
-                unsigned int verdict)
+void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
-       struct list_head *elem = &info->elem->list;
-       struct list_head *i;
-       struct nf_afinfo *afinfo;
+       struct sk_buff *skb = entry->skb;
+       struct list_head *elem = &entry->elem->list;
+       const struct nf_afinfo *afinfo;
+       int err;
 
        rcu_read_lock();
 
-       /* Release those devices we held, or Alexey will kill me. */
-       if (info->indev) dev_put(info->indev);
-       if (info->outdev) dev_put(info->outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
-       if (skb->nf_bridge) {
-               if (skb->nf_bridge->physindev)
-                       dev_put(skb->nf_bridge->physindev);
-               if (skb->nf_bridge->physoutdev)
-                       dev_put(skb->nf_bridge->physoutdev);
-       }
-#endif
-
-       /* Drop reference to owner of hook which queued us. */
-       module_put(info->elem->owner);
-
-       list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
-               if (i == elem)
-                       break;
-       }
-
-       if (i == &nf_hooks[info->pf][info->hook]) {
-               /* The module which sent it to userspace is gone. */
-               NFDEBUG("%s: module disappeared, dropping packet.\n",
-                       __FUNCTION__);
-               verdict = NF_DROP;
-       }
+       nf_queue_entry_release_refs(entry);
 
        /* Continue traversal iff userspace said ok... */
        if (verdict == NF_REPEAT) {
@@ -255,44 +278,53 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
        }
 
        if (verdict == NF_ACCEPT) {
-               afinfo = nf_get_afinfo(info->pf);
-               if (!afinfo || afinfo->reroute(&skb, info) < 0)
+               afinfo = nf_get_afinfo(entry->pf);
+               if (!afinfo || afinfo->reroute(skb, entry) < 0)
                        verdict = NF_DROP;
        }
 
        if (verdict == NF_ACCEPT) {
        next_hook:
-               verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
-                                    &skb, info->hook,
-                                    info->indev, info->outdev, &elem,
-                                    info->okfn, INT_MIN);
+               verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
+                                    skb, entry->hook,
+                                    entry->indev, entry->outdev, &elem,
+                                    entry->okfn, INT_MIN);
        }
 
        switch (verdict & NF_VERDICT_MASK) {
        case NF_ACCEPT:
        case NF_STOP:
-               info->okfn(skb);
-       case NF_STOLEN:
+               local_bh_disable();
+               entry->okfn(skb);
+               local_bh_enable();
                break;
        case NF_QUEUE:
-               if (!__nf_queue(skb, elem, info->pf, info->hook,
-                               info->indev, info->outdev, info->okfn,
-                               verdict >> NF_VERDICT_BITS))
-                       goto next_hook;
+               err = __nf_queue(skb, elem, entry->pf, entry->hook,
+                                entry->indev, entry->outdev, entry->okfn,
+                                verdict >> NF_VERDICT_QBITS);
+               if (err < 0) {
+                       if (err == -ECANCELED)
+                               goto next_hook;
+                       if (err == -ESRCH &&
+                          (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+                               goto next_hook;
+                       kfree_skb(skb);
+               }
+               break;
+       case NF_STOLEN:
                break;
        default:
                kfree_skb(skb);
        }
        rcu_read_unlock();
-       kfree(info);
-       return;
+       kfree(entry);
 }
 EXPORT_SYMBOL(nf_reinject);
 
 #ifdef CONFIG_PROC_FS
 static void *seq_start(struct seq_file *seq, loff_t *pos)
 {
-       if (*pos >= NPROTO)
+       if (*pos >= ARRAY_SIZE(queue_handler))
                return NULL;
 
        return pos;
@@ -302,7 +334,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
        (*pos)++;
 
-       if (*pos >= NPROTO)
+       if (*pos >= ARRAY_SIZE(queue_handler))
                return NULL;
 
        return pos;
@@ -317,7 +349,7 @@ static int seq_show(struct seq_file *s, void *v)
 {
        int ret;
        loff_t *pos = v;
-       struct nf_queue_handler *qh;
+       const struct nf_queue_handler *qh;
 
        rcu_read_lock();
        qh = rcu_dereference(queue_handler[*pos]);
@@ -330,7 +362,7 @@ static int seq_show(struct seq_file *s, void *v)
        return ret;
 }
 
-static struct seq_operations nfqueue_seq_ops = {
+static const struct seq_operations nfqueue_seq_ops = {
        .start  = seq_start,
        .next   = seq_next,
        .stop   = seq_stop,
@@ -355,12 +387,9 @@ static const struct file_operations nfqueue_file_ops = {
 int __init netfilter_queue_init(void)
 {
 #ifdef CONFIG_PROC_FS
-       struct proc_dir_entry *pde;
-
-       pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter);
-       if (!pde)
+       if (!proc_create("nf_queue", S_IRUGO,
+                        proc_net_netfilter, &nfqueue_file_ops))
                return -1;
-       pde->proc_fops = &nfqueue_file_ops;
 #endif
        return 0;
 }