net: sk_dst_cache RCUification
[linux-2.6.git] / net / core / filter.c
index a52665f..ff943be 100644 (file)
@@ -18,7 +18,6 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/fcntl.h>
 #include <linux/socket.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
 #include <linux/if_packet.h>
+#include <linux/gfp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
+#include <net/netlink.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/unaligned.h>
 #include <linux/filter.h>
 
 /* No hurry in this branch */
@@ -42,17 +44,17 @@ static void *__load_pointer(struct sk_buff *skb, int k)
        u8 *ptr = NULL;
 
        if (k >= SKF_NET_OFF)
-               ptr = skb->nh.raw + k - SKF_NET_OFF;
+               ptr = skb_network_header(skb) + k - SKF_NET_OFF;
        else if (k >= SKF_LL_OFF)
-               ptr = skb->mac.raw + k - SKF_LL_OFF;
+               ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
 
-       if (ptr >= skb->head && ptr < skb->tail)
+       if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
                return ptr;
        return NULL;
 }
 
 static inline void *load_pointer(struct sk_buff *skb, int k,
-                                 unsigned int size, void *buffer)
+                                unsigned int size, void *buffer)
 {
        if (k >= 0)
                return skb_header_pointer(skb, k, size, buffer);
@@ -64,7 +66,41 @@ static inline void *load_pointer(struct sk_buff *skb, int k,
 }
 
 /**
- *     sk_run_filter   -       run a filter on a socket
+ *     sk_filter - run a packet through a socket filter
+ *     @sk: sock associated with &sk_buff
+ *     @skb: buffer to filter
+ *
+ * Run the filter code and then cut skb->data to correct size returned by
+ * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * than pkt_len we keep whole skb->data. This is the socket level
+ * wrapper to sk_run_filter. It returns 0 if the packet should
+ * be accepted or -EPERM if the packet should be tossed.
+ *
+ */
+int sk_filter(struct sock *sk, struct sk_buff *skb)
+{
+       int err;
+       struct sk_filter *filter;
+
+       err = security_sock_rcv_skb(sk, skb);
+       if (err)
+               return err;
+
+       rcu_read_lock_bh();
+       filter = rcu_dereference_bh(sk->sk_filter);
+       if (filter) {
+               unsigned int pkt_len = sk_run_filter(skb, filter->insns,
+                               filter->len);
+               err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
+       }
+       rcu_read_unlock_bh();
+
+       return err;
+}
+EXPORT_SYMBOL(sk_filter);
+
+/**
+ *     sk_run_filter - run a filter on a socket
  *     @skb: buffer to run the filter on
  *     @filter: filter to apply
  *     @flen: length of filter
@@ -74,13 +110,12 @@ static inline void *load_pointer(struct sk_buff *skb, int k,
  * filtering, filter is the array of filter instructions, and
  * len is the number of filter blocks in the array.
  */
 unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
 {
        struct sock_filter *fentry;     /* We walk down these */
        void *ptr;
-       u32 A = 0;                      /* Accumulator */
-       u32 X = 0;                      /* Index Register */
+       u32 A = 0;                      /* Accumulator */
+       u32 X = 0;                      /* Index Register */
        u32 mem[BPF_MEMWORDS];          /* Scratch Memory Store */
        u32 tmp;
        int k;
@@ -91,7 +126,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
         */
        for (pc = 0; pc < flen; pc++) {
                fentry = &filter[pc];
-                       
+
                switch (fentry->code) {
                case BPF_ALU|BPF_ADD|BPF_X:
                        A += X;
@@ -175,19 +210,19 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
                        continue;
                case BPF_LD|BPF_W|BPF_ABS:
                        k = fentry->k;
- load_w:
+load_w:
                        ptr = load_pointer(skb, k, 4, &tmp);
                        if (ptr != NULL) {
-                               A = ntohl(*(u32 *)ptr);
+                               A = get_unaligned_be32(ptr);
                                continue;
                        }
                        break;
                case BPF_LD|BPF_H|BPF_ABS:
                        k = fentry->k;
- load_h:
+load_h:
                        ptr = load_pointer(skb, k, 2, &tmp);
                        if (ptr != NULL) {
-                               A = ntohs(*(u16 *)ptr);
+                               A = get_unaligned_be16(ptr);
                                continue;
                        }
                        break;
@@ -261,7 +296,7 @@ load_b:
                 */
                switch (k-SKF_AD_OFF) {
                case SKF_AD_PROTOCOL:
-                       A = htons(skb->protocol);
+                       A = ntohs(skb->protocol);
                        continue;
                case SKF_AD_PKTTYPE:
                        A = skb->pkt_type;
@@ -269,6 +304,47 @@ load_b:
                case SKF_AD_IFINDEX:
                        A = skb->dev->ifindex;
                        continue;
+               case SKF_AD_MARK:
+                       A = skb->mark;
+                       continue;
+               case SKF_AD_QUEUE:
+                       A = skb->queue_mapping;
+                       continue;
+               case SKF_AD_NLATTR: {
+                       struct nlattr *nla;
+
+                       if (skb_is_nonlinear(skb))
+                               return 0;
+                       if (A > skb->len - sizeof(struct nlattr))
+                               return 0;
+
+                       nla = nla_find((struct nlattr *)&skb->data[A],
+                                      skb->len - A, X);
+                       if (nla)
+                               A = (void *)nla - (void *)skb->data;
+                       else
+                               A = 0;
+                       continue;
+               }
+               case SKF_AD_NLATTR_NEST: {
+                       struct nlattr *nla;
+
+                       if (skb_is_nonlinear(skb))
+                               return 0;
+                       if (A > skb->len - sizeof(struct nlattr))
+                               return 0;
+
+                       nla = (struct nlattr *)&skb->data[A];
+                       if (nla->nla_len > A - skb->len)
+                               return 0;
+
+                       nla = nla_find_nested(nla, X);
+                       if (nla)
+                               A = (void *)nla - (void *)skb->data;
+                       else
+                               A = 0;
+                       continue;
+               }
                default:
                        return 0;
                }
@@ -276,6 +352,7 @@ load_b:
 
        return 0;
 }
+EXPORT_SYMBOL(sk_run_filter);
 
 /**
  *     sk_chk_filter - verify socket filter code
@@ -374,7 +451,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
                case BPF_JMP|BPF_JSET|BPF_K:
                case BPF_JMP|BPF_JSET|BPF_X:
                        /* for conditionals both must be safe */
-                       if (pc + ftest->jt + 1 >= flen ||
+                       if (pc + ftest->jt + 1 >= flen ||
                            pc + ftest->jf + 1 >= flen)
                                return -EINVAL;
                        break;
@@ -384,7 +461,27 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
                }
        }
 
-        return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
+       return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL(sk_chk_filter);
+
+/**
+ *     sk_filter_rcu_release: Release a socket filter by rcu_head
+ *     @rcu: rcu_head that contains the sk_filter to free
+ */
+static void sk_filter_rcu_release(struct rcu_head *rcu)
+{
+       struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+
+       sk_filter_release(fp);
+}
+
+static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
+{
+       unsigned int size = sk_filter_len(fp);
+
+       atomic_sub(size, &sk->sk_omem_alloc);
+       call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
 }
 
 /**
@@ -399,19 +496,19 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
  */
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 {
-       struct sk_filter *fp; 
+       struct sk_filter *fp, *old_fp;
        unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
        int err;
 
        /* Make sure new filter is there and in the right amounts. */
-        if (fprog->filter == NULL)
-                return -EINVAL;
+       if (fprog->filter == NULL)
+               return -EINVAL;
 
        fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
        if (!fp)
                return -ENOMEM;
        if (copy_from_user(fp->insns, fprog->filter, fsize)) {
-               sock_kfree_s(sk, fp, fsize+sizeof(*fp)); 
+               sock_kfree_s(sk, fp, fsize+sizeof(*fp));
                return -EFAULT;
        }
 
@@ -419,20 +516,35 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
        fp->len = fprog->len;
 
        err = sk_chk_filter(fp->insns, fp->len);
-       if (!err) {
-               struct sk_filter *old_fp;
-
-               spin_lock_bh(&sk->sk_lock.slock);
-               old_fp = sk->sk_filter;
-               sk->sk_filter = fp;
-               spin_unlock_bh(&sk->sk_lock.slock);
-               fp = old_fp;
+       if (err) {
+               sk_filter_uncharge(sk, fp);
+               return err;
        }
 
-       if (fp)
-               sk_filter_release(sk, fp);
-       return err;
+       rcu_read_lock_bh();
+       old_fp = rcu_dereference_bh(sk->sk_filter);
+       rcu_assign_pointer(sk->sk_filter, fp);
+       rcu_read_unlock_bh();
+
+       if (old_fp)
+               sk_filter_delayed_uncharge(sk, old_fp);
+       return 0;
 }
+EXPORT_SYMBOL_GPL(sk_attach_filter);
 
-EXPORT_SYMBOL(sk_chk_filter);
-EXPORT_SYMBOL(sk_run_filter);
+int sk_detach_filter(struct sock *sk)
+{
+       int ret = -ENOENT;
+       struct sk_filter *filter;
+
+       rcu_read_lock_bh();
+       filter = rcu_dereference_bh(sk->sk_filter);
+       if (filter) {
+               rcu_assign_pointer(sk->sk_filter, NULL);
+               sk_filter_delayed_uncharge(sk, filter);
+               ret = 0;
+       }
+       rcu_read_unlock_bh();
+       return ret;
+}
+EXPORT_SYMBOL_GPL(sk_detach_filter);