filter: optimize sk_run_filter
Eric Dumazet [Fri, 19 Nov 2010 17:49:59 +0000 (09:49 -0800)]
Remove pc variable to avoid arithmetic to compute fentry at each filter
instruction. Jumps directly manipulate fentry pointer.

As the last instruction of filter[] is guaranteed to be a RETURN, and
all jumps are before the last instruction, we dont need to check filter
bounds (number of instructions in filter array) at each iteration, so we
remove it from sk_run_filter() params.

On x86_32 remove f_k var introduced in commit 57fe93b374a6b871
(filter: make sure filters dont read uninitialized memory)

Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to
avoid too many ifdefs in this code.

This helps compiler to use cpu registers to hold fentry and A
accumulator.

On x86_32, this saves 401 bytes, and more important, sk_run_filter()
runs much faster because less register pressure (One less conditional
branch per BPF instruction)

# size net/core/filter.o net/core/filter_pre.o
   text    data     bss     dec     hex filename
   2948       0       0    2948     b84 net/core/filter.o
   3349       0       0    3349     d15 net/core/filter_pre.o

on x86_64 :
# size net/core/filter.o net/core/filter_pre.o
   text    data     bss     dec     hex filename
   5173       0       0    5173    1435 net/core/filter.o
   5224       0       0    5224    1468 net/core/filter_pre.o

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

drivers/isdn/i4l/isdn_ppp.c
drivers/net/ppp_generic.c
include/linux/filter.h
net/core/filter.c
net/core/timestamping.c
net/packet/af_packet.c

index 97c5cc2..9e8162c 100644 (file)
@@ -1147,15 +1147,14 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
        }
 
        if (is->pass_filter
-           && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0) {
+           && sk_run_filter(skb, is->pass_filter) == 0) {
                if (is->debug & 0x2)
                        printk(KERN_DEBUG "IPPP: inbound frame filtered.\n");
                kfree_skb(skb);
                return;
        }
        if (!(is->active_filter
-             && sk_run_filter(skb, is->active_filter,
-                              is->active_len) == 0)) {
+             && sk_run_filter(skb, is->active_filter) == 0)) {
                if (is->debug & 0x2)
                        printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n");
                lp->huptimer = 0;
@@ -1294,15 +1293,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev)
        }
 
        if (ipt->pass_filter
-           && sk_run_filter(skb, ipt->pass_filter, ipt->pass_len) == 0) {
+           && sk_run_filter(skb, ipt->pass_filter) == 0) {
                if (ipt->debug & 0x4)
                        printk(KERN_DEBUG "IPPP: outbound frame filtered.\n");
                kfree_skb(skb);
                goto unlock;
        }
        if (!(ipt->active_filter
-             && sk_run_filter(skb, ipt->active_filter,
-                              ipt->active_len) == 0)) {
+             && sk_run_filter(skb, ipt->active_filter) == 0)) {
                if (ipt->debug & 0x4)
                        printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n");
                lp->huptimer = 0;
@@ -1492,9 +1490,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp)
        }
        
        drop |= is->pass_filter
-               && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0;
+               && sk_run_filter(skb, is->pass_filter) == 0;
        drop |= is->active_filter
-               && sk_run_filter(skb, is->active_filter, is->active_len) == 0;
+               && sk_run_filter(skb, is->active_filter) == 0;
        
        skb_push(skb, IPPP_MAX_HEADER - 4);
        return drop;
index 09cf56d..0c91598 100644 (file)
@@ -1136,8 +1136,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
                   a four-byte PPP header on each packet */
                *skb_push(skb, 2) = 1;
                if (ppp->pass_filter &&
-                   sk_run_filter(skb, ppp->pass_filter,
-                                 ppp->pass_len) == 0) {
+                   sk_run_filter(skb, ppp->pass_filter) == 0) {
                        if (ppp->debug & 1)
                                printk(KERN_DEBUG "PPP: outbound frame not passed\n");
                        kfree_skb(skb);
@@ -1145,8 +1144,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
                }
                /* if this packet passes the active filter, record the time */
                if (!(ppp->active_filter &&
-                     sk_run_filter(skb, ppp->active_filter,
-                                   ppp->active_len) == 0))
+                     sk_run_filter(skb, ppp->active_filter) == 0))
                        ppp->last_xmit = jiffies;
                skb_pull(skb, 2);
 #else
@@ -1758,8 +1756,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 
                        *skb_push(skb, 2) = 0;
                        if (ppp->pass_filter &&
-                           sk_run_filter(skb, ppp->pass_filter,
-                                         ppp->pass_len) == 0) {
+                           sk_run_filter(skb, ppp->pass_filter) == 0) {
                                if (ppp->debug & 1)
                                        printk(KERN_DEBUG "PPP: inbound frame "
                                               "not passed\n");
@@ -1767,8 +1764,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
                                return;
                        }
                        if (!(ppp->active_filter &&
-                             sk_run_filter(skb, ppp->active_filter,
-                                           ppp->active_len) == 0))
+                             sk_run_filter(skb, ppp->active_filter) == 0))
                                ppp->last_recv = jiffies;
                        __skb_pull(skb, 2);
                } else
index 151f5d7..447a775 100644 (file)
@@ -147,7 +147,7 @@ struct sock;
 
 extern int sk_filter(struct sock *sk, struct sk_buff *skb);
 extern unsigned int sk_run_filter(struct sk_buff *skb,
-                                 struct sock_filter *filter, int flen);
+                                 const struct sock_filter *filter);
 extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 extern int sk_detach_filter(struct sock *sk);
 extern int sk_chk_filter(struct sock_filter *filter, int flen);
index 15a545d..9e77b3c 100644 (file)
@@ -137,7 +137,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
        rcu_read_lock_bh();
        filter = rcu_dereference_bh(sk->sk_filter);
        if (filter) {
-               unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
+               unsigned int pkt_len = sk_run_filter(skb, filter->insns);
 
                err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
        }
@@ -151,14 +151,15 @@ EXPORT_SYMBOL(sk_filter);
  *     sk_run_filter - run a filter on a socket
  *     @skb: buffer to run the filter on
  *     @filter: filter to apply
- *     @flen: length of filter
  *
  * Decode and apply filter instructions to the skb->data.
- * Return length to keep, 0 for none. skb is the data we are
- * filtering, filter is the array of filter instructions, and
- * len is the number of filter blocks in the array.
+ * Return length to keep, 0 for none. @skb is the data we are
+ * filtering, @filter is the array of filter instructions.
+ * Because all jumps are guaranteed to be before last instruction,
+ * and last instruction guaranteed to be a RET, we dont need to check
+ * flen. (We used to pass to this function the length of filter)
  */
-unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
 {
        void *ptr;
        u32 A = 0;                      /* Accumulator */
@@ -167,34 +168,36 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
        unsigned long memvalid = 0;
        u32 tmp;
        int k;
-       int pc;
 
        BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
        /*
         * Process array of filter instructions.
         */
-       for (pc = 0; pc < flen; pc++) {
-               const struct sock_filter *fentry = &filter[pc];
-               u32 f_k = fentry->k;
+       for (;; fentry++) {
+#if defined(CONFIG_X86_32)
+#define        K (fentry->k)
+#else
+               const u32 K = fentry->k;
+#endif
 
                switch (fentry->code) {
                case BPF_S_ALU_ADD_X:
                        A += X;
                        continue;
                case BPF_S_ALU_ADD_K:
-                       A += f_k;
+                       A += K;
                        continue;
                case BPF_S_ALU_SUB_X:
                        A -= X;
                        continue;
                case BPF_S_ALU_SUB_K:
-                       A -= f_k;
+                       A -= K;
                        continue;
                case BPF_S_ALU_MUL_X:
                        A *= X;
                        continue;
                case BPF_S_ALU_MUL_K:
-                       A *= f_k;
+                       A *= K;
                        continue;
                case BPF_S_ALU_DIV_X:
                        if (X == 0)
@@ -202,64 +205,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
                        A /= X;
                        continue;
                case BPF_S_ALU_DIV_K:
-                       A /= f_k;
+                       A /= K;
                        continue;
                case BPF_S_ALU_AND_X:
                        A &= X;
                        continue;
                case BPF_S_ALU_AND_K:
-                       A &= f_k;
+                       A &= K;
                        continue;
                case BPF_S_ALU_OR_X:
                        A |= X;
                        continue;
                case BPF_S_ALU_OR_K:
-                       A |= f_k;
+                       A |= K;
                        continue;
                case BPF_S_ALU_LSH_X:
                        A <<= X;
                        continue;
                case BPF_S_ALU_LSH_K:
-                       A <<= f_k;
+                       A <<= K;
                        continue;
                case BPF_S_ALU_RSH_X:
                        A >>= X;
                        continue;
                case BPF_S_ALU_RSH_K:
-                       A >>= f_k;
+                       A >>= K;
                        continue;
                case BPF_S_ALU_NEG:
                        A = -A;
                        continue;
                case BPF_S_JMP_JA:
-                       pc += f_k;
+                       fentry += K;
                        continue;
                case BPF_S_JMP_JGT_K:
-                       pc += (A > f_k) ? fentry->jt : fentry->jf;
+                       fentry += (A > K) ? fentry->jt : fentry->jf;
                        continue;
                case BPF_S_JMP_JGE_K:
-                       pc += (A >= f_k) ? fentry->jt : fentry->jf;
+                       fentry += (A >= K) ? fentry->jt : fentry->jf;
                        continue;
                case BPF_S_JMP_JEQ_K:
-                       pc += (A == f_k) ? fentry->jt : fentry->jf;
+                       fentry += (A == K) ? fentry->jt : fentry->jf;
                        continue;
                case BPF_S_JMP_JSET_K:
-                       pc += (A & f_k) ? fentry->jt : fentry->jf;
+                       fentry += (A & K) ? fentry->jt : fentry->jf;
                        continue;
                case BPF_S_JMP_JGT_X:
-                       pc += (A > X) ? fentry->jt : fentry->jf;
+                       fentry += (A > X) ? fentry->jt : fentry->jf;
                        continue;
                case BPF_S_JMP_JGE_X:
-                       pc += (A >= X) ? fentry->jt : fentry->jf;
+                       fentry += (A >= X) ? fentry->jt : fentry->jf;
                        continue;
                case BPF_S_JMP_JEQ_X:
-                       pc += (A == X) ? fentry->jt : fentry->jf;
+                       fentry += (A == X) ? fentry->jt : fentry->jf;
                        continue;
                case BPF_S_JMP_JSET_X:
-                       pc += (A & X) ? fentry->jt : fentry->jf;
+                       fentry += (A & X) ? fentry->jt : fentry->jf;
                        continue;
                case BPF_S_LD_W_ABS:
-                       k = f_k;
+                       k = K;
 load_w:
                        ptr = load_pointer(skb, k, 4, &tmp);
                        if (ptr != NULL) {
@@ -268,7 +271,7 @@ load_w:
                        }
                        break;
                case BPF_S_LD_H_ABS:
-                       k = f_k;
+                       k = K;
 load_h:
                        ptr = load_pointer(skb, k, 2, &tmp);
                        if (ptr != NULL) {
@@ -277,7 +280,7 @@ load_h:
                        }
                        break;
                case BPF_S_LD_B_ABS:
-                       k = f_k;
+                       k = K;
 load_b:
                        ptr = load_pointer(skb, k, 1, &tmp);
                        if (ptr != NULL) {
@@ -292,34 +295,34 @@ load_b:
                        X = skb->len;
                        continue;
                case BPF_S_LD_W_IND:
-                       k = X + f_k;
+                       k = X + K;
                        goto load_w;
                case BPF_S_LD_H_IND:
-                       k = X + f_k;
+                       k = X + K;
                        goto load_h;
                case BPF_S_LD_B_IND:
-                       k = X + f_k;
+                       k = X + K;
                        goto load_b;
                case BPF_S_LDX_B_MSH:
-                       ptr = load_pointer(skb, f_k, 1, &tmp);
+                       ptr = load_pointer(skb, K, 1, &tmp);
                        if (ptr != NULL) {
                                X = (*(u8 *)ptr & 0xf) << 2;
                                continue;
                        }
                        return 0;
                case BPF_S_LD_IMM:
-                       A = f_k;
+                       A = K;
                        continue;
                case BPF_S_LDX_IMM:
-                       X = f_k;
+                       X = K;
                        continue;
                case BPF_S_LD_MEM:
-                       A = (memvalid & (1UL << f_k)) ?
-                               mem[f_k] : 0;
+                       A = (memvalid & (1UL << K)) ?
+                               mem[K] : 0;
                        continue;
                case BPF_S_LDX_MEM:
-                       X = (memvalid & (1UL << f_k)) ?
-                               mem[f_k] : 0;
+                       X = (memvalid & (1UL << K)) ?
+                               mem[K] : 0;
                        continue;
                case BPF_S_MISC_TAX:
                        X = A;
@@ -328,16 +331,16 @@ load_b:
                        A = X;
                        continue;
                case BPF_S_RET_K:
-                       return f_k;
+                       return K;
                case BPF_S_RET_A:
                        return A;
                case BPF_S_ST:
-                       memvalid |= 1UL << f_k;
-                       mem[f_k] = A;
+                       memvalid |= 1UL << K;
+                       mem[K] = A;
                        continue;
                case BPF_S_STX:
-                       memvalid |= 1UL << f_k;
-                       mem[f_k] = X;
+                       memvalid |= 1UL << K;
+                       mem[K] = X;
                        continue;
                default:
                        WARN_ON(1);
index 0ae6c22..dac7ed6 100644 (file)
@@ -31,7 +31,7 @@ static unsigned int classify(struct sk_buff *skb)
        if (likely(skb->dev &&
                   skb->dev->phydev &&
                   skb->dev->phydev->drv))
-               return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+               return sk_run_filter(skb, ptp_filter);
        else
                return PTP_CLASS_NONE;
 }
index 2096456..b6372dd 100644 (file)
@@ -519,7 +519,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
        rcu_read_lock_bh();
        filter = rcu_dereference_bh(sk->sk_filter);
        if (filter != NULL)
-               res = sk_run_filter(skb, filter->insns, filter->len);
+               res = sk_run_filter(skb, filter->insns);
        rcu_read_unlock_bh();
 
        return res;