net: Optimize hard_start_xmit() return checking
[linux-2.6.git] / net / core / filter.c
1 /*
2  * Linux Socket Filter - Kernel level socket filtering
3  *
4  * Author:
5  *     Jay Schulist <jschlst@samba.org>
6  *
7  * Based on the design of:
8  *     - The Berkeley Packet Filter
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License
12  * as published by the Free Software Foundation; either version
13  * 2 of the License, or (at your option) any later version.
14  *
15  * Andi Kleen - Fix a few bad bugs and races.
16  * Kris Katterjohn - Added many additional checks in sk_chk_filter()
17  */
18
19 #include <linux/module.h>
20 #include <linux/types.h>
21 #include <linux/mm.h>
22 #include <linux/fcntl.h>
23 #include <linux/socket.h>
24 #include <linux/in.h>
25 #include <linux/inet.h>
26 #include <linux/netdevice.h>
27 #include <linux/if_packet.h>
28 #include <net/ip.h>
29 #include <net/protocol.h>
30 #include <net/netlink.h>
31 #include <linux/skbuff.h>
32 #include <net/sock.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <asm/system.h>
36 #include <asm/uaccess.h>
37 #include <asm/unaligned.h>
38 #include <linux/filter.h>
39
40 /* No hurry in this branch */
41 static void *__load_pointer(struct sk_buff *skb, int k)
42 {
43         u8 *ptr = NULL;
44
45         if (k >= SKF_NET_OFF)
46                 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
47         else if (k >= SKF_LL_OFF)
48                 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
49
50         if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
51                 return ptr;
52         return NULL;
53 }
54
55 static inline void *load_pointer(struct sk_buff *skb, int k,
56                                  unsigned int size, void *buffer)
57 {
58         if (k >= 0)
59                 return skb_header_pointer(skb, k, size, buffer);
60         else {
61                 if (k >= SKF_AD_OFF)
62                         return NULL;
63                 return __load_pointer(skb, k);
64         }
65 }
66
67 /**
68  *      sk_filter - run a packet through a socket filter
69  *      @sk: sock associated with &sk_buff
70  *      @skb: buffer to filter
71  *
72  * Run the filter code and then cut skb->data to correct size returned by
73  * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
74  * than pkt_len we keep whole skb->data. This is the socket level
75  * wrapper to sk_run_filter. It returns 0 if the packet should
76  * be accepted or -EPERM if the packet should be tossed.
77  *
78  */
79 int sk_filter(struct sock *sk, struct sk_buff *skb)
80 {
81         int err;
82         struct sk_filter *filter;
83
84         err = security_sock_rcv_skb(sk, skb);
85         if (err)
86                 return err;
87
88         rcu_read_lock_bh();
89         filter = rcu_dereference(sk->sk_filter);
90         if (filter) {
91                 unsigned int pkt_len = sk_run_filter(skb, filter->insns,
92                                 filter->len);
93                 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
94         }
95         rcu_read_unlock_bh();
96
97         return err;
98 }
99 EXPORT_SYMBOL(sk_filter);
100
101 /**
102  *      sk_run_filter - run a filter on a socket
103  *      @skb: buffer to run the filter on
104  *      @filter: filter to apply
105  *      @flen: length of filter
106  *
107  * Decode and apply filter instructions to the skb->data.
108  * Return length to keep, 0 for none. skb is the data we are
109  * filtering, filter is the array of filter instructions, and
110  * len is the number of filter blocks in the array.
111  */
112 unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
113 {
114         struct sock_filter *fentry;     /* We walk down these */
115         void *ptr;
116         u32 A = 0;                      /* Accumulator */
117         u32 X = 0;                      /* Index Register */
118         u32 mem[BPF_MEMWORDS];          /* Scratch Memory Store */
119         u32 tmp;
120         int k;
121         int pc;
122
123         /*
124          * Process array of filter instructions.
125          */
126         for (pc = 0; pc < flen; pc++) {
127                 fentry = &filter[pc];
128
129                 switch (fentry->code) {
130                 case BPF_ALU|BPF_ADD|BPF_X:
131                         A += X;
132                         continue;
133                 case BPF_ALU|BPF_ADD|BPF_K:
134                         A += fentry->k;
135                         continue;
136                 case BPF_ALU|BPF_SUB|BPF_X:
137                         A -= X;
138                         continue;
139                 case BPF_ALU|BPF_SUB|BPF_K:
140                         A -= fentry->k;
141                         continue;
142                 case BPF_ALU|BPF_MUL|BPF_X:
143                         A *= X;
144                         continue;
145                 case BPF_ALU|BPF_MUL|BPF_K:
146                         A *= fentry->k;
147                         continue;
148                 case BPF_ALU|BPF_DIV|BPF_X:
149                         if (X == 0)
150                                 return 0;
151                         A /= X;
152                         continue;
153                 case BPF_ALU|BPF_DIV|BPF_K:
154                         A /= fentry->k;
155                         continue;
156                 case BPF_ALU|BPF_AND|BPF_X:
157                         A &= X;
158                         continue;
159                 case BPF_ALU|BPF_AND|BPF_K:
160                         A &= fentry->k;
161                         continue;
162                 case BPF_ALU|BPF_OR|BPF_X:
163                         A |= X;
164                         continue;
165                 case BPF_ALU|BPF_OR|BPF_K:
166                         A |= fentry->k;
167                         continue;
168                 case BPF_ALU|BPF_LSH|BPF_X:
169                         A <<= X;
170                         continue;
171                 case BPF_ALU|BPF_LSH|BPF_K:
172                         A <<= fentry->k;
173                         continue;
174                 case BPF_ALU|BPF_RSH|BPF_X:
175                         A >>= X;
176                         continue;
177                 case BPF_ALU|BPF_RSH|BPF_K:
178                         A >>= fentry->k;
179                         continue;
180                 case BPF_ALU|BPF_NEG:
181                         A = -A;
182                         continue;
183                 case BPF_JMP|BPF_JA:
184                         pc += fentry->k;
185                         continue;
186                 case BPF_JMP|BPF_JGT|BPF_K:
187                         pc += (A > fentry->k) ? fentry->jt : fentry->jf;
188                         continue;
189                 case BPF_JMP|BPF_JGE|BPF_K:
190                         pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
191                         continue;
192                 case BPF_JMP|BPF_JEQ|BPF_K:
193                         pc += (A == fentry->k) ? fentry->jt : fentry->jf;
194                         continue;
195                 case BPF_JMP|BPF_JSET|BPF_K:
196                         pc += (A & fentry->k) ? fentry->jt : fentry->jf;
197                         continue;
198                 case BPF_JMP|BPF_JGT|BPF_X:
199                         pc += (A > X) ? fentry->jt : fentry->jf;
200                         continue;
201                 case BPF_JMP|BPF_JGE|BPF_X:
202                         pc += (A >= X) ? fentry->jt : fentry->jf;
203                         continue;
204                 case BPF_JMP|BPF_JEQ|BPF_X:
205                         pc += (A == X) ? fentry->jt : fentry->jf;
206                         continue;
207                 case BPF_JMP|BPF_JSET|BPF_X:
208                         pc += (A & X) ? fentry->jt : fentry->jf;
209                         continue;
210                 case BPF_LD|BPF_W|BPF_ABS:
211                         k = fentry->k;
212 load_w:
213                         ptr = load_pointer(skb, k, 4, &tmp);
214                         if (ptr != NULL) {
215                                 A = get_unaligned_be32(ptr);
216                                 continue;
217                         }
218                         break;
219                 case BPF_LD|BPF_H|BPF_ABS:
220                         k = fentry->k;
221 load_h:
222                         ptr = load_pointer(skb, k, 2, &tmp);
223                         if (ptr != NULL) {
224                                 A = get_unaligned_be16(ptr);
225                                 continue;
226                         }
227                         break;
228                 case BPF_LD|BPF_B|BPF_ABS:
229                         k = fentry->k;
230 load_b:
231                         ptr = load_pointer(skb, k, 1, &tmp);
232                         if (ptr != NULL) {
233                                 A = *(u8 *)ptr;
234                                 continue;
235                         }
236                         break;
237                 case BPF_LD|BPF_W|BPF_LEN:
238                         A = skb->len;
239                         continue;
240                 case BPF_LDX|BPF_W|BPF_LEN:
241                         X = skb->len;
242                         continue;
243                 case BPF_LD|BPF_W|BPF_IND:
244                         k = X + fentry->k;
245                         goto load_w;
246                 case BPF_LD|BPF_H|BPF_IND:
247                         k = X + fentry->k;
248                         goto load_h;
249                 case BPF_LD|BPF_B|BPF_IND:
250                         k = X + fentry->k;
251                         goto load_b;
252                 case BPF_LDX|BPF_B|BPF_MSH:
253                         ptr = load_pointer(skb, fentry->k, 1, &tmp);
254                         if (ptr != NULL) {
255                                 X = (*(u8 *)ptr & 0xf) << 2;
256                                 continue;
257                         }
258                         return 0;
259                 case BPF_LD|BPF_IMM:
260                         A = fentry->k;
261                         continue;
262                 case BPF_LDX|BPF_IMM:
263                         X = fentry->k;
264                         continue;
265                 case BPF_LD|BPF_MEM:
266                         A = mem[fentry->k];
267                         continue;
268                 case BPF_LDX|BPF_MEM:
269                         X = mem[fentry->k];
270                         continue;
271                 case BPF_MISC|BPF_TAX:
272                         X = A;
273                         continue;
274                 case BPF_MISC|BPF_TXA:
275                         A = X;
276                         continue;
277                 case BPF_RET|BPF_K:
278                         return fentry->k;
279                 case BPF_RET|BPF_A:
280                         return A;
281                 case BPF_ST:
282                         mem[fentry->k] = A;
283                         continue;
284                 case BPF_STX:
285                         mem[fentry->k] = X;
286                         continue;
287                 default:
288                         WARN_ON(1);
289                         return 0;
290                 }
291
292                 /*
293                  * Handle ancillary data, which are impossible
294                  * (or very difficult) to get parsing packet contents.
295                  */
296                 switch (k-SKF_AD_OFF) {
297                 case SKF_AD_PROTOCOL:
298                         A = ntohs(skb->protocol);
299                         continue;
300                 case SKF_AD_PKTTYPE:
301                         A = skb->pkt_type;
302                         continue;
303                 case SKF_AD_IFINDEX:
304                         A = skb->dev->ifindex;
305                         continue;
306                 case SKF_AD_MARK:
307                         A = skb->mark;
308                         continue;
309                 case SKF_AD_QUEUE:
310                         A = skb->queue_mapping;
311                         continue;
312                 case SKF_AD_NLATTR: {
313                         struct nlattr *nla;
314
315                         if (skb_is_nonlinear(skb))
316                                 return 0;
317                         if (A > skb->len - sizeof(struct nlattr))
318                                 return 0;
319
320                         nla = nla_find((struct nlattr *)&skb->data[A],
321                                        skb->len - A, X);
322                         if (nla)
323                                 A = (void *)nla - (void *)skb->data;
324                         else
325                                 A = 0;
326                         continue;
327                 }
328                 case SKF_AD_NLATTR_NEST: {
329                         struct nlattr *nla;
330
331                         if (skb_is_nonlinear(skb))
332                                 return 0;
333                         if (A > skb->len - sizeof(struct nlattr))
334                                 return 0;
335
336                         nla = (struct nlattr *)&skb->data[A];
337                         if (nla->nla_len > A - skb->len)
338                                 return 0;
339
340                         nla = nla_find_nested(nla, X);
341                         if (nla)
342                                 A = (void *)nla - (void *)skb->data;
343                         else
344                                 A = 0;
345                         continue;
346                 }
347                 default:
348                         return 0;
349                 }
350         }
351
352         return 0;
353 }
354 EXPORT_SYMBOL(sk_run_filter);
355
356 /**
357  *      sk_chk_filter - verify socket filter code
358  *      @filter: filter to verify
359  *      @flen: length of filter
360  *
361  * Check the user's filter code. If we let some ugly
362  * filter code slip through kaboom! The filter must contain
363  * no references or jumps that are out of range, no illegal
364  * instructions, and must end with a RET instruction.
365  *
366  * All jumps are forward as they are not signed.
367  *
368  * Returns 0 if the rule set is legal or -EINVAL if not.
369  */
370 int sk_chk_filter(struct sock_filter *filter, int flen)
371 {
372         struct sock_filter *ftest;
373         int pc;
374
375         if (flen == 0 || flen > BPF_MAXINSNS)
376                 return -EINVAL;
377
378         /* check the filter code now */
379         for (pc = 0; pc < flen; pc++) {
380                 ftest = &filter[pc];
381
382                 /* Only allow valid instructions */
383                 switch (ftest->code) {
384                 case BPF_ALU|BPF_ADD|BPF_K:
385                 case BPF_ALU|BPF_ADD|BPF_X:
386                 case BPF_ALU|BPF_SUB|BPF_K:
387                 case BPF_ALU|BPF_SUB|BPF_X:
388                 case BPF_ALU|BPF_MUL|BPF_K:
389                 case BPF_ALU|BPF_MUL|BPF_X:
390                 case BPF_ALU|BPF_DIV|BPF_X:
391                 case BPF_ALU|BPF_AND|BPF_K:
392                 case BPF_ALU|BPF_AND|BPF_X:
393                 case BPF_ALU|BPF_OR|BPF_K:
394                 case BPF_ALU|BPF_OR|BPF_X:
395                 case BPF_ALU|BPF_LSH|BPF_K:
396                 case BPF_ALU|BPF_LSH|BPF_X:
397                 case BPF_ALU|BPF_RSH|BPF_K:
398                 case BPF_ALU|BPF_RSH|BPF_X:
399                 case BPF_ALU|BPF_NEG:
400                 case BPF_LD|BPF_W|BPF_ABS:
401                 case BPF_LD|BPF_H|BPF_ABS:
402                 case BPF_LD|BPF_B|BPF_ABS:
403                 case BPF_LD|BPF_W|BPF_LEN:
404                 case BPF_LD|BPF_W|BPF_IND:
405                 case BPF_LD|BPF_H|BPF_IND:
406                 case BPF_LD|BPF_B|BPF_IND:
407                 case BPF_LD|BPF_IMM:
408                 case BPF_LDX|BPF_W|BPF_LEN:
409                 case BPF_LDX|BPF_B|BPF_MSH:
410                 case BPF_LDX|BPF_IMM:
411                 case BPF_MISC|BPF_TAX:
412                 case BPF_MISC|BPF_TXA:
413                 case BPF_RET|BPF_K:
414                 case BPF_RET|BPF_A:
415                         break;
416
417                 /* Some instructions need special checks */
418
419                 case BPF_ALU|BPF_DIV|BPF_K:
420                         /* check for division by zero */
421                         if (ftest->k == 0)
422                                 return -EINVAL;
423                         break;
424
425                 case BPF_LD|BPF_MEM:
426                 case BPF_LDX|BPF_MEM:
427                 case BPF_ST:
428                 case BPF_STX:
429                         /* check for invalid memory addresses */
430                         if (ftest->k >= BPF_MEMWORDS)
431                                 return -EINVAL;
432                         break;
433
434                 case BPF_JMP|BPF_JA:
435                         /*
436                          * Note, the large ftest->k might cause loops.
437                          * Compare this with conditional jumps below,
438                          * where offsets are limited. --ANK (981016)
439                          */
440                         if (ftest->k >= (unsigned)(flen-pc-1))
441                                 return -EINVAL;
442                         break;
443
444                 case BPF_JMP|BPF_JEQ|BPF_K:
445                 case BPF_JMP|BPF_JEQ|BPF_X:
446                 case BPF_JMP|BPF_JGE|BPF_K:
447                 case BPF_JMP|BPF_JGE|BPF_X:
448                 case BPF_JMP|BPF_JGT|BPF_K:
449                 case BPF_JMP|BPF_JGT|BPF_X:
450                 case BPF_JMP|BPF_JSET|BPF_K:
451                 case BPF_JMP|BPF_JSET|BPF_X:
452                         /* for conditionals both must be safe */
453                         if (pc + ftest->jt + 1 >= flen ||
454                             pc + ftest->jf + 1 >= flen)
455                                 return -EINVAL;
456                         break;
457
458                 default:
459                         return -EINVAL;
460                 }
461         }
462
463         return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
464 }
465 EXPORT_SYMBOL(sk_chk_filter);
466
467 /**
468  *      sk_filter_rcu_release: Release a socket filter by rcu_head
469  *      @rcu: rcu_head that contains the sk_filter to free
470  */
471 static void sk_filter_rcu_release(struct rcu_head *rcu)
472 {
473         struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
474
475         sk_filter_release(fp);
476 }
477
478 static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
479 {
480         unsigned int size = sk_filter_len(fp);
481
482         atomic_sub(size, &sk->sk_omem_alloc);
483         call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
484 }
485
486 /**
487  *      sk_attach_filter - attach a socket filter
488  *      @fprog: the filter program
489  *      @sk: the socket to use
490  *
491  * Attach the user's filter code. We first run some sanity checks on
492  * it to make sure it does not explode on us later. If an error
493  * occurs or there is insufficient memory for the filter a negative
494  * errno code is returned. On success the return is zero.
495  */
496 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
497 {
498         struct sk_filter *fp, *old_fp;
499         unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
500         int err;
501
502         /* Make sure new filter is there and in the right amounts. */
503         if (fprog->filter == NULL)
504                 return -EINVAL;
505
506         fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
507         if (!fp)
508                 return -ENOMEM;
509         if (copy_from_user(fp->insns, fprog->filter, fsize)) {
510                 sock_kfree_s(sk, fp, fsize+sizeof(*fp));
511                 return -EFAULT;
512         }
513
514         atomic_set(&fp->refcnt, 1);
515         fp->len = fprog->len;
516
517         err = sk_chk_filter(fp->insns, fp->len);
518         if (err) {
519                 sk_filter_uncharge(sk, fp);
520                 return err;
521         }
522
523         rcu_read_lock_bh();
524         old_fp = rcu_dereference(sk->sk_filter);
525         rcu_assign_pointer(sk->sk_filter, fp);
526         rcu_read_unlock_bh();
527
528         if (old_fp)
529                 sk_filter_delayed_uncharge(sk, old_fp);
530         return 0;
531 }
532
533 int sk_detach_filter(struct sock *sk)
534 {
535         int ret = -ENOENT;
536         struct sk_filter *filter;
537
538         rcu_read_lock_bh();
539         filter = rcu_dereference(sk->sk_filter);
540         if (filter) {
541                 rcu_assign_pointer(sk->sk_filter, NULL);
542                 sk_filter_delayed_uncharge(sk, filter);
543                 ret = 0;
544         }
545         rcu_read_unlock_bh();
546         return ret;
547 }