net: Tyop of sk_filter() comment
[linux-2.6.git] / net / core / filter.c
1 /*
2  * Linux Socket Filter - Kernel level socket filtering
3  *
4  * Author:
5  *     Jay Schulist <jschlst@samba.org>
6  *
7  * Based on the design of:
8  *     - The Berkeley Packet Filter
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License
12  * as published by the Free Software Foundation; either version
13  * 2 of the License, or (at your option) any later version.
14  *
15  * Andi Kleen - Fix a few bad bugs and races.
16  * Kris Katterjohn - Added many additional checks in sk_chk_filter()
17  */
18
19 #include <linux/module.h>
20 #include <linux/types.h>
21 #include <linux/mm.h>
22 #include <linux/fcntl.h>
23 #include <linux/socket.h>
24 #include <linux/in.h>
25 #include <linux/inet.h>
26 #include <linux/netdevice.h>
27 #include <linux/if_packet.h>
28 #include <net/ip.h>
29 #include <net/protocol.h>
30 #include <net/netlink.h>
31 #include <linux/skbuff.h>
32 #include <net/sock.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <asm/system.h>
36 #include <asm/uaccess.h>
37 #include <asm/unaligned.h>
38 #include <linux/filter.h>
39
40 /* No hurry in this branch */
41 static void *__load_pointer(struct sk_buff *skb, int k)
42 {
43         u8 *ptr = NULL;
44
45         if (k >= SKF_NET_OFF)
46                 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
47         else if (k >= SKF_LL_OFF)
48                 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
49
50         if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
51                 return ptr;
52         return NULL;
53 }
54
55 static inline void *load_pointer(struct sk_buff *skb, int k,
56                                  unsigned int size, void *buffer)
57 {
58         if (k >= 0)
59                 return skb_header_pointer(skb, k, size, buffer);
60         else {
61                 if (k >= SKF_AD_OFF)
62                         return NULL;
63                 return __load_pointer(skb, k);
64         }
65 }
66
67 /**
68  *      sk_filter - run a packet through a socket filter
69  *      @sk: sock associated with &sk_buff
70  *      @skb: buffer to filter
71  *
72  * Run the filter code and then cut skb->data to correct size returned by
73  * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
74  * than pkt_len we keep whole skb->data. This is the socket level
75  * wrapper to sk_run_filter. It returns 0 if the packet should
76  * be accepted or -EPERM if the packet should be tossed.
77  *
78  */
79 int sk_filter(struct sock *sk, struct sk_buff *skb)
80 {
81         int err;
82         struct sk_filter *filter;
83
84         err = security_sock_rcv_skb(sk, skb);
85         if (err)
86                 return err;
87
88         rcu_read_lock_bh();
89         filter = rcu_dereference(sk->sk_filter);
90         if (filter) {
91                 unsigned int pkt_len = sk_run_filter(skb, filter->insns,
92                                 filter->len);
93                 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
94         }
95         rcu_read_unlock_bh();
96
97         return err;
98 }
99 EXPORT_SYMBOL(sk_filter);
100
101 /**
102  *      sk_run_filter - run a filter on a socket
103  *      @skb: buffer to run the filter on
104  *      @filter: filter to apply
105  *      @flen: length of filter
106  *
107  * Decode and apply filter instructions to the skb->data.
108  * Return length to keep, 0 for none. skb is the data we are
109  * filtering, filter is the array of filter instructions, and
110  * len is the number of filter blocks in the array.
111  */
112 unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
113 {
114         struct sock_filter *fentry;     /* We walk down these */
115         void *ptr;
116         u32 A = 0;                      /* Accumulator */
117         u32 X = 0;                      /* Index Register */
118         u32 mem[BPF_MEMWORDS];          /* Scratch Memory Store */
119         u32 tmp;
120         int k;
121         int pc;
122
123         /*
124          * Process array of filter instructions.
125          */
126         for (pc = 0; pc < flen; pc++) {
127                 fentry = &filter[pc];
128
129                 switch (fentry->code) {
130                 case BPF_ALU|BPF_ADD|BPF_X:
131                         A += X;
132                         continue;
133                 case BPF_ALU|BPF_ADD|BPF_K:
134                         A += fentry->k;
135                         continue;
136                 case BPF_ALU|BPF_SUB|BPF_X:
137                         A -= X;
138                         continue;
139                 case BPF_ALU|BPF_SUB|BPF_K:
140                         A -= fentry->k;
141                         continue;
142                 case BPF_ALU|BPF_MUL|BPF_X:
143                         A *= X;
144                         continue;
145                 case BPF_ALU|BPF_MUL|BPF_K:
146                         A *= fentry->k;
147                         continue;
148                 case BPF_ALU|BPF_DIV|BPF_X:
149                         if (X == 0)
150                                 return 0;
151                         A /= X;
152                         continue;
153                 case BPF_ALU|BPF_DIV|BPF_K:
154                         A /= fentry->k;
155                         continue;
156                 case BPF_ALU|BPF_AND|BPF_X:
157                         A &= X;
158                         continue;
159                 case BPF_ALU|BPF_AND|BPF_K:
160                         A &= fentry->k;
161                         continue;
162                 case BPF_ALU|BPF_OR|BPF_X:
163                         A |= X;
164                         continue;
165                 case BPF_ALU|BPF_OR|BPF_K:
166                         A |= fentry->k;
167                         continue;
168                 case BPF_ALU|BPF_LSH|BPF_X:
169                         A <<= X;
170                         continue;
171                 case BPF_ALU|BPF_LSH|BPF_K:
172                         A <<= fentry->k;
173                         continue;
174                 case BPF_ALU|BPF_RSH|BPF_X:
175                         A >>= X;
176                         continue;
177                 case BPF_ALU|BPF_RSH|BPF_K:
178                         A >>= fentry->k;
179                         continue;
180                 case BPF_ALU|BPF_NEG:
181                         A = -A;
182                         continue;
183                 case BPF_JMP|BPF_JA:
184                         pc += fentry->k;
185                         continue;
186                 case BPF_JMP|BPF_JGT|BPF_K:
187                         pc += (A > fentry->k) ? fentry->jt : fentry->jf;
188                         continue;
189                 case BPF_JMP|BPF_JGE|BPF_K:
190                         pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
191                         continue;
192                 case BPF_JMP|BPF_JEQ|BPF_K:
193                         pc += (A == fentry->k) ? fentry->jt : fentry->jf;
194                         continue;
195                 case BPF_JMP|BPF_JSET|BPF_K:
196                         pc += (A & fentry->k) ? fentry->jt : fentry->jf;
197                         continue;
198                 case BPF_JMP|BPF_JGT|BPF_X:
199                         pc += (A > X) ? fentry->jt : fentry->jf;
200                         continue;
201                 case BPF_JMP|BPF_JGE|BPF_X:
202                         pc += (A >= X) ? fentry->jt : fentry->jf;
203                         continue;
204                 case BPF_JMP|BPF_JEQ|BPF_X:
205                         pc += (A == X) ? fentry->jt : fentry->jf;
206                         continue;
207                 case BPF_JMP|BPF_JSET|BPF_X:
208                         pc += (A & X) ? fentry->jt : fentry->jf;
209                         continue;
210                 case BPF_LD|BPF_W|BPF_ABS:
211                         k = fentry->k;
212 load_w:
213                         ptr = load_pointer(skb, k, 4, &tmp);
214                         if (ptr != NULL) {
215                                 A = get_unaligned_be32(ptr);
216                                 continue;
217                         }
218                         break;
219                 case BPF_LD|BPF_H|BPF_ABS:
220                         k = fentry->k;
221 load_h:
222                         ptr = load_pointer(skb, k, 2, &tmp);
223                         if (ptr != NULL) {
224                                 A = get_unaligned_be16(ptr);
225                                 continue;
226                         }
227                         break;
228                 case BPF_LD|BPF_B|BPF_ABS:
229                         k = fentry->k;
230 load_b:
231                         ptr = load_pointer(skb, k, 1, &tmp);
232                         if (ptr != NULL) {
233                                 A = *(u8 *)ptr;
234                                 continue;
235                         }
236                         break;
237                 case BPF_LD|BPF_W|BPF_LEN:
238                         A = skb->len;
239                         continue;
240                 case BPF_LDX|BPF_W|BPF_LEN:
241                         X = skb->len;
242                         continue;
243                 case BPF_LD|BPF_W|BPF_IND:
244                         k = X + fentry->k;
245                         goto load_w;
246                 case BPF_LD|BPF_H|BPF_IND:
247                         k = X + fentry->k;
248                         goto load_h;
249                 case BPF_LD|BPF_B|BPF_IND:
250                         k = X + fentry->k;
251                         goto load_b;
252                 case BPF_LDX|BPF_B|BPF_MSH:
253                         ptr = load_pointer(skb, fentry->k, 1, &tmp);
254                         if (ptr != NULL) {
255                                 X = (*(u8 *)ptr & 0xf) << 2;
256                                 continue;
257                         }
258                         return 0;
259                 case BPF_LD|BPF_IMM:
260                         A = fentry->k;
261                         continue;
262                 case BPF_LDX|BPF_IMM:
263                         X = fentry->k;
264                         continue;
265                 case BPF_LD|BPF_MEM:
266                         A = mem[fentry->k];
267                         continue;
268                 case BPF_LDX|BPF_MEM:
269                         X = mem[fentry->k];
270                         continue;
271                 case BPF_MISC|BPF_TAX:
272                         X = A;
273                         continue;
274                 case BPF_MISC|BPF_TXA:
275                         A = X;
276                         continue;
277                 case BPF_RET|BPF_K:
278                         return fentry->k;
279                 case BPF_RET|BPF_A:
280                         return A;
281                 case BPF_ST:
282                         mem[fentry->k] = A;
283                         continue;
284                 case BPF_STX:
285                         mem[fentry->k] = X;
286                         continue;
287                 default:
288                         WARN_ON(1);
289                         return 0;
290                 }
291
292                 /*
293                  * Handle ancillary data, which are impossible
294                  * (or very difficult) to get parsing packet contents.
295                  */
296                 switch (k-SKF_AD_OFF) {
297                 case SKF_AD_PROTOCOL:
298                         A = ntohs(skb->protocol);
299                         continue;
300                 case SKF_AD_PKTTYPE:
301                         A = skb->pkt_type;
302                         continue;
303                 case SKF_AD_IFINDEX:
304                         A = skb->dev->ifindex;
305                         continue;
306                 case SKF_AD_NLATTR: {
307                         struct nlattr *nla;
308
309                         if (skb_is_nonlinear(skb))
310                                 return 0;
311                         if (A > skb->len - sizeof(struct nlattr))
312                                 return 0;
313
314                         nla = nla_find((struct nlattr *)&skb->data[A],
315                                        skb->len - A, X);
316                         if (nla)
317                                 A = (void *)nla - (void *)skb->data;
318                         else
319                                 A = 0;
320                         continue;
321                 }
322                 default:
323                         return 0;
324                 }
325         }
326
327         return 0;
328 }
329 EXPORT_SYMBOL(sk_run_filter);
330
331 /**
332  *      sk_chk_filter - verify socket filter code
333  *      @filter: filter to verify
334  *      @flen: length of filter
335  *
336  * Check the user's filter code. If we let some ugly
337  * filter code slip through kaboom! The filter must contain
338  * no references or jumps that are out of range, no illegal
339  * instructions, and must end with a RET instruction.
340  *
341  * All jumps are forward as they are not signed.
342  *
343  * Returns 0 if the rule set is legal or -EINVAL if not.
344  */
345 int sk_chk_filter(struct sock_filter *filter, int flen)
346 {
347         struct sock_filter *ftest;
348         int pc;
349
350         if (flen == 0 || flen > BPF_MAXINSNS)
351                 return -EINVAL;
352
353         /* check the filter code now */
354         for (pc = 0; pc < flen; pc++) {
355                 ftest = &filter[pc];
356
357                 /* Only allow valid instructions */
358                 switch (ftest->code) {
359                 case BPF_ALU|BPF_ADD|BPF_K:
360                 case BPF_ALU|BPF_ADD|BPF_X:
361                 case BPF_ALU|BPF_SUB|BPF_K:
362                 case BPF_ALU|BPF_SUB|BPF_X:
363                 case BPF_ALU|BPF_MUL|BPF_K:
364                 case BPF_ALU|BPF_MUL|BPF_X:
365                 case BPF_ALU|BPF_DIV|BPF_X:
366                 case BPF_ALU|BPF_AND|BPF_K:
367                 case BPF_ALU|BPF_AND|BPF_X:
368                 case BPF_ALU|BPF_OR|BPF_K:
369                 case BPF_ALU|BPF_OR|BPF_X:
370                 case BPF_ALU|BPF_LSH|BPF_K:
371                 case BPF_ALU|BPF_LSH|BPF_X:
372                 case BPF_ALU|BPF_RSH|BPF_K:
373                 case BPF_ALU|BPF_RSH|BPF_X:
374                 case BPF_ALU|BPF_NEG:
375                 case BPF_LD|BPF_W|BPF_ABS:
376                 case BPF_LD|BPF_H|BPF_ABS:
377                 case BPF_LD|BPF_B|BPF_ABS:
378                 case BPF_LD|BPF_W|BPF_LEN:
379                 case BPF_LD|BPF_W|BPF_IND:
380                 case BPF_LD|BPF_H|BPF_IND:
381                 case BPF_LD|BPF_B|BPF_IND:
382                 case BPF_LD|BPF_IMM:
383                 case BPF_LDX|BPF_W|BPF_LEN:
384                 case BPF_LDX|BPF_B|BPF_MSH:
385                 case BPF_LDX|BPF_IMM:
386                 case BPF_MISC|BPF_TAX:
387                 case BPF_MISC|BPF_TXA:
388                 case BPF_RET|BPF_K:
389                 case BPF_RET|BPF_A:
390                         break;
391
392                 /* Some instructions need special checks */
393
394                 case BPF_ALU|BPF_DIV|BPF_K:
395                         /* check for division by zero */
396                         if (ftest->k == 0)
397                                 return -EINVAL;
398                         break;
399
400                 case BPF_LD|BPF_MEM:
401                 case BPF_LDX|BPF_MEM:
402                 case BPF_ST:
403                 case BPF_STX:
404                         /* check for invalid memory addresses */
405                         if (ftest->k >= BPF_MEMWORDS)
406                                 return -EINVAL;
407                         break;
408
409                 case BPF_JMP|BPF_JA:
410                         /*
411                          * Note, the large ftest->k might cause loops.
412                          * Compare this with conditional jumps below,
413                          * where offsets are limited. --ANK (981016)
414                          */
415                         if (ftest->k >= (unsigned)(flen-pc-1))
416                                 return -EINVAL;
417                         break;
418
419                 case BPF_JMP|BPF_JEQ|BPF_K:
420                 case BPF_JMP|BPF_JEQ|BPF_X:
421                 case BPF_JMP|BPF_JGE|BPF_K:
422                 case BPF_JMP|BPF_JGE|BPF_X:
423                 case BPF_JMP|BPF_JGT|BPF_K:
424                 case BPF_JMP|BPF_JGT|BPF_X:
425                 case BPF_JMP|BPF_JSET|BPF_K:
426                 case BPF_JMP|BPF_JSET|BPF_X:
427                         /* for conditionals both must be safe */
428                         if (pc + ftest->jt + 1 >= flen ||
429                             pc + ftest->jf + 1 >= flen)
430                                 return -EINVAL;
431                         break;
432
433                 default:
434                         return -EINVAL;
435                 }
436         }
437
438         return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
439 }
440 EXPORT_SYMBOL(sk_chk_filter);
441
442 /**
443  *      sk_filter_rcu_release: Release a socket filter by rcu_head
444  *      @rcu: rcu_head that contains the sk_filter to free
445  */
446 static void sk_filter_rcu_release(struct rcu_head *rcu)
447 {
448         struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
449
450         sk_filter_release(fp);
451 }
452
453 static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
454 {
455         unsigned int size = sk_filter_len(fp);
456
457         atomic_sub(size, &sk->sk_omem_alloc);
458         call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
459 }
460
461 /**
462  *      sk_attach_filter - attach a socket filter
463  *      @fprog: the filter program
464  *      @sk: the socket to use
465  *
466  * Attach the user's filter code. We first run some sanity checks on
467  * it to make sure it does not explode on us later. If an error
468  * occurs or there is insufficient memory for the filter a negative
469  * errno code is returned. On success the return is zero.
470  */
471 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
472 {
473         struct sk_filter *fp, *old_fp;
474         unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
475         int err;
476
477         /* Make sure new filter is there and in the right amounts. */
478         if (fprog->filter == NULL)
479                 return -EINVAL;
480
481         fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
482         if (!fp)
483                 return -ENOMEM;
484         if (copy_from_user(fp->insns, fprog->filter, fsize)) {
485                 sock_kfree_s(sk, fp, fsize+sizeof(*fp));
486                 return -EFAULT;
487         }
488
489         atomic_set(&fp->refcnt, 1);
490         fp->len = fprog->len;
491
492         err = sk_chk_filter(fp->insns, fp->len);
493         if (err) {
494                 sk_filter_uncharge(sk, fp);
495                 return err;
496         }
497
498         rcu_read_lock_bh();
499         old_fp = rcu_dereference(sk->sk_filter);
500         rcu_assign_pointer(sk->sk_filter, fp);
501         rcu_read_unlock_bh();
502
503         if (old_fp)
504                 sk_filter_delayed_uncharge(sk, old_fp);
505         return 0;
506 }
507
508 int sk_detach_filter(struct sock *sk)
509 {
510         int ret = -ENOENT;
511         struct sk_filter *filter;
512
513         rcu_read_lock_bh();
514         filter = rcu_dereference(sk->sk_filter);
515         if (filter) {
516                 rcu_assign_pointer(sk->sk_filter, NULL);
517                 sk_filter_delayed_uncharge(sk, filter);
518                 ret = 0;
519         }
520         rcu_read_unlock_bh();
521         return ret;
522 }