877bc96d333684fae54d3af394d2e13aaf73d598
[linux-2.6.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/capability.h>
18 #include <linux/skbuff.h>
19 #include <linux/kmod.h>
20 #include <linux/vmalloc.h>
21 #include <linux/netdevice.h>
22 #include <linux/module.h>
23 #include <linux/tcp.h>
24 #include <linux/udp.h>
25 #include <linux/icmp.h>
26 #include <net/ip.h>
27 #include <asm/uaccess.h>
28 #include <asm/semaphore.h>
29 #include <linux/proc_fs.h>
30 #include <linux/err.h>
31 #include <linux/cpumask.h>
32
33 #include <linux/netfilter_ipv4/ip_tables.h>
34
35 MODULE_LICENSE("GPL");
36 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
37 MODULE_DESCRIPTION("IPv4 packet filter");
38
39 /*#define DEBUG_IP_FIREWALL*/
40 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
41 /*#define DEBUG_IP_FIREWALL_USER*/
42
43 #ifdef DEBUG_IP_FIREWALL
44 #define dprintf(format, args...)  printk(format , ## args)
45 #else
46 #define dprintf(format, args...)
47 #endif
48
49 #ifdef DEBUG_IP_FIREWALL_USER
50 #define duprintf(format, args...) printk(format , ## args)
51 #else
52 #define duprintf(format, args...)
53 #endif
54
55 #ifdef CONFIG_NETFILTER_DEBUG
56 #define IP_NF_ASSERT(x)                                         \
57 do {                                                            \
58         if (!(x))                                               \
59                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
60                        __FUNCTION__, __FILE__, __LINE__);       \
61 } while(0)
62 #else
63 #define IP_NF_ASSERT(x)
64 #endif
65 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
66
67 static DECLARE_MUTEX(ipt_mutex);
68
69 /* Must have mutex */
70 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
71 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
72 #include <linux/netfilter_ipv4/listhelp.h>
73
74 #if 0
75 /* All the better to debug you with... */
76 #define static
77 #define inline
78 #endif
79
80 /*
81    We keep a set of rules for each CPU, so we can avoid write-locking
82    them in the softirq when updating the counters and therefore
83    only need to read-lock in the softirq; doing a write_lock_bh() in user
84    context stops packets coming through and allows user context to read
85    the counters or update the rules.
86
87    Hence the start of any table is given by get_table() below.  */
88
89 /* The table itself */
90 struct ipt_table_info
91 {
92         /* Size per table */
93         unsigned int size;
94         /* Number of entries: FIXME. --RR */
95         unsigned int number;
96         /* Initial number of entries. Needed for module usage count */
97         unsigned int initial_entries;
98
99         /* Entry points and underflows */
100         unsigned int hook_entry[NF_IP_NUMHOOKS];
101         unsigned int underflow[NF_IP_NUMHOOKS];
102
103         /* ipt_entry tables: one per CPU */
104         void *entries[NR_CPUS];
105 };
106
107 static LIST_HEAD(ipt_target);
108 static LIST_HEAD(ipt_match);
109 static LIST_HEAD(ipt_tables);
110 #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
111 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
112
113 #if 0
114 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
115 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
116 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
117 #endif
118
119 /* Returns whether matches rule or not. */
120 static inline int
121 ip_packet_match(const struct iphdr *ip,
122                 const char *indev,
123                 const char *outdev,
124                 const struct ipt_ip *ipinfo,
125                 int isfrag)
126 {
127         size_t i;
128         unsigned long ret;
129
130 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
131
132         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
133                   IPT_INV_SRCIP)
134             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
135                      IPT_INV_DSTIP)) {
136                 dprintf("Source or dest mismatch.\n");
137
138                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
139                         NIPQUAD(ip->saddr),
140                         NIPQUAD(ipinfo->smsk.s_addr),
141                         NIPQUAD(ipinfo->src.s_addr),
142                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
143                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
144                         NIPQUAD(ip->daddr),
145                         NIPQUAD(ipinfo->dmsk.s_addr),
146                         NIPQUAD(ipinfo->dst.s_addr),
147                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
148                 return 0;
149         }
150
151         /* Look for ifname matches; this should unroll nicely. */
152         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
153                 ret |= (((const unsigned long *)indev)[i]
154                         ^ ((const unsigned long *)ipinfo->iniface)[i])
155                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
156         }
157
158         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
159                 dprintf("VIA in mismatch (%s vs %s).%s\n",
160                         indev, ipinfo->iniface,
161                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
162                 return 0;
163         }
164
165         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
166                 ret |= (((const unsigned long *)outdev)[i]
167                         ^ ((const unsigned long *)ipinfo->outiface)[i])
168                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
169         }
170
171         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
172                 dprintf("VIA out mismatch (%s vs %s).%s\n",
173                         outdev, ipinfo->outiface,
174                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
175                 return 0;
176         }
177
178         /* Check specific protocol */
179         if (ipinfo->proto
180             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
181                 dprintf("Packet protocol %hi does not match %hi.%s\n",
182                         ip->protocol, ipinfo->proto,
183                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
184                 return 0;
185         }
186
187         /* If we have a fragment rule but the packet is not a fragment
188          * then we return zero */
189         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
190                 dprintf("Fragment rule but not fragment.%s\n",
191                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
192                 return 0;
193         }
194
195         return 1;
196 }
197
198 static inline int
199 ip_checkentry(const struct ipt_ip *ip)
200 {
201         if (ip->flags & ~IPT_F_MASK) {
202                 duprintf("Unknown flag bits set: %08X\n",
203                          ip->flags & ~IPT_F_MASK);
204                 return 0;
205         }
206         if (ip->invflags & ~IPT_INV_MASK) {
207                 duprintf("Unknown invflag bits set: %08X\n",
208                          ip->invflags & ~IPT_INV_MASK);
209                 return 0;
210         }
211         return 1;
212 }
213
214 static unsigned int
215 ipt_error(struct sk_buff **pskb,
216           const struct net_device *in,
217           const struct net_device *out,
218           unsigned int hooknum,
219           const void *targinfo,
220           void *userinfo)
221 {
222         if (net_ratelimit())
223                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
224
225         return NF_DROP;
226 }
227
228 static inline
229 int do_match(struct ipt_entry_match *m,
230              const struct sk_buff *skb,
231              const struct net_device *in,
232              const struct net_device *out,
233              int offset,
234              int *hotdrop)
235 {
236         /* Stop iteration if it doesn't match */
237         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
238                 return 1;
239         else
240                 return 0;
241 }
242
243 static inline struct ipt_entry *
244 get_entry(void *base, unsigned int offset)
245 {
246         return (struct ipt_entry *)(base + offset);
247 }
248
249 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
250 unsigned int
251 ipt_do_table(struct sk_buff **pskb,
252              unsigned int hook,
253              const struct net_device *in,
254              const struct net_device *out,
255              struct ipt_table *table,
256              void *userdata)
257 {
258         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
259         u_int16_t offset;
260         struct iphdr *ip;
261         u_int16_t datalen;
262         int hotdrop = 0;
263         /* Initializing verdict to NF_DROP keeps gcc happy. */
264         unsigned int verdict = NF_DROP;
265         const char *indev, *outdev;
266         void *table_base;
267         struct ipt_entry *e, *back;
268
269         /* Initialization */
270         ip = (*pskb)->nh.iph;
271         datalen = (*pskb)->len - ip->ihl * 4;
272         indev = in ? in->name : nulldevname;
273         outdev = out ? out->name : nulldevname;
274         /* We handle fragments by dealing with the first fragment as
275          * if it was a normal packet.  All other fragments are treated
276          * normally, except that they will NEVER match rules that ask
277          * things we don't know, ie. tcp syn flag or ports).  If the
278          * rule is also a fragment-specific rule, non-fragments won't
279          * match it. */
280         offset = ntohs(ip->frag_off) & IP_OFFSET;
281
282         read_lock_bh(&table->lock);
283         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
284         table_base = (void *)table->private->entries[smp_processor_id()];
285         e = get_entry(table_base, table->private->hook_entry[hook]);
286
287 #ifdef CONFIG_NETFILTER_DEBUG
288         /* Check noone else using our table */
289         if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
290             && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
291                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
292                        smp_processor_id(),
293                        table->name,
294                        &((struct ipt_entry *)table_base)->comefrom,
295                        ((struct ipt_entry *)table_base)->comefrom);
296         }
297         ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
298 #endif
299
300         /* For return from builtin chain */
301         back = get_entry(table_base, table->private->underflow[hook]);
302
303         do {
304                 IP_NF_ASSERT(e);
305                 IP_NF_ASSERT(back);
306                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
307                         struct ipt_entry_target *t;
308
309                         if (IPT_MATCH_ITERATE(e, do_match,
310                                               *pskb, in, out,
311                                               offset, &hotdrop) != 0)
312                                 goto no_match;
313
314                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
315
316                         t = ipt_get_target(e);
317                         IP_NF_ASSERT(t->u.kernel.target);
318                         /* Standard target? */
319                         if (!t->u.kernel.target->target) {
320                                 int v;
321
322                                 v = ((struct ipt_standard_target *)t)->verdict;
323                                 if (v < 0) {
324                                         /* Pop from stack? */
325                                         if (v != IPT_RETURN) {
326                                                 verdict = (unsigned)(-v) - 1;
327                                                 break;
328                                         }
329                                         e = back;
330                                         back = get_entry(table_base,
331                                                          back->comefrom);
332                                         continue;
333                                 }
334                                 if (table_base + v != (void *)e + e->next_offset
335                                     && !(e->ip.flags & IPT_F_GOTO)) {
336                                         /* Save old back ptr in next entry */
337                                         struct ipt_entry *next
338                                                 = (void *)e + e->next_offset;
339                                         next->comefrom
340                                                 = (void *)back - table_base;
341                                         /* set back pointer to next entry */
342                                         back = next;
343                                 }
344
345                                 e = get_entry(table_base, v);
346                         } else {
347                                 /* Targets which reenter must return
348                                    abs. verdicts */
349 #ifdef CONFIG_NETFILTER_DEBUG
350                                 ((struct ipt_entry *)table_base)->comefrom
351                                         = 0xeeeeeeec;
352 #endif
353                                 verdict = t->u.kernel.target->target(pskb,
354                                                                      in, out,
355                                                                      hook,
356                                                                      t->data,
357                                                                      userdata);
358
359 #ifdef CONFIG_NETFILTER_DEBUG
360                                 if (((struct ipt_entry *)table_base)->comefrom
361                                     != 0xeeeeeeec
362                                     && verdict == IPT_CONTINUE) {
363                                         printk("Target %s reentered!\n",
364                                                t->u.kernel.target->name);
365                                         verdict = NF_DROP;
366                                 }
367                                 ((struct ipt_entry *)table_base)->comefrom
368                                         = 0x57acc001;
369 #endif
370                                 /* Target might have changed stuff. */
371                                 ip = (*pskb)->nh.iph;
372                                 datalen = (*pskb)->len - ip->ihl * 4;
373
374                                 if (verdict == IPT_CONTINUE)
375                                         e = (void *)e + e->next_offset;
376                                 else
377                                         /* Verdict */
378                                         break;
379                         }
380                 } else {
381
382                 no_match:
383                         e = (void *)e + e->next_offset;
384                 }
385         } while (!hotdrop);
386
387 #ifdef CONFIG_NETFILTER_DEBUG
388         ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
389 #endif
390         read_unlock_bh(&table->lock);
391
392 #ifdef DEBUG_ALLOW_ALL
393         return NF_ACCEPT;
394 #else
395         if (hotdrop)
396                 return NF_DROP;
397         else return verdict;
398 #endif
399 }
400
401 /*
402  * These are weird, but module loading must not be done with mutex
403  * held (since they will register), and we have to have a single
404  * function to use try_then_request_module().
405  */
406
407 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
408 static inline struct ipt_table *find_table_lock(const char *name)
409 {
410         struct ipt_table *t;
411
412         if (down_interruptible(&ipt_mutex) != 0)
413                 return ERR_PTR(-EINTR);
414
415         list_for_each_entry(t, &ipt_tables, list)
416                 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
417                         return t;
418         up(&ipt_mutex);
419         return NULL;
420 }
421
422 /* Find match, grabs ref.  Returns ERR_PTR() on error. */
423 static inline struct ipt_match *find_match(const char *name, u8 revision)
424 {
425         struct ipt_match *m;
426         int err = 0;
427
428         if (down_interruptible(&ipt_mutex) != 0)
429                 return ERR_PTR(-EINTR);
430
431         list_for_each_entry(m, &ipt_match, list) {
432                 if (strcmp(m->name, name) == 0) {
433                         if (m->revision == revision) {
434                                 if (try_module_get(m->me)) {
435                                         up(&ipt_mutex);
436                                         return m;
437                                 }
438                         } else
439                                 err = -EPROTOTYPE; /* Found something. */
440                 }
441         }
442         up(&ipt_mutex);
443         return ERR_PTR(err);
444 }
445
446 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
447 static inline struct ipt_target *find_target(const char *name, u8 revision)
448 {
449         struct ipt_target *t;
450         int err = 0;
451
452         if (down_interruptible(&ipt_mutex) != 0)
453                 return ERR_PTR(-EINTR);
454
455         list_for_each_entry(t, &ipt_target, list) {
456                 if (strcmp(t->name, name) == 0) {
457                         if (t->revision == revision) {
458                                 if (try_module_get(t->me)) {
459                                         up(&ipt_mutex);
460                                         return t;
461                                 }
462                         } else
463                                 err = -EPROTOTYPE; /* Found something. */
464                 }
465         }
466         up(&ipt_mutex);
467         return ERR_PTR(err);
468 }
469
470 struct ipt_target *ipt_find_target(const char *name, u8 revision)
471 {
472         struct ipt_target *target;
473
474         target = try_then_request_module(find_target(name, revision),
475                                          "ipt_%s", name);
476         if (IS_ERR(target) || !target)
477                 return NULL;
478         return target;
479 }
480
481 static int match_revfn(const char *name, u8 revision, int *bestp)
482 {
483         struct ipt_match *m;
484         int have_rev = 0;
485
486         list_for_each_entry(m, &ipt_match, list) {
487                 if (strcmp(m->name, name) == 0) {
488                         if (m->revision > *bestp)
489                                 *bestp = m->revision;
490                         if (m->revision == revision)
491                                 have_rev = 1;
492                 }
493         }
494         return have_rev;
495 }
496
497 static int target_revfn(const char *name, u8 revision, int *bestp)
498 {
499         struct ipt_target *t;
500         int have_rev = 0;
501
502         list_for_each_entry(t, &ipt_target, list) {
503                 if (strcmp(t->name, name) == 0) {
504                         if (t->revision > *bestp)
505                                 *bestp = t->revision;
506                         if (t->revision == revision)
507                                 have_rev = 1;
508                 }
509         }
510         return have_rev;
511 }
512
513 /* Returns true or false (if no such extension at all) */
514 static inline int find_revision(const char *name, u8 revision,
515                                 int (*revfn)(const char *, u8, int *),
516                                 int *err)
517 {
518         int have_rev, best = -1;
519
520         if (down_interruptible(&ipt_mutex) != 0) {
521                 *err = -EINTR;
522                 return 1;
523         }
524         have_rev = revfn(name, revision, &best);
525         up(&ipt_mutex);
526
527         /* Nothing at all?  Return 0 to try loading module. */
528         if (best == -1) {
529                 *err = -ENOENT;
530                 return 0;
531         }
532
533         *err = best;
534         if (!have_rev)
535                 *err = -EPROTONOSUPPORT;
536         return 1;
537 }
538
539
540 /* All zeroes == unconditional rule. */
541 static inline int
542 unconditional(const struct ipt_ip *ip)
543 {
544         unsigned int i;
545
546         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
547                 if (((__u32 *)ip)[i])
548                         return 0;
549
550         return 1;
551 }
552
553 /* Figures out from what hook each rule can be called: returns 0 if
554    there are loops.  Puts hook bitmask in comefrom. */
555 static int
556 mark_source_chains(struct ipt_table_info *newinfo,
557                    unsigned int valid_hooks, void *entry0)
558 {
559         unsigned int hook;
560
561         /* No recursion; use packet counter to save back ptrs (reset
562            to 0 as we leave), and comefrom to save source hook bitmask */
563         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
564                 unsigned int pos = newinfo->hook_entry[hook];
565                 struct ipt_entry *e
566                         = (struct ipt_entry *)(entry0 + pos);
567
568                 if (!(valid_hooks & (1 << hook)))
569                         continue;
570
571                 /* Set initial back pointer. */
572                 e->counters.pcnt = pos;
573
574                 for (;;) {
575                         struct ipt_standard_target *t
576                                 = (void *)ipt_get_target(e);
577
578                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
579                                 printk("iptables: loop hook %u pos %u %08X.\n",
580                                        hook, pos, e->comefrom);
581                                 return 0;
582                         }
583                         e->comefrom
584                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
585
586                         /* Unconditional return/END. */
587                         if (e->target_offset == sizeof(struct ipt_entry)
588                             && (strcmp(t->target.u.user.name,
589                                        IPT_STANDARD_TARGET) == 0)
590                             && t->verdict < 0
591                             && unconditional(&e->ip)) {
592                                 unsigned int oldpos, size;
593
594                                 /* Return: backtrack through the last
595                                    big jump. */
596                                 do {
597                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
598 #ifdef DEBUG_IP_FIREWALL_USER
599                                         if (e->comefrom
600                                             & (1 << NF_IP_NUMHOOKS)) {
601                                                 duprintf("Back unset "
602                                                          "on hook %u "
603                                                          "rule %u\n",
604                                                          hook, pos);
605                                         }
606 #endif
607                                         oldpos = pos;
608                                         pos = e->counters.pcnt;
609                                         e->counters.pcnt = 0;
610
611                                         /* We're at the start. */
612                                         if (pos == oldpos)
613                                                 goto next;
614
615                                         e = (struct ipt_entry *)
616                                                 (entry0 + pos);
617                                 } while (oldpos == pos + e->next_offset);
618
619                                 /* Move along one */
620                                 size = e->next_offset;
621                                 e = (struct ipt_entry *)
622                                         (entry0 + pos + size);
623                                 e->counters.pcnt = pos;
624                                 pos += size;
625                         } else {
626                                 int newpos = t->verdict;
627
628                                 if (strcmp(t->target.u.user.name,
629                                            IPT_STANDARD_TARGET) == 0
630                                     && newpos >= 0) {
631                                         /* This a jump; chase it. */
632                                         duprintf("Jump rule %u -> %u\n",
633                                                  pos, newpos);
634                                 } else {
635                                         /* ... this is a fallthru */
636                                         newpos = pos + e->next_offset;
637                                 }
638                                 e = (struct ipt_entry *)
639                                         (entry0 + newpos);
640                                 e->counters.pcnt = pos;
641                                 pos = newpos;
642                         }
643                 }
644                 next:
645                 duprintf("Finished chain %u\n", hook);
646         }
647         return 1;
648 }
649
650 static inline int
651 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
652 {
653         if (i && (*i)-- == 0)
654                 return 1;
655
656         if (m->u.kernel.match->destroy)
657                 m->u.kernel.match->destroy(m->data,
658                                            m->u.match_size - sizeof(*m));
659         module_put(m->u.kernel.match->me);
660         return 0;
661 }
662
663 static inline int
664 standard_check(const struct ipt_entry_target *t,
665                unsigned int max_offset)
666 {
667         struct ipt_standard_target *targ = (void *)t;
668
669         /* Check standard info. */
670         if (t->u.target_size
671             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
672                 duprintf("standard_check: target size %u != %u\n",
673                          t->u.target_size,
674                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
675                 return 0;
676         }
677
678         if (targ->verdict >= 0
679             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
680                 duprintf("ipt_standard_check: bad verdict (%i)\n",
681                          targ->verdict);
682                 return 0;
683         }
684
685         if (targ->verdict < -NF_MAX_VERDICT - 1) {
686                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
687                          targ->verdict);
688                 return 0;
689         }
690         return 1;
691 }
692
693 static inline int
694 check_match(struct ipt_entry_match *m,
695             const char *name,
696             const struct ipt_ip *ip,
697             unsigned int hookmask,
698             unsigned int *i)
699 {
700         struct ipt_match *match;
701
702         match = try_then_request_module(find_match(m->u.user.name,
703                                                    m->u.user.revision),
704                                         "ipt_%s", m->u.user.name);
705         if (IS_ERR(match) || !match) {
706                 duprintf("check_match: `%s' not found\n", m->u.user.name);
707                 return match ? PTR_ERR(match) : -ENOENT;
708         }
709         m->u.kernel.match = match;
710
711         if (m->u.kernel.match->checkentry
712             && !m->u.kernel.match->checkentry(name, ip, m->data,
713                                               m->u.match_size - sizeof(*m),
714                                               hookmask)) {
715                 module_put(m->u.kernel.match->me);
716                 duprintf("ip_tables: check failed for `%s'.\n",
717                          m->u.kernel.match->name);
718                 return -EINVAL;
719         }
720
721         (*i)++;
722         return 0;
723 }
724
725 static struct ipt_target ipt_standard_target;
726
727 static inline int
728 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
729             unsigned int *i)
730 {
731         struct ipt_entry_target *t;
732         struct ipt_target *target;
733         int ret;
734         unsigned int j;
735
736         if (!ip_checkentry(&e->ip)) {
737                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
738                 return -EINVAL;
739         }
740
741         j = 0;
742         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
743         if (ret != 0)
744                 goto cleanup_matches;
745
746         t = ipt_get_target(e);
747         target = try_then_request_module(find_target(t->u.user.name,
748                                                      t->u.user.revision),
749                                          "ipt_%s", t->u.user.name);
750         if (IS_ERR(target) || !target) {
751                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
752                 ret = target ? PTR_ERR(target) : -ENOENT;
753                 goto cleanup_matches;
754         }
755         t->u.kernel.target = target;
756
757         if (t->u.kernel.target == &ipt_standard_target) {
758                 if (!standard_check(t, size)) {
759                         ret = -EINVAL;
760                         goto cleanup_matches;
761                 }
762         } else if (t->u.kernel.target->checkentry
763                    && !t->u.kernel.target->checkentry(name, e, t->data,
764                                                       t->u.target_size
765                                                       - sizeof(*t),
766                                                       e->comefrom)) {
767                 module_put(t->u.kernel.target->me);
768                 duprintf("ip_tables: check failed for `%s'.\n",
769                          t->u.kernel.target->name);
770                 ret = -EINVAL;
771                 goto cleanup_matches;
772         }
773
774         (*i)++;
775         return 0;
776
777  cleanup_matches:
778         IPT_MATCH_ITERATE(e, cleanup_match, &j);
779         return ret;
780 }
781
782 static inline int
783 check_entry_size_and_hooks(struct ipt_entry *e,
784                            struct ipt_table_info *newinfo,
785                            unsigned char *base,
786                            unsigned char *limit,
787                            const unsigned int *hook_entries,
788                            const unsigned int *underflows,
789                            unsigned int *i)
790 {
791         unsigned int h;
792
793         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
794             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
795                 duprintf("Bad offset %p\n", e);
796                 return -EINVAL;
797         }
798
799         if (e->next_offset
800             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
801                 duprintf("checking: element %p size %u\n",
802                          e, e->next_offset);
803                 return -EINVAL;
804         }
805
806         /* Check hooks & underflows */
807         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
808                 if ((unsigned char *)e - base == hook_entries[h])
809                         newinfo->hook_entry[h] = hook_entries[h];
810                 if ((unsigned char *)e - base == underflows[h])
811                         newinfo->underflow[h] = underflows[h];
812         }
813
814         /* FIXME: underflows must be unconditional, standard verdicts
815            < 0 (not IPT_RETURN). --RR */
816
817         /* Clear counters and comefrom */
818         e->counters = ((struct ipt_counters) { 0, 0 });
819         e->comefrom = 0;
820
821         (*i)++;
822         return 0;
823 }
824
825 static inline int
826 cleanup_entry(struct ipt_entry *e, unsigned int *i)
827 {
828         struct ipt_entry_target *t;
829
830         if (i && (*i)-- == 0)
831                 return 1;
832
833         /* Cleanup all matches */
834         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
835         t = ipt_get_target(e);
836         if (t->u.kernel.target->destroy)
837                 t->u.kernel.target->destroy(t->data,
838                                             t->u.target_size - sizeof(*t));
839         module_put(t->u.kernel.target->me);
840         return 0;
841 }
842
843 /* Checks and translates the user-supplied table segment (held in
844    newinfo) */
845 static int
846 translate_table(const char *name,
847                 unsigned int valid_hooks,
848                 struct ipt_table_info *newinfo,
849                 void *entry0,
850                 unsigned int size,
851                 unsigned int number,
852                 const unsigned int *hook_entries,
853                 const unsigned int *underflows)
854 {
855         unsigned int i;
856         int ret;
857
858         newinfo->size = size;
859         newinfo->number = number;
860
861         /* Init all hooks to impossible value. */
862         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
863                 newinfo->hook_entry[i] = 0xFFFFFFFF;
864                 newinfo->underflow[i] = 0xFFFFFFFF;
865         }
866
867         duprintf("translate_table: size %u\n", newinfo->size);
868         i = 0;
869         /* Walk through entries, checking offsets. */
870         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
871                                 check_entry_size_and_hooks,
872                                 newinfo,
873                                 entry0,
874                                 entry0 + size,
875                                 hook_entries, underflows, &i);
876         if (ret != 0)
877                 return ret;
878
879         if (i != number) {
880                 duprintf("translate_table: %u not %u entries\n",
881                          i, number);
882                 return -EINVAL;
883         }
884
885         /* Check hooks all assigned */
886         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
887                 /* Only hooks which are valid */
888                 if (!(valid_hooks & (1 << i)))
889                         continue;
890                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
891                         duprintf("Invalid hook entry %u %u\n",
892                                  i, hook_entries[i]);
893                         return -EINVAL;
894                 }
895                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
896                         duprintf("Invalid underflow %u %u\n",
897                                  i, underflows[i]);
898                         return -EINVAL;
899                 }
900         }
901
902         if (!mark_source_chains(newinfo, valid_hooks, entry0))
903                 return -ELOOP;
904
905         /* Finally, each sanity check must pass */
906         i = 0;
907         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
908                                 check_entry, name, size, &i);
909
910         if (ret != 0) {
911                 IPT_ENTRY_ITERATE(entry0, newinfo->size,
912                                   cleanup_entry, &i);
913                 return ret;
914         }
915
916         /* And one copy for every other CPU */
917         for_each_cpu(i) {
918                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
919                         memcpy(newinfo->entries[i], entry0, newinfo->size);
920         }
921
922         return ret;
923 }
924
925 static struct ipt_table_info *
926 replace_table(struct ipt_table *table,
927               unsigned int num_counters,
928               struct ipt_table_info *newinfo,
929               int *error)
930 {
931         struct ipt_table_info *oldinfo;
932
933 #ifdef CONFIG_NETFILTER_DEBUG
934         {
935                 int cpu;
936
937                 for_each_cpu(cpu) {
938                         struct ipt_entry *table_base = newinfo->entries[cpu];
939                         if (table_base)
940                                 table_base->comefrom = 0xdead57ac;
941                 }
942         }
943 #endif
944
945         /* Do the substitution. */
946         write_lock_bh(&table->lock);
947         /* Check inside lock: is the old number correct? */
948         if (num_counters != table->private->number) {
949                 duprintf("num_counters != table->private->number (%u/%u)\n",
950                          num_counters, table->private->number);
951                 write_unlock_bh(&table->lock);
952                 *error = -EAGAIN;
953                 return NULL;
954         }
955         oldinfo = table->private;
956         table->private = newinfo;
957         newinfo->initial_entries = oldinfo->initial_entries;
958         write_unlock_bh(&table->lock);
959
960         return oldinfo;
961 }
962
963 /* Gets counters. */
964 static inline int
965 add_entry_to_counter(const struct ipt_entry *e,
966                      struct ipt_counters total[],
967                      unsigned int *i)
968 {
969         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
970
971         (*i)++;
972         return 0;
973 }
974
975 static inline int
976 set_entry_to_counter(const struct ipt_entry *e,
977                      struct ipt_counters total[],
978                      unsigned int *i)
979 {
980         SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
981
982         (*i)++;
983         return 0;
984 }
985
986 static void
987 get_counters(const struct ipt_table_info *t,
988              struct ipt_counters counters[])
989 {
990         unsigned int cpu;
991         unsigned int i;
992         unsigned int curcpu;
993
994         /* Instead of clearing (by a previous call to memset())
995          * the counters and using adds, we set the counters
996          * with data used by 'current' CPU
997          * We dont care about preemption here.
998          */
999         curcpu = raw_smp_processor_id();
1000
1001         i = 0;
1002         IPT_ENTRY_ITERATE(t->entries[curcpu],
1003                           t->size,
1004                           set_entry_to_counter,
1005                           counters,
1006                           &i);
1007
1008         for_each_cpu(cpu) {
1009                 if (cpu == curcpu)
1010                         continue;
1011                 i = 0;
1012                 IPT_ENTRY_ITERATE(t->entries[cpu],
1013                                   t->size,
1014                                   add_entry_to_counter,
1015                                   counters,
1016                                   &i);
1017         }
1018 }
1019
1020 static int
1021 copy_entries_to_user(unsigned int total_size,
1022                      struct ipt_table *table,
1023                      void __user *userptr)
1024 {
1025         unsigned int off, num, countersize;
1026         struct ipt_entry *e;
1027         struct ipt_counters *counters;
1028         int ret = 0;
1029         void *loc_cpu_entry;
1030
1031         /* We need atomic snapshot of counters: rest doesn't change
1032            (other than comefrom, which userspace doesn't care
1033            about). */
1034         countersize = sizeof(struct ipt_counters) * table->private->number;
1035         counters = vmalloc_node(countersize, numa_node_id());
1036
1037         if (counters == NULL)
1038                 return -ENOMEM;
1039
1040         /* First, sum counters... */
1041         write_lock_bh(&table->lock);
1042         get_counters(table->private, counters);
1043         write_unlock_bh(&table->lock);
1044
1045         /* choose the copy that is on our node/cpu, ...
1046          * This choice is lazy (because current thread is
1047          * allowed to migrate to another cpu)
1048          */
1049         loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1050         /* ... then copy entire thing ... */
1051         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1052                 ret = -EFAULT;
1053                 goto free_counters;
1054         }
1055
1056         /* FIXME: use iterator macros --RR */
1057         /* ... then go back and fix counters and names */
1058         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1059                 unsigned int i;
1060                 struct ipt_entry_match *m;
1061                 struct ipt_entry_target *t;
1062
1063                 e = (struct ipt_entry *)(loc_cpu_entry + off);
1064                 if (copy_to_user(userptr + off
1065                                  + offsetof(struct ipt_entry, counters),
1066                                  &counters[num],
1067                                  sizeof(counters[num])) != 0) {
1068                         ret = -EFAULT;
1069                         goto free_counters;
1070                 }
1071
1072                 for (i = sizeof(struct ipt_entry);
1073                      i < e->target_offset;
1074                      i += m->u.match_size) {
1075                         m = (void *)e + i;
1076
1077                         if (copy_to_user(userptr + off + i
1078                                          + offsetof(struct ipt_entry_match,
1079                                                     u.user.name),
1080                                          m->u.kernel.match->name,
1081                                          strlen(m->u.kernel.match->name)+1)
1082                             != 0) {
1083                                 ret = -EFAULT;
1084                                 goto free_counters;
1085                         }
1086                 }
1087
1088                 t = ipt_get_target(e);
1089                 if (copy_to_user(userptr + off + e->target_offset
1090                                  + offsetof(struct ipt_entry_target,
1091                                             u.user.name),
1092                                  t->u.kernel.target->name,
1093                                  strlen(t->u.kernel.target->name)+1) != 0) {
1094                         ret = -EFAULT;
1095                         goto free_counters;
1096                 }
1097         }
1098
1099  free_counters:
1100         vfree(counters);
1101         return ret;
1102 }
1103
1104 static int
1105 get_entries(const struct ipt_get_entries *entries,
1106             struct ipt_get_entries __user *uptr)
1107 {
1108         int ret;
1109         struct ipt_table *t;
1110
1111         t = find_table_lock(entries->name);
1112         if (t && !IS_ERR(t)) {
1113                 duprintf("t->private->number = %u\n",
1114                          t->private->number);
1115                 if (entries->size == t->private->size)
1116                         ret = copy_entries_to_user(t->private->size,
1117                                                    t, uptr->entrytable);
1118                 else {
1119                         duprintf("get_entries: I've got %u not %u!\n",
1120                                  t->private->size,
1121                                  entries->size);
1122                         ret = -EINVAL;
1123                 }
1124                 module_put(t->me);
1125                 up(&ipt_mutex);
1126         } else
1127                 ret = t ? PTR_ERR(t) : -ENOENT;
1128
1129         return ret;
1130 }
1131
1132 static void free_table_info(struct ipt_table_info *info)
1133 {
1134         int cpu;
1135         for_each_cpu(cpu) {
1136                 if (info->size <= PAGE_SIZE)
1137                         kfree(info->entries[cpu]);
1138                 else
1139                         vfree(info->entries[cpu]);
1140         }
1141         kfree(info);
1142 }
1143
1144 static struct ipt_table_info *alloc_table_info(unsigned int size)
1145 {
1146         struct ipt_table_info *newinfo;
1147         int cpu;
1148
1149         newinfo = kzalloc(sizeof(struct ipt_table_info), GFP_KERNEL);
1150         if (!newinfo)
1151                 return NULL;
1152
1153         newinfo->size = size;
1154
1155         for_each_cpu(cpu) {
1156                 if (size <= PAGE_SIZE)
1157                         newinfo->entries[cpu] = kmalloc_node(size,
1158                                 GFP_KERNEL,
1159                                 cpu_to_node(cpu));
1160                 else
1161                         newinfo->entries[cpu] = vmalloc_node(size, cpu_to_node(cpu));
1162                 if (newinfo->entries[cpu] == 0) {
1163                         free_table_info(newinfo);
1164                         return NULL;
1165                 }
1166         }
1167
1168         return newinfo;
1169 }
1170
1171 static int
1172 do_replace(void __user *user, unsigned int len)
1173 {
1174         int ret;
1175         struct ipt_replace tmp;
1176         struct ipt_table *t;
1177         struct ipt_table_info *newinfo, *oldinfo;
1178         struct ipt_counters *counters;
1179         void *loc_cpu_entry, *loc_cpu_old_entry;
1180
1181         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1182                 return -EFAULT;
1183
1184         /* Hack: Causes ipchains to give correct error msg --RR */
1185         if (len != sizeof(tmp) + tmp.size)
1186                 return -ENOPROTOOPT;
1187
1188         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1189         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1190                 return -ENOMEM;
1191
1192         newinfo = alloc_table_info(tmp.size);
1193         if (!newinfo)
1194                 return -ENOMEM;
1195
1196         /* choose the copy that is our node/cpu */
1197         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1198         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1199                            tmp.size) != 0) {
1200                 ret = -EFAULT;
1201                 goto free_newinfo;
1202         }
1203
1204         counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1205         if (!counters) {
1206                 ret = -ENOMEM;
1207                 goto free_newinfo;
1208         }
1209
1210         ret = translate_table(tmp.name, tmp.valid_hooks,
1211                               newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1212                               tmp.hook_entry, tmp.underflow);
1213         if (ret != 0)
1214                 goto free_newinfo_counters;
1215
1216         duprintf("ip_tables: Translated table\n");
1217
1218         t = try_then_request_module(find_table_lock(tmp.name),
1219                                     "iptable_%s", tmp.name);
1220         if (!t || IS_ERR(t)) {
1221                 ret = t ? PTR_ERR(t) : -ENOENT;
1222                 goto free_newinfo_counters_untrans;
1223         }
1224
1225         /* You lied! */
1226         if (tmp.valid_hooks != t->valid_hooks) {
1227                 duprintf("Valid hook crap: %08X vs %08X\n",
1228                          tmp.valid_hooks, t->valid_hooks);
1229                 ret = -EINVAL;
1230                 goto put_module;
1231         }
1232
1233         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1234         if (!oldinfo)
1235                 goto put_module;
1236
1237         /* Update module usage count based on number of rules */
1238         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1239                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1240         if ((oldinfo->number > oldinfo->initial_entries) || 
1241             (newinfo->number <= oldinfo->initial_entries)) 
1242                 module_put(t->me);
1243         if ((oldinfo->number > oldinfo->initial_entries) &&
1244             (newinfo->number <= oldinfo->initial_entries))
1245                 module_put(t->me);
1246
1247         /* Get the old counters. */
1248         get_counters(oldinfo, counters);
1249         /* Decrease module usage counts and free resource */
1250         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1251         IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1252         free_table_info(oldinfo);
1253         if (copy_to_user(tmp.counters, counters,
1254                          sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1255                 ret = -EFAULT;
1256         vfree(counters);
1257         up(&ipt_mutex);
1258         return ret;
1259
1260  put_module:
1261         module_put(t->me);
1262         up(&ipt_mutex);
1263  free_newinfo_counters_untrans:
1264         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1265  free_newinfo_counters:
1266         vfree(counters);
1267  free_newinfo:
1268         free_table_info(newinfo);
1269         return ret;
1270 }
1271
1272 /* We're lazy, and add to the first CPU; overflow works its fey magic
1273  * and everything is OK. */
1274 static inline int
1275 add_counter_to_entry(struct ipt_entry *e,
1276                      const struct ipt_counters addme[],
1277                      unsigned int *i)
1278 {
1279 #if 0
1280         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1281                  *i,
1282                  (long unsigned int)e->counters.pcnt,
1283                  (long unsigned int)e->counters.bcnt,
1284                  (long unsigned int)addme[*i].pcnt,
1285                  (long unsigned int)addme[*i].bcnt);
1286 #endif
1287
1288         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1289
1290         (*i)++;
1291         return 0;
1292 }
1293
1294 static int
1295 do_add_counters(void __user *user, unsigned int len)
1296 {
1297         unsigned int i;
1298         struct ipt_counters_info tmp, *paddc;
1299         struct ipt_table *t;
1300         int ret = 0;
1301         void *loc_cpu_entry;
1302
1303         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1304                 return -EFAULT;
1305
1306         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1307                 return -EINVAL;
1308
1309         paddc = vmalloc_node(len, numa_node_id());
1310         if (!paddc)
1311                 return -ENOMEM;
1312
1313         if (copy_from_user(paddc, user, len) != 0) {
1314                 ret = -EFAULT;
1315                 goto free;
1316         }
1317
1318         t = find_table_lock(tmp.name);
1319         if (!t || IS_ERR(t)) {
1320                 ret = t ? PTR_ERR(t) : -ENOENT;
1321                 goto free;
1322         }
1323
1324         write_lock_bh(&t->lock);
1325         if (t->private->number != paddc->num_counters) {
1326                 ret = -EINVAL;
1327                 goto unlock_up_free;
1328         }
1329
1330         i = 0;
1331         /* Choose the copy that is on our node */
1332         loc_cpu_entry = t->private->entries[raw_smp_processor_id()];
1333         IPT_ENTRY_ITERATE(loc_cpu_entry,
1334                           t->private->size,
1335                           add_counter_to_entry,
1336                           paddc->counters,
1337                           &i);
1338  unlock_up_free:
1339         write_unlock_bh(&t->lock);
1340         up(&ipt_mutex);
1341         module_put(t->me);
1342  free:
1343         vfree(paddc);
1344
1345         return ret;
1346 }
1347
1348 static int
1349 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1350 {
1351         int ret;
1352
1353         if (!capable(CAP_NET_ADMIN))
1354                 return -EPERM;
1355
1356         switch (cmd) {
1357         case IPT_SO_SET_REPLACE:
1358                 ret = do_replace(user, len);
1359                 break;
1360
1361         case IPT_SO_SET_ADD_COUNTERS:
1362                 ret = do_add_counters(user, len);
1363                 break;
1364
1365         default:
1366                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1367                 ret = -EINVAL;
1368         }
1369
1370         return ret;
1371 }
1372
1373 static int
1374 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1375 {
1376         int ret;
1377
1378         if (!capable(CAP_NET_ADMIN))
1379                 return -EPERM;
1380
1381         switch (cmd) {
1382         case IPT_SO_GET_INFO: {
1383                 char name[IPT_TABLE_MAXNAMELEN];
1384                 struct ipt_table *t;
1385
1386                 if (*len != sizeof(struct ipt_getinfo)) {
1387                         duprintf("length %u != %u\n", *len,
1388                                  sizeof(struct ipt_getinfo));
1389                         ret = -EINVAL;
1390                         break;
1391                 }
1392
1393                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1394                         ret = -EFAULT;
1395                         break;
1396                 }
1397                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1398
1399                 t = try_then_request_module(find_table_lock(name),
1400                                             "iptable_%s", name);
1401                 if (t && !IS_ERR(t)) {
1402                         struct ipt_getinfo info;
1403
1404                         info.valid_hooks = t->valid_hooks;
1405                         memcpy(info.hook_entry, t->private->hook_entry,
1406                                sizeof(info.hook_entry));
1407                         memcpy(info.underflow, t->private->underflow,
1408                                sizeof(info.underflow));
1409                         info.num_entries = t->private->number;
1410                         info.size = t->private->size;
1411                         memcpy(info.name, name, sizeof(info.name));
1412
1413                         if (copy_to_user(user, &info, *len) != 0)
1414                                 ret = -EFAULT;
1415                         else
1416                                 ret = 0;
1417                         up(&ipt_mutex);
1418                         module_put(t->me);
1419                 } else
1420                         ret = t ? PTR_ERR(t) : -ENOENT;
1421         }
1422         break;
1423
1424         case IPT_SO_GET_ENTRIES: {
1425                 struct ipt_get_entries get;
1426
1427                 if (*len < sizeof(get)) {
1428                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1429                         ret = -EINVAL;
1430                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1431                         ret = -EFAULT;
1432                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1433                         duprintf("get_entries: %u != %u\n", *len,
1434                                  sizeof(struct ipt_get_entries) + get.size);
1435                         ret = -EINVAL;
1436                 } else
1437                         ret = get_entries(&get, user);
1438                 break;
1439         }
1440
1441         case IPT_SO_GET_REVISION_MATCH:
1442         case IPT_SO_GET_REVISION_TARGET: {
1443                 struct ipt_get_revision rev;
1444                 int (*revfn)(const char *, u8, int *);
1445
1446                 if (*len != sizeof(rev)) {
1447                         ret = -EINVAL;
1448                         break;
1449                 }
1450                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1451                         ret = -EFAULT;
1452                         break;
1453                 }
1454
1455                 if (cmd == IPT_SO_GET_REVISION_TARGET)
1456                         revfn = target_revfn;
1457                 else
1458                         revfn = match_revfn;
1459
1460                 try_then_request_module(find_revision(rev.name, rev.revision,
1461                                                       revfn, &ret),
1462                                         "ipt_%s", rev.name);
1463                 break;
1464         }
1465
1466         default:
1467                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1468                 ret = -EINVAL;
1469         }
1470
1471         return ret;
1472 }
1473
1474 /* Registration hooks for targets. */
1475 int
1476 ipt_register_target(struct ipt_target *target)
1477 {
1478         int ret;
1479
1480         ret = down_interruptible(&ipt_mutex);
1481         if (ret != 0)
1482                 return ret;
1483         list_add(&target->list, &ipt_target);
1484         up(&ipt_mutex);
1485         return ret;
1486 }
1487
1488 void
1489 ipt_unregister_target(struct ipt_target *target)
1490 {
1491         down(&ipt_mutex);
1492         LIST_DELETE(&ipt_target, target);
1493         up(&ipt_mutex);
1494 }
1495
1496 int
1497 ipt_register_match(struct ipt_match *match)
1498 {
1499         int ret;
1500
1501         ret = down_interruptible(&ipt_mutex);
1502         if (ret != 0)
1503                 return ret;
1504
1505         list_add(&match->list, &ipt_match);
1506         up(&ipt_mutex);
1507
1508         return ret;
1509 }
1510
1511 void
1512 ipt_unregister_match(struct ipt_match *match)
1513 {
1514         down(&ipt_mutex);
1515         LIST_DELETE(&ipt_match, match);
1516         up(&ipt_mutex);
1517 }
1518
1519 int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1520 {
1521         int ret;
1522         struct ipt_table_info *newinfo;
1523         static struct ipt_table_info bootstrap
1524                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1525         void *loc_cpu_entry;
1526
1527         newinfo = alloc_table_info(repl->size);
1528         if (!newinfo)
1529                 return -ENOMEM;
1530
1531         /* choose the copy on our node/cpu
1532          * but dont care of preemption
1533          */
1534         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1535         memcpy(loc_cpu_entry, repl->entries, repl->size);
1536
1537         ret = translate_table(table->name, table->valid_hooks,
1538                               newinfo, loc_cpu_entry, repl->size,
1539                               repl->num_entries,
1540                               repl->hook_entry,
1541                               repl->underflow);
1542         if (ret != 0) {
1543                 free_table_info(newinfo);
1544                 return ret;
1545         }
1546
1547         ret = down_interruptible(&ipt_mutex);
1548         if (ret != 0) {
1549                 free_table_info(newinfo);
1550                 return ret;
1551         }
1552
1553         /* Don't autoload: we'd eat our tail... */
1554         if (list_named_find(&ipt_tables, table->name)) {
1555                 ret = -EEXIST;
1556                 goto free_unlock;
1557         }
1558
1559         /* Simplifies replace_table code. */
1560         table->private = &bootstrap;
1561         if (!replace_table(table, 0, newinfo, &ret))
1562                 goto free_unlock;
1563
1564         duprintf("table->private->number = %u\n",
1565                  table->private->number);
1566         
1567         /* save number of initial entries */
1568         table->private->initial_entries = table->private->number;
1569
1570         rwlock_init(&table->lock);
1571         list_prepend(&ipt_tables, table);
1572
1573  unlock:
1574         up(&ipt_mutex);
1575         return ret;
1576
1577  free_unlock:
1578         free_table_info(newinfo);
1579         goto unlock;
1580 }
1581
1582 void ipt_unregister_table(struct ipt_table *table)
1583 {
1584         void *loc_cpu_entry;
1585
1586         down(&ipt_mutex);
1587         LIST_DELETE(&ipt_tables, table);
1588         up(&ipt_mutex);
1589
1590         /* Decrease module usage counts and free resources */
1591         loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1592         IPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
1593                           cleanup_entry, NULL);
1594         free_table_info(table->private);
1595 }
1596
1597 /* Returns 1 if the port is matched by the range, 0 otherwise */
1598 static inline int
1599 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1600 {
1601         int ret;
1602
1603         ret = (port >= min && port <= max) ^ invert;
1604         return ret;
1605 }
1606
1607 static int
1608 tcp_find_option(u_int8_t option,
1609                 const struct sk_buff *skb,
1610                 unsigned int optlen,
1611                 int invert,
1612                 int *hotdrop)
1613 {
1614         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1615         u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1616         unsigned int i;
1617
1618         duprintf("tcp_match: finding option\n");
1619
1620         if (!optlen)
1621                 return invert;
1622
1623         /* If we don't have the whole header, drop packet. */
1624         op = skb_header_pointer(skb,
1625                                 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1626                                 optlen, _opt);
1627         if (op == NULL) {
1628                 *hotdrop = 1;
1629                 return 0;
1630         }
1631
1632         for (i = 0; i < optlen; ) {
1633                 if (op[i] == option) return !invert;
1634                 if (op[i] < 2) i++;
1635                 else i += op[i+1]?:1;
1636         }
1637
1638         return invert;
1639 }
1640
1641 static int
1642 tcp_match(const struct sk_buff *skb,
1643           const struct net_device *in,
1644           const struct net_device *out,
1645           const void *matchinfo,
1646           int offset,
1647           int *hotdrop)
1648 {
1649         struct tcphdr _tcph, *th;
1650         const struct ipt_tcp *tcpinfo = matchinfo;
1651
1652         if (offset) {
1653                 /* To quote Alan:
1654
1655                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1656                    causes this. Its a cracker trying to break in by doing a
1657                    flag overwrite to pass the direction checks.
1658                 */
1659                 if (offset == 1) {
1660                         duprintf("Dropping evil TCP offset=1 frag.\n");
1661                         *hotdrop = 1;
1662                 }
1663                 /* Must not be a fragment. */
1664                 return 0;
1665         }
1666
1667 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1668
1669         th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1670                                 sizeof(_tcph), &_tcph);
1671         if (th == NULL) {
1672                 /* We've been asked to examine this packet, and we
1673                    can't.  Hence, no choice but to drop. */
1674                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1675                 *hotdrop = 1;
1676                 return 0;
1677         }
1678
1679         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1680                         ntohs(th->source),
1681                         !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1682                 return 0;
1683         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1684                         ntohs(th->dest),
1685                         !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1686                 return 0;
1687         if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1688                       == tcpinfo->flg_cmp,
1689                       IPT_TCP_INV_FLAGS))
1690                 return 0;
1691         if (tcpinfo->option) {
1692                 if (th->doff * 4 < sizeof(_tcph)) {
1693                         *hotdrop = 1;
1694                         return 0;
1695                 }
1696                 if (!tcp_find_option(tcpinfo->option, skb,
1697                                      th->doff*4 - sizeof(_tcph),
1698                                      tcpinfo->invflags & IPT_TCP_INV_OPTION,
1699                                      hotdrop))
1700                         return 0;
1701         }
1702         return 1;
1703 }
1704
1705 /* Called when user tries to insert an entry of this type. */
1706 static int
1707 tcp_checkentry(const char *tablename,
1708                const struct ipt_ip *ip,
1709                void *matchinfo,
1710                unsigned int matchsize,
1711                unsigned int hook_mask)
1712 {
1713         const struct ipt_tcp *tcpinfo = matchinfo;
1714
1715         /* Must specify proto == TCP, and no unknown invflags */
1716         return ip->proto == IPPROTO_TCP
1717                 && !(ip->invflags & IPT_INV_PROTO)
1718                 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1719                 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1720 }
1721
1722 static int
1723 udp_match(const struct sk_buff *skb,
1724           const struct net_device *in,
1725           const struct net_device *out,
1726           const void *matchinfo,
1727           int offset,
1728           int *hotdrop)
1729 {
1730         struct udphdr _udph, *uh;
1731         const struct ipt_udp *udpinfo = matchinfo;
1732
1733         /* Must not be a fragment. */
1734         if (offset)
1735                 return 0;
1736
1737         uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1738                                 sizeof(_udph), &_udph);
1739         if (uh == NULL) {
1740                 /* We've been asked to examine this packet, and we
1741                    can't.  Hence, no choice but to drop. */
1742                 duprintf("Dropping evil UDP tinygram.\n");
1743                 *hotdrop = 1;
1744                 return 0;
1745         }
1746
1747         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1748                           ntohs(uh->source),
1749                           !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1750                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1751                               ntohs(uh->dest),
1752                               !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1753 }
1754
1755 /* Called when user tries to insert an entry of this type. */
1756 static int
1757 udp_checkentry(const char *tablename,
1758                const struct ipt_ip *ip,
1759                void *matchinfo,
1760                unsigned int matchinfosize,
1761                unsigned int hook_mask)
1762 {
1763         const struct ipt_udp *udpinfo = matchinfo;
1764
1765         /* Must specify proto == UDP, and no unknown invflags */
1766         if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1767                 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1768                          IPPROTO_UDP);
1769                 return 0;
1770         }
1771         if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1772                 duprintf("ipt_udp: matchsize %u != %u\n",
1773                          matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1774                 return 0;
1775         }
1776         if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1777                 duprintf("ipt_udp: unknown flags %X\n",
1778                          udpinfo->invflags);
1779                 return 0;
1780         }
1781
1782         return 1;
1783 }
1784
1785 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1786 static inline int
1787 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1788                      u_int8_t type, u_int8_t code,
1789                      int invert)
1790 {
1791         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1792                 ^ invert;
1793 }
1794
1795 static int
1796 icmp_match(const struct sk_buff *skb,
1797            const struct net_device *in,
1798            const struct net_device *out,
1799            const void *matchinfo,
1800            int offset,
1801            int *hotdrop)
1802 {
1803         struct icmphdr _icmph, *ic;
1804         const struct ipt_icmp *icmpinfo = matchinfo;
1805
1806         /* Must not be a fragment. */
1807         if (offset)
1808                 return 0;
1809
1810         ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1811                                 sizeof(_icmph), &_icmph);
1812         if (ic == NULL) {
1813                 /* We've been asked to examine this packet, and we
1814                  * can't.  Hence, no choice but to drop.
1815                  */
1816                 duprintf("Dropping evil ICMP tinygram.\n");
1817                 *hotdrop = 1;
1818                 return 0;
1819         }
1820
1821         return icmp_type_code_match(icmpinfo->type,
1822                                     icmpinfo->code[0],
1823                                     icmpinfo->code[1],
1824                                     ic->type, ic->code,
1825                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1826 }
1827
1828 /* Called when user tries to insert an entry of this type. */
1829 static int
1830 icmp_checkentry(const char *tablename,
1831            const struct ipt_ip *ip,
1832            void *matchinfo,
1833            unsigned int matchsize,
1834            unsigned int hook_mask)
1835 {
1836         const struct ipt_icmp *icmpinfo = matchinfo;
1837
1838         /* Must specify proto == ICMP, and no unknown invflags */
1839         return ip->proto == IPPROTO_ICMP
1840                 && !(ip->invflags & IPT_INV_PROTO)
1841                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1842                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1843 }
1844
1845 /* The built-in targets: standard (NULL) and error. */
1846 static struct ipt_target ipt_standard_target = {
1847         .name           = IPT_STANDARD_TARGET,
1848 };
1849
1850 static struct ipt_target ipt_error_target = {
1851         .name           = IPT_ERROR_TARGET,
1852         .target         = ipt_error,
1853 };
1854
1855 static struct nf_sockopt_ops ipt_sockopts = {
1856         .pf             = PF_INET,
1857         .set_optmin     = IPT_BASE_CTL,
1858         .set_optmax     = IPT_SO_SET_MAX+1,
1859         .set            = do_ipt_set_ctl,
1860         .get_optmin     = IPT_BASE_CTL,
1861         .get_optmax     = IPT_SO_GET_MAX+1,
1862         .get            = do_ipt_get_ctl,
1863 };
1864
1865 static struct ipt_match tcp_matchstruct = {
1866         .name           = "tcp",
1867         .match          = &tcp_match,
1868         .checkentry     = &tcp_checkentry,
1869 };
1870
1871 static struct ipt_match udp_matchstruct = {
1872         .name           = "udp",
1873         .match          = &udp_match,
1874         .checkentry     = &udp_checkentry,
1875 };
1876
1877 static struct ipt_match icmp_matchstruct = {
1878         .name           = "icmp",
1879         .match          = &icmp_match,
1880         .checkentry     = &icmp_checkentry,
1881 };
1882
1883 #ifdef CONFIG_PROC_FS
1884 static inline int print_name(const char *i,
1885                              off_t start_offset, char *buffer, int length,
1886                              off_t *pos, unsigned int *count)
1887 {
1888         if ((*count)++ >= start_offset) {
1889                 unsigned int namelen;
1890
1891                 namelen = sprintf(buffer + *pos, "%s\n",
1892                                   i + sizeof(struct list_head));
1893                 if (*pos + namelen > length) {
1894                         /* Stop iterating */
1895                         return 1;
1896                 }
1897                 *pos += namelen;
1898         }
1899         return 0;
1900 }
1901
1902 static inline int print_target(const struct ipt_target *t,
1903                                off_t start_offset, char *buffer, int length,
1904                                off_t *pos, unsigned int *count)
1905 {
1906         if (t == &ipt_standard_target || t == &ipt_error_target)
1907                 return 0;
1908         return print_name((char *)t, start_offset, buffer, length, pos, count);
1909 }
1910
1911 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1912 {
1913         off_t pos = 0;
1914         unsigned int count = 0;
1915
1916         if (down_interruptible(&ipt_mutex) != 0)
1917                 return 0;
1918
1919         LIST_FIND(&ipt_tables, print_name, void *,
1920                   offset, buffer, length, &pos, &count);
1921
1922         up(&ipt_mutex);
1923
1924         /* `start' hack - see fs/proc/generic.c line ~105 */
1925         *start=(char *)((unsigned long)count-offset);
1926         return pos;
1927 }
1928
1929 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1930 {
1931         off_t pos = 0;
1932         unsigned int count = 0;
1933
1934         if (down_interruptible(&ipt_mutex) != 0)
1935                 return 0;
1936
1937         LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1938                   offset, buffer, length, &pos, &count);
1939         
1940         up(&ipt_mutex);
1941
1942         *start = (char *)((unsigned long)count - offset);
1943         return pos;
1944 }
1945
1946 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1947 {
1948         off_t pos = 0;
1949         unsigned int count = 0;
1950
1951         if (down_interruptible(&ipt_mutex) != 0)
1952                 return 0;
1953         
1954         LIST_FIND(&ipt_match, print_name, void *,
1955                   offset, buffer, length, &pos, &count);
1956
1957         up(&ipt_mutex);
1958
1959         *start = (char *)((unsigned long)count - offset);
1960         return pos;
1961 }
1962
1963 static const struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1964 { { "ip_tables_names", ipt_get_tables },
1965   { "ip_tables_targets", ipt_get_targets },
1966   { "ip_tables_matches", ipt_get_matches },
1967   { NULL, NULL} };
1968 #endif /*CONFIG_PROC_FS*/
1969
1970 static int __init init(void)
1971 {
1972         int ret;
1973
1974         /* Noone else will be downing sem now, so we won't sleep */
1975         down(&ipt_mutex);
1976         list_append(&ipt_target, &ipt_standard_target);
1977         list_append(&ipt_target, &ipt_error_target);
1978         list_append(&ipt_match, &tcp_matchstruct);
1979         list_append(&ipt_match, &udp_matchstruct);
1980         list_append(&ipt_match, &icmp_matchstruct);
1981         up(&ipt_mutex);
1982
1983         /* Register setsockopt */
1984         ret = nf_register_sockopt(&ipt_sockopts);
1985         if (ret < 0) {
1986                 duprintf("Unable to register sockopts.\n");
1987                 return ret;
1988         }
1989
1990 #ifdef CONFIG_PROC_FS
1991         {
1992         struct proc_dir_entry *proc;
1993         int i;
1994
1995         for (i = 0; ipt_proc_entry[i].name; i++) {
1996                 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1997                                        ipt_proc_entry[i].get_info);
1998                 if (!proc) {
1999                         while (--i >= 0)
2000                                 proc_net_remove(ipt_proc_entry[i].name);
2001                         nf_unregister_sockopt(&ipt_sockopts);
2002                         return -ENOMEM;
2003                 }
2004                 proc->owner = THIS_MODULE;
2005         }
2006         }
2007 #endif
2008
2009         printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
2010         return 0;
2011 }
2012
2013 static void __exit fini(void)
2014 {
2015         nf_unregister_sockopt(&ipt_sockopts);
2016 #ifdef CONFIG_PROC_FS
2017         {
2018         int i;
2019         for (i = 0; ipt_proc_entry[i].name; i++)
2020                 proc_net_remove(ipt_proc_entry[i].name);
2021         }
2022 #endif
2023 }
2024
2025 EXPORT_SYMBOL(ipt_register_table);
2026 EXPORT_SYMBOL(ipt_unregister_table);
2027 EXPORT_SYMBOL(ipt_register_match);
2028 EXPORT_SYMBOL(ipt_unregister_match);
2029 EXPORT_SYMBOL(ipt_do_table);
2030 EXPORT_SYMBOL(ipt_register_target);
2031 EXPORT_SYMBOL(ipt_unregister_target);
2032 EXPORT_SYMBOL(ipt_find_target);
2033
2034 module_init(init);
2035 module_exit(fini);