[NETFILTER]: Fix OOPSes on machines with discontiguous cpu numbering.
[linux-2.6.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29 #include <linux/err.h>
30 #include <linux/cpumask.h>
31
32 #include <linux/netfilter_ipv4/ip_tables.h>
33
34 MODULE_LICENSE("GPL");
35 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
36 MODULE_DESCRIPTION("IPv4 packet filter");
37
38 /*#define DEBUG_IP_FIREWALL*/
39 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
40 /*#define DEBUG_IP_FIREWALL_USER*/
41
42 #ifdef DEBUG_IP_FIREWALL
43 #define dprintf(format, args...)  printk(format , ## args)
44 #else
45 #define dprintf(format, args...)
46 #endif
47
48 #ifdef DEBUG_IP_FIREWALL_USER
49 #define duprintf(format, args...) printk(format , ## args)
50 #else
51 #define duprintf(format, args...)
52 #endif
53
54 #ifdef CONFIG_NETFILTER_DEBUG
55 #define IP_NF_ASSERT(x)                                         \
56 do {                                                            \
57         if (!(x))                                               \
58                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
59                        __FUNCTION__, __FILE__, __LINE__);       \
60 } while(0)
61 #else
62 #define IP_NF_ASSERT(x)
63 #endif
64 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
65
66 static DECLARE_MUTEX(ipt_mutex);
67
68 /* Must have mutex */
69 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
71 #include <linux/netfilter_ipv4/listhelp.h>
72
73 #if 0
74 /* All the better to debug you with... */
75 #define static
76 #define inline
77 #endif
78
79 /*
80    We keep a set of rules for each CPU, so we can avoid write-locking
81    them in the softirq when updating the counters and therefore
82    only need to read-lock in the softirq; doing a write_lock_bh() in user
83    context stops packets coming through and allows user context to read
84    the counters or update the rules.
85
86    To be cache friendly on SMP, we arrange them like so:
87    [ n-entries ]
88    ... cache-align padding ...
89    [ n-entries ]
90
91    Hence the start of any table is given by get_table() below.  */
92
93 /* The table itself */
94 struct ipt_table_info
95 {
96         /* Size per table */
97         unsigned int size;
98         /* Number of entries: FIXME. --RR */
99         unsigned int number;
100         /* Initial number of entries. Needed for module usage count */
101         unsigned int initial_entries;
102
103         /* Entry points and underflows */
104         unsigned int hook_entry[NF_IP_NUMHOOKS];
105         unsigned int underflow[NF_IP_NUMHOOKS];
106
107         /* ipt_entry tables: one per CPU */
108         char entries[0] ____cacheline_aligned;
109 };
110
111 static LIST_HEAD(ipt_target);
112 static LIST_HEAD(ipt_match);
113 static LIST_HEAD(ipt_tables);
114 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
115
116 #ifdef CONFIG_SMP
117 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
118 #else
119 #define TABLE_OFFSET(t,p) 0
120 #endif
121
122 #if 0
123 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
124 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
125 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
126 #endif
127
128 /* Returns whether matches rule or not. */
129 static inline int
130 ip_packet_match(const struct iphdr *ip,
131                 const char *indev,
132                 const char *outdev,
133                 const struct ipt_ip *ipinfo,
134                 int isfrag)
135 {
136         size_t i;
137         unsigned long ret;
138
139 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
140
141         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
142                   IPT_INV_SRCIP)
143             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
144                      IPT_INV_DSTIP)) {
145                 dprintf("Source or dest mismatch.\n");
146
147                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
148                         NIPQUAD(ip->saddr),
149                         NIPQUAD(ipinfo->smsk.s_addr),
150                         NIPQUAD(ipinfo->src.s_addr),
151                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
152                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
153                         NIPQUAD(ip->daddr),
154                         NIPQUAD(ipinfo->dmsk.s_addr),
155                         NIPQUAD(ipinfo->dst.s_addr),
156                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
157                 return 0;
158         }
159
160         /* Look for ifname matches; this should unroll nicely. */
161         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
162                 ret |= (((const unsigned long *)indev)[i]
163                         ^ ((const unsigned long *)ipinfo->iniface)[i])
164                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
165         }
166
167         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
168                 dprintf("VIA in mismatch (%s vs %s).%s\n",
169                         indev, ipinfo->iniface,
170                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
171                 return 0;
172         }
173
174         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
175                 ret |= (((const unsigned long *)outdev)[i]
176                         ^ ((const unsigned long *)ipinfo->outiface)[i])
177                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
178         }
179
180         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
181                 dprintf("VIA out mismatch (%s vs %s).%s\n",
182                         outdev, ipinfo->outiface,
183                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
184                 return 0;
185         }
186
187         /* Check specific protocol */
188         if (ipinfo->proto
189             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
190                 dprintf("Packet protocol %hi does not match %hi.%s\n",
191                         ip->protocol, ipinfo->proto,
192                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
193                 return 0;
194         }
195
196         /* If we have a fragment rule but the packet is not a fragment
197          * then we return zero */
198         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
199                 dprintf("Fragment rule but not fragment.%s\n",
200                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
201                 return 0;
202         }
203
204         return 1;
205 }
206
207 static inline int
208 ip_checkentry(const struct ipt_ip *ip)
209 {
210         if (ip->flags & ~IPT_F_MASK) {
211                 duprintf("Unknown flag bits set: %08X\n",
212                          ip->flags & ~IPT_F_MASK);
213                 return 0;
214         }
215         if (ip->invflags & ~IPT_INV_MASK) {
216                 duprintf("Unknown invflag bits set: %08X\n",
217                          ip->invflags & ~IPT_INV_MASK);
218                 return 0;
219         }
220         return 1;
221 }
222
223 static unsigned int
224 ipt_error(struct sk_buff **pskb,
225           const struct net_device *in,
226           const struct net_device *out,
227           unsigned int hooknum,
228           const void *targinfo,
229           void *userinfo)
230 {
231         if (net_ratelimit())
232                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
233
234         return NF_DROP;
235 }
236
237 static inline
238 int do_match(struct ipt_entry_match *m,
239              const struct sk_buff *skb,
240              const struct net_device *in,
241              const struct net_device *out,
242              int offset,
243              int *hotdrop)
244 {
245         /* Stop iteration if it doesn't match */
246         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
247                 return 1;
248         else
249                 return 0;
250 }
251
252 static inline struct ipt_entry *
253 get_entry(void *base, unsigned int offset)
254 {
255         return (struct ipt_entry *)(base + offset);
256 }
257
258 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
259 unsigned int
260 ipt_do_table(struct sk_buff **pskb,
261              unsigned int hook,
262              const struct net_device *in,
263              const struct net_device *out,
264              struct ipt_table *table,
265              void *userdata)
266 {
267         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
268         u_int16_t offset;
269         struct iphdr *ip;
270         u_int16_t datalen;
271         int hotdrop = 0;
272         /* Initializing verdict to NF_DROP keeps gcc happy. */
273         unsigned int verdict = NF_DROP;
274         const char *indev, *outdev;
275         void *table_base;
276         struct ipt_entry *e, *back;
277
278         /* Initialization */
279         ip = (*pskb)->nh.iph;
280         datalen = (*pskb)->len - ip->ihl * 4;
281         indev = in ? in->name : nulldevname;
282         outdev = out ? out->name : nulldevname;
283         /* We handle fragments by dealing with the first fragment as
284          * if it was a normal packet.  All other fragments are treated
285          * normally, except that they will NEVER match rules that ask
286          * things we don't know, ie. tcp syn flag or ports).  If the
287          * rule is also a fragment-specific rule, non-fragments won't
288          * match it. */
289         offset = ntohs(ip->frag_off) & IP_OFFSET;
290
291         read_lock_bh(&table->lock);
292         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
293         table_base = (void *)table->private->entries
294                 + TABLE_OFFSET(table->private, smp_processor_id());
295         e = get_entry(table_base, table->private->hook_entry[hook]);
296
297 #ifdef CONFIG_NETFILTER_DEBUG
298         /* Check noone else using our table */
299         if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
300             && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
301                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
302                        smp_processor_id(),
303                        table->name,
304                        &((struct ipt_entry *)table_base)->comefrom,
305                        ((struct ipt_entry *)table_base)->comefrom);
306         }
307         ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
308 #endif
309
310         /* For return from builtin chain */
311         back = get_entry(table_base, table->private->underflow[hook]);
312
313         do {
314                 IP_NF_ASSERT(e);
315                 IP_NF_ASSERT(back);
316                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
317                         struct ipt_entry_target *t;
318
319                         if (IPT_MATCH_ITERATE(e, do_match,
320                                               *pskb, in, out,
321                                               offset, &hotdrop) != 0)
322                                 goto no_match;
323
324                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
325
326                         t = ipt_get_target(e);
327                         IP_NF_ASSERT(t->u.kernel.target);
328                         /* Standard target? */
329                         if (!t->u.kernel.target->target) {
330                                 int v;
331
332                                 v = ((struct ipt_standard_target *)t)->verdict;
333                                 if (v < 0) {
334                                         /* Pop from stack? */
335                                         if (v != IPT_RETURN) {
336                                                 verdict = (unsigned)(-v) - 1;
337                                                 break;
338                                         }
339                                         e = back;
340                                         back = get_entry(table_base,
341                                                          back->comefrom);
342                                         continue;
343                                 }
344                                 if (table_base + v != (void *)e + e->next_offset
345                                     && !(e->ip.flags & IPT_F_GOTO)) {
346                                         /* Save old back ptr in next entry */
347                                         struct ipt_entry *next
348                                                 = (void *)e + e->next_offset;
349                                         next->comefrom
350                                                 = (void *)back - table_base;
351                                         /* set back pointer to next entry */
352                                         back = next;
353                                 }
354
355                                 e = get_entry(table_base, v);
356                         } else {
357                                 /* Targets which reenter must return
358                                    abs. verdicts */
359 #ifdef CONFIG_NETFILTER_DEBUG
360                                 ((struct ipt_entry *)table_base)->comefrom
361                                         = 0xeeeeeeec;
362 #endif
363                                 verdict = t->u.kernel.target->target(pskb,
364                                                                      in, out,
365                                                                      hook,
366                                                                      t->data,
367                                                                      userdata);
368
369 #ifdef CONFIG_NETFILTER_DEBUG
370                                 if (((struct ipt_entry *)table_base)->comefrom
371                                     != 0xeeeeeeec
372                                     && verdict == IPT_CONTINUE) {
373                                         printk("Target %s reentered!\n",
374                                                t->u.kernel.target->name);
375                                         verdict = NF_DROP;
376                                 }
377                                 ((struct ipt_entry *)table_base)->comefrom
378                                         = 0x57acc001;
379 #endif
380                                 /* Target might have changed stuff. */
381                                 ip = (*pskb)->nh.iph;
382                                 datalen = (*pskb)->len - ip->ihl * 4;
383
384                                 if (verdict == IPT_CONTINUE)
385                                         e = (void *)e + e->next_offset;
386                                 else
387                                         /* Verdict */
388                                         break;
389                         }
390                 } else {
391
392                 no_match:
393                         e = (void *)e + e->next_offset;
394                 }
395         } while (!hotdrop);
396
397 #ifdef CONFIG_NETFILTER_DEBUG
398         ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
399 #endif
400         read_unlock_bh(&table->lock);
401
402 #ifdef DEBUG_ALLOW_ALL
403         return NF_ACCEPT;
404 #else
405         if (hotdrop)
406                 return NF_DROP;
407         else return verdict;
408 #endif
409 }
410
411 /*
412  * These are weird, but module loading must not be done with mutex
413  * held (since they will register), and we have to have a single
414  * function to use try_then_request_module().
415  */
416
417 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
418 static inline struct ipt_table *find_table_lock(const char *name)
419 {
420         struct ipt_table *t;
421
422         if (down_interruptible(&ipt_mutex) != 0)
423                 return ERR_PTR(-EINTR);
424
425         list_for_each_entry(t, &ipt_tables, list)
426                 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
427                         return t;
428         up(&ipt_mutex);
429         return NULL;
430 }
431
432 /* Find match, grabs ref.  Returns ERR_PTR() on error. */
433 static inline struct ipt_match *find_match(const char *name, u8 revision)
434 {
435         struct ipt_match *m;
436         int err = 0;
437
438         if (down_interruptible(&ipt_mutex) != 0)
439                 return ERR_PTR(-EINTR);
440
441         list_for_each_entry(m, &ipt_match, list) {
442                 if (strcmp(m->name, name) == 0) {
443                         if (m->revision == revision) {
444                                 if (try_module_get(m->me)) {
445                                         up(&ipt_mutex);
446                                         return m;
447                                 }
448                         } else
449                                 err = -EPROTOTYPE; /* Found something. */
450                 }
451         }
452         up(&ipt_mutex);
453         return ERR_PTR(err);
454 }
455
456 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
457 static inline struct ipt_target *find_target(const char *name, u8 revision)
458 {
459         struct ipt_target *t;
460         int err = 0;
461
462         if (down_interruptible(&ipt_mutex) != 0)
463                 return ERR_PTR(-EINTR);
464
465         list_for_each_entry(t, &ipt_target, list) {
466                 if (strcmp(t->name, name) == 0) {
467                         if (t->revision == revision) {
468                                 if (try_module_get(t->me)) {
469                                         up(&ipt_mutex);
470                                         return t;
471                                 }
472                         } else
473                                 err = -EPROTOTYPE; /* Found something. */
474                 }
475         }
476         up(&ipt_mutex);
477         return ERR_PTR(err);
478 }
479
480 struct ipt_target *ipt_find_target(const char *name, u8 revision)
481 {
482         struct ipt_target *target;
483
484         target = try_then_request_module(find_target(name, revision),
485                                          "ipt_%s", name);
486         if (IS_ERR(target) || !target)
487                 return NULL;
488         return target;
489 }
490
491 static int match_revfn(const char *name, u8 revision, int *bestp)
492 {
493         struct ipt_match *m;
494         int have_rev = 0;
495
496         list_for_each_entry(m, &ipt_match, list) {
497                 if (strcmp(m->name, name) == 0) {
498                         if (m->revision > *bestp)
499                                 *bestp = m->revision;
500                         if (m->revision == revision)
501                                 have_rev = 1;
502                 }
503         }
504         return have_rev;
505 }
506
507 static int target_revfn(const char *name, u8 revision, int *bestp)
508 {
509         struct ipt_target *t;
510         int have_rev = 0;
511
512         list_for_each_entry(t, &ipt_target, list) {
513                 if (strcmp(t->name, name) == 0) {
514                         if (t->revision > *bestp)
515                                 *bestp = t->revision;
516                         if (t->revision == revision)
517                                 have_rev = 1;
518                 }
519         }
520         return have_rev;
521 }
522
523 /* Returns true or false (if no such extension at all) */
524 static inline int find_revision(const char *name, u8 revision,
525                                 int (*revfn)(const char *, u8, int *),
526                                 int *err)
527 {
528         int have_rev, best = -1;
529
530         if (down_interruptible(&ipt_mutex) != 0) {
531                 *err = -EINTR;
532                 return 1;
533         }
534         have_rev = revfn(name, revision, &best);
535         up(&ipt_mutex);
536
537         /* Nothing at all?  Return 0 to try loading module. */
538         if (best == -1) {
539                 *err = -ENOENT;
540                 return 0;
541         }
542
543         *err = best;
544         if (!have_rev)
545                 *err = -EPROTONOSUPPORT;
546         return 1;
547 }
548
549
550 /* All zeroes == unconditional rule. */
551 static inline int
552 unconditional(const struct ipt_ip *ip)
553 {
554         unsigned int i;
555
556         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
557                 if (((__u32 *)ip)[i])
558                         return 0;
559
560         return 1;
561 }
562
563 /* Figures out from what hook each rule can be called: returns 0 if
564    there are loops.  Puts hook bitmask in comefrom. */
565 static int
566 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
567 {
568         unsigned int hook;
569
570         /* No recursion; use packet counter to save back ptrs (reset
571            to 0 as we leave), and comefrom to save source hook bitmask */
572         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
573                 unsigned int pos = newinfo->hook_entry[hook];
574                 struct ipt_entry *e
575                         = (struct ipt_entry *)(newinfo->entries + pos);
576
577                 if (!(valid_hooks & (1 << hook)))
578                         continue;
579
580                 /* Set initial back pointer. */
581                 e->counters.pcnt = pos;
582
583                 for (;;) {
584                         struct ipt_standard_target *t
585                                 = (void *)ipt_get_target(e);
586
587                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
588                                 printk("iptables: loop hook %u pos %u %08X.\n",
589                                        hook, pos, e->comefrom);
590                                 return 0;
591                         }
592                         e->comefrom
593                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
594
595                         /* Unconditional return/END. */
596                         if (e->target_offset == sizeof(struct ipt_entry)
597                             && (strcmp(t->target.u.user.name,
598                                        IPT_STANDARD_TARGET) == 0)
599                             && t->verdict < 0
600                             && unconditional(&e->ip)) {
601                                 unsigned int oldpos, size;
602
603                                 /* Return: backtrack through the last
604                                    big jump. */
605                                 do {
606                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
607 #ifdef DEBUG_IP_FIREWALL_USER
608                                         if (e->comefrom
609                                             & (1 << NF_IP_NUMHOOKS)) {
610                                                 duprintf("Back unset "
611                                                          "on hook %u "
612                                                          "rule %u\n",
613                                                          hook, pos);
614                                         }
615 #endif
616                                         oldpos = pos;
617                                         pos = e->counters.pcnt;
618                                         e->counters.pcnt = 0;
619
620                                         /* We're at the start. */
621                                         if (pos == oldpos)
622                                                 goto next;
623
624                                         e = (struct ipt_entry *)
625                                                 (newinfo->entries + pos);
626                                 } while (oldpos == pos + e->next_offset);
627
628                                 /* Move along one */
629                                 size = e->next_offset;
630                                 e = (struct ipt_entry *)
631                                         (newinfo->entries + pos + size);
632                                 e->counters.pcnt = pos;
633                                 pos += size;
634                         } else {
635                                 int newpos = t->verdict;
636
637                                 if (strcmp(t->target.u.user.name,
638                                            IPT_STANDARD_TARGET) == 0
639                                     && newpos >= 0) {
640                                         /* This a jump; chase it. */
641                                         duprintf("Jump rule %u -> %u\n",
642                                                  pos, newpos);
643                                 } else {
644                                         /* ... this is a fallthru */
645                                         newpos = pos + e->next_offset;
646                                 }
647                                 e = (struct ipt_entry *)
648                                         (newinfo->entries + newpos);
649                                 e->counters.pcnt = pos;
650                                 pos = newpos;
651                         }
652                 }
653                 next:
654                 duprintf("Finished chain %u\n", hook);
655         }
656         return 1;
657 }
658
659 static inline int
660 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
661 {
662         if (i && (*i)-- == 0)
663                 return 1;
664
665         if (m->u.kernel.match->destroy)
666                 m->u.kernel.match->destroy(m->data,
667                                            m->u.match_size - sizeof(*m));
668         module_put(m->u.kernel.match->me);
669         return 0;
670 }
671
672 static inline int
673 standard_check(const struct ipt_entry_target *t,
674                unsigned int max_offset)
675 {
676         struct ipt_standard_target *targ = (void *)t;
677
678         /* Check standard info. */
679         if (t->u.target_size
680             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
681                 duprintf("standard_check: target size %u != %u\n",
682                          t->u.target_size,
683                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
684                 return 0;
685         }
686
687         if (targ->verdict >= 0
688             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
689                 duprintf("ipt_standard_check: bad verdict (%i)\n",
690                          targ->verdict);
691                 return 0;
692         }
693
694         if (targ->verdict < -NF_MAX_VERDICT - 1) {
695                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
696                          targ->verdict);
697                 return 0;
698         }
699         return 1;
700 }
701
702 static inline int
703 check_match(struct ipt_entry_match *m,
704             const char *name,
705             const struct ipt_ip *ip,
706             unsigned int hookmask,
707             unsigned int *i)
708 {
709         struct ipt_match *match;
710
711         match = try_then_request_module(find_match(m->u.user.name,
712                                                    m->u.user.revision),
713                                         "ipt_%s", m->u.user.name);
714         if (IS_ERR(match) || !match) {
715                 duprintf("check_match: `%s' not found\n", m->u.user.name);
716                 return match ? PTR_ERR(match) : -ENOENT;
717         }
718         m->u.kernel.match = match;
719
720         if (m->u.kernel.match->checkentry
721             && !m->u.kernel.match->checkentry(name, ip, m->data,
722                                               m->u.match_size - sizeof(*m),
723                                               hookmask)) {
724                 module_put(m->u.kernel.match->me);
725                 duprintf("ip_tables: check failed for `%s'.\n",
726                          m->u.kernel.match->name);
727                 return -EINVAL;
728         }
729
730         (*i)++;
731         return 0;
732 }
733
734 static struct ipt_target ipt_standard_target;
735
736 static inline int
737 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
738             unsigned int *i)
739 {
740         struct ipt_entry_target *t;
741         struct ipt_target *target;
742         int ret;
743         unsigned int j;
744
745         if (!ip_checkentry(&e->ip)) {
746                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
747                 return -EINVAL;
748         }
749
750         j = 0;
751         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
752         if (ret != 0)
753                 goto cleanup_matches;
754
755         t = ipt_get_target(e);
756         target = try_then_request_module(find_target(t->u.user.name,
757                                                      t->u.user.revision),
758                                          "ipt_%s", t->u.user.name);
759         if (IS_ERR(target) || !target) {
760                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
761                 ret = target ? PTR_ERR(target) : -ENOENT;
762                 goto cleanup_matches;
763         }
764         t->u.kernel.target = target;
765
766         if (t->u.kernel.target == &ipt_standard_target) {
767                 if (!standard_check(t, size)) {
768                         ret = -EINVAL;
769                         goto cleanup_matches;
770                 }
771         } else if (t->u.kernel.target->checkentry
772                    && !t->u.kernel.target->checkentry(name, e, t->data,
773                                                       t->u.target_size
774                                                       - sizeof(*t),
775                                                       e->comefrom)) {
776                 module_put(t->u.kernel.target->me);
777                 duprintf("ip_tables: check failed for `%s'.\n",
778                          t->u.kernel.target->name);
779                 ret = -EINVAL;
780                 goto cleanup_matches;
781         }
782
783         (*i)++;
784         return 0;
785
786  cleanup_matches:
787         IPT_MATCH_ITERATE(e, cleanup_match, &j);
788         return ret;
789 }
790
791 static inline int
792 check_entry_size_and_hooks(struct ipt_entry *e,
793                            struct ipt_table_info *newinfo,
794                            unsigned char *base,
795                            unsigned char *limit,
796                            const unsigned int *hook_entries,
797                            const unsigned int *underflows,
798                            unsigned int *i)
799 {
800         unsigned int h;
801
802         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
803             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
804                 duprintf("Bad offset %p\n", e);
805                 return -EINVAL;
806         }
807
808         if (e->next_offset
809             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
810                 duprintf("checking: element %p size %u\n",
811                          e, e->next_offset);
812                 return -EINVAL;
813         }
814
815         /* Check hooks & underflows */
816         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
817                 if ((unsigned char *)e - base == hook_entries[h])
818                         newinfo->hook_entry[h] = hook_entries[h];
819                 if ((unsigned char *)e - base == underflows[h])
820                         newinfo->underflow[h] = underflows[h];
821         }
822
823         /* FIXME: underflows must be unconditional, standard verdicts
824            < 0 (not IPT_RETURN). --RR */
825
826         /* Clear counters and comefrom */
827         e->counters = ((struct ipt_counters) { 0, 0 });
828         e->comefrom = 0;
829
830         (*i)++;
831         return 0;
832 }
833
834 static inline int
835 cleanup_entry(struct ipt_entry *e, unsigned int *i)
836 {
837         struct ipt_entry_target *t;
838
839         if (i && (*i)-- == 0)
840                 return 1;
841
842         /* Cleanup all matches */
843         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
844         t = ipt_get_target(e);
845         if (t->u.kernel.target->destroy)
846                 t->u.kernel.target->destroy(t->data,
847                                             t->u.target_size - sizeof(*t));
848         module_put(t->u.kernel.target->me);
849         return 0;
850 }
851
852 /* Checks and translates the user-supplied table segment (held in
853    newinfo) */
854 static int
855 translate_table(const char *name,
856                 unsigned int valid_hooks,
857                 struct ipt_table_info *newinfo,
858                 unsigned int size,
859                 unsigned int number,
860                 const unsigned int *hook_entries,
861                 const unsigned int *underflows)
862 {
863         unsigned int i;
864         int ret;
865
866         newinfo->size = size;
867         newinfo->number = number;
868
869         /* Init all hooks to impossible value. */
870         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
871                 newinfo->hook_entry[i] = 0xFFFFFFFF;
872                 newinfo->underflow[i] = 0xFFFFFFFF;
873         }
874
875         duprintf("translate_table: size %u\n", newinfo->size);
876         i = 0;
877         /* Walk through entries, checking offsets. */
878         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
879                                 check_entry_size_and_hooks,
880                                 newinfo,
881                                 newinfo->entries,
882                                 newinfo->entries + size,
883                                 hook_entries, underflows, &i);
884         if (ret != 0)
885                 return ret;
886
887         if (i != number) {
888                 duprintf("translate_table: %u not %u entries\n",
889                          i, number);
890                 return -EINVAL;
891         }
892
893         /* Check hooks all assigned */
894         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
895                 /* Only hooks which are valid */
896                 if (!(valid_hooks & (1 << i)))
897                         continue;
898                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
899                         duprintf("Invalid hook entry %u %u\n",
900                                  i, hook_entries[i]);
901                         return -EINVAL;
902                 }
903                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
904                         duprintf("Invalid underflow %u %u\n",
905                                  i, underflows[i]);
906                         return -EINVAL;
907                 }
908         }
909
910         if (!mark_source_chains(newinfo, valid_hooks))
911                 return -ELOOP;
912
913         /* Finally, each sanity check must pass */
914         i = 0;
915         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
916                                 check_entry, name, size, &i);
917
918         if (ret != 0) {
919                 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
920                                   cleanup_entry, &i);
921                 return ret;
922         }
923
924         /* And one copy for every other CPU */
925         for_each_cpu(i) {
926                 if (i == 0)
927                         continue;
928                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
929                        newinfo->entries,
930                        SMP_ALIGN(newinfo->size));
931         }
932
933         return ret;
934 }
935
936 static struct ipt_table_info *
937 replace_table(struct ipt_table *table,
938               unsigned int num_counters,
939               struct ipt_table_info *newinfo,
940               int *error)
941 {
942         struct ipt_table_info *oldinfo;
943
944 #ifdef CONFIG_NETFILTER_DEBUG
945         {
946                 struct ipt_entry *table_base;
947                 unsigned int i;
948
949                 for_each_cpu(i) {
950                         table_base =
951                                 (void *)newinfo->entries
952                                 + TABLE_OFFSET(newinfo, i);
953
954                         table_base->comefrom = 0xdead57ac;
955                 }
956         }
957 #endif
958
959         /* Do the substitution. */
960         write_lock_bh(&table->lock);
961         /* Check inside lock: is the old number correct? */
962         if (num_counters != table->private->number) {
963                 duprintf("num_counters != table->private->number (%u/%u)\n",
964                          num_counters, table->private->number);
965                 write_unlock_bh(&table->lock);
966                 *error = -EAGAIN;
967                 return NULL;
968         }
969         oldinfo = table->private;
970         table->private = newinfo;
971         newinfo->initial_entries = oldinfo->initial_entries;
972         write_unlock_bh(&table->lock);
973
974         return oldinfo;
975 }
976
977 /* Gets counters. */
978 static inline int
979 add_entry_to_counter(const struct ipt_entry *e,
980                      struct ipt_counters total[],
981                      unsigned int *i)
982 {
983         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
984
985         (*i)++;
986         return 0;
987 }
988
989 static void
990 get_counters(const struct ipt_table_info *t,
991              struct ipt_counters counters[])
992 {
993         unsigned int cpu;
994         unsigned int i;
995
996         for_each_cpu(cpu) {
997                 i = 0;
998                 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
999                                   t->size,
1000                                   add_entry_to_counter,
1001                                   counters,
1002                                   &i);
1003         }
1004 }
1005
1006 static int
1007 copy_entries_to_user(unsigned int total_size,
1008                      struct ipt_table *table,
1009                      void __user *userptr)
1010 {
1011         unsigned int off, num, countersize;
1012         struct ipt_entry *e;
1013         struct ipt_counters *counters;
1014         int ret = 0;
1015
1016         /* We need atomic snapshot of counters: rest doesn't change
1017            (other than comefrom, which userspace doesn't care
1018            about). */
1019         countersize = sizeof(struct ipt_counters) * table->private->number;
1020         counters = vmalloc(countersize);
1021
1022         if (counters == NULL)
1023                 return -ENOMEM;
1024
1025         /* First, sum counters... */
1026         memset(counters, 0, countersize);
1027         write_lock_bh(&table->lock);
1028         get_counters(table->private, counters);
1029         write_unlock_bh(&table->lock);
1030
1031         /* ... then copy entire thing from CPU 0... */
1032         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
1033                 ret = -EFAULT;
1034                 goto free_counters;
1035         }
1036
1037         /* FIXME: use iterator macros --RR */
1038         /* ... then go back and fix counters and names */
1039         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1040                 unsigned int i;
1041                 struct ipt_entry_match *m;
1042                 struct ipt_entry_target *t;
1043
1044                 e = (struct ipt_entry *)(table->private->entries + off);
1045                 if (copy_to_user(userptr + off
1046                                  + offsetof(struct ipt_entry, counters),
1047                                  &counters[num],
1048                                  sizeof(counters[num])) != 0) {
1049                         ret = -EFAULT;
1050                         goto free_counters;
1051                 }
1052
1053                 for (i = sizeof(struct ipt_entry);
1054                      i < e->target_offset;
1055                      i += m->u.match_size) {
1056                         m = (void *)e + i;
1057
1058                         if (copy_to_user(userptr + off + i
1059                                          + offsetof(struct ipt_entry_match,
1060                                                     u.user.name),
1061                                          m->u.kernel.match->name,
1062                                          strlen(m->u.kernel.match->name)+1)
1063                             != 0) {
1064                                 ret = -EFAULT;
1065                                 goto free_counters;
1066                         }
1067                 }
1068
1069                 t = ipt_get_target(e);
1070                 if (copy_to_user(userptr + off + e->target_offset
1071                                  + offsetof(struct ipt_entry_target,
1072                                             u.user.name),
1073                                  t->u.kernel.target->name,
1074                                  strlen(t->u.kernel.target->name)+1) != 0) {
1075                         ret = -EFAULT;
1076                         goto free_counters;
1077                 }
1078         }
1079
1080  free_counters:
1081         vfree(counters);
1082         return ret;
1083 }
1084
1085 static int
1086 get_entries(const struct ipt_get_entries *entries,
1087             struct ipt_get_entries __user *uptr)
1088 {
1089         int ret;
1090         struct ipt_table *t;
1091
1092         t = find_table_lock(entries->name);
1093         if (t && !IS_ERR(t)) {
1094                 duprintf("t->private->number = %u\n",
1095                          t->private->number);
1096                 if (entries->size == t->private->size)
1097                         ret = copy_entries_to_user(t->private->size,
1098                                                    t, uptr->entrytable);
1099                 else {
1100                         duprintf("get_entries: I've got %u not %u!\n",
1101                                  t->private->size,
1102                                  entries->size);
1103                         ret = -EINVAL;
1104                 }
1105                 module_put(t->me);
1106                 up(&ipt_mutex);
1107         } else
1108                 ret = t ? PTR_ERR(t) : -ENOENT;
1109
1110         return ret;
1111 }
1112
1113 static int
1114 do_replace(void __user *user, unsigned int len)
1115 {
1116         int ret;
1117         struct ipt_replace tmp;
1118         struct ipt_table *t;
1119         struct ipt_table_info *newinfo, *oldinfo;
1120         struct ipt_counters *counters;
1121
1122         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1123                 return -EFAULT;
1124
1125         /* Hack: Causes ipchains to give correct error msg --RR */
1126         if (len != sizeof(tmp) + tmp.size)
1127                 return -ENOPROTOOPT;
1128
1129         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1130         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1131                 return -ENOMEM;
1132
1133         newinfo = vmalloc(sizeof(struct ipt_table_info)
1134                           + SMP_ALIGN(tmp.size) * 
1135                                 (highest_possible_processor_id()+1));
1136         if (!newinfo)
1137                 return -ENOMEM;
1138
1139         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1140                            tmp.size) != 0) {
1141                 ret = -EFAULT;
1142                 goto free_newinfo;
1143         }
1144
1145         counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1146         if (!counters) {
1147                 ret = -ENOMEM;
1148                 goto free_newinfo;
1149         }
1150         memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1151
1152         ret = translate_table(tmp.name, tmp.valid_hooks,
1153                               newinfo, tmp.size, tmp.num_entries,
1154                               tmp.hook_entry, tmp.underflow);
1155         if (ret != 0)
1156                 goto free_newinfo_counters;
1157
1158         duprintf("ip_tables: Translated table\n");
1159
1160         t = try_then_request_module(find_table_lock(tmp.name),
1161                                     "iptable_%s", tmp.name);
1162         if (!t || IS_ERR(t)) {
1163                 ret = t ? PTR_ERR(t) : -ENOENT;
1164                 goto free_newinfo_counters_untrans;
1165         }
1166
1167         /* You lied! */
1168         if (tmp.valid_hooks != t->valid_hooks) {
1169                 duprintf("Valid hook crap: %08X vs %08X\n",
1170                          tmp.valid_hooks, t->valid_hooks);
1171                 ret = -EINVAL;
1172                 goto put_module;
1173         }
1174
1175         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1176         if (!oldinfo)
1177                 goto put_module;
1178
1179         /* Update module usage count based on number of rules */
1180         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1181                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1182         if ((oldinfo->number > oldinfo->initial_entries) || 
1183             (newinfo->number <= oldinfo->initial_entries)) 
1184                 module_put(t->me);
1185         if ((oldinfo->number > oldinfo->initial_entries) &&
1186             (newinfo->number <= oldinfo->initial_entries))
1187                 module_put(t->me);
1188
1189         /* Get the old counters. */
1190         get_counters(oldinfo, counters);
1191         /* Decrease module usage counts and free resource */
1192         IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1193         vfree(oldinfo);
1194         if (copy_to_user(tmp.counters, counters,
1195                          sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1196                 ret = -EFAULT;
1197         vfree(counters);
1198         up(&ipt_mutex);
1199         return ret;
1200
1201  put_module:
1202         module_put(t->me);
1203         up(&ipt_mutex);
1204  free_newinfo_counters_untrans:
1205         IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1206  free_newinfo_counters:
1207         vfree(counters);
1208  free_newinfo:
1209         vfree(newinfo);
1210         return ret;
1211 }
1212
1213 /* We're lazy, and add to the first CPU; overflow works its fey magic
1214  * and everything is OK. */
1215 static inline int
1216 add_counter_to_entry(struct ipt_entry *e,
1217                      const struct ipt_counters addme[],
1218                      unsigned int *i)
1219 {
1220 #if 0
1221         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1222                  *i,
1223                  (long unsigned int)e->counters.pcnt,
1224                  (long unsigned int)e->counters.bcnt,
1225                  (long unsigned int)addme[*i].pcnt,
1226                  (long unsigned int)addme[*i].bcnt);
1227 #endif
1228
1229         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1230
1231         (*i)++;
1232         return 0;
1233 }
1234
1235 static int
1236 do_add_counters(void __user *user, unsigned int len)
1237 {
1238         unsigned int i;
1239         struct ipt_counters_info tmp, *paddc;
1240         struct ipt_table *t;
1241         int ret = 0;
1242
1243         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1244                 return -EFAULT;
1245
1246         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1247                 return -EINVAL;
1248
1249         paddc = vmalloc(len);
1250         if (!paddc)
1251                 return -ENOMEM;
1252
1253         if (copy_from_user(paddc, user, len) != 0) {
1254                 ret = -EFAULT;
1255                 goto free;
1256         }
1257
1258         t = find_table_lock(tmp.name);
1259         if (!t || IS_ERR(t)) {
1260                 ret = t ? PTR_ERR(t) : -ENOENT;
1261                 goto free;
1262         }
1263
1264         write_lock_bh(&t->lock);
1265         if (t->private->number != paddc->num_counters) {
1266                 ret = -EINVAL;
1267                 goto unlock_up_free;
1268         }
1269
1270         i = 0;
1271         IPT_ENTRY_ITERATE(t->private->entries,
1272                           t->private->size,
1273                           add_counter_to_entry,
1274                           paddc->counters,
1275                           &i);
1276  unlock_up_free:
1277         write_unlock_bh(&t->lock);
1278         up(&ipt_mutex);
1279         module_put(t->me);
1280  free:
1281         vfree(paddc);
1282
1283         return ret;
1284 }
1285
1286 static int
1287 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1288 {
1289         int ret;
1290
1291         if (!capable(CAP_NET_ADMIN))
1292                 return -EPERM;
1293
1294         switch (cmd) {
1295         case IPT_SO_SET_REPLACE:
1296                 ret = do_replace(user, len);
1297                 break;
1298
1299         case IPT_SO_SET_ADD_COUNTERS:
1300                 ret = do_add_counters(user, len);
1301                 break;
1302
1303         default:
1304                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1305                 ret = -EINVAL;
1306         }
1307
1308         return ret;
1309 }
1310
1311 static int
1312 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1313 {
1314         int ret;
1315
1316         if (!capable(CAP_NET_ADMIN))
1317                 return -EPERM;
1318
1319         switch (cmd) {
1320         case IPT_SO_GET_INFO: {
1321                 char name[IPT_TABLE_MAXNAMELEN];
1322                 struct ipt_table *t;
1323
1324                 if (*len != sizeof(struct ipt_getinfo)) {
1325                         duprintf("length %u != %u\n", *len,
1326                                  sizeof(struct ipt_getinfo));
1327                         ret = -EINVAL;
1328                         break;
1329                 }
1330
1331                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1332                         ret = -EFAULT;
1333                         break;
1334                 }
1335                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1336
1337                 t = try_then_request_module(find_table_lock(name),
1338                                             "iptable_%s", name);
1339                 if (t && !IS_ERR(t)) {
1340                         struct ipt_getinfo info;
1341
1342                         info.valid_hooks = t->valid_hooks;
1343                         memcpy(info.hook_entry, t->private->hook_entry,
1344                                sizeof(info.hook_entry));
1345                         memcpy(info.underflow, t->private->underflow,
1346                                sizeof(info.underflow));
1347                         info.num_entries = t->private->number;
1348                         info.size = t->private->size;
1349                         memcpy(info.name, name, sizeof(info.name));
1350
1351                         if (copy_to_user(user, &info, *len) != 0)
1352                                 ret = -EFAULT;
1353                         else
1354                                 ret = 0;
1355                         up(&ipt_mutex);
1356                         module_put(t->me);
1357                 } else
1358                         ret = t ? PTR_ERR(t) : -ENOENT;
1359         }
1360         break;
1361
1362         case IPT_SO_GET_ENTRIES: {
1363                 struct ipt_get_entries get;
1364
1365                 if (*len < sizeof(get)) {
1366                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1367                         ret = -EINVAL;
1368                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1369                         ret = -EFAULT;
1370                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1371                         duprintf("get_entries: %u != %u\n", *len,
1372                                  sizeof(struct ipt_get_entries) + get.size);
1373                         ret = -EINVAL;
1374                 } else
1375                         ret = get_entries(&get, user);
1376                 break;
1377         }
1378
1379         case IPT_SO_GET_REVISION_MATCH:
1380         case IPT_SO_GET_REVISION_TARGET: {
1381                 struct ipt_get_revision rev;
1382                 int (*revfn)(const char *, u8, int *);
1383
1384                 if (*len != sizeof(rev)) {
1385                         ret = -EINVAL;
1386                         break;
1387                 }
1388                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1389                         ret = -EFAULT;
1390                         break;
1391                 }
1392
1393                 if (cmd == IPT_SO_GET_REVISION_TARGET)
1394                         revfn = target_revfn;
1395                 else
1396                         revfn = match_revfn;
1397
1398                 try_then_request_module(find_revision(rev.name, rev.revision,
1399                                                       revfn, &ret),
1400                                         "ipt_%s", rev.name);
1401                 break;
1402         }
1403
1404         default:
1405                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1406                 ret = -EINVAL;
1407         }
1408
1409         return ret;
1410 }
1411
1412 /* Registration hooks for targets. */
1413 int
1414 ipt_register_target(struct ipt_target *target)
1415 {
1416         int ret;
1417
1418         ret = down_interruptible(&ipt_mutex);
1419         if (ret != 0)
1420                 return ret;
1421         list_add(&target->list, &ipt_target);
1422         up(&ipt_mutex);
1423         return ret;
1424 }
1425
1426 void
1427 ipt_unregister_target(struct ipt_target *target)
1428 {
1429         down(&ipt_mutex);
1430         LIST_DELETE(&ipt_target, target);
1431         up(&ipt_mutex);
1432 }
1433
1434 int
1435 ipt_register_match(struct ipt_match *match)
1436 {
1437         int ret;
1438
1439         ret = down_interruptible(&ipt_mutex);
1440         if (ret != 0)
1441                 return ret;
1442
1443         list_add(&match->list, &ipt_match);
1444         up(&ipt_mutex);
1445
1446         return ret;
1447 }
1448
1449 void
1450 ipt_unregister_match(struct ipt_match *match)
1451 {
1452         down(&ipt_mutex);
1453         LIST_DELETE(&ipt_match, match);
1454         up(&ipt_mutex);
1455 }
1456
1457 int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1458 {
1459         int ret;
1460         struct ipt_table_info *newinfo;
1461         static struct ipt_table_info bootstrap
1462                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1463
1464         newinfo = vmalloc(sizeof(struct ipt_table_info)
1465                           + SMP_ALIGN(repl->size) * 
1466                                         (highest_possible_processor_id()+1));
1467         if (!newinfo)
1468                 return -ENOMEM;
1469
1470         memcpy(newinfo->entries, repl->entries, repl->size);
1471
1472         ret = translate_table(table->name, table->valid_hooks,
1473                               newinfo, repl->size,
1474                               repl->num_entries,
1475                               repl->hook_entry,
1476                               repl->underflow);
1477         if (ret != 0) {
1478                 vfree(newinfo);
1479                 return ret;
1480         }
1481
1482         ret = down_interruptible(&ipt_mutex);
1483         if (ret != 0) {
1484                 vfree(newinfo);
1485                 return ret;
1486         }
1487
1488         /* Don't autoload: we'd eat our tail... */
1489         if (list_named_find(&ipt_tables, table->name)) {
1490                 ret = -EEXIST;
1491                 goto free_unlock;
1492         }
1493
1494         /* Simplifies replace_table code. */
1495         table->private = &bootstrap;
1496         if (!replace_table(table, 0, newinfo, &ret))
1497                 goto free_unlock;
1498
1499         duprintf("table->private->number = %u\n",
1500                  table->private->number);
1501         
1502         /* save number of initial entries */
1503         table->private->initial_entries = table->private->number;
1504
1505         rwlock_init(&table->lock);
1506         list_prepend(&ipt_tables, table);
1507
1508  unlock:
1509         up(&ipt_mutex);
1510         return ret;
1511
1512  free_unlock:
1513         vfree(newinfo);
1514         goto unlock;
1515 }
1516
1517 void ipt_unregister_table(struct ipt_table *table)
1518 {
1519         down(&ipt_mutex);
1520         LIST_DELETE(&ipt_tables, table);
1521         up(&ipt_mutex);
1522
1523         /* Decrease module usage counts and free resources */
1524         IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1525                           cleanup_entry, NULL);
1526         vfree(table->private);
1527 }
1528
1529 /* Returns 1 if the port is matched by the range, 0 otherwise */
1530 static inline int
1531 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1532 {
1533         int ret;
1534
1535         ret = (port >= min && port <= max) ^ invert;
1536         return ret;
1537 }
1538
1539 static int
1540 tcp_find_option(u_int8_t option,
1541                 const struct sk_buff *skb,
1542                 unsigned int optlen,
1543                 int invert,
1544                 int *hotdrop)
1545 {
1546         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1547         u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1548         unsigned int i;
1549
1550         duprintf("tcp_match: finding option\n");
1551
1552         if (!optlen)
1553                 return invert;
1554
1555         /* If we don't have the whole header, drop packet. */
1556         op = skb_header_pointer(skb,
1557                                 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1558                                 optlen, _opt);
1559         if (op == NULL) {
1560                 *hotdrop = 1;
1561                 return 0;
1562         }
1563
1564         for (i = 0; i < optlen; ) {
1565                 if (op[i] == option) return !invert;
1566                 if (op[i] < 2) i++;
1567                 else i += op[i+1]?:1;
1568         }
1569
1570         return invert;
1571 }
1572
1573 static int
1574 tcp_match(const struct sk_buff *skb,
1575           const struct net_device *in,
1576           const struct net_device *out,
1577           const void *matchinfo,
1578           int offset,
1579           int *hotdrop)
1580 {
1581         struct tcphdr _tcph, *th;
1582         const struct ipt_tcp *tcpinfo = matchinfo;
1583
1584         if (offset) {
1585                 /* To quote Alan:
1586
1587                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1588                    causes this. Its a cracker trying to break in by doing a
1589                    flag overwrite to pass the direction checks.
1590                 */
1591                 if (offset == 1) {
1592                         duprintf("Dropping evil TCP offset=1 frag.\n");
1593                         *hotdrop = 1;
1594                 }
1595                 /* Must not be a fragment. */
1596                 return 0;
1597         }
1598
1599 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1600
1601         th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1602                                 sizeof(_tcph), &_tcph);
1603         if (th == NULL) {
1604                 /* We've been asked to examine this packet, and we
1605                    can't.  Hence, no choice but to drop. */
1606                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1607                 *hotdrop = 1;
1608                 return 0;
1609         }
1610
1611         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1612                         ntohs(th->source),
1613                         !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1614                 return 0;
1615         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1616                         ntohs(th->dest),
1617                         !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1618                 return 0;
1619         if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1620                       == tcpinfo->flg_cmp,
1621                       IPT_TCP_INV_FLAGS))
1622                 return 0;
1623         if (tcpinfo->option) {
1624                 if (th->doff * 4 < sizeof(_tcph)) {
1625                         *hotdrop = 1;
1626                         return 0;
1627                 }
1628                 if (!tcp_find_option(tcpinfo->option, skb,
1629                                      th->doff*4 - sizeof(_tcph),
1630                                      tcpinfo->invflags & IPT_TCP_INV_OPTION,
1631                                      hotdrop))
1632                         return 0;
1633         }
1634         return 1;
1635 }
1636
1637 /* Called when user tries to insert an entry of this type. */
1638 static int
1639 tcp_checkentry(const char *tablename,
1640                const struct ipt_ip *ip,
1641                void *matchinfo,
1642                unsigned int matchsize,
1643                unsigned int hook_mask)
1644 {
1645         const struct ipt_tcp *tcpinfo = matchinfo;
1646
1647         /* Must specify proto == TCP, and no unknown invflags */
1648         return ip->proto == IPPROTO_TCP
1649                 && !(ip->invflags & IPT_INV_PROTO)
1650                 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1651                 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1652 }
1653
1654 static int
1655 udp_match(const struct sk_buff *skb,
1656           const struct net_device *in,
1657           const struct net_device *out,
1658           const void *matchinfo,
1659           int offset,
1660           int *hotdrop)
1661 {
1662         struct udphdr _udph, *uh;
1663         const struct ipt_udp *udpinfo = matchinfo;
1664
1665         /* Must not be a fragment. */
1666         if (offset)
1667                 return 0;
1668
1669         uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1670                                 sizeof(_udph), &_udph);
1671         if (uh == NULL) {
1672                 /* We've been asked to examine this packet, and we
1673                    can't.  Hence, no choice but to drop. */
1674                 duprintf("Dropping evil UDP tinygram.\n");
1675                 *hotdrop = 1;
1676                 return 0;
1677         }
1678
1679         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1680                           ntohs(uh->source),
1681                           !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1682                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1683                               ntohs(uh->dest),
1684                               !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1685 }
1686
1687 /* Called when user tries to insert an entry of this type. */
1688 static int
1689 udp_checkentry(const char *tablename,
1690                const struct ipt_ip *ip,
1691                void *matchinfo,
1692                unsigned int matchinfosize,
1693                unsigned int hook_mask)
1694 {
1695         const struct ipt_udp *udpinfo = matchinfo;
1696
1697         /* Must specify proto == UDP, and no unknown invflags */
1698         if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1699                 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1700                          IPPROTO_UDP);
1701                 return 0;
1702         }
1703         if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1704                 duprintf("ipt_udp: matchsize %u != %u\n",
1705                          matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1706                 return 0;
1707         }
1708         if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1709                 duprintf("ipt_udp: unknown flags %X\n",
1710                          udpinfo->invflags);
1711                 return 0;
1712         }
1713
1714         return 1;
1715 }
1716
1717 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1718 static inline int
1719 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1720                      u_int8_t type, u_int8_t code,
1721                      int invert)
1722 {
1723         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1724                 ^ invert;
1725 }
1726
1727 static int
1728 icmp_match(const struct sk_buff *skb,
1729            const struct net_device *in,
1730            const struct net_device *out,
1731            const void *matchinfo,
1732            int offset,
1733            int *hotdrop)
1734 {
1735         struct icmphdr _icmph, *ic;
1736         const struct ipt_icmp *icmpinfo = matchinfo;
1737
1738         /* Must not be a fragment. */
1739         if (offset)
1740                 return 0;
1741
1742         ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1743                                 sizeof(_icmph), &_icmph);
1744         if (ic == NULL) {
1745                 /* We've been asked to examine this packet, and we
1746                  * can't.  Hence, no choice but to drop.
1747                  */
1748                 duprintf("Dropping evil ICMP tinygram.\n");
1749                 *hotdrop = 1;
1750                 return 0;
1751         }
1752
1753         return icmp_type_code_match(icmpinfo->type,
1754                                     icmpinfo->code[0],
1755                                     icmpinfo->code[1],
1756                                     ic->type, ic->code,
1757                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1758 }
1759
1760 /* Called when user tries to insert an entry of this type. */
1761 static int
1762 icmp_checkentry(const char *tablename,
1763            const struct ipt_ip *ip,
1764            void *matchinfo,
1765            unsigned int matchsize,
1766            unsigned int hook_mask)
1767 {
1768         const struct ipt_icmp *icmpinfo = matchinfo;
1769
1770         /* Must specify proto == ICMP, and no unknown invflags */
1771         return ip->proto == IPPROTO_ICMP
1772                 && !(ip->invflags & IPT_INV_PROTO)
1773                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1774                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1775 }
1776
1777 /* The built-in targets: standard (NULL) and error. */
1778 static struct ipt_target ipt_standard_target = {
1779         .name           = IPT_STANDARD_TARGET,
1780 };
1781
1782 static struct ipt_target ipt_error_target = {
1783         .name           = IPT_ERROR_TARGET,
1784         .target         = ipt_error,
1785 };
1786
1787 static struct nf_sockopt_ops ipt_sockopts = {
1788         .pf             = PF_INET,
1789         .set_optmin     = IPT_BASE_CTL,
1790         .set_optmax     = IPT_SO_SET_MAX+1,
1791         .set            = do_ipt_set_ctl,
1792         .get_optmin     = IPT_BASE_CTL,
1793         .get_optmax     = IPT_SO_GET_MAX+1,
1794         .get            = do_ipt_get_ctl,
1795 };
1796
1797 static struct ipt_match tcp_matchstruct = {
1798         .name           = "tcp",
1799         .match          = &tcp_match,
1800         .checkentry     = &tcp_checkentry,
1801 };
1802
1803 static struct ipt_match udp_matchstruct = {
1804         .name           = "udp",
1805         .match          = &udp_match,
1806         .checkentry     = &udp_checkentry,
1807 };
1808
1809 static struct ipt_match icmp_matchstruct = {
1810         .name           = "icmp",
1811         .match          = &icmp_match,
1812         .checkentry     = &icmp_checkentry,
1813 };
1814
1815 #ifdef CONFIG_PROC_FS
1816 static inline int print_name(const char *i,
1817                              off_t start_offset, char *buffer, int length,
1818                              off_t *pos, unsigned int *count)
1819 {
1820         if ((*count)++ >= start_offset) {
1821                 unsigned int namelen;
1822
1823                 namelen = sprintf(buffer + *pos, "%s\n",
1824                                   i + sizeof(struct list_head));
1825                 if (*pos + namelen > length) {
1826                         /* Stop iterating */
1827                         return 1;
1828                 }
1829                 *pos += namelen;
1830         }
1831         return 0;
1832 }
1833
1834 static inline int print_target(const struct ipt_target *t,
1835                                off_t start_offset, char *buffer, int length,
1836                                off_t *pos, unsigned int *count)
1837 {
1838         if (t == &ipt_standard_target || t == &ipt_error_target)
1839                 return 0;
1840         return print_name((char *)t, start_offset, buffer, length, pos, count);
1841 }
1842
1843 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1844 {
1845         off_t pos = 0;
1846         unsigned int count = 0;
1847
1848         if (down_interruptible(&ipt_mutex) != 0)
1849                 return 0;
1850
1851         LIST_FIND(&ipt_tables, print_name, void *,
1852                   offset, buffer, length, &pos, &count);
1853
1854         up(&ipt_mutex);
1855
1856         /* `start' hack - see fs/proc/generic.c line ~105 */
1857         *start=(char *)((unsigned long)count-offset);
1858         return pos;
1859 }
1860
1861 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1862 {
1863         off_t pos = 0;
1864         unsigned int count = 0;
1865
1866         if (down_interruptible(&ipt_mutex) != 0)
1867                 return 0;
1868
1869         LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1870                   offset, buffer, length, &pos, &count);
1871         
1872         up(&ipt_mutex);
1873
1874         *start = (char *)((unsigned long)count - offset);
1875         return pos;
1876 }
1877
1878 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1879 {
1880         off_t pos = 0;
1881         unsigned int count = 0;
1882
1883         if (down_interruptible(&ipt_mutex) != 0)
1884                 return 0;
1885         
1886         LIST_FIND(&ipt_match, print_name, void *,
1887                   offset, buffer, length, &pos, &count);
1888
1889         up(&ipt_mutex);
1890
1891         *start = (char *)((unsigned long)count - offset);
1892         return pos;
1893 }
1894
1895 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1896 { { "ip_tables_names", ipt_get_tables },
1897   { "ip_tables_targets", ipt_get_targets },
1898   { "ip_tables_matches", ipt_get_matches },
1899   { NULL, NULL} };
1900 #endif /*CONFIG_PROC_FS*/
1901
1902 static int __init init(void)
1903 {
1904         int ret;
1905
1906         /* Noone else will be downing sem now, so we won't sleep */
1907         down(&ipt_mutex);
1908         list_append(&ipt_target, &ipt_standard_target);
1909         list_append(&ipt_target, &ipt_error_target);
1910         list_append(&ipt_match, &tcp_matchstruct);
1911         list_append(&ipt_match, &udp_matchstruct);
1912         list_append(&ipt_match, &icmp_matchstruct);
1913         up(&ipt_mutex);
1914
1915         /* Register setsockopt */
1916         ret = nf_register_sockopt(&ipt_sockopts);
1917         if (ret < 0) {
1918                 duprintf("Unable to register sockopts.\n");
1919                 return ret;
1920         }
1921
1922 #ifdef CONFIG_PROC_FS
1923         {
1924         struct proc_dir_entry *proc;
1925         int i;
1926
1927         for (i = 0; ipt_proc_entry[i].name; i++) {
1928                 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1929                                        ipt_proc_entry[i].get_info);
1930                 if (!proc) {
1931                         while (--i >= 0)
1932                                 proc_net_remove(ipt_proc_entry[i].name);
1933                         nf_unregister_sockopt(&ipt_sockopts);
1934                         return -ENOMEM;
1935                 }
1936                 proc->owner = THIS_MODULE;
1937         }
1938         }
1939 #endif
1940
1941         printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1942         return 0;
1943 }
1944
1945 static void __exit fini(void)
1946 {
1947         nf_unregister_sockopt(&ipt_sockopts);
1948 #ifdef CONFIG_PROC_FS
1949         {
1950         int i;
1951         for (i = 0; ipt_proc_entry[i].name; i++)
1952                 proc_net_remove(ipt_proc_entry[i].name);
1953         }
1954 #endif
1955 }
1956
1957 EXPORT_SYMBOL(ipt_register_table);
1958 EXPORT_SYMBOL(ipt_unregister_table);
1959 EXPORT_SYMBOL(ipt_register_match);
1960 EXPORT_SYMBOL(ipt_unregister_match);
1961 EXPORT_SYMBOL(ipt_do_table);
1962 EXPORT_SYMBOL(ipt_register_target);
1963 EXPORT_SYMBOL(ipt_unregister_target);
1964 EXPORT_SYMBOL(ipt_find_target);
1965
1966 module_init(init);
1967 module_exit(fini);