[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
[linux-2.6.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
15  *      - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
16  */
17 #include <linux/config.h>
18 #include <linux/cache.h>
19 #include <linux/capability.h>
20 #include <linux/skbuff.h>
21 #include <linux/kmod.h>
22 #include <linux/vmalloc.h>
23 #include <linux/netdevice.h>
24 #include <linux/module.h>
25 #include <linux/icmp.h>
26 #include <net/ip.h>
27 #include <asm/uaccess.h>
28 #include <asm/semaphore.h>
29 #include <linux/proc_fs.h>
30 #include <linux/err.h>
31 #include <linux/cpumask.h>
32
33 #include <linux/netfilter/x_tables.h>
34 #include <linux/netfilter_ipv4/ip_tables.h>
35
36 MODULE_LICENSE("GPL");
37 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
38 MODULE_DESCRIPTION("IPv4 packet filter");
39
40 /*#define DEBUG_IP_FIREWALL*/
41 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
42 /*#define DEBUG_IP_FIREWALL_USER*/
43
44 #ifdef DEBUG_IP_FIREWALL
45 #define dprintf(format, args...)  printk(format , ## args)
46 #else
47 #define dprintf(format, args...)
48 #endif
49
50 #ifdef DEBUG_IP_FIREWALL_USER
51 #define duprintf(format, args...) printk(format , ## args)
52 #else
53 #define duprintf(format, args...)
54 #endif
55
56 #ifdef CONFIG_NETFILTER_DEBUG
57 #define IP_NF_ASSERT(x)                                         \
58 do {                                                            \
59         if (!(x))                                               \
60                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
61                        __FUNCTION__, __FILE__, __LINE__);       \
62 } while(0)
63 #else
64 #define IP_NF_ASSERT(x)
65 #endif
66
67 #if 0
68 /* All the better to debug you with... */
69 #define static
70 #define inline
71 #endif
72
73 /*
74    We keep a set of rules for each CPU, so we can avoid write-locking
75    them in the softirq when updating the counters and therefore
76    only need to read-lock in the softirq; doing a write_lock_bh() in user
77    context stops packets coming through and allows user context to read
78    the counters or update the rules.
79
80    Hence the start of any table is given by get_table() below.  */
81
82 /* Returns whether matches rule or not. */
83 static inline int
84 ip_packet_match(const struct iphdr *ip,
85                 const char *indev,
86                 const char *outdev,
87                 const struct ipt_ip *ipinfo,
88                 int isfrag)
89 {
90         size_t i;
91         unsigned long ret;
92
93 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
94
95         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
96                   IPT_INV_SRCIP)
97             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
98                      IPT_INV_DSTIP)) {
99                 dprintf("Source or dest mismatch.\n");
100
101                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
102                         NIPQUAD(ip->saddr),
103                         NIPQUAD(ipinfo->smsk.s_addr),
104                         NIPQUAD(ipinfo->src.s_addr),
105                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
106                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
107                         NIPQUAD(ip->daddr),
108                         NIPQUAD(ipinfo->dmsk.s_addr),
109                         NIPQUAD(ipinfo->dst.s_addr),
110                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
111                 return 0;
112         }
113
114         /* Look for ifname matches; this should unroll nicely. */
115         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
116                 ret |= (((const unsigned long *)indev)[i]
117                         ^ ((const unsigned long *)ipinfo->iniface)[i])
118                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
119         }
120
121         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
122                 dprintf("VIA in mismatch (%s vs %s).%s\n",
123                         indev, ipinfo->iniface,
124                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
125                 return 0;
126         }
127
128         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
129                 ret |= (((const unsigned long *)outdev)[i]
130                         ^ ((const unsigned long *)ipinfo->outiface)[i])
131                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
132         }
133
134         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
135                 dprintf("VIA out mismatch (%s vs %s).%s\n",
136                         outdev, ipinfo->outiface,
137                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
138                 return 0;
139         }
140
141         /* Check specific protocol */
142         if (ipinfo->proto
143             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
144                 dprintf("Packet protocol %hi does not match %hi.%s\n",
145                         ip->protocol, ipinfo->proto,
146                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
147                 return 0;
148         }
149
150         /* If we have a fragment rule but the packet is not a fragment
151          * then we return zero */
152         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
153                 dprintf("Fragment rule but not fragment.%s\n",
154                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
155                 return 0;
156         }
157
158         return 1;
159 }
160
161 static inline int
162 ip_checkentry(const struct ipt_ip *ip)
163 {
164         if (ip->flags & ~IPT_F_MASK) {
165                 duprintf("Unknown flag bits set: %08X\n",
166                          ip->flags & ~IPT_F_MASK);
167                 return 0;
168         }
169         if (ip->invflags & ~IPT_INV_MASK) {
170                 duprintf("Unknown invflag bits set: %08X\n",
171                          ip->invflags & ~IPT_INV_MASK);
172                 return 0;
173         }
174         return 1;
175 }
176
177 static unsigned int
178 ipt_error(struct sk_buff **pskb,
179           const struct net_device *in,
180           const struct net_device *out,
181           unsigned int hooknum,
182           const void *targinfo,
183           void *userinfo)
184 {
185         if (net_ratelimit())
186                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
187
188         return NF_DROP;
189 }
190
191 static inline
192 int do_match(struct ipt_entry_match *m,
193              const struct sk_buff *skb,
194              const struct net_device *in,
195              const struct net_device *out,
196              int offset,
197              int *hotdrop)
198 {
199         /* Stop iteration if it doesn't match */
200         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, 
201             skb->nh.iph->ihl*4, hotdrop))
202                 return 1;
203         else
204                 return 0;
205 }
206
207 static inline struct ipt_entry *
208 get_entry(void *base, unsigned int offset)
209 {
210         return (struct ipt_entry *)(base + offset);
211 }
212
213 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
214 unsigned int
215 ipt_do_table(struct sk_buff **pskb,
216              unsigned int hook,
217              const struct net_device *in,
218              const struct net_device *out,
219              struct ipt_table *table,
220              void *userdata)
221 {
222         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
223         u_int16_t offset;
224         struct iphdr *ip;
225         u_int16_t datalen;
226         int hotdrop = 0;
227         /* Initializing verdict to NF_DROP keeps gcc happy. */
228         unsigned int verdict = NF_DROP;
229         const char *indev, *outdev;
230         void *table_base;
231         struct ipt_entry *e, *back;
232         struct xt_table_info *private = table->private;
233
234         /* Initialization */
235         ip = (*pskb)->nh.iph;
236         datalen = (*pskb)->len - ip->ihl * 4;
237         indev = in ? in->name : nulldevname;
238         outdev = out ? out->name : nulldevname;
239         /* We handle fragments by dealing with the first fragment as
240          * if it was a normal packet.  All other fragments are treated
241          * normally, except that they will NEVER match rules that ask
242          * things we don't know, ie. tcp syn flag or ports).  If the
243          * rule is also a fragment-specific rule, non-fragments won't
244          * match it. */
245         offset = ntohs(ip->frag_off) & IP_OFFSET;
246
247         read_lock_bh(&table->lock);
248         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
249         table_base = (void *)private->entries[smp_processor_id()];
250         e = get_entry(table_base, private->hook_entry[hook]);
251
252         /* For return from builtin chain */
253         back = get_entry(table_base, private->underflow[hook]);
254
255         do {
256                 IP_NF_ASSERT(e);
257                 IP_NF_ASSERT(back);
258                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
259                         struct ipt_entry_target *t;
260
261                         if (IPT_MATCH_ITERATE(e, do_match,
262                                               *pskb, in, out,
263                                               offset, &hotdrop) != 0)
264                                 goto no_match;
265
266                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
267
268                         t = ipt_get_target(e);
269                         IP_NF_ASSERT(t->u.kernel.target);
270                         /* Standard target? */
271                         if (!t->u.kernel.target->target) {
272                                 int v;
273
274                                 v = ((struct ipt_standard_target *)t)->verdict;
275                                 if (v < 0) {
276                                         /* Pop from stack? */
277                                         if (v != IPT_RETURN) {
278                                                 verdict = (unsigned)(-v) - 1;
279                                                 break;
280                                         }
281                                         e = back;
282                                         back = get_entry(table_base,
283                                                          back->comefrom);
284                                         continue;
285                                 }
286                                 if (table_base + v != (void *)e + e->next_offset
287                                     && !(e->ip.flags & IPT_F_GOTO)) {
288                                         /* Save old back ptr in next entry */
289                                         struct ipt_entry *next
290                                                 = (void *)e + e->next_offset;
291                                         next->comefrom
292                                                 = (void *)back - table_base;
293                                         /* set back pointer to next entry */
294                                         back = next;
295                                 }
296
297                                 e = get_entry(table_base, v);
298                         } else {
299                                 /* Targets which reenter must return
300                                    abs. verdicts */
301 #ifdef CONFIG_NETFILTER_DEBUG
302                                 ((struct ipt_entry *)table_base)->comefrom
303                                         = 0xeeeeeeec;
304 #endif
305                                 verdict = t->u.kernel.target->target(pskb,
306                                                                      in, out,
307                                                                      hook,
308                                                                      t->data,
309                                                                      userdata);
310
311 #ifdef CONFIG_NETFILTER_DEBUG
312                                 if (((struct ipt_entry *)table_base)->comefrom
313                                     != 0xeeeeeeec
314                                     && verdict == IPT_CONTINUE) {
315                                         printk("Target %s reentered!\n",
316                                                t->u.kernel.target->name);
317                                         verdict = NF_DROP;
318                                 }
319                                 ((struct ipt_entry *)table_base)->comefrom
320                                         = 0x57acc001;
321 #endif
322                                 /* Target might have changed stuff. */
323                                 ip = (*pskb)->nh.iph;
324                                 datalen = (*pskb)->len - ip->ihl * 4;
325
326                                 if (verdict == IPT_CONTINUE)
327                                         e = (void *)e + e->next_offset;
328                                 else
329                                         /* Verdict */
330                                         break;
331                         }
332                 } else {
333
334                 no_match:
335                         e = (void *)e + e->next_offset;
336                 }
337         } while (!hotdrop);
338
339         read_unlock_bh(&table->lock);
340
341 #ifdef DEBUG_ALLOW_ALL
342         return NF_ACCEPT;
343 #else
344         if (hotdrop)
345                 return NF_DROP;
346         else return verdict;
347 #endif
348 }
349
350 /* All zeroes == unconditional rule. */
351 static inline int
352 unconditional(const struct ipt_ip *ip)
353 {
354         unsigned int i;
355
356         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
357                 if (((__u32 *)ip)[i])
358                         return 0;
359
360         return 1;
361 }
362
363 /* Figures out from what hook each rule can be called: returns 0 if
364    there are loops.  Puts hook bitmask in comefrom. */
365 static int
366 mark_source_chains(struct xt_table_info *newinfo,
367                    unsigned int valid_hooks, void *entry0)
368 {
369         unsigned int hook;
370
371         /* No recursion; use packet counter to save back ptrs (reset
372            to 0 as we leave), and comefrom to save source hook bitmask */
373         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
374                 unsigned int pos = newinfo->hook_entry[hook];
375                 struct ipt_entry *e
376                         = (struct ipt_entry *)(entry0 + pos);
377
378                 if (!(valid_hooks & (1 << hook)))
379                         continue;
380
381                 /* Set initial back pointer. */
382                 e->counters.pcnt = pos;
383
384                 for (;;) {
385                         struct ipt_standard_target *t
386                                 = (void *)ipt_get_target(e);
387
388                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
389                                 printk("iptables: loop hook %u pos %u %08X.\n",
390                                        hook, pos, e->comefrom);
391                                 return 0;
392                         }
393                         e->comefrom
394                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
395
396                         /* Unconditional return/END. */
397                         if (e->target_offset == sizeof(struct ipt_entry)
398                             && (strcmp(t->target.u.user.name,
399                                        IPT_STANDARD_TARGET) == 0)
400                             && t->verdict < 0
401                             && unconditional(&e->ip)) {
402                                 unsigned int oldpos, size;
403
404                                 /* Return: backtrack through the last
405                                    big jump. */
406                                 do {
407                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
408 #ifdef DEBUG_IP_FIREWALL_USER
409                                         if (e->comefrom
410                                             & (1 << NF_IP_NUMHOOKS)) {
411                                                 duprintf("Back unset "
412                                                          "on hook %u "
413                                                          "rule %u\n",
414                                                          hook, pos);
415                                         }
416 #endif
417                                         oldpos = pos;
418                                         pos = e->counters.pcnt;
419                                         e->counters.pcnt = 0;
420
421                                         /* We're at the start. */
422                                         if (pos == oldpos)
423                                                 goto next;
424
425                                         e = (struct ipt_entry *)
426                                                 (entry0 + pos);
427                                 } while (oldpos == pos + e->next_offset);
428
429                                 /* Move along one */
430                                 size = e->next_offset;
431                                 e = (struct ipt_entry *)
432                                         (entry0 + pos + size);
433                                 e->counters.pcnt = pos;
434                                 pos += size;
435                         } else {
436                                 int newpos = t->verdict;
437
438                                 if (strcmp(t->target.u.user.name,
439                                            IPT_STANDARD_TARGET) == 0
440                                     && newpos >= 0) {
441                                         /* This a jump; chase it. */
442                                         duprintf("Jump rule %u -> %u\n",
443                                                  pos, newpos);
444                                 } else {
445                                         /* ... this is a fallthru */
446                                         newpos = pos + e->next_offset;
447                                 }
448                                 e = (struct ipt_entry *)
449                                         (entry0 + newpos);
450                                 e->counters.pcnt = pos;
451                                 pos = newpos;
452                         }
453                 }
454                 next:
455                 duprintf("Finished chain %u\n", hook);
456         }
457         return 1;
458 }
459
460 static inline int
461 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
462 {
463         if (i && (*i)-- == 0)
464                 return 1;
465
466         if (m->u.kernel.match->destroy)
467                 m->u.kernel.match->destroy(m->data,
468                                            m->u.match_size - sizeof(*m));
469         module_put(m->u.kernel.match->me);
470         return 0;
471 }
472
473 static inline int
474 standard_check(const struct ipt_entry_target *t,
475                unsigned int max_offset)
476 {
477         struct ipt_standard_target *targ = (void *)t;
478
479         /* Check standard info. */
480         if (t->u.target_size
481             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
482                 duprintf("standard_check: target size %u != %u\n",
483                          t->u.target_size,
484                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
485                 return 0;
486         }
487
488         if (targ->verdict >= 0
489             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
490                 duprintf("ipt_standard_check: bad verdict (%i)\n",
491                          targ->verdict);
492                 return 0;
493         }
494
495         if (targ->verdict < -NF_MAX_VERDICT - 1) {
496                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
497                          targ->verdict);
498                 return 0;
499         }
500         return 1;
501 }
502
503 static inline int
504 check_match(struct ipt_entry_match *m,
505             const char *name,
506             const struct ipt_ip *ip,
507             unsigned int hookmask,
508             unsigned int *i)
509 {
510         struct ipt_match *match;
511
512         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
513                                                    m->u.user.revision),
514                                         "ipt_%s", m->u.user.name);
515         if (IS_ERR(match) || !match) {
516                 duprintf("check_match: `%s' not found\n", m->u.user.name);
517                 return match ? PTR_ERR(match) : -ENOENT;
518         }
519         m->u.kernel.match = match;
520
521         if (m->u.kernel.match->checkentry
522             && !m->u.kernel.match->checkentry(name, ip, m->data,
523                                               m->u.match_size - sizeof(*m),
524                                               hookmask)) {
525                 module_put(m->u.kernel.match->me);
526                 duprintf("ip_tables: check failed for `%s'.\n",
527                          m->u.kernel.match->name);
528                 return -EINVAL;
529         }
530
531         (*i)++;
532         return 0;
533 }
534
535 static struct ipt_target ipt_standard_target;
536
537 static inline int
538 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
539             unsigned int *i)
540 {
541         struct ipt_entry_target *t;
542         struct ipt_target *target;
543         int ret;
544         unsigned int j;
545
546         if (!ip_checkentry(&e->ip)) {
547                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
548                 return -EINVAL;
549         }
550
551         j = 0;
552         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
553         if (ret != 0)
554                 goto cleanup_matches;
555
556         t = ipt_get_target(e);
557         target = try_then_request_module(xt_find_target(AF_INET,
558                                                      t->u.user.name,
559                                                      t->u.user.revision),
560                                          "ipt_%s", t->u.user.name);
561         if (IS_ERR(target) || !target) {
562                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
563                 ret = target ? PTR_ERR(target) : -ENOENT;
564                 goto cleanup_matches;
565         }
566         t->u.kernel.target = target;
567
568         if (t->u.kernel.target == &ipt_standard_target) {
569                 if (!standard_check(t, size)) {
570                         ret = -EINVAL;
571                         goto cleanup_matches;
572                 }
573         } else if (t->u.kernel.target->checkentry
574                    && !t->u.kernel.target->checkentry(name, e, t->data,
575                                                       t->u.target_size
576                                                       - sizeof(*t),
577                                                       e->comefrom)) {
578                 module_put(t->u.kernel.target->me);
579                 duprintf("ip_tables: check failed for `%s'.\n",
580                          t->u.kernel.target->name);
581                 ret = -EINVAL;
582                 goto cleanup_matches;
583         }
584
585         (*i)++;
586         return 0;
587
588  cleanup_matches:
589         IPT_MATCH_ITERATE(e, cleanup_match, &j);
590         return ret;
591 }
592
593 static inline int
594 check_entry_size_and_hooks(struct ipt_entry *e,
595                            struct xt_table_info *newinfo,
596                            unsigned char *base,
597                            unsigned char *limit,
598                            const unsigned int *hook_entries,
599                            const unsigned int *underflows,
600                            unsigned int *i)
601 {
602         unsigned int h;
603
604         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
605             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
606                 duprintf("Bad offset %p\n", e);
607                 return -EINVAL;
608         }
609
610         if (e->next_offset
611             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
612                 duprintf("checking: element %p size %u\n",
613                          e, e->next_offset);
614                 return -EINVAL;
615         }
616
617         /* Check hooks & underflows */
618         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
619                 if ((unsigned char *)e - base == hook_entries[h])
620                         newinfo->hook_entry[h] = hook_entries[h];
621                 if ((unsigned char *)e - base == underflows[h])
622                         newinfo->underflow[h] = underflows[h];
623         }
624
625         /* FIXME: underflows must be unconditional, standard verdicts
626            < 0 (not IPT_RETURN). --RR */
627
628         /* Clear counters and comefrom */
629         e->counters = ((struct xt_counters) { 0, 0 });
630         e->comefrom = 0;
631
632         (*i)++;
633         return 0;
634 }
635
636 static inline int
637 cleanup_entry(struct ipt_entry *e, unsigned int *i)
638 {
639         struct ipt_entry_target *t;
640
641         if (i && (*i)-- == 0)
642                 return 1;
643
644         /* Cleanup all matches */
645         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
646         t = ipt_get_target(e);
647         if (t->u.kernel.target->destroy)
648                 t->u.kernel.target->destroy(t->data,
649                                             t->u.target_size - sizeof(*t));
650         module_put(t->u.kernel.target->me);
651         return 0;
652 }
653
654 /* Checks and translates the user-supplied table segment (held in
655    newinfo) */
656 static int
657 translate_table(const char *name,
658                 unsigned int valid_hooks,
659                 struct xt_table_info *newinfo,
660                 void *entry0,
661                 unsigned int size,
662                 unsigned int number,
663                 const unsigned int *hook_entries,
664                 const unsigned int *underflows)
665 {
666         unsigned int i;
667         int ret;
668
669         newinfo->size = size;
670         newinfo->number = number;
671
672         /* Init all hooks to impossible value. */
673         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
674                 newinfo->hook_entry[i] = 0xFFFFFFFF;
675                 newinfo->underflow[i] = 0xFFFFFFFF;
676         }
677
678         duprintf("translate_table: size %u\n", newinfo->size);
679         i = 0;
680         /* Walk through entries, checking offsets. */
681         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
682                                 check_entry_size_and_hooks,
683                                 newinfo,
684                                 entry0,
685                                 entry0 + size,
686                                 hook_entries, underflows, &i);
687         if (ret != 0)
688                 return ret;
689
690         if (i != number) {
691                 duprintf("translate_table: %u not %u entries\n",
692                          i, number);
693                 return -EINVAL;
694         }
695
696         /* Check hooks all assigned */
697         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
698                 /* Only hooks which are valid */
699                 if (!(valid_hooks & (1 << i)))
700                         continue;
701                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
702                         duprintf("Invalid hook entry %u %u\n",
703                                  i, hook_entries[i]);
704                         return -EINVAL;
705                 }
706                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
707                         duprintf("Invalid underflow %u %u\n",
708                                  i, underflows[i]);
709                         return -EINVAL;
710                 }
711         }
712
713         if (!mark_source_chains(newinfo, valid_hooks, entry0))
714                 return -ELOOP;
715
716         /* Finally, each sanity check must pass */
717         i = 0;
718         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
719                                 check_entry, name, size, &i);
720
721         if (ret != 0) {
722                 IPT_ENTRY_ITERATE(entry0, newinfo->size,
723                                   cleanup_entry, &i);
724                 return ret;
725         }
726
727         /* And one copy for every other CPU */
728         for_each_cpu(i) {
729                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
730                         memcpy(newinfo->entries[i], entry0, newinfo->size);
731         }
732
733         return ret;
734 }
735
736 /* Gets counters. */
737 static inline int
738 add_entry_to_counter(const struct ipt_entry *e,
739                      struct xt_counters total[],
740                      unsigned int *i)
741 {
742         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
743
744         (*i)++;
745         return 0;
746 }
747
748 static inline int
749 set_entry_to_counter(const struct ipt_entry *e,
750                      struct ipt_counters total[],
751                      unsigned int *i)
752 {
753         SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
754
755         (*i)++;
756         return 0;
757 }
758
759 static void
760 get_counters(const struct xt_table_info *t,
761              struct xt_counters counters[])
762 {
763         unsigned int cpu;
764         unsigned int i;
765         unsigned int curcpu;
766
767         /* Instead of clearing (by a previous call to memset())
768          * the counters and using adds, we set the counters
769          * with data used by 'current' CPU
770          * We dont care about preemption here.
771          */
772         curcpu = raw_smp_processor_id();
773
774         i = 0;
775         IPT_ENTRY_ITERATE(t->entries[curcpu],
776                           t->size,
777                           set_entry_to_counter,
778                           counters,
779                           &i);
780
781         for_each_cpu(cpu) {
782                 if (cpu == curcpu)
783                         continue;
784                 i = 0;
785                 IPT_ENTRY_ITERATE(t->entries[cpu],
786                                   t->size,
787                                   add_entry_to_counter,
788                                   counters,
789                                   &i);
790         }
791 }
792
793 static int
794 copy_entries_to_user(unsigned int total_size,
795                      struct ipt_table *table,
796                      void __user *userptr)
797 {
798         unsigned int off, num, countersize;
799         struct ipt_entry *e;
800         struct xt_counters *counters;
801         struct xt_table_info *private = table->private;
802         int ret = 0;
803         void *loc_cpu_entry;
804
805         /* We need atomic snapshot of counters: rest doesn't change
806            (other than comefrom, which userspace doesn't care
807            about). */
808         countersize = sizeof(struct xt_counters) * private->number;
809         counters = vmalloc_node(countersize, numa_node_id());
810
811         if (counters == NULL)
812                 return -ENOMEM;
813
814         /* First, sum counters... */
815         write_lock_bh(&table->lock);
816         get_counters(private, counters);
817         write_unlock_bh(&table->lock);
818
819         /* choose the copy that is on our node/cpu, ...
820          * This choice is lazy (because current thread is
821          * allowed to migrate to another cpu)
822          */
823         loc_cpu_entry = private->entries[raw_smp_processor_id()];
824         /* ... then copy entire thing ... */
825         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
826                 ret = -EFAULT;
827                 goto free_counters;
828         }
829
830         /* FIXME: use iterator macros --RR */
831         /* ... then go back and fix counters and names */
832         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
833                 unsigned int i;
834                 struct ipt_entry_match *m;
835                 struct ipt_entry_target *t;
836
837                 e = (struct ipt_entry *)(loc_cpu_entry + off);
838                 if (copy_to_user(userptr + off
839                                  + offsetof(struct ipt_entry, counters),
840                                  &counters[num],
841                                  sizeof(counters[num])) != 0) {
842                         ret = -EFAULT;
843                         goto free_counters;
844                 }
845
846                 for (i = sizeof(struct ipt_entry);
847                      i < e->target_offset;
848                      i += m->u.match_size) {
849                         m = (void *)e + i;
850
851                         if (copy_to_user(userptr + off + i
852                                          + offsetof(struct ipt_entry_match,
853                                                     u.user.name),
854                                          m->u.kernel.match->name,
855                                          strlen(m->u.kernel.match->name)+1)
856                             != 0) {
857                                 ret = -EFAULT;
858                                 goto free_counters;
859                         }
860                 }
861
862                 t = ipt_get_target(e);
863                 if (copy_to_user(userptr + off + e->target_offset
864                                  + offsetof(struct ipt_entry_target,
865                                             u.user.name),
866                                  t->u.kernel.target->name,
867                                  strlen(t->u.kernel.target->name)+1) != 0) {
868                         ret = -EFAULT;
869                         goto free_counters;
870                 }
871         }
872
873  free_counters:
874         vfree(counters);
875         return ret;
876 }
877
878 static int
879 get_entries(const struct ipt_get_entries *entries,
880             struct ipt_get_entries __user *uptr)
881 {
882         int ret;
883         struct ipt_table *t;
884
885         t = xt_find_table_lock(AF_INET, entries->name);
886         if (t && !IS_ERR(t)) {
887                 struct xt_table_info *private = t->private;
888                 duprintf("t->private->number = %u\n",
889                          private->number);
890                 if (entries->size == private->size)
891                         ret = copy_entries_to_user(private->size,
892                                                    t, uptr->entrytable);
893                 else {
894                         duprintf("get_entries: I've got %u not %u!\n",
895                                  private->size,
896                                  entries->size);
897                         ret = -EINVAL;
898                 }
899                 module_put(t->me);
900                 xt_table_unlock(t);
901         } else
902                 ret = t ? PTR_ERR(t) : -ENOENT;
903
904         return ret;
905 }
906
907 static int
908 do_replace(void __user *user, unsigned int len)
909 {
910         int ret;
911         struct ipt_replace tmp;
912         struct ipt_table *t;
913         struct xt_table_info *newinfo, *oldinfo;
914         struct xt_counters *counters;
915         void *loc_cpu_entry, *loc_cpu_old_entry;
916
917         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
918                 return -EFAULT;
919
920         /* Hack: Causes ipchains to give correct error msg --RR */
921         if (len != sizeof(tmp) + tmp.size)
922                 return -ENOPROTOOPT;
923
924         newinfo = xt_alloc_table_info(tmp.size);
925         if (!newinfo)
926                 return -ENOMEM;
927
928         /* choose the copy that is our node/cpu */
929         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
930         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
931                            tmp.size) != 0) {
932                 ret = -EFAULT;
933                 goto free_newinfo;
934         }
935
936         counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
937         if (!counters) {
938                 ret = -ENOMEM;
939                 goto free_newinfo;
940         }
941
942         ret = translate_table(tmp.name, tmp.valid_hooks,
943                               newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
944                               tmp.hook_entry, tmp.underflow);
945         if (ret != 0)
946                 goto free_newinfo_counters;
947
948         duprintf("ip_tables: Translated table\n");
949
950         t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
951                                     "iptable_%s", tmp.name);
952         if (!t || IS_ERR(t)) {
953                 ret = t ? PTR_ERR(t) : -ENOENT;
954                 goto free_newinfo_counters_untrans;
955         }
956
957         /* You lied! */
958         if (tmp.valid_hooks != t->valid_hooks) {
959                 duprintf("Valid hook crap: %08X vs %08X\n",
960                          tmp.valid_hooks, t->valid_hooks);
961                 ret = -EINVAL;
962                 goto put_module;
963         }
964
965         oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
966         if (!oldinfo)
967                 goto put_module;
968
969         /* Update module usage count based on number of rules */
970         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
971                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
972         if ((oldinfo->number > oldinfo->initial_entries) || 
973             (newinfo->number <= oldinfo->initial_entries)) 
974                 module_put(t->me);
975         if ((oldinfo->number > oldinfo->initial_entries) &&
976             (newinfo->number <= oldinfo->initial_entries))
977                 module_put(t->me);
978
979         /* Get the old counters. */
980         get_counters(oldinfo, counters);
981         /* Decrease module usage counts and free resource */
982         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
983         IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
984         xt_free_table_info(oldinfo);
985         if (copy_to_user(tmp.counters, counters,
986                          sizeof(struct xt_counters) * tmp.num_counters) != 0)
987                 ret = -EFAULT;
988         vfree(counters);
989         xt_table_unlock(t);
990         return ret;
991
992  put_module:
993         module_put(t->me);
994         xt_table_unlock(t);
995  free_newinfo_counters_untrans:
996         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
997  free_newinfo_counters:
998         vfree(counters);
999  free_newinfo:
1000         xt_free_table_info(newinfo);
1001         return ret;
1002 }
1003
1004 /* We're lazy, and add to the first CPU; overflow works its fey magic
1005  * and everything is OK. */
1006 static inline int
1007 add_counter_to_entry(struct ipt_entry *e,
1008                      const struct xt_counters addme[],
1009                      unsigned int *i)
1010 {
1011 #if 0
1012         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1013                  *i,
1014                  (long unsigned int)e->counters.pcnt,
1015                  (long unsigned int)e->counters.bcnt,
1016                  (long unsigned int)addme[*i].pcnt,
1017                  (long unsigned int)addme[*i].bcnt);
1018 #endif
1019
1020         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1021
1022         (*i)++;
1023         return 0;
1024 }
1025
1026 static int
1027 do_add_counters(void __user *user, unsigned int len)
1028 {
1029         unsigned int i;
1030         struct xt_counters_info tmp, *paddc;
1031         struct ipt_table *t;
1032         struct xt_table_info *private;
1033         int ret = 0;
1034         void *loc_cpu_entry;
1035
1036         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1037                 return -EFAULT;
1038
1039         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
1040                 return -EINVAL;
1041
1042         paddc = vmalloc_node(len, numa_node_id());
1043         if (!paddc)
1044                 return -ENOMEM;
1045
1046         if (copy_from_user(paddc, user, len) != 0) {
1047                 ret = -EFAULT;
1048                 goto free;
1049         }
1050
1051         t = xt_find_table_lock(AF_INET, tmp.name);
1052         if (!t || IS_ERR(t)) {
1053                 ret = t ? PTR_ERR(t) : -ENOENT;
1054                 goto free;
1055         }
1056
1057         write_lock_bh(&t->lock);
1058         private = t->private;
1059         if (private->number != paddc->num_counters) {
1060                 ret = -EINVAL;
1061                 goto unlock_up_free;
1062         }
1063
1064         i = 0;
1065         /* Choose the copy that is on our node */
1066         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1067         IPT_ENTRY_ITERATE(loc_cpu_entry,
1068                           private->size,
1069                           add_counter_to_entry,
1070                           paddc->counters,
1071                           &i);
1072  unlock_up_free:
1073         write_unlock_bh(&t->lock);
1074         xt_table_unlock(t);
1075         module_put(t->me);
1076  free:
1077         vfree(paddc);
1078
1079         return ret;
1080 }
1081
1082 static int
1083 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1084 {
1085         int ret;
1086
1087         if (!capable(CAP_NET_ADMIN))
1088                 return -EPERM;
1089
1090         switch (cmd) {
1091         case IPT_SO_SET_REPLACE:
1092                 ret = do_replace(user, len);
1093                 break;
1094
1095         case IPT_SO_SET_ADD_COUNTERS:
1096                 ret = do_add_counters(user, len);
1097                 break;
1098
1099         default:
1100                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1101                 ret = -EINVAL;
1102         }
1103
1104         return ret;
1105 }
1106
1107 static int
1108 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1109 {
1110         int ret;
1111
1112         if (!capable(CAP_NET_ADMIN))
1113                 return -EPERM;
1114
1115         switch (cmd) {
1116         case IPT_SO_GET_INFO: {
1117                 char name[IPT_TABLE_MAXNAMELEN];
1118                 struct ipt_table *t;
1119
1120                 if (*len != sizeof(struct ipt_getinfo)) {
1121                         duprintf("length %u != %u\n", *len,
1122                                  sizeof(struct ipt_getinfo));
1123                         ret = -EINVAL;
1124                         break;
1125                 }
1126
1127                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1128                         ret = -EFAULT;
1129                         break;
1130                 }
1131                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1132
1133                 t = try_then_request_module(xt_find_table_lock(AF_INET, name),
1134                                             "iptable_%s", name);
1135                 if (t && !IS_ERR(t)) {
1136                         struct ipt_getinfo info;
1137                         struct xt_table_info *private = t->private;
1138
1139                         info.valid_hooks = t->valid_hooks;
1140                         memcpy(info.hook_entry, private->hook_entry,
1141                                sizeof(info.hook_entry));
1142                         memcpy(info.underflow, private->underflow,
1143                                sizeof(info.underflow));
1144                         info.num_entries = private->number;
1145                         info.size = private->size;
1146                         memcpy(info.name, name, sizeof(info.name));
1147
1148                         if (copy_to_user(user, &info, *len) != 0)
1149                                 ret = -EFAULT;
1150                         else
1151                                 ret = 0;
1152                         xt_table_unlock(t);
1153                         module_put(t->me);
1154                 } else
1155                         ret = t ? PTR_ERR(t) : -ENOENT;
1156         }
1157         break;
1158
1159         case IPT_SO_GET_ENTRIES: {
1160                 struct ipt_get_entries get;
1161
1162                 if (*len < sizeof(get)) {
1163                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1164                         ret = -EINVAL;
1165                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1166                         ret = -EFAULT;
1167                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1168                         duprintf("get_entries: %u != %u\n", *len,
1169                                  sizeof(struct ipt_get_entries) + get.size);
1170                         ret = -EINVAL;
1171                 } else
1172                         ret = get_entries(&get, user);
1173                 break;
1174         }
1175
1176         case IPT_SO_GET_REVISION_MATCH:
1177         case IPT_SO_GET_REVISION_TARGET: {
1178                 struct ipt_get_revision rev;
1179                 int target;
1180
1181                 if (*len != sizeof(rev)) {
1182                         ret = -EINVAL;
1183                         break;
1184                 }
1185                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1186                         ret = -EFAULT;
1187                         break;
1188                 }
1189
1190                 if (cmd == IPT_SO_GET_REVISION_TARGET)
1191                         target = 1;
1192                 else
1193                         target = 0;
1194
1195                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
1196                                                          rev.revision,
1197                                                          target, &ret),
1198                                         "ipt_%s", rev.name);
1199                 break;
1200         }
1201
1202         default:
1203                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1204                 ret = -EINVAL;
1205         }
1206
1207         return ret;
1208 }
1209
1210 int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
1211 {
1212         int ret;
1213         struct xt_table_info *newinfo;
1214         static struct xt_table_info bootstrap
1215                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1216         void *loc_cpu_entry;
1217
1218         newinfo = xt_alloc_table_info(repl->size);
1219         if (!newinfo)
1220                 return -ENOMEM;
1221
1222         /* choose the copy on our node/cpu
1223          * but dont care of preemption
1224          */
1225         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1226         memcpy(loc_cpu_entry, repl->entries, repl->size);
1227
1228         ret = translate_table(table->name, table->valid_hooks,
1229                               newinfo, loc_cpu_entry, repl->size,
1230                               repl->num_entries,
1231                               repl->hook_entry,
1232                               repl->underflow);
1233         if (ret != 0) {
1234                 xt_free_table_info(newinfo);
1235                 return ret;
1236         }
1237
1238         if (xt_register_table(table, &bootstrap, newinfo) != 0) {
1239                 xt_free_table_info(newinfo);
1240                 return ret;
1241         }
1242
1243         return 0;
1244 }
1245
1246 void ipt_unregister_table(struct ipt_table *table)
1247 {
1248         struct xt_table_info *private;
1249         void *loc_cpu_entry;
1250
1251         private = xt_unregister_table(table);
1252
1253         /* Decrease module usage counts and free resources */
1254         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1255         IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
1256         xt_free_table_info(private);
1257 }
1258
1259 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1260 static inline int
1261 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1262                      u_int8_t type, u_int8_t code,
1263                      int invert)
1264 {
1265         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1266                 ^ invert;
1267 }
1268
1269 static int
1270 icmp_match(const struct sk_buff *skb,
1271            const struct net_device *in,
1272            const struct net_device *out,
1273            const void *matchinfo,
1274            int offset,
1275            unsigned int protoff,
1276            int *hotdrop)
1277 {
1278         struct icmphdr _icmph, *ic;
1279         const struct ipt_icmp *icmpinfo = matchinfo;
1280
1281         /* Must not be a fragment. */
1282         if (offset)
1283                 return 0;
1284
1285         ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
1286         if (ic == NULL) {
1287                 /* We've been asked to examine this packet, and we
1288                  * can't.  Hence, no choice but to drop.
1289                  */
1290                 duprintf("Dropping evil ICMP tinygram.\n");
1291                 *hotdrop = 1;
1292                 return 0;
1293         }
1294
1295         return icmp_type_code_match(icmpinfo->type,
1296                                     icmpinfo->code[0],
1297                                     icmpinfo->code[1],
1298                                     ic->type, ic->code,
1299                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1300 }
1301
1302 /* Called when user tries to insert an entry of this type. */
1303 static int
1304 icmp_checkentry(const char *tablename,
1305            const void *info,
1306            void *matchinfo,
1307            unsigned int matchsize,
1308            unsigned int hook_mask)
1309 {
1310         const struct ipt_ip *ip = info;
1311         const struct ipt_icmp *icmpinfo = matchinfo;
1312
1313         /* Must specify proto == ICMP, and no unknown invflags */
1314         return ip->proto == IPPROTO_ICMP
1315                 && !(ip->invflags & IPT_INV_PROTO)
1316                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1317                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1318 }
1319
1320 /* The built-in targets: standard (NULL) and error. */
1321 static struct ipt_target ipt_standard_target = {
1322         .name           = IPT_STANDARD_TARGET,
1323 };
1324
1325 static struct ipt_target ipt_error_target = {
1326         .name           = IPT_ERROR_TARGET,
1327         .target         = ipt_error,
1328 };
1329
1330 static struct nf_sockopt_ops ipt_sockopts = {
1331         .pf             = PF_INET,
1332         .set_optmin     = IPT_BASE_CTL,
1333         .set_optmax     = IPT_SO_SET_MAX+1,
1334         .set            = do_ipt_set_ctl,
1335         .get_optmin     = IPT_BASE_CTL,
1336         .get_optmax     = IPT_SO_GET_MAX+1,
1337         .get            = do_ipt_get_ctl,
1338 };
1339
1340 static struct ipt_match icmp_matchstruct = {
1341         .name           = "icmp",
1342         .match          = &icmp_match,
1343         .checkentry     = &icmp_checkentry,
1344 };
1345
1346 static int __init init(void)
1347 {
1348         int ret;
1349
1350         xt_proto_init(AF_INET);
1351
1352         /* Noone else will be downing sem now, so we won't sleep */
1353         xt_register_target(AF_INET, &ipt_standard_target);
1354         xt_register_target(AF_INET, &ipt_error_target);
1355         xt_register_match(AF_INET, &icmp_matchstruct);
1356
1357         /* Register setsockopt */
1358         ret = nf_register_sockopt(&ipt_sockopts);
1359         if (ret < 0) {
1360                 duprintf("Unable to register sockopts.\n");
1361                 return ret;
1362         }
1363
1364         printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
1365         return 0;
1366 }
1367
1368 static void __exit fini(void)
1369 {
1370         nf_unregister_sockopt(&ipt_sockopts);
1371
1372         xt_unregister_match(AF_INET, &icmp_matchstruct);
1373         xt_unregister_target(AF_INET, &ipt_error_target);
1374         xt_unregister_target(AF_INET, &ipt_standard_target);
1375
1376         xt_proto_fini(AF_INET);
1377 }
1378
1379 EXPORT_SYMBOL(ipt_register_table);
1380 EXPORT_SYMBOL(ipt_unregister_table);
1381 EXPORT_SYMBOL(ipt_do_table);
1382 module_init(init);
1383 module_exit(fini);