[NETFILTER]: Fix OOPSes on machines with discontiguous cpu numbering.
[linux-2.6.git] / net / ipv6 / netfilter / ip6_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2002 Netfilter core team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
15  *      - new extension header parser code
16  */
17 #include <linux/config.h>
18 #include <linux/skbuff.h>
19 #include <linux/kmod.h>
20 #include <linux/vmalloc.h>
21 #include <linux/netdevice.h>
22 #include <linux/module.h>
23 #include <linux/tcp.h>
24 #include <linux/udp.h>
25 #include <linux/icmpv6.h>
26 #include <net/ip.h>
27 #include <net/ipv6.h>
28 #include <asm/uaccess.h>
29 #include <asm/semaphore.h>
30 #include <linux/proc_fs.h>
31 #include <linux/cpumask.h>
32
33 #include <linux/netfilter_ipv6/ip6_tables.h>
34
35 MODULE_LICENSE("GPL");
36 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
37 MODULE_DESCRIPTION("IPv6 packet filter");
38
39 #define IPV6_HDR_LEN    (sizeof(struct ipv6hdr))
40 #define IPV6_OPTHDR_LEN (sizeof(struct ipv6_opt_hdr))
41
42 /*#define DEBUG_IP_FIREWALL*/
43 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
44 /*#define DEBUG_IP_FIREWALL_USER*/
45
46 #ifdef DEBUG_IP_FIREWALL
47 #define dprintf(format, args...)  printk(format , ## args)
48 #else
49 #define dprintf(format, args...)
50 #endif
51
52 #ifdef DEBUG_IP_FIREWALL_USER
53 #define duprintf(format, args...) printk(format , ## args)
54 #else
55 #define duprintf(format, args...)
56 #endif
57
58 #ifdef CONFIG_NETFILTER_DEBUG
59 #define IP_NF_ASSERT(x)                                         \
60 do {                                                            \
61         if (!(x))                                               \
62                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
63                        __FUNCTION__, __FILE__, __LINE__);       \
64 } while(0)
65 #else
66 #define IP_NF_ASSERT(x)
67 #endif
68 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
69
70 static DECLARE_MUTEX(ip6t_mutex);
71
72 /* Must have mutex */
73 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
74 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
75 #include <linux/netfilter_ipv4/listhelp.h>
76
77 #if 0
78 /* All the better to debug you with... */
79 #define static
80 #define inline
81 #endif
82
83 /* Locking is simple: we assume at worst case there will be one packet
84    in user context and one from bottom halves (or soft irq if Alexey's
85    softnet patch was applied).
86
87    We keep a set of rules for each CPU, so we can avoid write-locking
88    them; doing a readlock_bh() stops packets coming through if we're
89    in user context.
90
91    To be cache friendly on SMP, we arrange them like so:
92    [ n-entries ]
93    ... cache-align padding ...
94    [ n-entries ]
95
96    Hence the start of any table is given by get_table() below.  */
97
98 /* The table itself */
99 struct ip6t_table_info
100 {
101         /* Size per table */
102         unsigned int size;
103         /* Number of entries: FIXME. --RR */
104         unsigned int number;
105         /* Initial number of entries. Needed for module usage count */
106         unsigned int initial_entries;
107
108         /* Entry points and underflows */
109         unsigned int hook_entry[NF_IP6_NUMHOOKS];
110         unsigned int underflow[NF_IP6_NUMHOOKS];
111
112         /* ip6t_entry tables: one per CPU */
113         char entries[0] ____cacheline_aligned;
114 };
115
116 static LIST_HEAD(ip6t_target);
117 static LIST_HEAD(ip6t_match);
118 static LIST_HEAD(ip6t_tables);
119 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
120
121 #ifdef CONFIG_SMP
122 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
123 #else
124 #define TABLE_OFFSET(t,p) 0
125 #endif
126
127 #if 0
128 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
129 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
130 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
131 #endif
132
133 static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask,
134                               struct in6_addr addr2)
135 {
136         int i;
137         for( i = 0; i < 16; i++){
138                 if((addr1.s6_addr[i] & mask.s6_addr[i]) != 
139                    (addr2.s6_addr[i] & mask.s6_addr[i]))
140                         return 1;
141         }
142         return 0;
143 }
144
145 /* Check for an extension */
146 int 
147 ip6t_ext_hdr(u8 nexthdr)
148 {
149         return ( (nexthdr == IPPROTO_HOPOPTS)   ||
150                  (nexthdr == IPPROTO_ROUTING)   ||
151                  (nexthdr == IPPROTO_FRAGMENT)  ||
152                  (nexthdr == IPPROTO_ESP)       ||
153                  (nexthdr == IPPROTO_AH)        ||
154                  (nexthdr == IPPROTO_NONE)      ||
155                  (nexthdr == IPPROTO_DSTOPTS) );
156 }
157
158 /* Returns whether matches rule or not. */
159 static inline int
160 ip6_packet_match(const struct sk_buff *skb,
161                  const char *indev,
162                  const char *outdev,
163                  const struct ip6t_ip6 *ip6info,
164                  unsigned int *protoff,
165                  int *fragoff)
166 {
167         size_t i;
168         unsigned long ret;
169         const struct ipv6hdr *ipv6 = skb->nh.ipv6h;
170
171 #define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
172
173         if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src),
174                   IP6T_INV_SRCIP)
175             || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst),
176                      IP6T_INV_DSTIP)) {
177                 dprintf("Source or dest mismatch.\n");
178 /*
179                 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
180                         ipinfo->smsk.s_addr, ipinfo->src.s_addr,
181                         ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : "");
182                 dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
183                         ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
184                         ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
185                 return 0;
186         }
187
188         /* Look for ifname matches; this should unroll nicely. */
189         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
190                 ret |= (((const unsigned long *)indev)[i]
191                         ^ ((const unsigned long *)ip6info->iniface)[i])
192                         & ((const unsigned long *)ip6info->iniface_mask)[i];
193         }
194
195         if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
196                 dprintf("VIA in mismatch (%s vs %s).%s\n",
197                         indev, ip6info->iniface,
198                         ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
199                 return 0;
200         }
201
202         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
203                 ret |= (((const unsigned long *)outdev)[i]
204                         ^ ((const unsigned long *)ip6info->outiface)[i])
205                         & ((const unsigned long *)ip6info->outiface_mask)[i];
206         }
207
208         if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
209                 dprintf("VIA out mismatch (%s vs %s).%s\n",
210                         outdev, ip6info->outiface,
211                         ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
212                 return 0;
213         }
214
215 /* ... might want to do something with class and flowlabel here ... */
216
217         /* look for the desired protocol header */
218         if((ip6info->flags & IP6T_F_PROTO)) {
219                 u_int8_t currenthdr = ipv6->nexthdr;
220                 struct ipv6_opt_hdr _hdr, *hp;
221                 u_int16_t ptr;          /* Header offset in skb */
222                 u_int16_t hdrlen;       /* Header */
223                 u_int16_t _fragoff = 0, *fp = NULL;
224
225                 ptr = IPV6_HDR_LEN;
226
227                 while (ip6t_ext_hdr(currenthdr)) {
228                         /* Is there enough space for the next ext header? */
229                         if (skb->len - ptr < IPV6_OPTHDR_LEN)
230                                 return 0;
231
232                         /* NONE or ESP: there isn't protocol part */
233                         /* If we want to count these packets in '-p all',
234                          * we will change the return 0 to 1*/
235                         if ((currenthdr == IPPROTO_NONE) || 
236                                 (currenthdr == IPPROTO_ESP))
237                                 break;
238
239                         hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
240                         BUG_ON(hp == NULL);
241
242                         /* Size calculation */
243                         if (currenthdr == IPPROTO_FRAGMENT) {
244                                 fp = skb_header_pointer(skb,
245                                                    ptr+offsetof(struct frag_hdr,
246                                                                 frag_off),
247                                                    sizeof(_fragoff),
248                                                    &_fragoff);
249                                 if (fp == NULL)
250                                         return 0;
251
252                                 _fragoff = ntohs(*fp) & ~0x7;
253                                 hdrlen = 8;
254                         } else if (currenthdr == IPPROTO_AH)
255                                 hdrlen = (hp->hdrlen+2)<<2;
256                         else
257                                 hdrlen = ipv6_optlen(hp);
258
259                         currenthdr = hp->nexthdr;
260                         ptr += hdrlen;
261                         /* ptr is too large */
262                         if ( ptr > skb->len ) 
263                                 return 0;
264                         if (_fragoff) {
265                                 if (ip6t_ext_hdr(currenthdr))
266                                         return 0;
267                                 break;
268                         }
269                 }
270
271                 *protoff = ptr;
272                 *fragoff = _fragoff;
273
274                 /* currenthdr contains the protocol header */
275
276                 dprintf("Packet protocol %hi ?= %s%hi.\n",
277                                 currenthdr, 
278                                 ip6info->invflags & IP6T_INV_PROTO ? "!":"",
279                                 ip6info->proto);
280
281                 if (ip6info->proto == currenthdr) {
282                         if(ip6info->invflags & IP6T_INV_PROTO) {
283                                 return 0;
284                         }
285                         return 1;
286                 }
287
288                 /* We need match for the '-p all', too! */
289                 if ((ip6info->proto != 0) &&
290                         !(ip6info->invflags & IP6T_INV_PROTO))
291                         return 0;
292         }
293         return 1;
294 }
295
296 /* should be ip6 safe */
297 static inline int 
298 ip6_checkentry(const struct ip6t_ip6 *ipv6)
299 {
300         if (ipv6->flags & ~IP6T_F_MASK) {
301                 duprintf("Unknown flag bits set: %08X\n",
302                          ipv6->flags & ~IP6T_F_MASK);
303                 return 0;
304         }
305         if (ipv6->invflags & ~IP6T_INV_MASK) {
306                 duprintf("Unknown invflag bits set: %08X\n",
307                          ipv6->invflags & ~IP6T_INV_MASK);
308                 return 0;
309         }
310         return 1;
311 }
312
313 static unsigned int
314 ip6t_error(struct sk_buff **pskb,
315           const struct net_device *in,
316           const struct net_device *out,
317           unsigned int hooknum,
318           const void *targinfo,
319           void *userinfo)
320 {
321         if (net_ratelimit())
322                 printk("ip6_tables: error: `%s'\n", (char *)targinfo);
323
324         return NF_DROP;
325 }
326
327 static inline
328 int do_match(struct ip6t_entry_match *m,
329              const struct sk_buff *skb,
330              const struct net_device *in,
331              const struct net_device *out,
332              int offset,
333              unsigned int protoff,
334              int *hotdrop)
335 {
336         /* Stop iteration if it doesn't match */
337         if (!m->u.kernel.match->match(skb, in, out, m->data,
338                                       offset, protoff, hotdrop))
339                 return 1;
340         else
341                 return 0;
342 }
343
344 static inline struct ip6t_entry *
345 get_entry(void *base, unsigned int offset)
346 {
347         return (struct ip6t_entry *)(base + offset);
348 }
349
350 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
351 unsigned int
352 ip6t_do_table(struct sk_buff **pskb,
353               unsigned int hook,
354               const struct net_device *in,
355               const struct net_device *out,
356               struct ip6t_table *table,
357               void *userdata)
358 {
359         static const char nulldevname[IFNAMSIZ];
360         int offset = 0;
361         unsigned int protoff = 0;
362         int hotdrop = 0;
363         /* Initializing verdict to NF_DROP keeps gcc happy. */
364         unsigned int verdict = NF_DROP;
365         const char *indev, *outdev;
366         void *table_base;
367         struct ip6t_entry *e, *back;
368
369         /* Initialization */
370         indev = in ? in->name : nulldevname;
371         outdev = out ? out->name : nulldevname;
372
373         /* We handle fragments by dealing with the first fragment as
374          * if it was a normal packet.  All other fragments are treated
375          * normally, except that they will NEVER match rules that ask
376          * things we don't know, ie. tcp syn flag or ports).  If the
377          * rule is also a fragment-specific rule, non-fragments won't
378          * match it. */
379
380         read_lock_bh(&table->lock);
381         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
382         table_base = (void *)table->private->entries
383                 + TABLE_OFFSET(table->private, smp_processor_id());
384         e = get_entry(table_base, table->private->hook_entry[hook]);
385
386 #ifdef CONFIG_NETFILTER_DEBUG
387         /* Check noone else using our table */
388         if (((struct ip6t_entry *)table_base)->comefrom != 0xdead57ac
389             && ((struct ip6t_entry *)table_base)->comefrom != 0xeeeeeeec) {
390                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
391                        smp_processor_id(),
392                        table->name,
393                        &((struct ip6t_entry *)table_base)->comefrom,
394                        ((struct ip6t_entry *)table_base)->comefrom);
395         }
396         ((struct ip6t_entry *)table_base)->comefrom = 0x57acc001;
397 #endif
398
399         /* For return from builtin chain */
400         back = get_entry(table_base, table->private->underflow[hook]);
401
402         do {
403                 IP_NF_ASSERT(e);
404                 IP_NF_ASSERT(back);
405                 if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
406                         &protoff, &offset)) {
407                         struct ip6t_entry_target *t;
408
409                         if (IP6T_MATCH_ITERATE(e, do_match,
410                                                *pskb, in, out,
411                                                offset, protoff, &hotdrop) != 0)
412                                 goto no_match;
413
414                         ADD_COUNTER(e->counters,
415                                     ntohs((*pskb)->nh.ipv6h->payload_len)
416                                     + IPV6_HDR_LEN,
417                                     1);
418
419                         t = ip6t_get_target(e);
420                         IP_NF_ASSERT(t->u.kernel.target);
421                         /* Standard target? */
422                         if (!t->u.kernel.target->target) {
423                                 int v;
424
425                                 v = ((struct ip6t_standard_target *)t)->verdict;
426                                 if (v < 0) {
427                                         /* Pop from stack? */
428                                         if (v != IP6T_RETURN) {
429                                                 verdict = (unsigned)(-v) - 1;
430                                                 break;
431                                         }
432                                         e = back;
433                                         back = get_entry(table_base,
434                                                          back->comefrom);
435                                         continue;
436                                 }
437                                 if (table_base + v != (void *)e + e->next_offset
438                                     && !(e->ipv6.flags & IP6T_F_GOTO)) {
439                                         /* Save old back ptr in next entry */
440                                         struct ip6t_entry *next
441                                                 = (void *)e + e->next_offset;
442                                         next->comefrom
443                                                 = (void *)back - table_base;
444                                         /* set back pointer to next entry */
445                                         back = next;
446                                 }
447
448                                 e = get_entry(table_base, v);
449                         } else {
450                                 /* Targets which reenter must return
451                                    abs. verdicts */
452 #ifdef CONFIG_NETFILTER_DEBUG
453                                 ((struct ip6t_entry *)table_base)->comefrom
454                                         = 0xeeeeeeec;
455 #endif
456                                 verdict = t->u.kernel.target->target(pskb,
457                                                                      in, out,
458                                                                      hook,
459                                                                      t->data,
460                                                                      userdata);
461
462 #ifdef CONFIG_NETFILTER_DEBUG
463                                 if (((struct ip6t_entry *)table_base)->comefrom
464                                     != 0xeeeeeeec
465                                     && verdict == IP6T_CONTINUE) {
466                                         printk("Target %s reentered!\n",
467                                                t->u.kernel.target->name);
468                                         verdict = NF_DROP;
469                                 }
470                                 ((struct ip6t_entry *)table_base)->comefrom
471                                         = 0x57acc001;
472 #endif
473                                 if (verdict == IP6T_CONTINUE)
474                                         e = (void *)e + e->next_offset;
475                                 else
476                                         /* Verdict */
477                                         break;
478                         }
479                 } else {
480
481                 no_match:
482                         e = (void *)e + e->next_offset;
483                 }
484         } while (!hotdrop);
485
486 #ifdef CONFIG_NETFILTER_DEBUG
487         ((struct ip6t_entry *)table_base)->comefrom = 0xdead57ac;
488 #endif
489         read_unlock_bh(&table->lock);
490
491 #ifdef DEBUG_ALLOW_ALL
492         return NF_ACCEPT;
493 #else
494         if (hotdrop)
495                 return NF_DROP;
496         else return verdict;
497 #endif
498 }
499
500 /* If it succeeds, returns element and locks mutex */
501 static inline void *
502 find_inlist_lock_noload(struct list_head *head,
503                         const char *name,
504                         int *error,
505                         struct semaphore *mutex)
506 {
507         void *ret;
508
509 #if 1
510         duprintf("find_inlist: searching for `%s' in %s.\n",
511                  name, head == &ip6t_target ? "ip6t_target"
512                  : head == &ip6t_match ? "ip6t_match"
513                  : head == &ip6t_tables ? "ip6t_tables" : "UNKNOWN");
514 #endif
515
516         *error = down_interruptible(mutex);
517         if (*error != 0)
518                 return NULL;
519
520         ret = list_named_find(head, name);
521         if (!ret) {
522                 *error = -ENOENT;
523                 up(mutex);
524         }
525         return ret;
526 }
527
528 #ifndef CONFIG_KMOD
529 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
530 #else
531 static void *
532 find_inlist_lock(struct list_head *head,
533                  const char *name,
534                  const char *prefix,
535                  int *error,
536                  struct semaphore *mutex)
537 {
538         void *ret;
539
540         ret = find_inlist_lock_noload(head, name, error, mutex);
541         if (!ret) {
542                 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
543                 request_module("%s%s", prefix, name);
544                 ret = find_inlist_lock_noload(head, name, error, mutex);
545         }
546
547         return ret;
548 }
549 #endif
550
551 static inline struct ip6t_table *
552 ip6t_find_table_lock(const char *name, int *error, struct semaphore *mutex)
553 {
554         return find_inlist_lock(&ip6t_tables, name, "ip6table_", error, mutex);
555 }
556
557 static inline struct ip6t_match *
558 find_match_lock(const char *name, int *error, struct semaphore *mutex)
559 {
560         return find_inlist_lock(&ip6t_match, name, "ip6t_", error, mutex);
561 }
562
563 static struct ip6t_target *
564 ip6t_find_target_lock(const char *name, int *error, struct semaphore *mutex)
565 {
566         return find_inlist_lock(&ip6t_target, name, "ip6t_", error, mutex);
567 }
568
569 /* All zeroes == unconditional rule. */
570 static inline int
571 unconditional(const struct ip6t_ip6 *ipv6)
572 {
573         unsigned int i;
574
575         for (i = 0; i < sizeof(*ipv6); i++)
576                 if (((char *)ipv6)[i])
577                         break;
578
579         return (i == sizeof(*ipv6));
580 }
581
582 /* Figures out from what hook each rule can be called: returns 0 if
583    there are loops.  Puts hook bitmask in comefrom. */
584 static int
585 mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
586 {
587         unsigned int hook;
588
589         /* No recursion; use packet counter to save back ptrs (reset
590            to 0 as we leave), and comefrom to save source hook bitmask */
591         for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
592                 unsigned int pos = newinfo->hook_entry[hook];
593                 struct ip6t_entry *e
594                         = (struct ip6t_entry *)(newinfo->entries + pos);
595
596                 if (!(valid_hooks & (1 << hook)))
597                         continue;
598
599                 /* Set initial back pointer. */
600                 e->counters.pcnt = pos;
601
602                 for (;;) {
603                         struct ip6t_standard_target *t
604                                 = (void *)ip6t_get_target(e);
605
606                         if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) {
607                                 printk("iptables: loop hook %u pos %u %08X.\n",
608                                        hook, pos, e->comefrom);
609                                 return 0;
610                         }
611                         e->comefrom
612                                 |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS));
613
614                         /* Unconditional return/END. */
615                         if (e->target_offset == sizeof(struct ip6t_entry)
616                             && (strcmp(t->target.u.user.name,
617                                        IP6T_STANDARD_TARGET) == 0)
618                             && t->verdict < 0
619                             && unconditional(&e->ipv6)) {
620                                 unsigned int oldpos, size;
621
622                                 /* Return: backtrack through the last
623                                    big jump. */
624                                 do {
625                                         e->comefrom ^= (1<<NF_IP6_NUMHOOKS);
626 #ifdef DEBUG_IP_FIREWALL_USER
627                                         if (e->comefrom
628                                             & (1 << NF_IP6_NUMHOOKS)) {
629                                                 duprintf("Back unset "
630                                                          "on hook %u "
631                                                          "rule %u\n",
632                                                          hook, pos);
633                                         }
634 #endif
635                                         oldpos = pos;
636                                         pos = e->counters.pcnt;
637                                         e->counters.pcnt = 0;
638
639                                         /* We're at the start. */
640                                         if (pos == oldpos)
641                                                 goto next;
642
643                                         e = (struct ip6t_entry *)
644                                                 (newinfo->entries + pos);
645                                 } while (oldpos == pos + e->next_offset);
646
647                                 /* Move along one */
648                                 size = e->next_offset;
649                                 e = (struct ip6t_entry *)
650                                         (newinfo->entries + pos + size);
651                                 e->counters.pcnt = pos;
652                                 pos += size;
653                         } else {
654                                 int newpos = t->verdict;
655
656                                 if (strcmp(t->target.u.user.name,
657                                            IP6T_STANDARD_TARGET) == 0
658                                     && newpos >= 0) {
659                                         /* This a jump; chase it. */
660                                         duprintf("Jump rule %u -> %u\n",
661                                                  pos, newpos);
662                                 } else {
663                                         /* ... this is a fallthru */
664                                         newpos = pos + e->next_offset;
665                                 }
666                                 e = (struct ip6t_entry *)
667                                         (newinfo->entries + newpos);
668                                 e->counters.pcnt = pos;
669                                 pos = newpos;
670                         }
671                 }
672                 next:
673                 duprintf("Finished chain %u\n", hook);
674         }
675         return 1;
676 }
677
678 static inline int
679 cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
680 {
681         if (i && (*i)-- == 0)
682                 return 1;
683
684         if (m->u.kernel.match->destroy)
685                 m->u.kernel.match->destroy(m->data,
686                                            m->u.match_size - sizeof(*m));
687         module_put(m->u.kernel.match->me);
688         return 0;
689 }
690
691 static inline int
692 standard_check(const struct ip6t_entry_target *t,
693                unsigned int max_offset)
694 {
695         struct ip6t_standard_target *targ = (void *)t;
696
697         /* Check standard info. */
698         if (t->u.target_size
699             != IP6T_ALIGN(sizeof(struct ip6t_standard_target))) {
700                 duprintf("standard_check: target size %u != %u\n",
701                          t->u.target_size,
702                          IP6T_ALIGN(sizeof(struct ip6t_standard_target)));
703                 return 0;
704         }
705
706         if (targ->verdict >= 0
707             && targ->verdict > max_offset - sizeof(struct ip6t_entry)) {
708                 duprintf("ip6t_standard_check: bad verdict (%i)\n",
709                          targ->verdict);
710                 return 0;
711         }
712
713         if (targ->verdict < -NF_MAX_VERDICT - 1) {
714                 duprintf("ip6t_standard_check: bad negative verdict (%i)\n",
715                          targ->verdict);
716                 return 0;
717         }
718         return 1;
719 }
720
721 static inline int
722 check_match(struct ip6t_entry_match *m,
723             const char *name,
724             const struct ip6t_ip6 *ipv6,
725             unsigned int hookmask,
726             unsigned int *i)
727 {
728         int ret;
729         struct ip6t_match *match;
730
731         match = find_match_lock(m->u.user.name, &ret, &ip6t_mutex);
732         if (!match) {
733           //            duprintf("check_match: `%s' not found\n", m->u.name);
734                 return ret;
735         }
736         if (!try_module_get(match->me)) {
737                 up(&ip6t_mutex);
738                 return -ENOENT;
739         }
740         m->u.kernel.match = match;
741         up(&ip6t_mutex);
742
743         if (m->u.kernel.match->checkentry
744             && !m->u.kernel.match->checkentry(name, ipv6, m->data,
745                                               m->u.match_size - sizeof(*m),
746                                               hookmask)) {
747                 module_put(m->u.kernel.match->me);
748                 duprintf("ip_tables: check failed for `%s'.\n",
749                          m->u.kernel.match->name);
750                 return -EINVAL;
751         }
752
753         (*i)++;
754         return 0;
755 }
756
757 static struct ip6t_target ip6t_standard_target;
758
759 static inline int
760 check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
761             unsigned int *i)
762 {
763         struct ip6t_entry_target *t;
764         struct ip6t_target *target;
765         int ret;
766         unsigned int j;
767
768         if (!ip6_checkentry(&e->ipv6)) {
769                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
770                 return -EINVAL;
771         }
772
773         j = 0;
774         ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j);
775         if (ret != 0)
776                 goto cleanup_matches;
777
778         t = ip6t_get_target(e);
779         target = ip6t_find_target_lock(t->u.user.name, &ret, &ip6t_mutex);
780         if (!target) {
781                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
782                 goto cleanup_matches;
783         }
784         if (!try_module_get(target->me)) {
785                 up(&ip6t_mutex);
786                 ret = -ENOENT;
787                 goto cleanup_matches;
788         }
789         t->u.kernel.target = target;
790         up(&ip6t_mutex);
791         if (!t->u.kernel.target) {
792                 ret = -EBUSY;
793                 goto cleanup_matches;
794         }
795         if (t->u.kernel.target == &ip6t_standard_target) {
796                 if (!standard_check(t, size)) {
797                         ret = -EINVAL;
798                         goto cleanup_matches;
799                 }
800         } else if (t->u.kernel.target->checkentry
801                    && !t->u.kernel.target->checkentry(name, e, t->data,
802                                                       t->u.target_size
803                                                       - sizeof(*t),
804                                                       e->comefrom)) {
805                 module_put(t->u.kernel.target->me);
806                 duprintf("ip_tables: check failed for `%s'.\n",
807                          t->u.kernel.target->name);
808                 ret = -EINVAL;
809                 goto cleanup_matches;
810         }
811
812         (*i)++;
813         return 0;
814
815  cleanup_matches:
816         IP6T_MATCH_ITERATE(e, cleanup_match, &j);
817         return ret;
818 }
819
820 static inline int
821 check_entry_size_and_hooks(struct ip6t_entry *e,
822                            struct ip6t_table_info *newinfo,
823                            unsigned char *base,
824                            unsigned char *limit,
825                            const unsigned int *hook_entries,
826                            const unsigned int *underflows,
827                            unsigned int *i)
828 {
829         unsigned int h;
830
831         if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0
832             || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
833                 duprintf("Bad offset %p\n", e);
834                 return -EINVAL;
835         }
836
837         if (e->next_offset
838             < sizeof(struct ip6t_entry) + sizeof(struct ip6t_entry_target)) {
839                 duprintf("checking: element %p size %u\n",
840                          e, e->next_offset);
841                 return -EINVAL;
842         }
843
844         /* Check hooks & underflows */
845         for (h = 0; h < NF_IP6_NUMHOOKS; h++) {
846                 if ((unsigned char *)e - base == hook_entries[h])
847                         newinfo->hook_entry[h] = hook_entries[h];
848                 if ((unsigned char *)e - base == underflows[h])
849                         newinfo->underflow[h] = underflows[h];
850         }
851
852         /* FIXME: underflows must be unconditional, standard verdicts
853            < 0 (not IP6T_RETURN). --RR */
854
855         /* Clear counters and comefrom */
856         e->counters = ((struct ip6t_counters) { 0, 0 });
857         e->comefrom = 0;
858
859         (*i)++;
860         return 0;
861 }
862
863 static inline int
864 cleanup_entry(struct ip6t_entry *e, unsigned int *i)
865 {
866         struct ip6t_entry_target *t;
867
868         if (i && (*i)-- == 0)
869                 return 1;
870
871         /* Cleanup all matches */
872         IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
873         t = ip6t_get_target(e);
874         if (t->u.kernel.target->destroy)
875                 t->u.kernel.target->destroy(t->data,
876                                             t->u.target_size - sizeof(*t));
877         module_put(t->u.kernel.target->me);
878         return 0;
879 }
880
881 /* Checks and translates the user-supplied table segment (held in
882    newinfo) */
883 static int
884 translate_table(const char *name,
885                 unsigned int valid_hooks,
886                 struct ip6t_table_info *newinfo,
887                 unsigned int size,
888                 unsigned int number,
889                 const unsigned int *hook_entries,
890                 const unsigned int *underflows)
891 {
892         unsigned int i;
893         int ret;
894
895         newinfo->size = size;
896         newinfo->number = number;
897
898         /* Init all hooks to impossible value. */
899         for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
900                 newinfo->hook_entry[i] = 0xFFFFFFFF;
901                 newinfo->underflow[i] = 0xFFFFFFFF;
902         }
903
904         duprintf("translate_table: size %u\n", newinfo->size);
905         i = 0;
906         /* Walk through entries, checking offsets. */
907         ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
908                                 check_entry_size_and_hooks,
909                                 newinfo,
910                                 newinfo->entries,
911                                 newinfo->entries + size,
912                                 hook_entries, underflows, &i);
913         if (ret != 0)
914                 return ret;
915
916         if (i != number) {
917                 duprintf("translate_table: %u not %u entries\n",
918                          i, number);
919                 return -EINVAL;
920         }
921
922         /* Check hooks all assigned */
923         for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
924                 /* Only hooks which are valid */
925                 if (!(valid_hooks & (1 << i)))
926                         continue;
927                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
928                         duprintf("Invalid hook entry %u %u\n",
929                                  i, hook_entries[i]);
930                         return -EINVAL;
931                 }
932                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
933                         duprintf("Invalid underflow %u %u\n",
934                                  i, underflows[i]);
935                         return -EINVAL;
936                 }
937         }
938
939         if (!mark_source_chains(newinfo, valid_hooks))
940                 return -ELOOP;
941
942         /* Finally, each sanity check must pass */
943         i = 0;
944         ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
945                                 check_entry, name, size, &i);
946
947         if (ret != 0) {
948                 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
949                                   cleanup_entry, &i);
950                 return ret;
951         }
952
953         /* And one copy for every other CPU */
954         for_each_cpu(i) {
955                 if (i == 0)
956                         continue;
957                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
958                        newinfo->entries,
959                        SMP_ALIGN(newinfo->size));
960         }
961
962         return ret;
963 }
964
965 static struct ip6t_table_info *
966 replace_table(struct ip6t_table *table,
967               unsigned int num_counters,
968               struct ip6t_table_info *newinfo,
969               int *error)
970 {
971         struct ip6t_table_info *oldinfo;
972
973 #ifdef CONFIG_NETFILTER_DEBUG
974         {
975                 struct ip6t_entry *table_base;
976                 unsigned int i;
977
978                 for (i = 0; i < num_possible_cpus(); i++) {
979                 for_each_cpu(i) {
980                         table_base =
981                                 (void *)newinfo->entries
982                                 + TABLE_OFFSET(newinfo, i);
983
984                         table_base->comefrom = 0xdead57ac;
985                 }
986         }
987 #endif
988
989         /* Do the substitution. */
990         write_lock_bh(&table->lock);
991         /* Check inside lock: is the old number correct? */
992         if (num_counters != table->private->number) {
993                 duprintf("num_counters != table->private->number (%u/%u)\n",
994                          num_counters, table->private->number);
995                 write_unlock_bh(&table->lock);
996                 *error = -EAGAIN;
997                 return NULL;
998         }
999         oldinfo = table->private;
1000         table->private = newinfo;
1001         newinfo->initial_entries = oldinfo->initial_entries;
1002         write_unlock_bh(&table->lock);
1003
1004         return oldinfo;
1005 }
1006
1007 /* Gets counters. */
1008 static inline int
1009 add_entry_to_counter(const struct ip6t_entry *e,
1010                      struct ip6t_counters total[],
1011                      unsigned int *i)
1012 {
1013         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
1014
1015         (*i)++;
1016         return 0;
1017 }
1018
1019 static void
1020 get_counters(const struct ip6t_table_info *t,
1021              struct ip6t_counters counters[])
1022 {
1023         unsigned int cpu;
1024         unsigned int i;
1025
1026         for_each_cpu(cpu) {
1027                 i = 0;
1028                 IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
1029                                   t->size,
1030                                   add_entry_to_counter,
1031                                   counters,
1032                                   &i);
1033         }
1034 }
1035
1036 static int
1037 copy_entries_to_user(unsigned int total_size,
1038                      struct ip6t_table *table,
1039                      void __user *userptr)
1040 {
1041         unsigned int off, num, countersize;
1042         struct ip6t_entry *e;
1043         struct ip6t_counters *counters;
1044         int ret = 0;
1045
1046         /* We need atomic snapshot of counters: rest doesn't change
1047            (other than comefrom, which userspace doesn't care
1048            about). */
1049         countersize = sizeof(struct ip6t_counters) * table->private->number;
1050         counters = vmalloc(countersize);
1051
1052         if (counters == NULL)
1053                 return -ENOMEM;
1054
1055         /* First, sum counters... */
1056         memset(counters, 0, countersize);
1057         write_lock_bh(&table->lock);
1058         get_counters(table->private, counters);
1059         write_unlock_bh(&table->lock);
1060
1061         /* ... then copy entire thing from CPU 0... */
1062         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
1063                 ret = -EFAULT;
1064                 goto free_counters;
1065         }
1066
1067         /* FIXME: use iterator macros --RR */
1068         /* ... then go back and fix counters and names */
1069         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1070                 unsigned int i;
1071                 struct ip6t_entry_match *m;
1072                 struct ip6t_entry_target *t;
1073
1074                 e = (struct ip6t_entry *)(table->private->entries + off);
1075                 if (copy_to_user(userptr + off
1076                                  + offsetof(struct ip6t_entry, counters),
1077                                  &counters[num],
1078                                  sizeof(counters[num])) != 0) {
1079                         ret = -EFAULT;
1080                         goto free_counters;
1081                 }
1082
1083                 for (i = sizeof(struct ip6t_entry);
1084                      i < e->target_offset;
1085                      i += m->u.match_size) {
1086                         m = (void *)e + i;
1087
1088                         if (copy_to_user(userptr + off + i
1089                                          + offsetof(struct ip6t_entry_match,
1090                                                     u.user.name),
1091                                          m->u.kernel.match->name,
1092                                          strlen(m->u.kernel.match->name)+1)
1093                             != 0) {
1094                                 ret = -EFAULT;
1095                                 goto free_counters;
1096                         }
1097                 }
1098
1099                 t = ip6t_get_target(e);
1100                 if (copy_to_user(userptr + off + e->target_offset
1101                                  + offsetof(struct ip6t_entry_target,
1102                                             u.user.name),
1103                                  t->u.kernel.target->name,
1104                                  strlen(t->u.kernel.target->name)+1) != 0) {
1105                         ret = -EFAULT;
1106                         goto free_counters;
1107                 }
1108         }
1109
1110  free_counters:
1111         vfree(counters);
1112         return ret;
1113 }
1114
1115 static int
1116 get_entries(const struct ip6t_get_entries *entries,
1117             struct ip6t_get_entries __user *uptr)
1118 {
1119         int ret;
1120         struct ip6t_table *t;
1121
1122         t = ip6t_find_table_lock(entries->name, &ret, &ip6t_mutex);
1123         if (t) {
1124                 duprintf("t->private->number = %u\n",
1125                          t->private->number);
1126                 if (entries->size == t->private->size)
1127                         ret = copy_entries_to_user(t->private->size,
1128                                                    t, uptr->entrytable);
1129                 else {
1130                         duprintf("get_entries: I've got %u not %u!\n",
1131                                  t->private->size,
1132                                  entries->size);
1133                         ret = -EINVAL;
1134                 }
1135                 up(&ip6t_mutex);
1136         } else
1137                 duprintf("get_entries: Can't find %s!\n",
1138                          entries->name);
1139
1140         return ret;
1141 }
1142
1143 static int
1144 do_replace(void __user *user, unsigned int len)
1145 {
1146         int ret;
1147         struct ip6t_replace tmp;
1148         struct ip6t_table *t;
1149         struct ip6t_table_info *newinfo, *oldinfo;
1150         struct ip6t_counters *counters;
1151
1152         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1153                 return -EFAULT;
1154
1155         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1156         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1157                 return -ENOMEM;
1158
1159         newinfo = vmalloc(sizeof(struct ip6t_table_info)
1160                           + SMP_ALIGN(tmp.size) *
1161                                         (highest_possible_processor_id()+1));
1162         if (!newinfo)
1163                 return -ENOMEM;
1164
1165         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1166                            tmp.size) != 0) {
1167                 ret = -EFAULT;
1168                 goto free_newinfo;
1169         }
1170
1171         counters = vmalloc(tmp.num_counters * sizeof(struct ip6t_counters));
1172         if (!counters) {
1173                 ret = -ENOMEM;
1174                 goto free_newinfo;
1175         }
1176         memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
1177
1178         ret = translate_table(tmp.name, tmp.valid_hooks,
1179                               newinfo, tmp.size, tmp.num_entries,
1180                               tmp.hook_entry, tmp.underflow);
1181         if (ret != 0)
1182                 goto free_newinfo_counters;
1183
1184         duprintf("ip_tables: Translated table\n");
1185
1186         t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex);
1187         if (!t)
1188                 goto free_newinfo_counters_untrans;
1189
1190         /* You lied! */
1191         if (tmp.valid_hooks != t->valid_hooks) {
1192                 duprintf("Valid hook crap: %08X vs %08X\n",
1193                          tmp.valid_hooks, t->valid_hooks);
1194                 ret = -EINVAL;
1195                 goto free_newinfo_counters_untrans_unlock;
1196         }
1197
1198         /* Get a reference in advance, we're not allowed fail later */
1199         if (!try_module_get(t->me)) {
1200                 ret = -EBUSY;
1201                 goto free_newinfo_counters_untrans_unlock;
1202         }
1203
1204         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1205         if (!oldinfo)
1206                 goto put_module;
1207
1208         /* Update module usage count based on number of rules */
1209         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1210                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1211         if ((oldinfo->number > oldinfo->initial_entries) || 
1212             (newinfo->number <= oldinfo->initial_entries)) 
1213                 module_put(t->me);
1214         if ((oldinfo->number > oldinfo->initial_entries) &&
1215             (newinfo->number <= oldinfo->initial_entries))
1216                 module_put(t->me);
1217
1218         /* Get the old counters. */
1219         get_counters(oldinfo, counters);
1220         /* Decrease module usage counts and free resource */
1221         IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1222         vfree(oldinfo);
1223         /* Silent error: too late now. */
1224         if (copy_to_user(tmp.counters, counters,
1225                          sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
1226                 ret = -EFAULT;
1227         vfree(counters);
1228         up(&ip6t_mutex);
1229         return ret;
1230
1231  put_module:
1232         module_put(t->me);
1233  free_newinfo_counters_untrans_unlock:
1234         up(&ip6t_mutex);
1235  free_newinfo_counters_untrans:
1236         IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1237  free_newinfo_counters:
1238         vfree(counters);
1239  free_newinfo:
1240         vfree(newinfo);
1241         return ret;
1242 }
1243
1244 /* We're lazy, and add to the first CPU; overflow works its fey magic
1245  * and everything is OK. */
1246 static inline int
1247 add_counter_to_entry(struct ip6t_entry *e,
1248                      const struct ip6t_counters addme[],
1249                      unsigned int *i)
1250 {
1251 #if 0
1252         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1253                  *i,
1254                  (long unsigned int)e->counters.pcnt,
1255                  (long unsigned int)e->counters.bcnt,
1256                  (long unsigned int)addme[*i].pcnt,
1257                  (long unsigned int)addme[*i].bcnt);
1258 #endif
1259
1260         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1261
1262         (*i)++;
1263         return 0;
1264 }
1265
1266 static int
1267 do_add_counters(void __user *user, unsigned int len)
1268 {
1269         unsigned int i;
1270         struct ip6t_counters_info tmp, *paddc;
1271         struct ip6t_table *t;
1272         int ret;
1273
1274         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1275                 return -EFAULT;
1276
1277         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ip6t_counters))
1278                 return -EINVAL;
1279
1280         paddc = vmalloc(len);
1281         if (!paddc)
1282                 return -ENOMEM;
1283
1284         if (copy_from_user(paddc, user, len) != 0) {
1285                 ret = -EFAULT;
1286                 goto free;
1287         }
1288
1289         t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex);
1290         if (!t)
1291                 goto free;
1292
1293         write_lock_bh(&t->lock);
1294         if (t->private->number != paddc->num_counters) {
1295                 ret = -EINVAL;
1296                 goto unlock_up_free;
1297         }
1298
1299         i = 0;
1300         IP6T_ENTRY_ITERATE(t->private->entries,
1301                           t->private->size,
1302                           add_counter_to_entry,
1303                           paddc->counters,
1304                           &i);
1305  unlock_up_free:
1306         write_unlock_bh(&t->lock);
1307         up(&ip6t_mutex);
1308  free:
1309         vfree(paddc);
1310
1311         return ret;
1312 }
1313
1314 static int
1315 do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1316 {
1317         int ret;
1318
1319         if (!capable(CAP_NET_ADMIN))
1320                 return -EPERM;
1321
1322         switch (cmd) {
1323         case IP6T_SO_SET_REPLACE:
1324                 ret = do_replace(user, len);
1325                 break;
1326
1327         case IP6T_SO_SET_ADD_COUNTERS:
1328                 ret = do_add_counters(user, len);
1329                 break;
1330
1331         default:
1332                 duprintf("do_ip6t_set_ctl:  unknown request %i\n", cmd);
1333                 ret = -EINVAL;
1334         }
1335
1336         return ret;
1337 }
1338
1339 static int
1340 do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1341 {
1342         int ret;
1343
1344         if (!capable(CAP_NET_ADMIN))
1345                 return -EPERM;
1346
1347         switch (cmd) {
1348         case IP6T_SO_GET_INFO: {
1349                 char name[IP6T_TABLE_MAXNAMELEN];
1350                 struct ip6t_table *t;
1351
1352                 if (*len != sizeof(struct ip6t_getinfo)) {
1353                         duprintf("length %u != %u\n", *len,
1354                                  sizeof(struct ip6t_getinfo));
1355                         ret = -EINVAL;
1356                         break;
1357                 }
1358
1359                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1360                         ret = -EFAULT;
1361                         break;
1362                 }
1363                 name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
1364                 t = ip6t_find_table_lock(name, &ret, &ip6t_mutex);
1365                 if (t) {
1366                         struct ip6t_getinfo info;
1367
1368                         info.valid_hooks = t->valid_hooks;
1369                         memcpy(info.hook_entry, t->private->hook_entry,
1370                                sizeof(info.hook_entry));
1371                         memcpy(info.underflow, t->private->underflow,
1372                                sizeof(info.underflow));
1373                         info.num_entries = t->private->number;
1374                         info.size = t->private->size;
1375                         memcpy(info.name, name, sizeof(info.name));
1376
1377                         if (copy_to_user(user, &info, *len) != 0)
1378                                 ret = -EFAULT;
1379                         else
1380                                 ret = 0;
1381
1382                         up(&ip6t_mutex);
1383                 }
1384         }
1385         break;
1386
1387         case IP6T_SO_GET_ENTRIES: {
1388                 struct ip6t_get_entries get;
1389
1390                 if (*len < sizeof(get)) {
1391                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1392                         ret = -EINVAL;
1393                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1394                         ret = -EFAULT;
1395                 } else if (*len != sizeof(struct ip6t_get_entries) + get.size) {
1396                         duprintf("get_entries: %u != %u\n", *len,
1397                                  sizeof(struct ip6t_get_entries) + get.size);
1398                         ret = -EINVAL;
1399                 } else
1400                         ret = get_entries(&get, user);
1401                 break;
1402         }
1403
1404         default:
1405                 duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
1406                 ret = -EINVAL;
1407         }
1408
1409         return ret;
1410 }
1411
1412 /* Registration hooks for targets. */
1413 int
1414 ip6t_register_target(struct ip6t_target *target)
1415 {
1416         int ret;
1417
1418         ret = down_interruptible(&ip6t_mutex);
1419         if (ret != 0)
1420                 return ret;
1421
1422         if (!list_named_insert(&ip6t_target, target)) {
1423                 duprintf("ip6t_register_target: `%s' already in list!\n",
1424                          target->name);
1425                 ret = -EINVAL;
1426         }
1427         up(&ip6t_mutex);
1428         return ret;
1429 }
1430
1431 void
1432 ip6t_unregister_target(struct ip6t_target *target)
1433 {
1434         down(&ip6t_mutex);
1435         LIST_DELETE(&ip6t_target, target);
1436         up(&ip6t_mutex);
1437 }
1438
1439 int
1440 ip6t_register_match(struct ip6t_match *match)
1441 {
1442         int ret;
1443
1444         ret = down_interruptible(&ip6t_mutex);
1445         if (ret != 0)
1446                 return ret;
1447
1448         if (!list_named_insert(&ip6t_match, match)) {
1449                 duprintf("ip6t_register_match: `%s' already in list!\n",
1450                          match->name);
1451                 ret = -EINVAL;
1452         }
1453         up(&ip6t_mutex);
1454
1455         return ret;
1456 }
1457
1458 void
1459 ip6t_unregister_match(struct ip6t_match *match)
1460 {
1461         down(&ip6t_mutex);
1462         LIST_DELETE(&ip6t_match, match);
1463         up(&ip6t_mutex);
1464 }
1465
1466 int ip6t_register_table(struct ip6t_table *table,
1467                         const struct ip6t_replace *repl)
1468 {
1469         int ret;
1470         struct ip6t_table_info *newinfo;
1471         static struct ip6t_table_info bootstrap
1472                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1473
1474         newinfo = vmalloc(sizeof(struct ip6t_table_info)
1475                           + SMP_ALIGN(repl->size) *
1476                                         (highest_possible_processor_id()+1));
1477         if (!newinfo)
1478                 return -ENOMEM;
1479
1480         memcpy(newinfo->entries, repl->entries, repl->size);
1481
1482         ret = translate_table(table->name, table->valid_hooks,
1483                               newinfo, repl->size,
1484                               repl->num_entries,
1485                               repl->hook_entry,
1486                               repl->underflow);
1487         if (ret != 0) {
1488                 vfree(newinfo);
1489                 return ret;
1490         }
1491
1492         ret = down_interruptible(&ip6t_mutex);
1493         if (ret != 0) {
1494                 vfree(newinfo);
1495                 return ret;
1496         }
1497
1498         /* Don't autoload: we'd eat our tail... */
1499         if (list_named_find(&ip6t_tables, table->name)) {
1500                 ret = -EEXIST;
1501                 goto free_unlock;
1502         }
1503
1504         /* Simplifies replace_table code. */
1505         table->private = &bootstrap;
1506         if (!replace_table(table, 0, newinfo, &ret))
1507                 goto free_unlock;
1508
1509         duprintf("table->private->number = %u\n",
1510                  table->private->number);
1511
1512         /* save number of initial entries */
1513         table->private->initial_entries = table->private->number;
1514
1515         rwlock_init(&table->lock);
1516         list_prepend(&ip6t_tables, table);
1517
1518  unlock:
1519         up(&ip6t_mutex);
1520         return ret;
1521
1522  free_unlock:
1523         vfree(newinfo);
1524         goto unlock;
1525 }
1526
1527 void ip6t_unregister_table(struct ip6t_table *table)
1528 {
1529         down(&ip6t_mutex);
1530         LIST_DELETE(&ip6t_tables, table);
1531         up(&ip6t_mutex);
1532
1533         /* Decrease module usage counts and free resources */
1534         IP6T_ENTRY_ITERATE(table->private->entries, table->private->size,
1535                           cleanup_entry, NULL);
1536         vfree(table->private);
1537 }
1538
1539 /* Returns 1 if the port is matched by the range, 0 otherwise */
1540 static inline int
1541 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1542 {
1543         int ret;
1544
1545         ret = (port >= min && port <= max) ^ invert;
1546         return ret;
1547 }
1548
1549 static int
1550 tcp_find_option(u_int8_t option,
1551                 const struct sk_buff *skb,
1552                 unsigned int tcpoff,
1553                 unsigned int optlen,
1554                 int invert,
1555                 int *hotdrop)
1556 {
1557         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1558         u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1559         unsigned int i;
1560
1561         duprintf("tcp_match: finding option\n");
1562         if (!optlen)
1563                 return invert;
1564         /* If we don't have the whole header, drop packet. */
1565         op = skb_header_pointer(skb, tcpoff + sizeof(struct tcphdr), optlen,
1566                                 _opt);
1567         if (op == NULL) {
1568                 *hotdrop = 1;
1569                 return 0;
1570         }
1571
1572         for (i = 0; i < optlen; ) {
1573                 if (op[i] == option) return !invert;
1574                 if (op[i] < 2) i++;
1575                 else i += op[i+1]?:1;
1576         }
1577
1578         return invert;
1579 }
1580
1581 static int
1582 tcp_match(const struct sk_buff *skb,
1583           const struct net_device *in,
1584           const struct net_device *out,
1585           const void *matchinfo,
1586           int offset,
1587           unsigned int protoff,
1588           int *hotdrop)
1589 {
1590         struct tcphdr _tcph, *th;
1591         const struct ip6t_tcp *tcpinfo = matchinfo;
1592
1593         if (offset) {
1594                 /* To quote Alan:
1595
1596                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1597                    causes this. Its a cracker trying to break in by doing a
1598                    flag overwrite to pass the direction checks.
1599                 */
1600                 if (offset == 1) {
1601                         duprintf("Dropping evil TCP offset=1 frag.\n");
1602                         *hotdrop = 1;
1603                 }
1604                 /* Must not be a fragment. */
1605                 return 0;
1606         }
1607
1608 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1609
1610         th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
1611         if (th == NULL) {
1612                 /* We've been asked to examine this packet, and we
1613                    can't.  Hence, no choice but to drop. */
1614                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1615                 *hotdrop = 1;
1616                 return 0;
1617         }
1618
1619         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1620                         ntohs(th->source),
1621                         !!(tcpinfo->invflags & IP6T_TCP_INV_SRCPT)))
1622                 return 0;
1623         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1624                         ntohs(th->dest),
1625                         !!(tcpinfo->invflags & IP6T_TCP_INV_DSTPT)))
1626                 return 0;
1627         if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1628                       == tcpinfo->flg_cmp,
1629                       IP6T_TCP_INV_FLAGS))
1630                 return 0;
1631         if (tcpinfo->option) {
1632                 if (th->doff * 4 < sizeof(_tcph)) {
1633                         *hotdrop = 1;
1634                         return 0;
1635                 }
1636                 if (!tcp_find_option(tcpinfo->option, skb, protoff,
1637                                      th->doff*4 - sizeof(*th),
1638                                      tcpinfo->invflags & IP6T_TCP_INV_OPTION,
1639                                      hotdrop))
1640                         return 0;
1641         }
1642         return 1;
1643 }
1644
1645 /* Called when user tries to insert an entry of this type. */
1646 static int
1647 tcp_checkentry(const char *tablename,
1648                const struct ip6t_ip6 *ipv6,
1649                void *matchinfo,
1650                unsigned int matchsize,
1651                unsigned int hook_mask)
1652 {
1653         const struct ip6t_tcp *tcpinfo = matchinfo;
1654
1655         /* Must specify proto == TCP, and no unknown invflags */
1656         return ipv6->proto == IPPROTO_TCP
1657                 && !(ipv6->invflags & IP6T_INV_PROTO)
1658                 && matchsize == IP6T_ALIGN(sizeof(struct ip6t_tcp))
1659                 && !(tcpinfo->invflags & ~IP6T_TCP_INV_MASK);
1660 }
1661
1662 static int
1663 udp_match(const struct sk_buff *skb,
1664           const struct net_device *in,
1665           const struct net_device *out,
1666           const void *matchinfo,
1667           int offset,
1668           unsigned int protoff,
1669           int *hotdrop)
1670 {
1671         struct udphdr _udph, *uh;
1672         const struct ip6t_udp *udpinfo = matchinfo;
1673
1674         /* Must not be a fragment. */
1675         if (offset)
1676                 return 0;
1677
1678         uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph);
1679         if (uh == NULL) {
1680                 /* We've been asked to examine this packet, and we
1681                    can't.  Hence, no choice but to drop. */
1682                 duprintf("Dropping evil UDP tinygram.\n");
1683                 *hotdrop = 1;
1684                 return 0;
1685         }
1686
1687         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1688                           ntohs(uh->source),
1689                           !!(udpinfo->invflags & IP6T_UDP_INV_SRCPT))
1690                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1691                               ntohs(uh->dest),
1692                               !!(udpinfo->invflags & IP6T_UDP_INV_DSTPT));
1693 }
1694
1695 /* Called when user tries to insert an entry of this type. */
1696 static int
1697 udp_checkentry(const char *tablename,
1698                const struct ip6t_ip6 *ipv6,
1699                void *matchinfo,
1700                unsigned int matchinfosize,
1701                unsigned int hook_mask)
1702 {
1703         const struct ip6t_udp *udpinfo = matchinfo;
1704
1705         /* Must specify proto == UDP, and no unknown invflags */
1706         if (ipv6->proto != IPPROTO_UDP || (ipv6->invflags & IP6T_INV_PROTO)) {
1707                 duprintf("ip6t_udp: Protocol %u != %u\n", ipv6->proto,
1708                          IPPROTO_UDP);
1709                 return 0;
1710         }
1711         if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_udp))) {
1712                 duprintf("ip6t_udp: matchsize %u != %u\n",
1713                          matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_udp)));
1714                 return 0;
1715         }
1716         if (udpinfo->invflags & ~IP6T_UDP_INV_MASK) {
1717                 duprintf("ip6t_udp: unknown flags %X\n",
1718                          udpinfo->invflags);
1719                 return 0;
1720         }
1721
1722         return 1;
1723 }
1724
1725 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1726 static inline int
1727 icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1728                      u_int8_t type, u_int8_t code,
1729                      int invert)
1730 {
1731         return (type == test_type && code >= min_code && code <= max_code)
1732                 ^ invert;
1733 }
1734
1735 static int
1736 icmp6_match(const struct sk_buff *skb,
1737            const struct net_device *in,
1738            const struct net_device *out,
1739            const void *matchinfo,
1740            int offset,
1741            unsigned int protoff,
1742            int *hotdrop)
1743 {
1744         struct icmp6hdr _icmp, *ic;
1745         const struct ip6t_icmp *icmpinfo = matchinfo;
1746
1747         /* Must not be a fragment. */
1748         if (offset)
1749                 return 0;
1750
1751         ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp);
1752         if (ic == NULL) {
1753                 /* We've been asked to examine this packet, and we
1754                    can't.  Hence, no choice but to drop. */
1755                 duprintf("Dropping evil ICMP tinygram.\n");
1756                 *hotdrop = 1;
1757                 return 0;
1758         }
1759
1760         return icmp6_type_code_match(icmpinfo->type,
1761                                      icmpinfo->code[0],
1762                                      icmpinfo->code[1],
1763                                      ic->icmp6_type, ic->icmp6_code,
1764                                      !!(icmpinfo->invflags&IP6T_ICMP_INV));
1765 }
1766
1767 /* Called when user tries to insert an entry of this type. */
1768 static int
1769 icmp6_checkentry(const char *tablename,
1770            const struct ip6t_ip6 *ipv6,
1771            void *matchinfo,
1772            unsigned int matchsize,
1773            unsigned int hook_mask)
1774 {
1775         const struct ip6t_icmp *icmpinfo = matchinfo;
1776
1777         /* Must specify proto == ICMP, and no unknown invflags */
1778         return ipv6->proto == IPPROTO_ICMPV6
1779                 && !(ipv6->invflags & IP6T_INV_PROTO)
1780                 && matchsize == IP6T_ALIGN(sizeof(struct ip6t_icmp))
1781                 && !(icmpinfo->invflags & ~IP6T_ICMP_INV);
1782 }
1783
1784 /* The built-in targets: standard (NULL) and error. */
1785 static struct ip6t_target ip6t_standard_target = {
1786         .name           = IP6T_STANDARD_TARGET,
1787 };
1788
1789 static struct ip6t_target ip6t_error_target = {
1790         .name           = IP6T_ERROR_TARGET,
1791         .target         = ip6t_error,
1792 };
1793
1794 static struct nf_sockopt_ops ip6t_sockopts = {
1795         .pf             = PF_INET6,
1796         .set_optmin     = IP6T_BASE_CTL,
1797         .set_optmax     = IP6T_SO_SET_MAX+1,
1798         .set            = do_ip6t_set_ctl,
1799         .get_optmin     = IP6T_BASE_CTL,
1800         .get_optmax     = IP6T_SO_GET_MAX+1,
1801         .get            = do_ip6t_get_ctl,
1802 };
1803
1804 static struct ip6t_match tcp_matchstruct = {
1805         .name           = "tcp",
1806         .match          = &tcp_match,
1807         .checkentry     = &tcp_checkentry,
1808 };
1809
1810 static struct ip6t_match udp_matchstruct = {
1811         .name           = "udp",
1812         .match          = &udp_match,
1813         .checkentry     = &udp_checkentry,
1814 };
1815
1816 static struct ip6t_match icmp6_matchstruct = {
1817         .name           = "icmp6",
1818         .match          = &icmp6_match,
1819         .checkentry     = &icmp6_checkentry,
1820 };
1821
1822 #ifdef CONFIG_PROC_FS
1823 static inline int print_name(const char *i,
1824                              off_t start_offset, char *buffer, int length,
1825                              off_t *pos, unsigned int *count)
1826 {
1827         if ((*count)++ >= start_offset) {
1828                 unsigned int namelen;
1829
1830                 namelen = sprintf(buffer + *pos, "%s\n",
1831                                   i + sizeof(struct list_head));
1832                 if (*pos + namelen > length) {
1833                         /* Stop iterating */
1834                         return 1;
1835                 }
1836                 *pos += namelen;
1837         }
1838         return 0;
1839 }
1840
1841 static inline int print_target(const struct ip6t_target *t,
1842                                off_t start_offset, char *buffer, int length,
1843                                off_t *pos, unsigned int *count)
1844 {
1845         if (t == &ip6t_standard_target || t == &ip6t_error_target)
1846                 return 0;
1847         return print_name((char *)t, start_offset, buffer, length, pos, count);
1848 }
1849
1850 static int ip6t_get_tables(char *buffer, char **start, off_t offset, int length)
1851 {
1852         off_t pos = 0;
1853         unsigned int count = 0;
1854
1855         if (down_interruptible(&ip6t_mutex) != 0)
1856                 return 0;
1857
1858         LIST_FIND(&ip6t_tables, print_name, char *,
1859                   offset, buffer, length, &pos, &count);
1860
1861         up(&ip6t_mutex);
1862
1863         /* `start' hack - see fs/proc/generic.c line ~105 */
1864         *start=(char *)((unsigned long)count-offset);
1865         return pos;
1866 }
1867
1868 static int ip6t_get_targets(char *buffer, char **start, off_t offset, int length)
1869 {
1870         off_t pos = 0;
1871         unsigned int count = 0;
1872
1873         if (down_interruptible(&ip6t_mutex) != 0)
1874                 return 0;
1875
1876         LIST_FIND(&ip6t_target, print_target, struct ip6t_target *,
1877                   offset, buffer, length, &pos, &count);
1878
1879         up(&ip6t_mutex);
1880
1881         *start = (char *)((unsigned long)count - offset);
1882         return pos;
1883 }
1884
1885 static int ip6t_get_matches(char *buffer, char **start, off_t offset, int length)
1886 {
1887         off_t pos = 0;
1888         unsigned int count = 0;
1889
1890         if (down_interruptible(&ip6t_mutex) != 0)
1891                 return 0;
1892
1893         LIST_FIND(&ip6t_match, print_name, char *,
1894                   offset, buffer, length, &pos, &count);
1895
1896         up(&ip6t_mutex);
1897
1898         *start = (char *)((unsigned long)count - offset);
1899         return pos;
1900 }
1901
1902 static struct { char *name; get_info_t *get_info; } ip6t_proc_entry[] =
1903 { { "ip6_tables_names", ip6t_get_tables },
1904   { "ip6_tables_targets", ip6t_get_targets },
1905   { "ip6_tables_matches", ip6t_get_matches },
1906   { NULL, NULL} };
1907 #endif /*CONFIG_PROC_FS*/
1908
1909 static int __init init(void)
1910 {
1911         int ret;
1912
1913         /* Noone else will be downing sem now, so we won't sleep */
1914         down(&ip6t_mutex);
1915         list_append(&ip6t_target, &ip6t_standard_target);
1916         list_append(&ip6t_target, &ip6t_error_target);
1917         list_append(&ip6t_match, &tcp_matchstruct);
1918         list_append(&ip6t_match, &udp_matchstruct);
1919         list_append(&ip6t_match, &icmp6_matchstruct);
1920         up(&ip6t_mutex);
1921
1922         /* Register setsockopt */
1923         ret = nf_register_sockopt(&ip6t_sockopts);
1924         if (ret < 0) {
1925                 duprintf("Unable to register sockopts.\n");
1926                 return ret;
1927         }
1928
1929 #ifdef CONFIG_PROC_FS
1930         {
1931                 struct proc_dir_entry *proc;
1932                 int i;
1933
1934                 for (i = 0; ip6t_proc_entry[i].name; i++) {
1935                         proc = proc_net_create(ip6t_proc_entry[i].name, 0,
1936                                                ip6t_proc_entry[i].get_info);
1937                         if (!proc) {
1938                                 while (--i >= 0)
1939                                        proc_net_remove(ip6t_proc_entry[i].name);
1940                                 nf_unregister_sockopt(&ip6t_sockopts);
1941                                 return -ENOMEM;
1942                         }
1943                         proc->owner = THIS_MODULE;
1944                 }
1945         }
1946 #endif
1947
1948         printk("ip6_tables: (C) 2000-2002 Netfilter core team\n");
1949         return 0;
1950 }
1951
1952 static void __exit fini(void)
1953 {
1954         nf_unregister_sockopt(&ip6t_sockopts);
1955 #ifdef CONFIG_PROC_FS
1956         {
1957                 int i;
1958                 for (i = 0; ip6t_proc_entry[i].name; i++)
1959                         proc_net_remove(ip6t_proc_entry[i].name);
1960         }
1961 #endif
1962 }
1963
1964 /*
1965  * find specified header up to transport protocol header.
1966  * If found target header, the offset to the header is set to *offset
1967  * and return 0. otherwise, return -1.
1968  *
1969  * Notes: - non-1st Fragment Header isn't skipped.
1970  *        - ESP header isn't skipped.
1971  *        - The target header may be trancated.
1972  */
1973 int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
1974 {
1975         unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
1976         u8 nexthdr = skb->nh.ipv6h->nexthdr;
1977         unsigned int len = skb->len - start;
1978
1979         while (nexthdr != target) {
1980                 struct ipv6_opt_hdr _hdr, *hp;
1981                 unsigned int hdrlen;
1982
1983                 if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE)
1984                         return -1;
1985                 hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
1986                 if (hp == NULL)
1987                         return -1;
1988                 if (nexthdr == NEXTHDR_FRAGMENT) {
1989                         unsigned short _frag_off, *fp;
1990                         fp = skb_header_pointer(skb,
1991                                                 start+offsetof(struct frag_hdr,
1992                                                                frag_off),
1993                                                 sizeof(_frag_off),
1994                                                 &_frag_off);
1995                         if (fp == NULL)
1996                                 return -1;
1997
1998                         if (ntohs(*fp) & ~0x7)
1999                                 return -1;
2000                         hdrlen = 8;
2001                 } else if (nexthdr == NEXTHDR_AUTH)
2002                         hdrlen = (hp->hdrlen + 2) << 2; 
2003                 else
2004                         hdrlen = ipv6_optlen(hp); 
2005
2006                 nexthdr = hp->nexthdr;
2007                 len -= hdrlen;
2008                 start += hdrlen;
2009         }
2010
2011         *offset = start;
2012         return 0;
2013 }
2014
2015 EXPORT_SYMBOL(ip6t_register_table);
2016 EXPORT_SYMBOL(ip6t_unregister_table);
2017 EXPORT_SYMBOL(ip6t_do_table);
2018 EXPORT_SYMBOL(ip6t_register_match);
2019 EXPORT_SYMBOL(ip6t_unregister_match);
2020 EXPORT_SYMBOL(ip6t_register_target);
2021 EXPORT_SYMBOL(ip6t_unregister_target);
2022 EXPORT_SYMBOL(ip6t_ext_hdr);
2023 EXPORT_SYMBOL(ipv6_find_hdr);
2024
2025 module_init(init);
2026 module_exit(fini);