[NET] NETNS: Omit sock->sk_net without CONFIG_NET_NS.
[linux-3.10.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/cache.h>
12 #include <linux/capability.h>
13 #include <linux/skbuff.h>
14 #include <linux/kmod.h>
15 #include <linux/vmalloc.h>
16 #include <linux/netdevice.h>
17 #include <linux/module.h>
18 #include <linux/icmp.h>
19 #include <net/ip.h>
20 #include <net/compat.h>
21 #include <asm/uaccess.h>
22 #include <linux/mutex.h>
23 #include <linux/proc_fs.h>
24 #include <linux/err.h>
25 #include <linux/cpumask.h>
26
27 #include <linux/netfilter/x_tables.h>
28 #include <linux/netfilter_ipv4/ip_tables.h>
29 #include <net/netfilter/nf_log.h>
30
31 MODULE_LICENSE("GPL");
32 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
33 MODULE_DESCRIPTION("IPv4 packet filter");
34
35 /*#define DEBUG_IP_FIREWALL*/
36 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
37 /*#define DEBUG_IP_FIREWALL_USER*/
38
39 #ifdef DEBUG_IP_FIREWALL
40 #define dprintf(format, args...)  printk(format , ## args)
41 #else
42 #define dprintf(format, args...)
43 #endif
44
45 #ifdef DEBUG_IP_FIREWALL_USER
46 #define duprintf(format, args...) printk(format , ## args)
47 #else
48 #define duprintf(format, args...)
49 #endif
50
51 #ifdef CONFIG_NETFILTER_DEBUG
52 #define IP_NF_ASSERT(x)                                         \
53 do {                                                            \
54         if (!(x))                                               \
55                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
56                        __func__, __FILE__, __LINE__);   \
57 } while(0)
58 #else
59 #define IP_NF_ASSERT(x)
60 #endif
61
62 #if 0
63 /* All the better to debug you with... */
64 #define static
65 #define inline
66 #endif
67
68 /*
69    We keep a set of rules for each CPU, so we can avoid write-locking
70    them in the softirq when updating the counters and therefore
71    only need to read-lock in the softirq; doing a write_lock_bh() in user
72    context stops packets coming through and allows user context to read
73    the counters or update the rules.
74
75    Hence the start of any table is given by get_table() below.  */
76
77 /* Returns whether matches rule or not. */
78 /* Performance critical - called for every packet */
79 static inline bool
80 ip_packet_match(const struct iphdr *ip,
81                 const char *indev,
82                 const char *outdev,
83                 const struct ipt_ip *ipinfo,
84                 int isfrag)
85 {
86         size_t i;
87         unsigned long ret;
88
89 #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
90
91         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
92                   IPT_INV_SRCIP)
93             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
94                      IPT_INV_DSTIP)) {
95                 dprintf("Source or dest mismatch.\n");
96
97                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
98                         NIPQUAD(ip->saddr),
99                         NIPQUAD(ipinfo->smsk.s_addr),
100                         NIPQUAD(ipinfo->src.s_addr),
101                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
102                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
103                         NIPQUAD(ip->daddr),
104                         NIPQUAD(ipinfo->dmsk.s_addr),
105                         NIPQUAD(ipinfo->dst.s_addr),
106                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
107                 return false;
108         }
109
110         /* Look for ifname matches; this should unroll nicely. */
111         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
112                 ret |= (((const unsigned long *)indev)[i]
113                         ^ ((const unsigned long *)ipinfo->iniface)[i])
114                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
115         }
116
117         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
118                 dprintf("VIA in mismatch (%s vs %s).%s\n",
119                         indev, ipinfo->iniface,
120                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
121                 return false;
122         }
123
124         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
125                 ret |= (((const unsigned long *)outdev)[i]
126                         ^ ((const unsigned long *)ipinfo->outiface)[i])
127                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
128         }
129
130         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
131                 dprintf("VIA out mismatch (%s vs %s).%s\n",
132                         outdev, ipinfo->outiface,
133                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
134                 return false;
135         }
136
137         /* Check specific protocol */
138         if (ipinfo->proto
139             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
140                 dprintf("Packet protocol %hi does not match %hi.%s\n",
141                         ip->protocol, ipinfo->proto,
142                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
143                 return false;
144         }
145
146         /* If we have a fragment rule but the packet is not a fragment
147          * then we return zero */
148         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
149                 dprintf("Fragment rule but not fragment.%s\n",
150                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
151                 return false;
152         }
153
154         return true;
155 }
156
157 static bool
158 ip_checkentry(const struct ipt_ip *ip)
159 {
160         if (ip->flags & ~IPT_F_MASK) {
161                 duprintf("Unknown flag bits set: %08X\n",
162                          ip->flags & ~IPT_F_MASK);
163                 return false;
164         }
165         if (ip->invflags & ~IPT_INV_MASK) {
166                 duprintf("Unknown invflag bits set: %08X\n",
167                          ip->invflags & ~IPT_INV_MASK);
168                 return false;
169         }
170         return true;
171 }
172
173 static unsigned int
174 ipt_error(struct sk_buff *skb,
175           const struct net_device *in,
176           const struct net_device *out,
177           unsigned int hooknum,
178           const struct xt_target *target,
179           const void *targinfo)
180 {
181         if (net_ratelimit())
182                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
183
184         return NF_DROP;
185 }
186
187 /* Performance critical - called for every packet */
188 static inline bool
189 do_match(struct ipt_entry_match *m,
190               const struct sk_buff *skb,
191               const struct net_device *in,
192               const struct net_device *out,
193               int offset,
194               bool *hotdrop)
195 {
196         /* Stop iteration if it doesn't match */
197         if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
198                                       offset, ip_hdrlen(skb), hotdrop))
199                 return true;
200         else
201                 return false;
202 }
203
204 /* Performance critical */
205 static inline struct ipt_entry *
206 get_entry(void *base, unsigned int offset)
207 {
208         return (struct ipt_entry *)(base + offset);
209 }
210
211 /* All zeroes == unconditional rule. */
212 /* Mildly perf critical (only if packet tracing is on) */
213 static inline int
214 unconditional(const struct ipt_ip *ip)
215 {
216         unsigned int i;
217
218         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
219                 if (((__u32 *)ip)[i])
220                         return 0;
221
222         return 1;
223 #undef FWINV
224 }
225
226 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
227     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
228 static const char *const hooknames[] = {
229         [NF_INET_PRE_ROUTING]           = "PREROUTING",
230         [NF_INET_LOCAL_IN]              = "INPUT",
231         [NF_INET_FORWARD]               = "FORWARD",
232         [NF_INET_LOCAL_OUT]             = "OUTPUT",
233         [NF_INET_POST_ROUTING]          = "POSTROUTING",
234 };
235
236 enum nf_ip_trace_comments {
237         NF_IP_TRACE_COMMENT_RULE,
238         NF_IP_TRACE_COMMENT_RETURN,
239         NF_IP_TRACE_COMMENT_POLICY,
240 };
241
242 static const char *const comments[] = {
243         [NF_IP_TRACE_COMMENT_RULE]      = "rule",
244         [NF_IP_TRACE_COMMENT_RETURN]    = "return",
245         [NF_IP_TRACE_COMMENT_POLICY]    = "policy",
246 };
247
248 static struct nf_loginfo trace_loginfo = {
249         .type = NF_LOG_TYPE_LOG,
250         .u = {
251                 .log = {
252                         .level = 4,
253                         .logflags = NF_LOG_MASK,
254                 },
255         },
256 };
257
258 /* Mildly perf critical (only if packet tracing is on) */
259 static inline int
260 get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
261                       char *hookname, char **chainname,
262                       char **comment, unsigned int *rulenum)
263 {
264         struct ipt_standard_target *t = (void *)ipt_get_target(s);
265
266         if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
267                 /* Head of user chain: ERROR target with chainname */
268                 *chainname = t->target.data;
269                 (*rulenum) = 0;
270         } else if (s == e) {
271                 (*rulenum)++;
272
273                 if (s->target_offset == sizeof(struct ipt_entry)
274                    && strcmp(t->target.u.kernel.target->name,
275                              IPT_STANDARD_TARGET) == 0
276                    && t->verdict < 0
277                    && unconditional(&s->ip)) {
278                         /* Tail of chains: STANDARD target (return/policy) */
279                         *comment = *chainname == hookname
280                                 ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY]
281                                 : (char *)comments[NF_IP_TRACE_COMMENT_RETURN];
282                 }
283                 return 1;
284         } else
285                 (*rulenum)++;
286
287         return 0;
288 }
289
290 static void trace_packet(struct sk_buff *skb,
291                          unsigned int hook,
292                          const struct net_device *in,
293                          const struct net_device *out,
294                          const char *tablename,
295                          struct xt_table_info *private,
296                          struct ipt_entry *e)
297 {
298         void *table_base;
299         struct ipt_entry *root;
300         char *hookname, *chainname, *comment;
301         unsigned int rulenum = 0;
302
303         table_base = (void *)private->entries[smp_processor_id()];
304         root = get_entry(table_base, private->hook_entry[hook]);
305
306         hookname = chainname = (char *)hooknames[hook];
307         comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE];
308
309         IPT_ENTRY_ITERATE(root,
310                           private->size - private->hook_entry[hook],
311                           get_chainname_rulenum,
312                           e, hookname, &chainname, &comment, &rulenum);
313
314         nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
315                       "TRACE: %s:%s:%s:%u ",
316                       tablename, chainname, comment, rulenum);
317 }
318 #endif
319
320 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
321 unsigned int
322 ipt_do_table(struct sk_buff *skb,
323              unsigned int hook,
324              const struct net_device *in,
325              const struct net_device *out,
326              struct xt_table *table)
327 {
328         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
329         u_int16_t offset;
330         struct iphdr *ip;
331         u_int16_t datalen;
332         bool hotdrop = false;
333         /* Initializing verdict to NF_DROP keeps gcc happy. */
334         unsigned int verdict = NF_DROP;
335         const char *indev, *outdev;
336         void *table_base;
337         struct ipt_entry *e, *back;
338         struct xt_table_info *private;
339
340         /* Initialization */
341         ip = ip_hdr(skb);
342         datalen = skb->len - ip->ihl * 4;
343         indev = in ? in->name : nulldevname;
344         outdev = out ? out->name : nulldevname;
345         /* We handle fragments by dealing with the first fragment as
346          * if it was a normal packet.  All other fragments are treated
347          * normally, except that they will NEVER match rules that ask
348          * things we don't know, ie. tcp syn flag or ports).  If the
349          * rule is also a fragment-specific rule, non-fragments won't
350          * match it. */
351         offset = ntohs(ip->frag_off) & IP_OFFSET;
352
353         read_lock_bh(&table->lock);
354         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
355         private = table->private;
356         table_base = (void *)private->entries[smp_processor_id()];
357         e = get_entry(table_base, private->hook_entry[hook]);
358
359         /* For return from builtin chain */
360         back = get_entry(table_base, private->underflow[hook]);
361
362         do {
363                 IP_NF_ASSERT(e);
364                 IP_NF_ASSERT(back);
365                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
366                         struct ipt_entry_target *t;
367
368                         if (IPT_MATCH_ITERATE(e, do_match,
369                                               skb, in, out,
370                                               offset, &hotdrop) != 0)
371                                 goto no_match;
372
373                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
374
375                         t = ipt_get_target(e);
376                         IP_NF_ASSERT(t->u.kernel.target);
377
378 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
379     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
380                         /* The packet is traced: log it */
381                         if (unlikely(skb->nf_trace))
382                                 trace_packet(skb, hook, in, out,
383                                              table->name, private, e);
384 #endif
385                         /* Standard target? */
386                         if (!t->u.kernel.target->target) {
387                                 int v;
388
389                                 v = ((struct ipt_standard_target *)t)->verdict;
390                                 if (v < 0) {
391                                         /* Pop from stack? */
392                                         if (v != IPT_RETURN) {
393                                                 verdict = (unsigned)(-v) - 1;
394                                                 break;
395                                         }
396                                         e = back;
397                                         back = get_entry(table_base,
398                                                          back->comefrom);
399                                         continue;
400                                 }
401                                 if (table_base + v != (void *)e + e->next_offset
402                                     && !(e->ip.flags & IPT_F_GOTO)) {
403                                         /* Save old back ptr in next entry */
404                                         struct ipt_entry *next
405                                                 = (void *)e + e->next_offset;
406                                         next->comefrom
407                                                 = (void *)back - table_base;
408                                         /* set back pointer to next entry */
409                                         back = next;
410                                 }
411
412                                 e = get_entry(table_base, v);
413                         } else {
414                                 /* Targets which reenter must return
415                                    abs. verdicts */
416 #ifdef CONFIG_NETFILTER_DEBUG
417                                 ((struct ipt_entry *)table_base)->comefrom
418                                         = 0xeeeeeeec;
419 #endif
420                                 verdict = t->u.kernel.target->target(skb,
421                                                                      in, out,
422                                                                      hook,
423                                                                      t->u.kernel.target,
424                                                                      t->data);
425
426 #ifdef CONFIG_NETFILTER_DEBUG
427                                 if (((struct ipt_entry *)table_base)->comefrom
428                                     != 0xeeeeeeec
429                                     && verdict == IPT_CONTINUE) {
430                                         printk("Target %s reentered!\n",
431                                                t->u.kernel.target->name);
432                                         verdict = NF_DROP;
433                                 }
434                                 ((struct ipt_entry *)table_base)->comefrom
435                                         = 0x57acc001;
436 #endif
437                                 /* Target might have changed stuff. */
438                                 ip = ip_hdr(skb);
439                                 datalen = skb->len - ip->ihl * 4;
440
441                                 if (verdict == IPT_CONTINUE)
442                                         e = (void *)e + e->next_offset;
443                                 else
444                                         /* Verdict */
445                                         break;
446                         }
447                 } else {
448
449                 no_match:
450                         e = (void *)e + e->next_offset;
451                 }
452         } while (!hotdrop);
453
454         read_unlock_bh(&table->lock);
455
456 #ifdef DEBUG_ALLOW_ALL
457         return NF_ACCEPT;
458 #else
459         if (hotdrop)
460                 return NF_DROP;
461         else return verdict;
462 #endif
463 }
464
465 /* Figures out from what hook each rule can be called: returns 0 if
466    there are loops.  Puts hook bitmask in comefrom. */
467 static int
468 mark_source_chains(struct xt_table_info *newinfo,
469                    unsigned int valid_hooks, void *entry0)
470 {
471         unsigned int hook;
472
473         /* No recursion; use packet counter to save back ptrs (reset
474            to 0 as we leave), and comefrom to save source hook bitmask */
475         for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
476                 unsigned int pos = newinfo->hook_entry[hook];
477                 struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
478
479                 if (!(valid_hooks & (1 << hook)))
480                         continue;
481
482                 /* Set initial back pointer. */
483                 e->counters.pcnt = pos;
484
485                 for (;;) {
486                         struct ipt_standard_target *t
487                                 = (void *)ipt_get_target(e);
488                         int visited = e->comefrom & (1 << hook);
489
490                         if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
491                                 printk("iptables: loop hook %u pos %u %08X.\n",
492                                        hook, pos, e->comefrom);
493                                 return 0;
494                         }
495                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
496
497                         /* Unconditional return/END. */
498                         if ((e->target_offset == sizeof(struct ipt_entry)
499                             && (strcmp(t->target.u.user.name,
500                                        IPT_STANDARD_TARGET) == 0)
501                             && t->verdict < 0
502                             && unconditional(&e->ip)) || visited) {
503                                 unsigned int oldpos, size;
504
505                                 if (t->verdict < -NF_MAX_VERDICT - 1) {
506                                         duprintf("mark_source_chains: bad "
507                                                 "negative verdict (%i)\n",
508                                                                 t->verdict);
509                                         return 0;
510                                 }
511
512                                 /* Return: backtrack through the last
513                                    big jump. */
514                                 do {
515                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
516 #ifdef DEBUG_IP_FIREWALL_USER
517                                         if (e->comefrom
518                                             & (1 << NF_INET_NUMHOOKS)) {
519                                                 duprintf("Back unset "
520                                                          "on hook %u "
521                                                          "rule %u\n",
522                                                          hook, pos);
523                                         }
524 #endif
525                                         oldpos = pos;
526                                         pos = e->counters.pcnt;
527                                         e->counters.pcnt = 0;
528
529                                         /* We're at the start. */
530                                         if (pos == oldpos)
531                                                 goto next;
532
533                                         e = (struct ipt_entry *)
534                                                 (entry0 + pos);
535                                 } while (oldpos == pos + e->next_offset);
536
537                                 /* Move along one */
538                                 size = e->next_offset;
539                                 e = (struct ipt_entry *)
540                                         (entry0 + pos + size);
541                                 e->counters.pcnt = pos;
542                                 pos += size;
543                         } else {
544                                 int newpos = t->verdict;
545
546                                 if (strcmp(t->target.u.user.name,
547                                            IPT_STANDARD_TARGET) == 0
548                                     && newpos >= 0) {
549                                         if (newpos > newinfo->size -
550                                                 sizeof(struct ipt_entry)) {
551                                                 duprintf("mark_source_chains: "
552                                                         "bad verdict (%i)\n",
553                                                                 newpos);
554                                                 return 0;
555                                         }
556                                         /* This a jump; chase it. */
557                                         duprintf("Jump rule %u -> %u\n",
558                                                  pos, newpos);
559                                 } else {
560                                         /* ... this is a fallthru */
561                                         newpos = pos + e->next_offset;
562                                 }
563                                 e = (struct ipt_entry *)
564                                         (entry0 + newpos);
565                                 e->counters.pcnt = pos;
566                                 pos = newpos;
567                         }
568                 }
569                 next:
570                 duprintf("Finished chain %u\n", hook);
571         }
572         return 1;
573 }
574
575 static int
576 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
577 {
578         if (i && (*i)-- == 0)
579                 return 1;
580
581         if (m->u.kernel.match->destroy)
582                 m->u.kernel.match->destroy(m->u.kernel.match, m->data);
583         module_put(m->u.kernel.match->me);
584         return 0;
585 }
586
587 static int
588 check_entry(struct ipt_entry *e, const char *name)
589 {
590         struct ipt_entry_target *t;
591
592         if (!ip_checkentry(&e->ip)) {
593                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
594                 return -EINVAL;
595         }
596
597         if (e->target_offset + sizeof(struct ipt_entry_target) >
598             e->next_offset)
599                 return -EINVAL;
600
601         t = ipt_get_target(e);
602         if (e->target_offset + t->u.target_size > e->next_offset)
603                 return -EINVAL;
604
605         return 0;
606 }
607
608 static int
609 check_match(struct ipt_entry_match *m, const char *name,
610                               const struct ipt_ip *ip,
611                               unsigned int hookmask, unsigned int *i)
612 {
613         struct xt_match *match;
614         int ret;
615
616         match = m->u.kernel.match;
617         ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
618                              name, hookmask, ip->proto,
619                              ip->invflags & IPT_INV_PROTO);
620         if (!ret && m->u.kernel.match->checkentry
621             && !m->u.kernel.match->checkentry(name, ip, match, m->data,
622                                               hookmask)) {
623                 duprintf("ip_tables: check failed for `%s'.\n",
624                          m->u.kernel.match->name);
625                 ret = -EINVAL;
626         }
627         if (!ret)
628                 (*i)++;
629         return ret;
630 }
631
632 static int
633 find_check_match(struct ipt_entry_match *m,
634                  const char *name,
635                  const struct ipt_ip *ip,
636                  unsigned int hookmask,
637                  unsigned int *i)
638 {
639         struct xt_match *match;
640         int ret;
641
642         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
643                                                       m->u.user.revision),
644                                         "ipt_%s", m->u.user.name);
645         if (IS_ERR(match) || !match) {
646                 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
647                 return match ? PTR_ERR(match) : -ENOENT;
648         }
649         m->u.kernel.match = match;
650
651         ret = check_match(m, name, ip, hookmask, i);
652         if (ret)
653                 goto err;
654
655         return 0;
656 err:
657         module_put(m->u.kernel.match->me);
658         return ret;
659 }
660
661 static int check_target(struct ipt_entry *e, const char *name)
662 {
663         struct ipt_entry_target *t;
664         struct xt_target *target;
665         int ret;
666
667         t = ipt_get_target(e);
668         target = t->u.kernel.target;
669         ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
670                               name, e->comefrom, e->ip.proto,
671                               e->ip.invflags & IPT_INV_PROTO);
672         if (!ret && t->u.kernel.target->checkentry
673             && !t->u.kernel.target->checkentry(name, e, target, t->data,
674                                                e->comefrom)) {
675                 duprintf("ip_tables: check failed for `%s'.\n",
676                          t->u.kernel.target->name);
677                 ret = -EINVAL;
678         }
679         return ret;
680 }
681
682 static int
683 find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
684                  unsigned int *i)
685 {
686         struct ipt_entry_target *t;
687         struct xt_target *target;
688         int ret;
689         unsigned int j;
690
691         ret = check_entry(e, name);
692         if (ret)
693                 return ret;
694
695         j = 0;
696         ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip,
697                                 e->comefrom, &j);
698         if (ret != 0)
699                 goto cleanup_matches;
700
701         t = ipt_get_target(e);
702         target = try_then_request_module(xt_find_target(AF_INET,
703                                                         t->u.user.name,
704                                                         t->u.user.revision),
705                                          "ipt_%s", t->u.user.name);
706         if (IS_ERR(target) || !target) {
707                 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
708                 ret = target ? PTR_ERR(target) : -ENOENT;
709                 goto cleanup_matches;
710         }
711         t->u.kernel.target = target;
712
713         ret = check_target(e, name);
714         if (ret)
715                 goto err;
716
717         (*i)++;
718         return 0;
719  err:
720         module_put(t->u.kernel.target->me);
721  cleanup_matches:
722         IPT_MATCH_ITERATE(e, cleanup_match, &j);
723         return ret;
724 }
725
726 static int
727 check_entry_size_and_hooks(struct ipt_entry *e,
728                            struct xt_table_info *newinfo,
729                            unsigned char *base,
730                            unsigned char *limit,
731                            const unsigned int *hook_entries,
732                            const unsigned int *underflows,
733                            unsigned int *i)
734 {
735         unsigned int h;
736
737         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
738             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
739                 duprintf("Bad offset %p\n", e);
740                 return -EINVAL;
741         }
742
743         if (e->next_offset
744             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
745                 duprintf("checking: element %p size %u\n",
746                          e, e->next_offset);
747                 return -EINVAL;
748         }
749
750         /* Check hooks & underflows */
751         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
752                 if ((unsigned char *)e - base == hook_entries[h])
753                         newinfo->hook_entry[h] = hook_entries[h];
754                 if ((unsigned char *)e - base == underflows[h])
755                         newinfo->underflow[h] = underflows[h];
756         }
757
758         /* FIXME: underflows must be unconditional, standard verdicts
759            < 0 (not IPT_RETURN). --RR */
760
761         /* Clear counters and comefrom */
762         e->counters = ((struct xt_counters) { 0, 0 });
763         e->comefrom = 0;
764
765         (*i)++;
766         return 0;
767 }
768
769 static int
770 cleanup_entry(struct ipt_entry *e, unsigned int *i)
771 {
772         struct ipt_entry_target *t;
773
774         if (i && (*i)-- == 0)
775                 return 1;
776
777         /* Cleanup all matches */
778         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
779         t = ipt_get_target(e);
780         if (t->u.kernel.target->destroy)
781                 t->u.kernel.target->destroy(t->u.kernel.target, t->data);
782         module_put(t->u.kernel.target->me);
783         return 0;
784 }
785
786 /* Checks and translates the user-supplied table segment (held in
787    newinfo) */
788 static int
789 translate_table(const char *name,
790                 unsigned int valid_hooks,
791                 struct xt_table_info *newinfo,
792                 void *entry0,
793                 unsigned int size,
794                 unsigned int number,
795                 const unsigned int *hook_entries,
796                 const unsigned int *underflows)
797 {
798         unsigned int i;
799         int ret;
800
801         newinfo->size = size;
802         newinfo->number = number;
803
804         /* Init all hooks to impossible value. */
805         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
806                 newinfo->hook_entry[i] = 0xFFFFFFFF;
807                 newinfo->underflow[i] = 0xFFFFFFFF;
808         }
809
810         duprintf("translate_table: size %u\n", newinfo->size);
811         i = 0;
812         /* Walk through entries, checking offsets. */
813         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
814                                 check_entry_size_and_hooks,
815                                 newinfo,
816                                 entry0,
817                                 entry0 + size,
818                                 hook_entries, underflows, &i);
819         if (ret != 0)
820                 return ret;
821
822         if (i != number) {
823                 duprintf("translate_table: %u not %u entries\n",
824                          i, number);
825                 return -EINVAL;
826         }
827
828         /* Check hooks all assigned */
829         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
830                 /* Only hooks which are valid */
831                 if (!(valid_hooks & (1 << i)))
832                         continue;
833                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
834                         duprintf("Invalid hook entry %u %u\n",
835                                  i, hook_entries[i]);
836                         return -EINVAL;
837                 }
838                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
839                         duprintf("Invalid underflow %u %u\n",
840                                  i, underflows[i]);
841                         return -EINVAL;
842                 }
843         }
844
845         if (!mark_source_chains(newinfo, valid_hooks, entry0))
846                 return -ELOOP;
847
848         /* Finally, each sanity check must pass */
849         i = 0;
850         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
851                                 find_check_entry, name, size, &i);
852
853         if (ret != 0) {
854                 IPT_ENTRY_ITERATE(entry0, newinfo->size,
855                                 cleanup_entry, &i);
856                 return ret;
857         }
858
859         /* And one copy for every other CPU */
860         for_each_possible_cpu(i) {
861                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
862                         memcpy(newinfo->entries[i], entry0, newinfo->size);
863         }
864
865         return ret;
866 }
867
868 /* Gets counters. */
869 static inline int
870 add_entry_to_counter(const struct ipt_entry *e,
871                      struct xt_counters total[],
872                      unsigned int *i)
873 {
874         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
875
876         (*i)++;
877         return 0;
878 }
879
880 static inline int
881 set_entry_to_counter(const struct ipt_entry *e,
882                      struct ipt_counters total[],
883                      unsigned int *i)
884 {
885         SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
886
887         (*i)++;
888         return 0;
889 }
890
891 static void
892 get_counters(const struct xt_table_info *t,
893              struct xt_counters counters[])
894 {
895         unsigned int cpu;
896         unsigned int i;
897         unsigned int curcpu;
898
899         /* Instead of clearing (by a previous call to memset())
900          * the counters and using adds, we set the counters
901          * with data used by 'current' CPU
902          * We dont care about preemption here.
903          */
904         curcpu = raw_smp_processor_id();
905
906         i = 0;
907         IPT_ENTRY_ITERATE(t->entries[curcpu],
908                           t->size,
909                           set_entry_to_counter,
910                           counters,
911                           &i);
912
913         for_each_possible_cpu(cpu) {
914                 if (cpu == curcpu)
915                         continue;
916                 i = 0;
917                 IPT_ENTRY_ITERATE(t->entries[cpu],
918                                   t->size,
919                                   add_entry_to_counter,
920                                   counters,
921                                   &i);
922         }
923 }
924
925 static struct xt_counters * alloc_counters(struct xt_table *table)
926 {
927         unsigned int countersize;
928         struct xt_counters *counters;
929         struct xt_table_info *private = table->private;
930
931         /* We need atomic snapshot of counters: rest doesn't change
932            (other than comefrom, which userspace doesn't care
933            about). */
934         countersize = sizeof(struct xt_counters) * private->number;
935         counters = vmalloc_node(countersize, numa_node_id());
936
937         if (counters == NULL)
938                 return ERR_PTR(-ENOMEM);
939
940         /* First, sum counters... */
941         write_lock_bh(&table->lock);
942         get_counters(private, counters);
943         write_unlock_bh(&table->lock);
944
945         return counters;
946 }
947
948 static int
949 copy_entries_to_user(unsigned int total_size,
950                      struct xt_table *table,
951                      void __user *userptr)
952 {
953         unsigned int off, num;
954         struct ipt_entry *e;
955         struct xt_counters *counters;
956         struct xt_table_info *private = table->private;
957         int ret = 0;
958         void *loc_cpu_entry;
959
960         counters = alloc_counters(table);
961         if (IS_ERR(counters))
962                 return PTR_ERR(counters);
963
964         /* choose the copy that is on our node/cpu, ...
965          * This choice is lazy (because current thread is
966          * allowed to migrate to another cpu)
967          */
968         loc_cpu_entry = private->entries[raw_smp_processor_id()];
969         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
970                 ret = -EFAULT;
971                 goto free_counters;
972         }
973
974         /* FIXME: use iterator macros --RR */
975         /* ... then go back and fix counters and names */
976         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
977                 unsigned int i;
978                 struct ipt_entry_match *m;
979                 struct ipt_entry_target *t;
980
981                 e = (struct ipt_entry *)(loc_cpu_entry + off);
982                 if (copy_to_user(userptr + off
983                                  + offsetof(struct ipt_entry, counters),
984                                  &counters[num],
985                                  sizeof(counters[num])) != 0) {
986                         ret = -EFAULT;
987                         goto free_counters;
988                 }
989
990                 for (i = sizeof(struct ipt_entry);
991                      i < e->target_offset;
992                      i += m->u.match_size) {
993                         m = (void *)e + i;
994
995                         if (copy_to_user(userptr + off + i
996                                          + offsetof(struct ipt_entry_match,
997                                                     u.user.name),
998                                          m->u.kernel.match->name,
999                                          strlen(m->u.kernel.match->name)+1)
1000                             != 0) {
1001                                 ret = -EFAULT;
1002                                 goto free_counters;
1003                         }
1004                 }
1005
1006                 t = ipt_get_target(e);
1007                 if (copy_to_user(userptr + off + e->target_offset
1008                                  + offsetof(struct ipt_entry_target,
1009                                             u.user.name),
1010                                  t->u.kernel.target->name,
1011                                  strlen(t->u.kernel.target->name)+1) != 0) {
1012                         ret = -EFAULT;
1013                         goto free_counters;
1014                 }
1015         }
1016
1017  free_counters:
1018         vfree(counters);
1019         return ret;
1020 }
1021
1022 #ifdef CONFIG_COMPAT
1023 static void compat_standard_from_user(void *dst, void *src)
1024 {
1025         int v = *(compat_int_t *)src;
1026
1027         if (v > 0)
1028                 v += xt_compat_calc_jump(AF_INET, v);
1029         memcpy(dst, &v, sizeof(v));
1030 }
1031
1032 static int compat_standard_to_user(void __user *dst, void *src)
1033 {
1034         compat_int_t cv = *(int *)src;
1035
1036         if (cv > 0)
1037                 cv -= xt_compat_calc_jump(AF_INET, cv);
1038         return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1039 }
1040
1041 static inline int
1042 compat_calc_match(struct ipt_entry_match *m, int *size)
1043 {
1044         *size += xt_compat_match_offset(m->u.kernel.match);
1045         return 0;
1046 }
1047
1048 static int compat_calc_entry(struct ipt_entry *e,
1049                              const struct xt_table_info *info,
1050                              void *base, struct xt_table_info *newinfo)
1051 {
1052         struct ipt_entry_target *t;
1053         unsigned int entry_offset;
1054         int off, i, ret;
1055
1056         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1057         entry_offset = (void *)e - base;
1058         IPT_MATCH_ITERATE(e, compat_calc_match, &off);
1059         t = ipt_get_target(e);
1060         off += xt_compat_target_offset(t->u.kernel.target);
1061         newinfo->size -= off;
1062         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1063         if (ret)
1064                 return ret;
1065
1066         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1067                 if (info->hook_entry[i] &&
1068                     (e < (struct ipt_entry *)(base + info->hook_entry[i])))
1069                         newinfo->hook_entry[i] -= off;
1070                 if (info->underflow[i] &&
1071                     (e < (struct ipt_entry *)(base + info->underflow[i])))
1072                         newinfo->underflow[i] -= off;
1073         }
1074         return 0;
1075 }
1076
1077 static int compat_table_info(const struct xt_table_info *info,
1078                              struct xt_table_info *newinfo)
1079 {
1080         void *loc_cpu_entry;
1081
1082         if (!newinfo || !info)
1083                 return -EINVAL;
1084
1085         /* we dont care about newinfo->entries[] */
1086         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1087         newinfo->initial_entries = 0;
1088         loc_cpu_entry = info->entries[raw_smp_processor_id()];
1089         return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
1090                                  compat_calc_entry, info, loc_cpu_entry,
1091                                  newinfo);
1092 }
1093 #endif
1094
1095 static int get_info(struct net *net, void __user *user, int *len, int compat)
1096 {
1097         char name[IPT_TABLE_MAXNAMELEN];
1098         struct xt_table *t;
1099         int ret;
1100
1101         if (*len != sizeof(struct ipt_getinfo)) {
1102                 duprintf("length %u != %zu\n", *len,
1103                          sizeof(struct ipt_getinfo));
1104                 return -EINVAL;
1105         }
1106
1107         if (copy_from_user(name, user, sizeof(name)) != 0)
1108                 return -EFAULT;
1109
1110         name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1111 #ifdef CONFIG_COMPAT
1112         if (compat)
1113                 xt_compat_lock(AF_INET);
1114 #endif
1115         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1116                                     "iptable_%s", name);
1117         if (t && !IS_ERR(t)) {
1118                 struct ipt_getinfo info;
1119                 struct xt_table_info *private = t->private;
1120
1121 #ifdef CONFIG_COMPAT
1122                 if (compat) {
1123                         struct xt_table_info tmp;
1124                         ret = compat_table_info(private, &tmp);
1125                         xt_compat_flush_offsets(AF_INET);
1126                         private = &tmp;
1127                 }
1128 #endif
1129                 info.valid_hooks = t->valid_hooks;
1130                 memcpy(info.hook_entry, private->hook_entry,
1131                        sizeof(info.hook_entry));
1132                 memcpy(info.underflow, private->underflow,
1133                        sizeof(info.underflow));
1134                 info.num_entries = private->number;
1135                 info.size = private->size;
1136                 strcpy(info.name, name);
1137
1138                 if (copy_to_user(user, &info, *len) != 0)
1139                         ret = -EFAULT;
1140                 else
1141                         ret = 0;
1142
1143                 xt_table_unlock(t);
1144                 module_put(t->me);
1145         } else
1146                 ret = t ? PTR_ERR(t) : -ENOENT;
1147 #ifdef CONFIG_COMPAT
1148         if (compat)
1149                 xt_compat_unlock(AF_INET);
1150 #endif
1151         return ret;
1152 }
1153
1154 static int
1155 get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
1156 {
1157         int ret;
1158         struct ipt_get_entries get;
1159         struct xt_table *t;
1160
1161         if (*len < sizeof(get)) {
1162                 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1163                 return -EINVAL;
1164         }
1165         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1166                 return -EFAULT;
1167         if (*len != sizeof(struct ipt_get_entries) + get.size) {
1168                 duprintf("get_entries: %u != %zu\n",
1169                          *len, sizeof(get) + get.size);
1170                 return -EINVAL;
1171         }
1172
1173         t = xt_find_table_lock(net, AF_INET, get.name);
1174         if (t && !IS_ERR(t)) {
1175                 struct xt_table_info *private = t->private;
1176                 duprintf("t->private->number = %u\n", private->number);
1177                 if (get.size == private->size)
1178                         ret = copy_entries_to_user(private->size,
1179                                                    t, uptr->entrytable);
1180                 else {
1181                         duprintf("get_entries: I've got %u not %u!\n",
1182                                  private->size, get.size);
1183                         ret = -EINVAL;
1184                 }
1185                 module_put(t->me);
1186                 xt_table_unlock(t);
1187         } else
1188                 ret = t ? PTR_ERR(t) : -ENOENT;
1189
1190         return ret;
1191 }
1192
1193 static int
1194 __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1195              struct xt_table_info *newinfo, unsigned int num_counters,
1196              void __user *counters_ptr)
1197 {
1198         int ret;
1199         struct xt_table *t;
1200         struct xt_table_info *oldinfo;
1201         struct xt_counters *counters;
1202         void *loc_cpu_old_entry;
1203
1204         ret = 0;
1205         counters = vmalloc(num_counters * sizeof(struct xt_counters));
1206         if (!counters) {
1207                 ret = -ENOMEM;
1208                 goto out;
1209         }
1210
1211         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1212                                     "iptable_%s", name);
1213         if (!t || IS_ERR(t)) {
1214                 ret = t ? PTR_ERR(t) : -ENOENT;
1215                 goto free_newinfo_counters_untrans;
1216         }
1217
1218         /* You lied! */
1219         if (valid_hooks != t->valid_hooks) {
1220                 duprintf("Valid hook crap: %08X vs %08X\n",
1221                          valid_hooks, t->valid_hooks);
1222                 ret = -EINVAL;
1223                 goto put_module;
1224         }
1225
1226         oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1227         if (!oldinfo)
1228                 goto put_module;
1229
1230         /* Update module usage count based on number of rules */
1231         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1232                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1233         if ((oldinfo->number > oldinfo->initial_entries) ||
1234             (newinfo->number <= oldinfo->initial_entries))
1235                 module_put(t->me);
1236         if ((oldinfo->number > oldinfo->initial_entries) &&
1237             (newinfo->number <= oldinfo->initial_entries))
1238                 module_put(t->me);
1239
1240         /* Get the old counters. */
1241         get_counters(oldinfo, counters);
1242         /* Decrease module usage counts and free resource */
1243         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1244         IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1245                           NULL);
1246         xt_free_table_info(oldinfo);
1247         if (copy_to_user(counters_ptr, counters,
1248                          sizeof(struct xt_counters) * num_counters) != 0)
1249                 ret = -EFAULT;
1250         vfree(counters);
1251         xt_table_unlock(t);
1252         return ret;
1253
1254  put_module:
1255         module_put(t->me);
1256         xt_table_unlock(t);
1257  free_newinfo_counters_untrans:
1258         vfree(counters);
1259  out:
1260         return ret;
1261 }
1262
1263 static int
1264 do_replace(struct net *net, void __user *user, unsigned int len)
1265 {
1266         int ret;
1267         struct ipt_replace tmp;
1268         struct xt_table_info *newinfo;
1269         void *loc_cpu_entry;
1270
1271         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1272                 return -EFAULT;
1273
1274         /* overflow check */
1275         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1276                 return -ENOMEM;
1277
1278         newinfo = xt_alloc_table_info(tmp.size);
1279         if (!newinfo)
1280                 return -ENOMEM;
1281
1282         /* choose the copy that is on our node/cpu */
1283         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1284         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1285                            tmp.size) != 0) {
1286                 ret = -EFAULT;
1287                 goto free_newinfo;
1288         }
1289
1290         ret = translate_table(tmp.name, tmp.valid_hooks,
1291                               newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1292                               tmp.hook_entry, tmp.underflow);
1293         if (ret != 0)
1294                 goto free_newinfo;
1295
1296         duprintf("ip_tables: Translated table\n");
1297
1298         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1299                            tmp.num_counters, tmp.counters);
1300         if (ret)
1301                 goto free_newinfo_untrans;
1302         return 0;
1303
1304  free_newinfo_untrans:
1305         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1306  free_newinfo:
1307         xt_free_table_info(newinfo);
1308         return ret;
1309 }
1310
1311 /* We're lazy, and add to the first CPU; overflow works its fey magic
1312  * and everything is OK. */
1313 static int
1314 add_counter_to_entry(struct ipt_entry *e,
1315                      const struct xt_counters addme[],
1316                      unsigned int *i)
1317 {
1318 #if 0
1319         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1320                  *i,
1321                  (long unsigned int)e->counters.pcnt,
1322                  (long unsigned int)e->counters.bcnt,
1323                  (long unsigned int)addme[*i].pcnt,
1324                  (long unsigned int)addme[*i].bcnt);
1325 #endif
1326
1327         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1328
1329         (*i)++;
1330         return 0;
1331 }
1332
1333 static int
1334 do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
1335 {
1336         unsigned int i;
1337         struct xt_counters_info tmp;
1338         struct xt_counters *paddc;
1339         unsigned int num_counters;
1340         char *name;
1341         int size;
1342         void *ptmp;
1343         struct xt_table *t;
1344         struct xt_table_info *private;
1345         int ret = 0;
1346         void *loc_cpu_entry;
1347 #ifdef CONFIG_COMPAT
1348         struct compat_xt_counters_info compat_tmp;
1349
1350         if (compat) {
1351                 ptmp = &compat_tmp;
1352                 size = sizeof(struct compat_xt_counters_info);
1353         } else
1354 #endif
1355         {
1356                 ptmp = &tmp;
1357                 size = sizeof(struct xt_counters_info);
1358         }
1359
1360         if (copy_from_user(ptmp, user, size) != 0)
1361                 return -EFAULT;
1362
1363 #ifdef CONFIG_COMPAT
1364         if (compat) {
1365                 num_counters = compat_tmp.num_counters;
1366                 name = compat_tmp.name;
1367         } else
1368 #endif
1369         {
1370                 num_counters = tmp.num_counters;
1371                 name = tmp.name;
1372         }
1373
1374         if (len != size + num_counters * sizeof(struct xt_counters))
1375                 return -EINVAL;
1376
1377         paddc = vmalloc_node(len - size, numa_node_id());
1378         if (!paddc)
1379                 return -ENOMEM;
1380
1381         if (copy_from_user(paddc, user + size, len - size) != 0) {
1382                 ret = -EFAULT;
1383                 goto free;
1384         }
1385
1386         t = xt_find_table_lock(net, AF_INET, name);
1387         if (!t || IS_ERR(t)) {
1388                 ret = t ? PTR_ERR(t) : -ENOENT;
1389                 goto free;
1390         }
1391
1392         write_lock_bh(&t->lock);
1393         private = t->private;
1394         if (private->number != num_counters) {
1395                 ret = -EINVAL;
1396                 goto unlock_up_free;
1397         }
1398
1399         i = 0;
1400         /* Choose the copy that is on our node */
1401         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1402         IPT_ENTRY_ITERATE(loc_cpu_entry,
1403                           private->size,
1404                           add_counter_to_entry,
1405                           paddc,
1406                           &i);
1407  unlock_up_free:
1408         write_unlock_bh(&t->lock);
1409         xt_table_unlock(t);
1410         module_put(t->me);
1411  free:
1412         vfree(paddc);
1413
1414         return ret;
1415 }
1416
1417 #ifdef CONFIG_COMPAT
1418 struct compat_ipt_replace {
1419         char                    name[IPT_TABLE_MAXNAMELEN];
1420         u32                     valid_hooks;
1421         u32                     num_entries;
1422         u32                     size;
1423         u32                     hook_entry[NF_INET_NUMHOOKS];
1424         u32                     underflow[NF_INET_NUMHOOKS];
1425         u32                     num_counters;
1426         compat_uptr_t           counters;       /* struct ipt_counters * */
1427         struct compat_ipt_entry entries[0];
1428 };
1429
1430 static int
1431 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1432                           unsigned int *size, struct xt_counters *counters,
1433                           unsigned int *i)
1434 {
1435         struct ipt_entry_target *t;
1436         struct compat_ipt_entry __user *ce;
1437         u_int16_t target_offset, next_offset;
1438         compat_uint_t origsize;
1439         int ret;
1440
1441         ret = -EFAULT;
1442         origsize = *size;
1443         ce = (struct compat_ipt_entry __user *)*dstptr;
1444         if (copy_to_user(ce, e, sizeof(struct ipt_entry)))
1445                 goto out;
1446
1447         if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
1448                 goto out;
1449
1450         *dstptr += sizeof(struct compat_ipt_entry);
1451         *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1452
1453         ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
1454         target_offset = e->target_offset - (origsize - *size);
1455         if (ret)
1456                 goto out;
1457         t = ipt_get_target(e);
1458         ret = xt_compat_target_to_user(t, dstptr, size);
1459         if (ret)
1460                 goto out;
1461         ret = -EFAULT;
1462         next_offset = e->next_offset - (origsize - *size);
1463         if (put_user(target_offset, &ce->target_offset))
1464                 goto out;
1465         if (put_user(next_offset, &ce->next_offset))
1466                 goto out;
1467
1468         (*i)++;
1469         return 0;
1470 out:
1471         return ret;
1472 }
1473
1474 static int
1475 compat_find_calc_match(struct ipt_entry_match *m,
1476                        const char *name,
1477                        const struct ipt_ip *ip,
1478                        unsigned int hookmask,
1479                        int *size, unsigned int *i)
1480 {
1481         struct xt_match *match;
1482
1483         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
1484                                                       m->u.user.revision),
1485                                         "ipt_%s", m->u.user.name);
1486         if (IS_ERR(match) || !match) {
1487                 duprintf("compat_check_calc_match: `%s' not found\n",
1488                          m->u.user.name);
1489                 return match ? PTR_ERR(match) : -ENOENT;
1490         }
1491         m->u.kernel.match = match;
1492         *size += xt_compat_match_offset(match);
1493
1494         (*i)++;
1495         return 0;
1496 }
1497
1498 static int
1499 compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1500 {
1501         if (i && (*i)-- == 0)
1502                 return 1;
1503
1504         module_put(m->u.kernel.match->me);
1505         return 0;
1506 }
1507
1508 static int
1509 compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
1510 {
1511         struct ipt_entry_target *t;
1512
1513         if (i && (*i)-- == 0)
1514                 return 1;
1515
1516         /* Cleanup all matches */
1517         COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
1518         t = compat_ipt_get_target(e);
1519         module_put(t->u.kernel.target->me);
1520         return 0;
1521 }
1522
1523 static int
1524 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1525                                   struct xt_table_info *newinfo,
1526                                   unsigned int *size,
1527                                   unsigned char *base,
1528                                   unsigned char *limit,
1529                                   unsigned int *hook_entries,
1530                                   unsigned int *underflows,
1531                                   unsigned int *i,
1532                                   const char *name)
1533 {
1534         struct ipt_entry_target *t;
1535         struct xt_target *target;
1536         unsigned int entry_offset;
1537         unsigned int j;
1538         int ret, off, h;
1539
1540         duprintf("check_compat_entry_size_and_hooks %p\n", e);
1541         if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
1542             || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
1543                 duprintf("Bad offset %p, limit = %p\n", e, limit);
1544                 return -EINVAL;
1545         }
1546
1547         if (e->next_offset < sizeof(struct compat_ipt_entry) +
1548                              sizeof(struct compat_xt_entry_target)) {
1549                 duprintf("checking: element %p size %u\n",
1550                          e, e->next_offset);
1551                 return -EINVAL;
1552         }
1553
1554         /* For purposes of check_entry casting the compat entry is fine */
1555         ret = check_entry((struct ipt_entry *)e, name);
1556         if (ret)
1557                 return ret;
1558
1559         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1560         entry_offset = (void *)e - (void *)base;
1561         j = 0;
1562         ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
1563                                        &e->ip, e->comefrom, &off, &j);
1564         if (ret != 0)
1565                 goto release_matches;
1566
1567         t = compat_ipt_get_target(e);
1568         target = try_then_request_module(xt_find_target(AF_INET,
1569                                                         t->u.user.name,
1570                                                         t->u.user.revision),
1571                                          "ipt_%s", t->u.user.name);
1572         if (IS_ERR(target) || !target) {
1573                 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1574                          t->u.user.name);
1575                 ret = target ? PTR_ERR(target) : -ENOENT;
1576                 goto release_matches;
1577         }
1578         t->u.kernel.target = target;
1579
1580         off += xt_compat_target_offset(target);
1581         *size += off;
1582         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1583         if (ret)
1584                 goto out;
1585
1586         /* Check hooks & underflows */
1587         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1588                 if ((unsigned char *)e - base == hook_entries[h])
1589                         newinfo->hook_entry[h] = hook_entries[h];
1590                 if ((unsigned char *)e - base == underflows[h])
1591                         newinfo->underflow[h] = underflows[h];
1592         }
1593
1594         /* Clear counters and comefrom */
1595         memset(&e->counters, 0, sizeof(e->counters));
1596         e->comefrom = 0;
1597
1598         (*i)++;
1599         return 0;
1600
1601 out:
1602         module_put(t->u.kernel.target->me);
1603 release_matches:
1604         IPT_MATCH_ITERATE(e, compat_release_match, &j);
1605         return ret;
1606 }
1607
1608 static int
1609 compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1610                             unsigned int *size, const char *name,
1611                             struct xt_table_info *newinfo, unsigned char *base)
1612 {
1613         struct ipt_entry_target *t;
1614         struct xt_target *target;
1615         struct ipt_entry *de;
1616         unsigned int origsize;
1617         int ret, h;
1618
1619         ret = 0;
1620         origsize = *size;
1621         de = (struct ipt_entry *)*dstptr;
1622         memcpy(de, e, sizeof(struct ipt_entry));
1623         memcpy(&de->counters, &e->counters, sizeof(e->counters));
1624
1625         *dstptr += sizeof(struct ipt_entry);
1626         *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1627
1628         ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
1629                                        dstptr, size);
1630         if (ret)
1631                 return ret;
1632         de->target_offset = e->target_offset - (origsize - *size);
1633         t = compat_ipt_get_target(e);
1634         target = t->u.kernel.target;
1635         xt_compat_target_from_user(t, dstptr, size);
1636
1637         de->next_offset = e->next_offset - (origsize - *size);
1638         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1639                 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1640                         newinfo->hook_entry[h] -= origsize - *size;
1641                 if ((unsigned char *)de - base < newinfo->underflow[h])
1642                         newinfo->underflow[h] -= origsize - *size;
1643         }
1644         return ret;
1645 }
1646
1647 static int
1648 compat_check_entry(struct ipt_entry *e, const char *name,
1649                                      unsigned int *i)
1650 {
1651         unsigned int j;
1652         int ret;
1653
1654         j = 0;
1655         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip,
1656                                 e->comefrom, &j);
1657         if (ret)
1658                 goto cleanup_matches;
1659
1660         ret = check_target(e, name);
1661         if (ret)
1662                 goto cleanup_matches;
1663
1664         (*i)++;
1665         return 0;
1666
1667  cleanup_matches:
1668         IPT_MATCH_ITERATE(e, cleanup_match, &j);
1669         return ret;
1670 }
1671
1672 static int
1673 translate_compat_table(const char *name,
1674                        unsigned int valid_hooks,
1675                        struct xt_table_info **pinfo,
1676                        void **pentry0,
1677                        unsigned int total_size,
1678                        unsigned int number,
1679                        unsigned int *hook_entries,
1680                        unsigned int *underflows)
1681 {
1682         unsigned int i, j;
1683         struct xt_table_info *newinfo, *info;
1684         void *pos, *entry0, *entry1;
1685         unsigned int size;
1686         int ret;
1687
1688         info = *pinfo;
1689         entry0 = *pentry0;
1690         size = total_size;
1691         info->number = number;
1692
1693         /* Init all hooks to impossible value. */
1694         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1695                 info->hook_entry[i] = 0xFFFFFFFF;
1696                 info->underflow[i] = 0xFFFFFFFF;
1697         }
1698
1699         duprintf("translate_compat_table: size %u\n", info->size);
1700         j = 0;
1701         xt_compat_lock(AF_INET);
1702         /* Walk through entries, checking offsets. */
1703         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1704                                        check_compat_entry_size_and_hooks,
1705                                        info, &size, entry0,
1706                                        entry0 + total_size,
1707                                        hook_entries, underflows, &j, name);
1708         if (ret != 0)
1709                 goto out_unlock;
1710
1711         ret = -EINVAL;
1712         if (j != number) {
1713                 duprintf("translate_compat_table: %u not %u entries\n",
1714                          j, number);
1715                 goto out_unlock;
1716         }
1717
1718         /* Check hooks all assigned */
1719         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1720                 /* Only hooks which are valid */
1721                 if (!(valid_hooks & (1 << i)))
1722                         continue;
1723                 if (info->hook_entry[i] == 0xFFFFFFFF) {
1724                         duprintf("Invalid hook entry %u %u\n",
1725                                  i, hook_entries[i]);
1726                         goto out_unlock;
1727                 }
1728                 if (info->underflow[i] == 0xFFFFFFFF) {
1729                         duprintf("Invalid underflow %u %u\n",
1730                                  i, underflows[i]);
1731                         goto out_unlock;
1732                 }
1733         }
1734
1735         ret = -ENOMEM;
1736         newinfo = xt_alloc_table_info(size);
1737         if (!newinfo)
1738                 goto out_unlock;
1739
1740         newinfo->number = number;
1741         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1742                 newinfo->hook_entry[i] = info->hook_entry[i];
1743                 newinfo->underflow[i] = info->underflow[i];
1744         }
1745         entry1 = newinfo->entries[raw_smp_processor_id()];
1746         pos = entry1;
1747         size = total_size;
1748         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1749                                        compat_copy_entry_from_user,
1750                                        &pos, &size, name, newinfo, entry1);
1751         xt_compat_flush_offsets(AF_INET);
1752         xt_compat_unlock(AF_INET);
1753         if (ret)
1754                 goto free_newinfo;
1755
1756         ret = -ELOOP;
1757         if (!mark_source_chains(newinfo, valid_hooks, entry1))
1758                 goto free_newinfo;
1759
1760         i = 0;
1761         ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1762                                 name, &i);
1763         if (ret) {
1764                 j -= i;
1765                 COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1766                                                   compat_release_entry, &j);
1767                 IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
1768                 xt_free_table_info(newinfo);
1769                 return ret;
1770         }
1771
1772         /* And one copy for every other CPU */
1773         for_each_possible_cpu(i)
1774                 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1775                         memcpy(newinfo->entries[i], entry1, newinfo->size);
1776
1777         *pinfo = newinfo;
1778         *pentry0 = entry1;
1779         xt_free_table_info(info);
1780         return 0;
1781
1782 free_newinfo:
1783         xt_free_table_info(newinfo);
1784 out:
1785         COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1786         return ret;
1787 out_unlock:
1788         xt_compat_flush_offsets(AF_INET);
1789         xt_compat_unlock(AF_INET);
1790         goto out;
1791 }
1792
1793 static int
1794 compat_do_replace(struct net *net, void __user *user, unsigned int len)
1795 {
1796         int ret;
1797         struct compat_ipt_replace tmp;
1798         struct xt_table_info *newinfo;
1799         void *loc_cpu_entry;
1800
1801         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1802                 return -EFAULT;
1803
1804         /* overflow check */
1805         if (tmp.size >= INT_MAX / num_possible_cpus())
1806                 return -ENOMEM;
1807         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1808                 return -ENOMEM;
1809
1810         newinfo = xt_alloc_table_info(tmp.size);
1811         if (!newinfo)
1812                 return -ENOMEM;
1813
1814         /* choose the copy that is on our node/cpu */
1815         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1816         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1817                            tmp.size) != 0) {
1818                 ret = -EFAULT;
1819                 goto free_newinfo;
1820         }
1821
1822         ret = translate_compat_table(tmp.name, tmp.valid_hooks,
1823                                      &newinfo, &loc_cpu_entry, tmp.size,
1824                                      tmp.num_entries, tmp.hook_entry,
1825                                      tmp.underflow);
1826         if (ret != 0)
1827                 goto free_newinfo;
1828
1829         duprintf("compat_do_replace: Translated table\n");
1830
1831         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1832                            tmp.num_counters, compat_ptr(tmp.counters));
1833         if (ret)
1834                 goto free_newinfo_untrans;
1835         return 0;
1836
1837  free_newinfo_untrans:
1838         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1839  free_newinfo:
1840         xt_free_table_info(newinfo);
1841         return ret;
1842 }
1843
1844 static int
1845 compat_do_ipt_set_ctl(struct sock *sk,  int cmd, void __user *user,
1846                       unsigned int len)
1847 {
1848         int ret;
1849
1850         if (!capable(CAP_NET_ADMIN))
1851                 return -EPERM;
1852
1853         switch (cmd) {
1854         case IPT_SO_SET_REPLACE:
1855                 ret = compat_do_replace(sock_net(sk), user, len);
1856                 break;
1857
1858         case IPT_SO_SET_ADD_COUNTERS:
1859                 ret = do_add_counters(sock_net(sk), user, len, 1);
1860                 break;
1861
1862         default:
1863                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1864                 ret = -EINVAL;
1865         }
1866
1867         return ret;
1868 }
1869
1870 struct compat_ipt_get_entries {
1871         char name[IPT_TABLE_MAXNAMELEN];
1872         compat_uint_t size;
1873         struct compat_ipt_entry entrytable[0];
1874 };
1875
1876 static int
1877 compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1878                             void __user *userptr)
1879 {
1880         struct xt_counters *counters;
1881         struct xt_table_info *private = table->private;
1882         void __user *pos;
1883         unsigned int size;
1884         int ret = 0;
1885         void *loc_cpu_entry;
1886         unsigned int i = 0;
1887
1888         counters = alloc_counters(table);
1889         if (IS_ERR(counters))
1890                 return PTR_ERR(counters);
1891
1892         /* choose the copy that is on our node/cpu, ...
1893          * This choice is lazy (because current thread is
1894          * allowed to migrate to another cpu)
1895          */
1896         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1897         pos = userptr;
1898         size = total_size;
1899         ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
1900                                 compat_copy_entry_to_user,
1901                                 &pos, &size, counters, &i);
1902
1903         vfree(counters);
1904         return ret;
1905 }
1906
1907 static int
1908 compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1909                    int *len)
1910 {
1911         int ret;
1912         struct compat_ipt_get_entries get;
1913         struct xt_table *t;
1914
1915         if (*len < sizeof(get)) {
1916                 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1917                 return -EINVAL;
1918         }
1919
1920         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1921                 return -EFAULT;
1922
1923         if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
1924                 duprintf("compat_get_entries: %u != %zu\n",
1925                          *len, sizeof(get) + get.size);
1926                 return -EINVAL;
1927         }
1928
1929         xt_compat_lock(AF_INET);
1930         t = xt_find_table_lock(net, AF_INET, get.name);
1931         if (t && !IS_ERR(t)) {
1932                 struct xt_table_info *private = t->private;
1933                 struct xt_table_info info;
1934                 duprintf("t->private->number = %u\n", private->number);
1935                 ret = compat_table_info(private, &info);
1936                 if (!ret && get.size == info.size) {
1937                         ret = compat_copy_entries_to_user(private->size,
1938                                                           t, uptr->entrytable);
1939                 } else if (!ret) {
1940                         duprintf("compat_get_entries: I've got %u not %u!\n",
1941                                  private->size, get.size);
1942                         ret = -EINVAL;
1943                 }
1944                 xt_compat_flush_offsets(AF_INET);
1945                 module_put(t->me);
1946                 xt_table_unlock(t);
1947         } else
1948                 ret = t ? PTR_ERR(t) : -ENOENT;
1949
1950         xt_compat_unlock(AF_INET);
1951         return ret;
1952 }
1953
1954 static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
1955
1956 static int
1957 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1958 {
1959         int ret;
1960
1961         if (!capable(CAP_NET_ADMIN))
1962                 return -EPERM;
1963
1964         switch (cmd) {
1965         case IPT_SO_GET_INFO:
1966                 ret = get_info(sock_net(sk), user, len, 1);
1967                 break;
1968         case IPT_SO_GET_ENTRIES:
1969                 ret = compat_get_entries(sock_net(sk), user, len);
1970                 break;
1971         default:
1972                 ret = do_ipt_get_ctl(sk, cmd, user, len);
1973         }
1974         return ret;
1975 }
1976 #endif
1977
1978 static int
1979 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1980 {
1981         int ret;
1982
1983         if (!capable(CAP_NET_ADMIN))
1984                 return -EPERM;
1985
1986         switch (cmd) {
1987         case IPT_SO_SET_REPLACE:
1988                 ret = do_replace(sock_net(sk), user, len);
1989                 break;
1990
1991         case IPT_SO_SET_ADD_COUNTERS:
1992                 ret = do_add_counters(sock_net(sk), user, len, 0);
1993                 break;
1994
1995         default:
1996                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1997                 ret = -EINVAL;
1998         }
1999
2000         return ret;
2001 }
2002
2003 static int
2004 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2005 {
2006         int ret;
2007
2008         if (!capable(CAP_NET_ADMIN))
2009                 return -EPERM;
2010
2011         switch (cmd) {
2012         case IPT_SO_GET_INFO:
2013                 ret = get_info(sock_net(sk), user, len, 0);
2014                 break;
2015
2016         case IPT_SO_GET_ENTRIES:
2017                 ret = get_entries(sock_net(sk), user, len);
2018                 break;
2019
2020         case IPT_SO_GET_REVISION_MATCH:
2021         case IPT_SO_GET_REVISION_TARGET: {
2022                 struct ipt_get_revision rev;
2023                 int target;
2024
2025                 if (*len != sizeof(rev)) {
2026                         ret = -EINVAL;
2027                         break;
2028                 }
2029                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
2030                         ret = -EFAULT;
2031                         break;
2032                 }
2033
2034                 if (cmd == IPT_SO_GET_REVISION_TARGET)
2035                         target = 1;
2036                 else
2037                         target = 0;
2038
2039                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
2040                                                          rev.revision,
2041                                                          target, &ret),
2042                                         "ipt_%s", rev.name);
2043                 break;
2044         }
2045
2046         default:
2047                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
2048                 ret = -EINVAL;
2049         }
2050
2051         return ret;
2052 }
2053
2054 struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
2055                                     const struct ipt_replace *repl)
2056 {
2057         int ret;
2058         struct xt_table_info *newinfo;
2059         struct xt_table_info bootstrap
2060                 = { 0, 0, 0, { 0 }, { 0 }, { } };
2061         void *loc_cpu_entry;
2062         struct xt_table *new_table;
2063
2064         newinfo = xt_alloc_table_info(repl->size);
2065         if (!newinfo) {
2066                 ret = -ENOMEM;
2067                 goto out;
2068         }
2069
2070         /* choose the copy on our node/cpu, but dont care about preemption */
2071         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2072         memcpy(loc_cpu_entry, repl->entries, repl->size);
2073
2074         ret = translate_table(table->name, table->valid_hooks,
2075                               newinfo, loc_cpu_entry, repl->size,
2076                               repl->num_entries,
2077                               repl->hook_entry,
2078                               repl->underflow);
2079         if (ret != 0)
2080                 goto out_free;
2081
2082         new_table = xt_register_table(net, table, &bootstrap, newinfo);
2083         if (IS_ERR(new_table)) {
2084                 ret = PTR_ERR(new_table);
2085                 goto out_free;
2086         }
2087
2088         return new_table;
2089
2090 out_free:
2091         xt_free_table_info(newinfo);
2092 out:
2093         return ERR_PTR(ret);
2094 }
2095
2096 void ipt_unregister_table(struct xt_table *table)
2097 {
2098         struct xt_table_info *private;
2099         void *loc_cpu_entry;
2100         struct module *table_owner = table->me;
2101
2102         private = xt_unregister_table(table);
2103
2104         /* Decrease module usage counts and free resources */
2105         loc_cpu_entry = private->entries[raw_smp_processor_id()];
2106         IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
2107         if (private->number > private->initial_entries)
2108                 module_put(table_owner);
2109         xt_free_table_info(private);
2110 }
2111
2112 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
2113 static inline bool
2114 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2115                      u_int8_t type, u_int8_t code,
2116                      bool invert)
2117 {
2118         return ((test_type == 0xFF) ||
2119                 (type == test_type && code >= min_code && code <= max_code))
2120                 ^ invert;
2121 }
2122
2123 static bool
2124 icmp_match(const struct sk_buff *skb,
2125            const struct net_device *in,
2126            const struct net_device *out,
2127            const struct xt_match *match,
2128            const void *matchinfo,
2129            int offset,
2130            unsigned int protoff,
2131            bool *hotdrop)
2132 {
2133         struct icmphdr _icmph, *ic;
2134         const struct ipt_icmp *icmpinfo = matchinfo;
2135
2136         /* Must not be a fragment. */
2137         if (offset)
2138                 return false;
2139
2140         ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
2141         if (ic == NULL) {
2142                 /* We've been asked to examine this packet, and we
2143                  * can't.  Hence, no choice but to drop.
2144                  */
2145                 duprintf("Dropping evil ICMP tinygram.\n");
2146                 *hotdrop = true;
2147                 return false;
2148         }
2149
2150         return icmp_type_code_match(icmpinfo->type,
2151                                     icmpinfo->code[0],
2152                                     icmpinfo->code[1],
2153                                     ic->type, ic->code,
2154                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
2155 }
2156
2157 /* Called when user tries to insert an entry of this type. */
2158 static bool
2159 icmp_checkentry(const char *tablename,
2160            const void *entry,
2161            const struct xt_match *match,
2162            void *matchinfo,
2163            unsigned int hook_mask)
2164 {
2165         const struct ipt_icmp *icmpinfo = matchinfo;
2166
2167         /* Must specify no unknown invflags */
2168         return !(icmpinfo->invflags & ~IPT_ICMP_INV);
2169 }
2170
2171 /* The built-in targets: standard (NULL) and error. */
2172 static struct xt_target ipt_standard_target __read_mostly = {
2173         .name           = IPT_STANDARD_TARGET,
2174         .targetsize     = sizeof(int),
2175         .family         = AF_INET,
2176 #ifdef CONFIG_COMPAT
2177         .compatsize     = sizeof(compat_int_t),
2178         .compat_from_user = compat_standard_from_user,
2179         .compat_to_user = compat_standard_to_user,
2180 #endif
2181 };
2182
2183 static struct xt_target ipt_error_target __read_mostly = {
2184         .name           = IPT_ERROR_TARGET,
2185         .target         = ipt_error,
2186         .targetsize     = IPT_FUNCTION_MAXNAMELEN,
2187         .family         = AF_INET,
2188 };
2189
2190 static struct nf_sockopt_ops ipt_sockopts = {
2191         .pf             = PF_INET,
2192         .set_optmin     = IPT_BASE_CTL,
2193         .set_optmax     = IPT_SO_SET_MAX+1,
2194         .set            = do_ipt_set_ctl,
2195 #ifdef CONFIG_COMPAT
2196         .compat_set     = compat_do_ipt_set_ctl,
2197 #endif
2198         .get_optmin     = IPT_BASE_CTL,
2199         .get_optmax     = IPT_SO_GET_MAX+1,
2200         .get            = do_ipt_get_ctl,
2201 #ifdef CONFIG_COMPAT
2202         .compat_get     = compat_do_ipt_get_ctl,
2203 #endif
2204         .owner          = THIS_MODULE,
2205 };
2206
2207 static struct xt_match icmp_matchstruct __read_mostly = {
2208         .name           = "icmp",
2209         .match          = icmp_match,
2210         .matchsize      = sizeof(struct ipt_icmp),
2211         .checkentry     = icmp_checkentry,
2212         .proto          = IPPROTO_ICMP,
2213         .family         = AF_INET,
2214 };
2215
2216 static int __net_init ip_tables_net_init(struct net *net)
2217 {
2218         return xt_proto_init(net, AF_INET);
2219 }
2220
2221 static void __net_exit ip_tables_net_exit(struct net *net)
2222 {
2223         xt_proto_fini(net, AF_INET);
2224 }
2225
2226 static struct pernet_operations ip_tables_net_ops = {
2227         .init = ip_tables_net_init,
2228         .exit = ip_tables_net_exit,
2229 };
2230
2231 static int __init ip_tables_init(void)
2232 {
2233         int ret;
2234
2235         ret = register_pernet_subsys(&ip_tables_net_ops);
2236         if (ret < 0)
2237                 goto err1;
2238
2239         /* Noone else will be downing sem now, so we won't sleep */
2240         ret = xt_register_target(&ipt_standard_target);
2241         if (ret < 0)
2242                 goto err2;
2243         ret = xt_register_target(&ipt_error_target);
2244         if (ret < 0)
2245                 goto err3;
2246         ret = xt_register_match(&icmp_matchstruct);
2247         if (ret < 0)
2248                 goto err4;
2249
2250         /* Register setsockopt */
2251         ret = nf_register_sockopt(&ipt_sockopts);
2252         if (ret < 0)
2253                 goto err5;
2254
2255         printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
2256         return 0;
2257
2258 err5:
2259         xt_unregister_match(&icmp_matchstruct);
2260 err4:
2261         xt_unregister_target(&ipt_error_target);
2262 err3:
2263         xt_unregister_target(&ipt_standard_target);
2264 err2:
2265         unregister_pernet_subsys(&ip_tables_net_ops);
2266 err1:
2267         return ret;
2268 }
2269
2270 static void __exit ip_tables_fini(void)
2271 {
2272         nf_unregister_sockopt(&ipt_sockopts);
2273
2274         xt_unregister_match(&icmp_matchstruct);
2275         xt_unregister_target(&ipt_error_target);
2276         xt_unregister_target(&ipt_standard_target);
2277
2278         unregister_pernet_subsys(&ip_tables_net_ops);
2279 }
2280
2281 EXPORT_SYMBOL(ipt_register_table);
2282 EXPORT_SYMBOL(ipt_unregister_table);
2283 EXPORT_SYMBOL(ipt_do_table);
2284 module_init(ip_tables_init);
2285 module_exit(ip_tables_fini);