netfilter: xtables: optimize call flow around xt_entry_foreach
[linux-2.6.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12 #include <linux/cache.h>
13 #include <linux/capability.h>
14 #include <linux/skbuff.h>
15 #include <linux/kmod.h>
16 #include <linux/vmalloc.h>
17 #include <linux/netdevice.h>
18 #include <linux/module.h>
19 #include <linux/icmp.h>
20 #include <net/ip.h>
21 #include <net/compat.h>
22 #include <asm/uaccess.h>
23 #include <linux/mutex.h>
24 #include <linux/proc_fs.h>
25 #include <linux/err.h>
26 #include <linux/cpumask.h>
27
28 #include <linux/netfilter/x_tables.h>
29 #include <linux/netfilter_ipv4/ip_tables.h>
30 #include <net/netfilter/nf_log.h>
31 #include "../../netfilter/xt_repldata.h"
32
33 MODULE_LICENSE("GPL");
34 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
35 MODULE_DESCRIPTION("IPv4 packet filter");
36
37 /*#define DEBUG_IP_FIREWALL*/
38 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
39 /*#define DEBUG_IP_FIREWALL_USER*/
40
41 #ifdef DEBUG_IP_FIREWALL
42 #define dprintf(format, args...)  printk(format , ## args)
43 #else
44 #define dprintf(format, args...)
45 #endif
46
47 #ifdef DEBUG_IP_FIREWALL_USER
48 #define duprintf(format, args...) printk(format , ## args)
49 #else
50 #define duprintf(format, args...)
51 #endif
52
53 #ifdef CONFIG_NETFILTER_DEBUG
54 #define IP_NF_ASSERT(x)                                         \
55 do {                                                            \
56         if (!(x))                                               \
57                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
58                        __func__, __FILE__, __LINE__);   \
59 } while(0)
60 #else
61 #define IP_NF_ASSERT(x)
62 #endif
63
64 #if 0
65 /* All the better to debug you with... */
66 #define static
67 #define inline
68 #endif
69
70 void *ipt_alloc_initial_table(const struct xt_table *info)
71 {
72         return xt_alloc_initial_table(ipt, IPT);
73 }
74 EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
75
76 /*
77    We keep a set of rules for each CPU, so we can avoid write-locking
78    them in the softirq when updating the counters and therefore
79    only need to read-lock in the softirq; doing a write_lock_bh() in user
80    context stops packets coming through and allows user context to read
81    the counters or update the rules.
82
83    Hence the start of any table is given by get_table() below.  */
84
85 /* Returns whether matches rule or not. */
86 /* Performance critical - called for every packet */
87 static inline bool
88 ip_packet_match(const struct iphdr *ip,
89                 const char *indev,
90                 const char *outdev,
91                 const struct ipt_ip *ipinfo,
92                 int isfrag)
93 {
94         unsigned long ret;
95
96 #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
97
98         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
99                   IPT_INV_SRCIP) ||
100             FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
101                   IPT_INV_DSTIP)) {
102                 dprintf("Source or dest mismatch.\n");
103
104                 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
105                         &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
106                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
107                 dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
108                         &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
109                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
110                 return false;
111         }
112
113         ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
114
115         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
116                 dprintf("VIA in mismatch (%s vs %s).%s\n",
117                         indev, ipinfo->iniface,
118                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
119                 return false;
120         }
121
122         ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
123
124         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
125                 dprintf("VIA out mismatch (%s vs %s).%s\n",
126                         outdev, ipinfo->outiface,
127                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
128                 return false;
129         }
130
131         /* Check specific protocol */
132         if (ipinfo->proto &&
133             FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
134                 dprintf("Packet protocol %hi does not match %hi.%s\n",
135                         ip->protocol, ipinfo->proto,
136                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
137                 return false;
138         }
139
140         /* If we have a fragment rule but the packet is not a fragment
141          * then we return zero */
142         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
143                 dprintf("Fragment rule but not fragment.%s\n",
144                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
145                 return false;
146         }
147
148         return true;
149 }
150
151 static bool
152 ip_checkentry(const struct ipt_ip *ip)
153 {
154         if (ip->flags & ~IPT_F_MASK) {
155                 duprintf("Unknown flag bits set: %08X\n",
156                          ip->flags & ~IPT_F_MASK);
157                 return false;
158         }
159         if (ip->invflags & ~IPT_INV_MASK) {
160                 duprintf("Unknown invflag bits set: %08X\n",
161                          ip->invflags & ~IPT_INV_MASK);
162                 return false;
163         }
164         return true;
165 }
166
167 static unsigned int
168 ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
169 {
170         if (net_ratelimit())
171                 printk("ip_tables: error: `%s'\n",
172                        (const char *)par->targinfo);
173
174         return NF_DROP;
175 }
176
177 /* Performance critical - called for every packet */
178 static inline bool
179 do_match(const struct ipt_entry_match *m, const struct sk_buff *skb,
180          struct xt_match_param *par)
181 {
182         par->match     = m->u.kernel.match;
183         par->matchinfo = m->data;
184
185         /* Stop iteration if it doesn't match */
186         if (!m->u.kernel.match->match(skb, par))
187                 return true;
188         else
189                 return false;
190 }
191
192 /* Performance critical */
193 static inline struct ipt_entry *
194 get_entry(const void *base, unsigned int offset)
195 {
196         return (struct ipt_entry *)(base + offset);
197 }
198
199 /* All zeroes == unconditional rule. */
200 /* Mildly perf critical (only if packet tracing is on) */
201 static inline bool unconditional(const struct ipt_ip *ip)
202 {
203         static const struct ipt_ip uncond;
204
205         return memcmp(ip, &uncond, sizeof(uncond)) == 0;
206 #undef FWINV
207 }
208
209 /* for const-correctness */
210 static inline const struct ipt_entry_target *
211 ipt_get_target_c(const struct ipt_entry *e)
212 {
213         return ipt_get_target((struct ipt_entry *)e);
214 }
215
216 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
217     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
218 static const char *const hooknames[] = {
219         [NF_INET_PRE_ROUTING]           = "PREROUTING",
220         [NF_INET_LOCAL_IN]              = "INPUT",
221         [NF_INET_FORWARD]               = "FORWARD",
222         [NF_INET_LOCAL_OUT]             = "OUTPUT",
223         [NF_INET_POST_ROUTING]          = "POSTROUTING",
224 };
225
226 enum nf_ip_trace_comments {
227         NF_IP_TRACE_COMMENT_RULE,
228         NF_IP_TRACE_COMMENT_RETURN,
229         NF_IP_TRACE_COMMENT_POLICY,
230 };
231
232 static const char *const comments[] = {
233         [NF_IP_TRACE_COMMENT_RULE]      = "rule",
234         [NF_IP_TRACE_COMMENT_RETURN]    = "return",
235         [NF_IP_TRACE_COMMENT_POLICY]    = "policy",
236 };
237
238 static struct nf_loginfo trace_loginfo = {
239         .type = NF_LOG_TYPE_LOG,
240         .u = {
241                 .log = {
242                         .level = 4,
243                         .logflags = NF_LOG_MASK,
244                 },
245         },
246 };
247
248 /* Mildly perf critical (only if packet tracing is on) */
249 static inline int
250 get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
251                       const char *hookname, const char **chainname,
252                       const char **comment, unsigned int *rulenum)
253 {
254         const struct ipt_standard_target *t = (void *)ipt_get_target_c(s);
255
256         if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
257                 /* Head of user chain: ERROR target with chainname */
258                 *chainname = t->target.data;
259                 (*rulenum) = 0;
260         } else if (s == e) {
261                 (*rulenum)++;
262
263                 if (s->target_offset == sizeof(struct ipt_entry) &&
264                     strcmp(t->target.u.kernel.target->name,
265                            IPT_STANDARD_TARGET) == 0 &&
266                    t->verdict < 0 &&
267                    unconditional(&s->ip)) {
268                         /* Tail of chains: STANDARD target (return/policy) */
269                         *comment = *chainname == hookname
270                                 ? comments[NF_IP_TRACE_COMMENT_POLICY]
271                                 : comments[NF_IP_TRACE_COMMENT_RETURN];
272                 }
273                 return 1;
274         } else
275                 (*rulenum)++;
276
277         return 0;
278 }
279
280 static void trace_packet(const struct sk_buff *skb,
281                          unsigned int hook,
282                          const struct net_device *in,
283                          const struct net_device *out,
284                          const char *tablename,
285                          const struct xt_table_info *private,
286                          const struct ipt_entry *e)
287 {
288         const void *table_base;
289         const struct ipt_entry *root;
290         const char *hookname, *chainname, *comment;
291         const struct ipt_entry *iter;
292         unsigned int rulenum = 0;
293
294         table_base = private->entries[smp_processor_id()];
295         root = get_entry(table_base, private->hook_entry[hook]);
296
297         hookname = chainname = hooknames[hook];
298         comment = comments[NF_IP_TRACE_COMMENT_RULE];
299
300         xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
301                 if (get_chainname_rulenum(iter, e, hookname,
302                     &chainname, &comment, &rulenum) != 0)
303                         break;
304
305         nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
306                       "TRACE: %s:%s:%s:%u ",
307                       tablename, chainname, comment, rulenum);
308 }
309 #endif
310
311 static inline __pure
312 struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
313 {
314         return (void *)entry + entry->next_offset;
315 }
316
317 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
318 unsigned int
319 ipt_do_table(struct sk_buff *skb,
320              unsigned int hook,
321              const struct net_device *in,
322              const struct net_device *out,
323              struct xt_table *table)
324 {
325 #define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
326
327         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
328         const struct iphdr *ip;
329         bool hotdrop = false;
330         /* Initializing verdict to NF_DROP keeps gcc happy. */
331         unsigned int verdict = NF_DROP;
332         const char *indev, *outdev;
333         const void *table_base;
334         struct ipt_entry *e, *back;
335         const struct xt_table_info *private;
336         struct xt_match_param mtpar;
337         struct xt_target_param tgpar;
338
339         /* Initialization */
340         ip = ip_hdr(skb);
341         indev = in ? in->name : nulldevname;
342         outdev = out ? out->name : nulldevname;
343         /* We handle fragments by dealing with the first fragment as
344          * if it was a normal packet.  All other fragments are treated
345          * normally, except that they will NEVER match rules that ask
346          * things we don't know, ie. tcp syn flag or ports).  If the
347          * rule is also a fragment-specific rule, non-fragments won't
348          * match it. */
349         mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
350         mtpar.thoff   = ip_hdrlen(skb);
351         mtpar.hotdrop = &hotdrop;
352         mtpar.in      = tgpar.in  = in;
353         mtpar.out     = tgpar.out = out;
354         mtpar.family  = tgpar.family = NFPROTO_IPV4;
355         mtpar.hooknum = tgpar.hooknum = hook;
356
357         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
358         xt_info_rdlock_bh();
359         private = table->private;
360         table_base = private->entries[smp_processor_id()];
361
362         e = get_entry(table_base, private->hook_entry[hook]);
363
364         /* For return from builtin chain */
365         back = get_entry(table_base, private->underflow[hook]);
366
367         do {
368                 const struct ipt_entry_target *t;
369
370                 IP_NF_ASSERT(e);
371                 IP_NF_ASSERT(back);
372                 if (!ip_packet_match(ip, indev, outdev,
373                     &e->ip, mtpar.fragoff) ||
374                     IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
375                         e = ipt_next_entry(e);
376                         continue;
377                 }
378
379                 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
380
381                 t = ipt_get_target(e);
382                 IP_NF_ASSERT(t->u.kernel.target);
383
384 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
385     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
386                 /* The packet is traced: log it */
387                 if (unlikely(skb->nf_trace))
388                         trace_packet(skb, hook, in, out,
389                                      table->name, private, e);
390 #endif
391                 /* Standard target? */
392                 if (!t->u.kernel.target->target) {
393                         int v;
394
395                         v = ((struct ipt_standard_target *)t)->verdict;
396                         if (v < 0) {
397                                 /* Pop from stack? */
398                                 if (v != IPT_RETURN) {
399                                         verdict = (unsigned)(-v) - 1;
400                                         break;
401                                 }
402                                 e = back;
403                                 back = get_entry(table_base, back->comefrom);
404                                 continue;
405                         }
406                         if (table_base + v != ipt_next_entry(e) &&
407                             !(e->ip.flags & IPT_F_GOTO)) {
408                                 /* Save old back ptr in next entry */
409                                 struct ipt_entry *next = ipt_next_entry(e);
410                                 next->comefrom = (void *)back - table_base;
411                                 /* set back pointer to next entry */
412                                 back = next;
413                         }
414
415                         e = get_entry(table_base, v);
416                         continue;
417                 }
418
419                 /* Targets which reenter must return
420                    abs. verdicts */
421                 tgpar.target   = t->u.kernel.target;
422                 tgpar.targinfo = t->data;
423
424
425 #ifdef CONFIG_NETFILTER_DEBUG
426                 tb_comefrom = 0xeeeeeeec;
427 #endif
428                 verdict = t->u.kernel.target->target(skb, &tgpar);
429 #ifdef CONFIG_NETFILTER_DEBUG
430                 if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
431                         printk("Target %s reentered!\n",
432                                t->u.kernel.target->name);
433                         verdict = NF_DROP;
434                 }
435                 tb_comefrom = 0x57acc001;
436 #endif
437                 /* Target might have changed stuff. */
438                 ip = ip_hdr(skb);
439                 if (verdict == IPT_CONTINUE)
440                         e = ipt_next_entry(e);
441                 else
442                         /* Verdict */
443                         break;
444         } while (!hotdrop);
445         xt_info_rdunlock_bh();
446
447 #ifdef DEBUG_ALLOW_ALL
448         return NF_ACCEPT;
449 #else
450         if (hotdrop)
451                 return NF_DROP;
452         else return verdict;
453 #endif
454
455 #undef tb_comefrom
456 }
457
458 /* Figures out from what hook each rule can be called: returns 0 if
459    there are loops.  Puts hook bitmask in comefrom. */
460 static int
461 mark_source_chains(const struct xt_table_info *newinfo,
462                    unsigned int valid_hooks, void *entry0)
463 {
464         unsigned int hook;
465
466         /* No recursion; use packet counter to save back ptrs (reset
467            to 0 as we leave), and comefrom to save source hook bitmask */
468         for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
469                 unsigned int pos = newinfo->hook_entry[hook];
470                 struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
471
472                 if (!(valid_hooks & (1 << hook)))
473                         continue;
474
475                 /* Set initial back pointer. */
476                 e->counters.pcnt = pos;
477
478                 for (;;) {
479                         const struct ipt_standard_target *t
480                                 = (void *)ipt_get_target_c(e);
481                         int visited = e->comefrom & (1 << hook);
482
483                         if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
484                                 printk("iptables: loop hook %u pos %u %08X.\n",
485                                        hook, pos, e->comefrom);
486                                 return 0;
487                         }
488                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
489
490                         /* Unconditional return/END. */
491                         if ((e->target_offset == sizeof(struct ipt_entry) &&
492                              (strcmp(t->target.u.user.name,
493                                      IPT_STANDARD_TARGET) == 0) &&
494                              t->verdict < 0 && unconditional(&e->ip)) ||
495                             visited) {
496                                 unsigned int oldpos, size;
497
498                                 if ((strcmp(t->target.u.user.name,
499                                             IPT_STANDARD_TARGET) == 0) &&
500                                     t->verdict < -NF_MAX_VERDICT - 1) {
501                                         duprintf("mark_source_chains: bad "
502                                                 "negative verdict (%i)\n",
503                                                                 t->verdict);
504                                         return 0;
505                                 }
506
507                                 /* Return: backtrack through the last
508                                    big jump. */
509                                 do {
510                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
511 #ifdef DEBUG_IP_FIREWALL_USER
512                                         if (e->comefrom
513                                             & (1 << NF_INET_NUMHOOKS)) {
514                                                 duprintf("Back unset "
515                                                          "on hook %u "
516                                                          "rule %u\n",
517                                                          hook, pos);
518                                         }
519 #endif
520                                         oldpos = pos;
521                                         pos = e->counters.pcnt;
522                                         e->counters.pcnt = 0;
523
524                                         /* We're at the start. */
525                                         if (pos == oldpos)
526                                                 goto next;
527
528                                         e = (struct ipt_entry *)
529                                                 (entry0 + pos);
530                                 } while (oldpos == pos + e->next_offset);
531
532                                 /* Move along one */
533                                 size = e->next_offset;
534                                 e = (struct ipt_entry *)
535                                         (entry0 + pos + size);
536                                 e->counters.pcnt = pos;
537                                 pos += size;
538                         } else {
539                                 int newpos = t->verdict;
540
541                                 if (strcmp(t->target.u.user.name,
542                                            IPT_STANDARD_TARGET) == 0 &&
543                                     newpos >= 0) {
544                                         if (newpos > newinfo->size -
545                                                 sizeof(struct ipt_entry)) {
546                                                 duprintf("mark_source_chains: "
547                                                         "bad verdict (%i)\n",
548                                                                 newpos);
549                                                 return 0;
550                                         }
551                                         /* This a jump; chase it. */
552                                         duprintf("Jump rule %u -> %u\n",
553                                                  pos, newpos);
554                                 } else {
555                                         /* ... this is a fallthru */
556                                         newpos = pos + e->next_offset;
557                                 }
558                                 e = (struct ipt_entry *)
559                                         (entry0 + newpos);
560                                 e->counters.pcnt = pos;
561                                 pos = newpos;
562                         }
563                 }
564                 next:
565                 duprintf("Finished chain %u\n", hook);
566         }
567         return 1;
568 }
569
570 static int
571 cleanup_match(struct ipt_entry_match *m, struct net *net, unsigned int *i)
572 {
573         struct xt_mtdtor_param par;
574
575         if (i && (*i)-- == 0)
576                 return 1;
577
578         par.net       = net;
579         par.match     = m->u.kernel.match;
580         par.matchinfo = m->data;
581         par.family    = NFPROTO_IPV4;
582         if (par.match->destroy != NULL)
583                 par.match->destroy(&par);
584         module_put(par.match->me);
585         return 0;
586 }
587
588 static int
589 check_entry(const struct ipt_entry *e, const char *name)
590 {
591         const struct ipt_entry_target *t;
592
593         if (!ip_checkentry(&e->ip)) {
594                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
595                 return -EINVAL;
596         }
597
598         if (e->target_offset + sizeof(struct ipt_entry_target) >
599             e->next_offset)
600                 return -EINVAL;
601
602         t = ipt_get_target_c(e);
603         if (e->target_offset + t->u.target_size > e->next_offset)
604                 return -EINVAL;
605
606         return 0;
607 }
608
609 static int
610 check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
611             unsigned int *i)
612 {
613         const struct ipt_ip *ip = par->entryinfo;
614         int ret;
615
616         par->match     = m->u.kernel.match;
617         par->matchinfo = m->data;
618
619         ret = xt_check_match(par, m->u.match_size - sizeof(*m),
620               ip->proto, ip->invflags & IPT_INV_PROTO);
621         if (ret < 0) {
622                 duprintf("ip_tables: check failed for `%s'.\n",
623                          par.match->name);
624                 return ret;
625         }
626         ++*i;
627         return 0;
628 }
629
630 static int
631 find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
632                  unsigned int *i)
633 {
634         struct xt_match *match;
635         int ret;
636
637         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
638                                                       m->u.user.revision),
639                                         "ipt_%s", m->u.user.name);
640         if (IS_ERR(match) || !match) {
641                 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
642                 return match ? PTR_ERR(match) : -ENOENT;
643         }
644         m->u.kernel.match = match;
645
646         ret = check_match(m, par, i);
647         if (ret)
648                 goto err;
649
650         return 0;
651 err:
652         module_put(m->u.kernel.match->me);
653         return ret;
654 }
655
656 static int check_target(struct ipt_entry *e, struct net *net, const char *name)
657 {
658         struct ipt_entry_target *t = ipt_get_target(e);
659         struct xt_tgchk_param par = {
660                 .net       = net,
661                 .table     = name,
662                 .entryinfo = e,
663                 .target    = t->u.kernel.target,
664                 .targinfo  = t->data,
665                 .hook_mask = e->comefrom,
666                 .family    = NFPROTO_IPV4,
667         };
668         int ret;
669
670         ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
671               e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
672         if (ret < 0) {
673                 duprintf("ip_tables: check failed for `%s'.\n",
674                          t->u.kernel.target->name);
675                 return ret;
676         }
677         return 0;
678 }
679
680 static int
681 find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
682                  unsigned int size)
683 {
684         struct ipt_entry_target *t;
685         struct xt_target *target;
686         int ret;
687         unsigned int j;
688         struct xt_mtchk_param mtpar;
689
690         ret = check_entry(e, name);
691         if (ret)
692                 return ret;
693
694         j = 0;
695         mtpar.net       = net;
696         mtpar.table     = name;
697         mtpar.entryinfo = &e->ip;
698         mtpar.hook_mask = e->comefrom;
699         mtpar.family    = NFPROTO_IPV4;
700         ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
701         if (ret != 0)
702                 goto cleanup_matches;
703
704         t = ipt_get_target(e);
705         target = try_then_request_module(xt_find_target(AF_INET,
706                                                         t->u.user.name,
707                                                         t->u.user.revision),
708                                          "ipt_%s", t->u.user.name);
709         if (IS_ERR(target) || !target) {
710                 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
711                 ret = target ? PTR_ERR(target) : -ENOENT;
712                 goto cleanup_matches;
713         }
714         t->u.kernel.target = target;
715
716         ret = check_target(e, net, name);
717         if (ret)
718                 goto err;
719         return 0;
720  err:
721         module_put(t->u.kernel.target->me);
722  cleanup_matches:
723         IPT_MATCH_ITERATE(e, cleanup_match, net, &j);
724         return ret;
725 }
726
727 static bool check_underflow(const struct ipt_entry *e)
728 {
729         const struct ipt_entry_target *t;
730         unsigned int verdict;
731
732         if (!unconditional(&e->ip))
733                 return false;
734         t = ipt_get_target_c(e);
735         if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
736                 return false;
737         verdict = ((struct ipt_standard_target *)t)->verdict;
738         verdict = -verdict - 1;
739         return verdict == NF_DROP || verdict == NF_ACCEPT;
740 }
741
742 static int
743 check_entry_size_and_hooks(struct ipt_entry *e,
744                            struct xt_table_info *newinfo,
745                            const unsigned char *base,
746                            const unsigned char *limit,
747                            const unsigned int *hook_entries,
748                            const unsigned int *underflows,
749                            unsigned int valid_hooks)
750 {
751         unsigned int h;
752
753         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
754             (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
755                 duprintf("Bad offset %p\n", e);
756                 return -EINVAL;
757         }
758
759         if (e->next_offset
760             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
761                 duprintf("checking: element %p size %u\n",
762                          e, e->next_offset);
763                 return -EINVAL;
764         }
765
766         /* Check hooks & underflows */
767         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
768                 if (!(valid_hooks & (1 << h)))
769                         continue;
770                 if ((unsigned char *)e - base == hook_entries[h])
771                         newinfo->hook_entry[h] = hook_entries[h];
772                 if ((unsigned char *)e - base == underflows[h]) {
773                         if (!check_underflow(e)) {
774                                 pr_err("Underflows must be unconditional and "
775                                        "use the STANDARD target with "
776                                        "ACCEPT/DROP\n");
777                                 return -EINVAL;
778                         }
779                         newinfo->underflow[h] = underflows[h];
780                 }
781         }
782
783         /* Clear counters and comefrom */
784         e->counters = ((struct xt_counters) { 0, 0 });
785         e->comefrom = 0;
786         return 0;
787 }
788
789 static void
790 cleanup_entry(struct ipt_entry *e, struct net *net)
791 {
792         struct xt_tgdtor_param par;
793         struct ipt_entry_target *t;
794
795         /* Cleanup all matches */
796         IPT_MATCH_ITERATE(e, cleanup_match, net, NULL);
797         t = ipt_get_target(e);
798
799         par.net      = net;
800         par.target   = t->u.kernel.target;
801         par.targinfo = t->data;
802         par.family   = NFPROTO_IPV4;
803         if (par.target->destroy != NULL)
804                 par.target->destroy(&par);
805         module_put(par.target->me);
806 }
807
808 /* Checks and translates the user-supplied table segment (held in
809    newinfo) */
810 static int
811 translate_table(struct net *net,
812                 const char *name,
813                 unsigned int valid_hooks,
814                 struct xt_table_info *newinfo,
815                 void *entry0,
816                 unsigned int size,
817                 unsigned int number,
818                 const unsigned int *hook_entries,
819                 const unsigned int *underflows)
820 {
821         struct ipt_entry *iter;
822         unsigned int i;
823         int ret = 0;
824
825         newinfo->size = size;
826         newinfo->number = number;
827
828         /* Init all hooks to impossible value. */
829         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
830                 newinfo->hook_entry[i] = 0xFFFFFFFF;
831                 newinfo->underflow[i] = 0xFFFFFFFF;
832         }
833
834         duprintf("translate_table: size %u\n", newinfo->size);
835         i = 0;
836         /* Walk through entries, checking offsets. */
837         xt_entry_foreach(iter, entry0, newinfo->size) {
838                 ret = check_entry_size_and_hooks(iter, newinfo, entry0,
839                       entry0 + size, hook_entries, underflows, valid_hooks);
840                 if (ret != 0)
841                         return ret;
842                 ++i;
843         }
844
845         if (i != number) {
846                 duprintf("translate_table: %u not %u entries\n",
847                          i, number);
848                 return -EINVAL;
849         }
850
851         /* Check hooks all assigned */
852         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
853                 /* Only hooks which are valid */
854                 if (!(valid_hooks & (1 << i)))
855                         continue;
856                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
857                         duprintf("Invalid hook entry %u %u\n",
858                                  i, hook_entries[i]);
859                         return -EINVAL;
860                 }
861                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
862                         duprintf("Invalid underflow %u %u\n",
863                                  i, underflows[i]);
864                         return -EINVAL;
865                 }
866         }
867
868         if (!mark_source_chains(newinfo, valid_hooks, entry0))
869                 return -ELOOP;
870
871         /* Finally, each sanity check must pass */
872         i = 0;
873         xt_entry_foreach(iter, entry0, newinfo->size) {
874                 ret = find_check_entry(iter, net, name, size);
875                 if (ret != 0)
876                         break;
877                 ++i;
878         }
879
880         if (ret != 0) {
881                 xt_entry_foreach(iter, entry0, newinfo->size) {
882                         if (i-- == 0)
883                                 break;
884                         cleanup_entry(iter, net);
885                 }
886                 return ret;
887         }
888
889         /* And one copy for every other CPU */
890         for_each_possible_cpu(i) {
891                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
892                         memcpy(newinfo->entries[i], entry0, newinfo->size);
893         }
894
895         return ret;
896 }
897
898 static void
899 get_counters(const struct xt_table_info *t,
900              struct xt_counters counters[])
901 {
902         struct ipt_entry *iter;
903         unsigned int cpu;
904         unsigned int i;
905         unsigned int curcpu;
906
907         /* Instead of clearing (by a previous call to memset())
908          * the counters and using adds, we set the counters
909          * with data used by 'current' CPU.
910          *
911          * Bottom half has to be disabled to prevent deadlock
912          * if new softirq were to run and call ipt_do_table
913          */
914         local_bh_disable();
915         curcpu = smp_processor_id();
916
917         i = 0;
918         xt_entry_foreach(iter, t->entries[curcpu], t->size) {
919                 SET_COUNTER(counters[i], iter->counters.bcnt,
920                         iter->counters.pcnt);
921                 ++i;
922         }
923
924         for_each_possible_cpu(cpu) {
925                 if (cpu == curcpu)
926                         continue;
927                 i = 0;
928                 xt_info_wrlock(cpu);
929                 xt_entry_foreach(iter, t->entries[cpu], t->size) {
930                         ADD_COUNTER(counters[i], iter->counters.bcnt,
931                                 iter->counters.pcnt);
932                         ++i; /* macro does multi eval of i */
933                 }
934                 xt_info_wrunlock(cpu);
935         }
936         local_bh_enable();
937 }
938
939 static struct xt_counters *alloc_counters(const struct xt_table *table)
940 {
941         unsigned int countersize;
942         struct xt_counters *counters;
943         const struct xt_table_info *private = table->private;
944
945         /* We need atomic snapshot of counters: rest doesn't change
946            (other than comefrom, which userspace doesn't care
947            about). */
948         countersize = sizeof(struct xt_counters) * private->number;
949         counters = vmalloc_node(countersize, numa_node_id());
950
951         if (counters == NULL)
952                 return ERR_PTR(-ENOMEM);
953
954         get_counters(private, counters);
955
956         return counters;
957 }
958
959 static int
960 copy_entries_to_user(unsigned int total_size,
961                      const struct xt_table *table,
962                      void __user *userptr)
963 {
964         unsigned int off, num;
965         const struct ipt_entry *e;
966         struct xt_counters *counters;
967         const struct xt_table_info *private = table->private;
968         int ret = 0;
969         const void *loc_cpu_entry;
970
971         counters = alloc_counters(table);
972         if (IS_ERR(counters))
973                 return PTR_ERR(counters);
974
975         /* choose the copy that is on our node/cpu, ...
976          * This choice is lazy (because current thread is
977          * allowed to migrate to another cpu)
978          */
979         loc_cpu_entry = private->entries[raw_smp_processor_id()];
980         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
981                 ret = -EFAULT;
982                 goto free_counters;
983         }
984
985         /* FIXME: use iterator macros --RR */
986         /* ... then go back and fix counters and names */
987         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
988                 unsigned int i;
989                 const struct ipt_entry_match *m;
990                 const struct ipt_entry_target *t;
991
992                 e = (struct ipt_entry *)(loc_cpu_entry + off);
993                 if (copy_to_user(userptr + off
994                                  + offsetof(struct ipt_entry, counters),
995                                  &counters[num],
996                                  sizeof(counters[num])) != 0) {
997                         ret = -EFAULT;
998                         goto free_counters;
999                 }
1000
1001                 for (i = sizeof(struct ipt_entry);
1002                      i < e->target_offset;
1003                      i += m->u.match_size) {
1004                         m = (void *)e + i;
1005
1006                         if (copy_to_user(userptr + off + i
1007                                          + offsetof(struct ipt_entry_match,
1008                                                     u.user.name),
1009                                          m->u.kernel.match->name,
1010                                          strlen(m->u.kernel.match->name)+1)
1011                             != 0) {
1012                                 ret = -EFAULT;
1013                                 goto free_counters;
1014                         }
1015                 }
1016
1017                 t = ipt_get_target_c(e);
1018                 if (copy_to_user(userptr + off + e->target_offset
1019                                  + offsetof(struct ipt_entry_target,
1020                                             u.user.name),
1021                                  t->u.kernel.target->name,
1022                                  strlen(t->u.kernel.target->name)+1) != 0) {
1023                         ret = -EFAULT;
1024                         goto free_counters;
1025                 }
1026         }
1027
1028  free_counters:
1029         vfree(counters);
1030         return ret;
1031 }
1032
1033 #ifdef CONFIG_COMPAT
1034 static void compat_standard_from_user(void *dst, const void *src)
1035 {
1036         int v = *(compat_int_t *)src;
1037
1038         if (v > 0)
1039                 v += xt_compat_calc_jump(AF_INET, v);
1040         memcpy(dst, &v, sizeof(v));
1041 }
1042
1043 static int compat_standard_to_user(void __user *dst, const void *src)
1044 {
1045         compat_int_t cv = *(int *)src;
1046
1047         if (cv > 0)
1048                 cv -= xt_compat_calc_jump(AF_INET, cv);
1049         return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1050 }
1051
1052 static inline int
1053 compat_calc_match(const struct ipt_entry_match *m, int *size)
1054 {
1055         *size += xt_compat_match_offset(m->u.kernel.match);
1056         return 0;
1057 }
1058
1059 static int compat_calc_entry(const struct ipt_entry *e,
1060                              const struct xt_table_info *info,
1061                              const void *base, struct xt_table_info *newinfo)
1062 {
1063         const struct ipt_entry_target *t;
1064         unsigned int entry_offset;
1065         int off, i, ret;
1066
1067         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1068         entry_offset = (void *)e - base;
1069         IPT_MATCH_ITERATE(e, compat_calc_match, &off);
1070         t = ipt_get_target_c(e);
1071         off += xt_compat_target_offset(t->u.kernel.target);
1072         newinfo->size -= off;
1073         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1074         if (ret)
1075                 return ret;
1076
1077         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1078                 if (info->hook_entry[i] &&
1079                     (e < (struct ipt_entry *)(base + info->hook_entry[i])))
1080                         newinfo->hook_entry[i] -= off;
1081                 if (info->underflow[i] &&
1082                     (e < (struct ipt_entry *)(base + info->underflow[i])))
1083                         newinfo->underflow[i] -= off;
1084         }
1085         return 0;
1086 }
1087
1088 static int compat_table_info(const struct xt_table_info *info,
1089                              struct xt_table_info *newinfo)
1090 {
1091         struct ipt_entry *iter;
1092         void *loc_cpu_entry;
1093         int ret;
1094
1095         if (!newinfo || !info)
1096                 return -EINVAL;
1097
1098         /* we dont care about newinfo->entries[] */
1099         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1100         newinfo->initial_entries = 0;
1101         loc_cpu_entry = info->entries[raw_smp_processor_id()];
1102         xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1103                 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
1104                 if (ret != 0)
1105                         return ret;
1106         }
1107         return 0;
1108 }
1109 #endif
1110
1111 static int get_info(struct net *net, void __user *user,
1112                     const int *len, int compat)
1113 {
1114         char name[IPT_TABLE_MAXNAMELEN];
1115         struct xt_table *t;
1116         int ret;
1117
1118         if (*len != sizeof(struct ipt_getinfo)) {
1119                 duprintf("length %u != %zu\n", *len,
1120                          sizeof(struct ipt_getinfo));
1121                 return -EINVAL;
1122         }
1123
1124         if (copy_from_user(name, user, sizeof(name)) != 0)
1125                 return -EFAULT;
1126
1127         name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1128 #ifdef CONFIG_COMPAT
1129         if (compat)
1130                 xt_compat_lock(AF_INET);
1131 #endif
1132         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1133                                     "iptable_%s", name);
1134         if (t && !IS_ERR(t)) {
1135                 struct ipt_getinfo info;
1136                 const struct xt_table_info *private = t->private;
1137 #ifdef CONFIG_COMPAT
1138                 struct xt_table_info tmp;
1139
1140                 if (compat) {
1141                         ret = compat_table_info(private, &tmp);
1142                         xt_compat_flush_offsets(AF_INET);
1143                         private = &tmp;
1144                 }
1145 #endif
1146                 info.valid_hooks = t->valid_hooks;
1147                 memcpy(info.hook_entry, private->hook_entry,
1148                        sizeof(info.hook_entry));
1149                 memcpy(info.underflow, private->underflow,
1150                        sizeof(info.underflow));
1151                 info.num_entries = private->number;
1152                 info.size = private->size;
1153                 strcpy(info.name, name);
1154
1155                 if (copy_to_user(user, &info, *len) != 0)
1156                         ret = -EFAULT;
1157                 else
1158                         ret = 0;
1159
1160                 xt_table_unlock(t);
1161                 module_put(t->me);
1162         } else
1163                 ret = t ? PTR_ERR(t) : -ENOENT;
1164 #ifdef CONFIG_COMPAT
1165         if (compat)
1166                 xt_compat_unlock(AF_INET);
1167 #endif
1168         return ret;
1169 }
1170
1171 static int
1172 get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1173             const int *len)
1174 {
1175         int ret;
1176         struct ipt_get_entries get;
1177         struct xt_table *t;
1178
1179         if (*len < sizeof(get)) {
1180                 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1181                 return -EINVAL;
1182         }
1183         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1184                 return -EFAULT;
1185         if (*len != sizeof(struct ipt_get_entries) + get.size) {
1186                 duprintf("get_entries: %u != %zu\n",
1187                          *len, sizeof(get) + get.size);
1188                 return -EINVAL;
1189         }
1190
1191         t = xt_find_table_lock(net, AF_INET, get.name);
1192         if (t && !IS_ERR(t)) {
1193                 const struct xt_table_info *private = t->private;
1194                 duprintf("t->private->number = %u\n", private->number);
1195                 if (get.size == private->size)
1196                         ret = copy_entries_to_user(private->size,
1197                                                    t, uptr->entrytable);
1198                 else {
1199                         duprintf("get_entries: I've got %u not %u!\n",
1200                                  private->size, get.size);
1201                         ret = -EAGAIN;
1202                 }
1203                 module_put(t->me);
1204                 xt_table_unlock(t);
1205         } else
1206                 ret = t ? PTR_ERR(t) : -ENOENT;
1207
1208         return ret;
1209 }
1210
1211 static int
1212 __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1213              struct xt_table_info *newinfo, unsigned int num_counters,
1214              void __user *counters_ptr)
1215 {
1216         int ret;
1217         struct xt_table *t;
1218         struct xt_table_info *oldinfo;
1219         struct xt_counters *counters;
1220         void *loc_cpu_old_entry;
1221         struct ipt_entry *iter;
1222
1223         ret = 0;
1224         counters = vmalloc(num_counters * sizeof(struct xt_counters));
1225         if (!counters) {
1226                 ret = -ENOMEM;
1227                 goto out;
1228         }
1229
1230         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1231                                     "iptable_%s", name);
1232         if (!t || IS_ERR(t)) {
1233                 ret = t ? PTR_ERR(t) : -ENOENT;
1234                 goto free_newinfo_counters_untrans;
1235         }
1236
1237         /* You lied! */
1238         if (valid_hooks != t->valid_hooks) {
1239                 duprintf("Valid hook crap: %08X vs %08X\n",
1240                          valid_hooks, t->valid_hooks);
1241                 ret = -EINVAL;
1242                 goto put_module;
1243         }
1244
1245         oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1246         if (!oldinfo)
1247                 goto put_module;
1248
1249         /* Update module usage count based on number of rules */
1250         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1251                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1252         if ((oldinfo->number > oldinfo->initial_entries) ||
1253             (newinfo->number <= oldinfo->initial_entries))
1254                 module_put(t->me);
1255         if ((oldinfo->number > oldinfo->initial_entries) &&
1256             (newinfo->number <= oldinfo->initial_entries))
1257                 module_put(t->me);
1258
1259         /* Get the old counters, and synchronize with replace */
1260         get_counters(oldinfo, counters);
1261
1262         /* Decrease module usage counts and free resource */
1263         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1264         xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
1265                 cleanup_entry(iter, net);
1266
1267         xt_free_table_info(oldinfo);
1268         if (copy_to_user(counters_ptr, counters,
1269                          sizeof(struct xt_counters) * num_counters) != 0)
1270                 ret = -EFAULT;
1271         vfree(counters);
1272         xt_table_unlock(t);
1273         return ret;
1274
1275  put_module:
1276         module_put(t->me);
1277         xt_table_unlock(t);
1278  free_newinfo_counters_untrans:
1279         vfree(counters);
1280  out:
1281         return ret;
1282 }
1283
1284 static int
1285 do_replace(struct net *net, const void __user *user, unsigned int len)
1286 {
1287         int ret;
1288         struct ipt_replace tmp;
1289         struct xt_table_info *newinfo;
1290         void *loc_cpu_entry;
1291         struct ipt_entry *iter;
1292
1293         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1294                 return -EFAULT;
1295
1296         /* overflow check */
1297         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1298                 return -ENOMEM;
1299
1300         newinfo = xt_alloc_table_info(tmp.size);
1301         if (!newinfo)
1302                 return -ENOMEM;
1303
1304         /* choose the copy that is on our node/cpu */
1305         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1306         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1307                            tmp.size) != 0) {
1308                 ret = -EFAULT;
1309                 goto free_newinfo;
1310         }
1311
1312         ret = translate_table(net, tmp.name, tmp.valid_hooks,
1313                               newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1314                               tmp.hook_entry, tmp.underflow);
1315         if (ret != 0)
1316                 goto free_newinfo;
1317
1318         duprintf("ip_tables: Translated table\n");
1319
1320         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1321                            tmp.num_counters, tmp.counters);
1322         if (ret)
1323                 goto free_newinfo_untrans;
1324         return 0;
1325
1326  free_newinfo_untrans:
1327         xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1328                 cleanup_entry(iter, net);
1329  free_newinfo:
1330         xt_free_table_info(newinfo);
1331         return ret;
1332 }
1333
1334 static int
1335 do_add_counters(struct net *net, const void __user *user,
1336                 unsigned int len, int compat)
1337 {
1338         unsigned int i, curcpu;
1339         struct xt_counters_info tmp;
1340         struct xt_counters *paddc;
1341         unsigned int num_counters;
1342         const char *name;
1343         int size;
1344         void *ptmp;
1345         struct xt_table *t;
1346         const struct xt_table_info *private;
1347         int ret = 0;
1348         void *loc_cpu_entry;
1349         struct ipt_entry *iter;
1350 #ifdef CONFIG_COMPAT
1351         struct compat_xt_counters_info compat_tmp;
1352
1353         if (compat) {
1354                 ptmp = &compat_tmp;
1355                 size = sizeof(struct compat_xt_counters_info);
1356         } else
1357 #endif
1358         {
1359                 ptmp = &tmp;
1360                 size = sizeof(struct xt_counters_info);
1361         }
1362
1363         if (copy_from_user(ptmp, user, size) != 0)
1364                 return -EFAULT;
1365
1366 #ifdef CONFIG_COMPAT
1367         if (compat) {
1368                 num_counters = compat_tmp.num_counters;
1369                 name = compat_tmp.name;
1370         } else
1371 #endif
1372         {
1373                 num_counters = tmp.num_counters;
1374                 name = tmp.name;
1375         }
1376
1377         if (len != size + num_counters * sizeof(struct xt_counters))
1378                 return -EINVAL;
1379
1380         paddc = vmalloc_node(len - size, numa_node_id());
1381         if (!paddc)
1382                 return -ENOMEM;
1383
1384         if (copy_from_user(paddc, user + size, len - size) != 0) {
1385                 ret = -EFAULT;
1386                 goto free;
1387         }
1388
1389         t = xt_find_table_lock(net, AF_INET, name);
1390         if (!t || IS_ERR(t)) {
1391                 ret = t ? PTR_ERR(t) : -ENOENT;
1392                 goto free;
1393         }
1394
1395         local_bh_disable();
1396         private = t->private;
1397         if (private->number != num_counters) {
1398                 ret = -EINVAL;
1399                 goto unlock_up_free;
1400         }
1401
1402         i = 0;
1403         /* Choose the copy that is on our node */
1404         curcpu = smp_processor_id();
1405         loc_cpu_entry = private->entries[curcpu];
1406         xt_info_wrlock(curcpu);
1407         xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1408                 ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1409                 ++i;
1410         }
1411         xt_info_wrunlock(curcpu);
1412  unlock_up_free:
1413         local_bh_enable();
1414         xt_table_unlock(t);
1415         module_put(t->me);
1416  free:
1417         vfree(paddc);
1418
1419         return ret;
1420 }
1421
1422 #ifdef CONFIG_COMPAT
1423 struct compat_ipt_replace {
1424         char                    name[IPT_TABLE_MAXNAMELEN];
1425         u32                     valid_hooks;
1426         u32                     num_entries;
1427         u32                     size;
1428         u32                     hook_entry[NF_INET_NUMHOOKS];
1429         u32                     underflow[NF_INET_NUMHOOKS];
1430         u32                     num_counters;
1431         compat_uptr_t           counters;       /* struct ipt_counters * */
1432         struct compat_ipt_entry entries[0];
1433 };
1434
1435 static int
1436 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1437                           unsigned int *size, struct xt_counters *counters,
1438                           unsigned int i)
1439 {
1440         struct ipt_entry_target *t;
1441         struct compat_ipt_entry __user *ce;
1442         u_int16_t target_offset, next_offset;
1443         compat_uint_t origsize;
1444         int ret;
1445
1446         origsize = *size;
1447         ce = (struct compat_ipt_entry __user *)*dstptr;
1448         if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
1449             copy_to_user(&ce->counters, &counters[i],
1450             sizeof(counters[i])) != 0)
1451                 return -EFAULT;
1452
1453         *dstptr += sizeof(struct compat_ipt_entry);
1454         *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1455
1456         ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
1457         target_offset = e->target_offset - (origsize - *size);
1458         if (ret)
1459                 return ret;
1460         t = ipt_get_target(e);
1461         ret = xt_compat_target_to_user(t, dstptr, size);
1462         if (ret)
1463                 return ret;
1464         next_offset = e->next_offset - (origsize - *size);
1465         if (put_user(target_offset, &ce->target_offset) != 0 ||
1466             put_user(next_offset, &ce->next_offset) != 0)
1467                 return -EFAULT;
1468         return 0;
1469 }
1470
1471 static int
1472 compat_find_calc_match(struct ipt_entry_match *m,
1473                        const char *name,
1474                        const struct ipt_ip *ip,
1475                        unsigned int hookmask,
1476                        int *size, unsigned int *i)
1477 {
1478         struct xt_match *match;
1479
1480         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
1481                                                       m->u.user.revision),
1482                                         "ipt_%s", m->u.user.name);
1483         if (IS_ERR(match) || !match) {
1484                 duprintf("compat_check_calc_match: `%s' not found\n",
1485                          m->u.user.name);
1486                 return match ? PTR_ERR(match) : -ENOENT;
1487         }
1488         m->u.kernel.match = match;
1489         *size += xt_compat_match_offset(match);
1490
1491         (*i)++;
1492         return 0;
1493 }
1494
1495 static int
1496 compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1497 {
1498         if (i && (*i)-- == 0)
1499                 return 1;
1500
1501         module_put(m->u.kernel.match->me);
1502         return 0;
1503 }
1504
1505 static void compat_release_entry(struct compat_ipt_entry *e)
1506 {
1507         struct ipt_entry_target *t;
1508
1509         /* Cleanup all matches */
1510         COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
1511         t = compat_ipt_get_target(e);
1512         module_put(t->u.kernel.target->me);
1513 }
1514
1515 static int
1516 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1517                                   struct xt_table_info *newinfo,
1518                                   unsigned int *size,
1519                                   const unsigned char *base,
1520                                   const unsigned char *limit,
1521                                   const unsigned int *hook_entries,
1522                                   const unsigned int *underflows,
1523                                   const char *name)
1524 {
1525         struct ipt_entry_target *t;
1526         struct xt_target *target;
1527         unsigned int entry_offset;
1528         unsigned int j;
1529         int ret, off, h;
1530
1531         duprintf("check_compat_entry_size_and_hooks %p\n", e);
1532         if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
1533             (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
1534                 duprintf("Bad offset %p, limit = %p\n", e, limit);
1535                 return -EINVAL;
1536         }
1537
1538         if (e->next_offset < sizeof(struct compat_ipt_entry) +
1539                              sizeof(struct compat_xt_entry_target)) {
1540                 duprintf("checking: element %p size %u\n",
1541                          e, e->next_offset);
1542                 return -EINVAL;
1543         }
1544
1545         /* For purposes of check_entry casting the compat entry is fine */
1546         ret = check_entry((struct ipt_entry *)e, name);
1547         if (ret)
1548                 return ret;
1549
1550         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1551         entry_offset = (void *)e - (void *)base;
1552         j = 0;
1553         ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
1554                                        &e->ip, e->comefrom, &off, &j);
1555         if (ret != 0)
1556                 goto release_matches;
1557
1558         t = compat_ipt_get_target(e);
1559         target = try_then_request_module(xt_find_target(AF_INET,
1560                                                         t->u.user.name,
1561                                                         t->u.user.revision),
1562                                          "ipt_%s", t->u.user.name);
1563         if (IS_ERR(target) || !target) {
1564                 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1565                          t->u.user.name);
1566                 ret = target ? PTR_ERR(target) : -ENOENT;
1567                 goto release_matches;
1568         }
1569         t->u.kernel.target = target;
1570
1571         off += xt_compat_target_offset(target);
1572         *size += off;
1573         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1574         if (ret)
1575                 goto out;
1576
1577         /* Check hooks & underflows */
1578         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1579                 if ((unsigned char *)e - base == hook_entries[h])
1580                         newinfo->hook_entry[h] = hook_entries[h];
1581                 if ((unsigned char *)e - base == underflows[h])
1582                         newinfo->underflow[h] = underflows[h];
1583         }
1584
1585         /* Clear counters and comefrom */
1586         memset(&e->counters, 0, sizeof(e->counters));
1587         e->comefrom = 0;
1588         return 0;
1589
1590 out:
1591         module_put(t->u.kernel.target->me);
1592 release_matches:
1593         IPT_MATCH_ITERATE(e, compat_release_match, &j);
1594         return ret;
1595 }
1596
1597 static int
1598 compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1599                             unsigned int *size, const char *name,
1600                             struct xt_table_info *newinfo, unsigned char *base)
1601 {
1602         struct ipt_entry_target *t;
1603         struct xt_target *target;
1604         struct ipt_entry *de;
1605         unsigned int origsize;
1606         int ret, h;
1607
1608         ret = 0;
1609         origsize = *size;
1610         de = (struct ipt_entry *)*dstptr;
1611         memcpy(de, e, sizeof(struct ipt_entry));
1612         memcpy(&de->counters, &e->counters, sizeof(e->counters));
1613
1614         *dstptr += sizeof(struct ipt_entry);
1615         *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1616
1617         ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
1618                                        dstptr, size);
1619         if (ret)
1620                 return ret;
1621         de->target_offset = e->target_offset - (origsize - *size);
1622         t = compat_ipt_get_target(e);
1623         target = t->u.kernel.target;
1624         xt_compat_target_from_user(t, dstptr, size);
1625
1626         de->next_offset = e->next_offset - (origsize - *size);
1627         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1628                 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1629                         newinfo->hook_entry[h] -= origsize - *size;
1630                 if ((unsigned char *)de - base < newinfo->underflow[h])
1631                         newinfo->underflow[h] -= origsize - *size;
1632         }
1633         return ret;
1634 }
1635
1636 static int
1637 compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
1638 {
1639         struct xt_mtchk_param mtpar;
1640         unsigned int j;
1641         int ret;
1642
1643         j = 0;
1644         mtpar.net       = net;
1645         mtpar.table     = name;
1646         mtpar.entryinfo = &e->ip;
1647         mtpar.hook_mask = e->comefrom;
1648         mtpar.family    = NFPROTO_IPV4;
1649         ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
1650         if (ret)
1651                 goto cleanup_matches;
1652
1653         ret = check_target(e, net, name);
1654         if (ret)
1655                 goto cleanup_matches;
1656         return 0;
1657
1658  cleanup_matches:
1659         IPT_MATCH_ITERATE(e, cleanup_match, net, &j);
1660         return ret;
1661 }
1662
1663 static int
1664 translate_compat_table(struct net *net,
1665                        const char *name,
1666                        unsigned int valid_hooks,
1667                        struct xt_table_info **pinfo,
1668                        void **pentry0,
1669                        unsigned int total_size,
1670                        unsigned int number,
1671                        unsigned int *hook_entries,
1672                        unsigned int *underflows)
1673 {
1674         unsigned int i, j;
1675         struct xt_table_info *newinfo, *info;
1676         void *pos, *entry0, *entry1;
1677         struct compat_ipt_entry *iter0;
1678         struct ipt_entry *iter1;
1679         unsigned int size;
1680         int ret;
1681
1682         info = *pinfo;
1683         entry0 = *pentry0;
1684         size = total_size;
1685         info->number = number;
1686
1687         /* Init all hooks to impossible value. */
1688         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1689                 info->hook_entry[i] = 0xFFFFFFFF;
1690                 info->underflow[i] = 0xFFFFFFFF;
1691         }
1692
1693         duprintf("translate_compat_table: size %u\n", info->size);
1694         j = 0;
1695         xt_compat_lock(AF_INET);
1696         /* Walk through entries, checking offsets. */
1697         xt_entry_foreach(iter0, entry0, total_size) {
1698                 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
1699                       entry0, entry0 + total_size, hook_entries, underflows,
1700                       name);
1701                 if (ret != 0)
1702                         goto out_unlock;
1703                 ++j;
1704         }
1705
1706         ret = -EINVAL;
1707         if (j != number) {
1708                 duprintf("translate_compat_table: %u not %u entries\n",
1709                          j, number);
1710                 goto out_unlock;
1711         }
1712
1713         /* Check hooks all assigned */
1714         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1715                 /* Only hooks which are valid */
1716                 if (!(valid_hooks & (1 << i)))
1717                         continue;
1718                 if (info->hook_entry[i] == 0xFFFFFFFF) {
1719                         duprintf("Invalid hook entry %u %u\n",
1720                                  i, hook_entries[i]);
1721                         goto out_unlock;
1722                 }
1723                 if (info->underflow[i] == 0xFFFFFFFF) {
1724                         duprintf("Invalid underflow %u %u\n",
1725                                  i, underflows[i]);
1726                         goto out_unlock;
1727                 }
1728         }
1729
1730         ret = -ENOMEM;
1731         newinfo = xt_alloc_table_info(size);
1732         if (!newinfo)
1733                 goto out_unlock;
1734
1735         newinfo->number = number;
1736         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1737                 newinfo->hook_entry[i] = info->hook_entry[i];
1738                 newinfo->underflow[i] = info->underflow[i];
1739         }
1740         entry1 = newinfo->entries[raw_smp_processor_id()];
1741         pos = entry1;
1742         size = total_size;
1743         xt_entry_foreach(iter0, entry0, total_size) {
1744                 ret = compat_copy_entry_from_user(iter0, &pos,
1745                       &size, name, newinfo, entry1);
1746                 if (ret != 0)
1747                         break;
1748         }
1749         xt_compat_flush_offsets(AF_INET);
1750         xt_compat_unlock(AF_INET);
1751         if (ret)
1752                 goto free_newinfo;
1753
1754         ret = -ELOOP;
1755         if (!mark_source_chains(newinfo, valid_hooks, entry1))
1756                 goto free_newinfo;
1757
1758         i = 0;
1759         xt_entry_foreach(iter1, entry1, newinfo->size) {
1760                 ret = compat_check_entry(iter1, net, name);
1761                 if (ret != 0)
1762                         break;
1763                 ++i;
1764         }
1765         if (ret) {
1766                 /*
1767                  * The first i matches need cleanup_entry (calls ->destroy)
1768                  * because they had called ->check already. The other j-i
1769                  * entries need only release.
1770                  */
1771                 int skip = i;
1772                 j -= i;
1773                 xt_entry_foreach(iter0, entry0, newinfo->size) {
1774                         if (skip-- > 0)
1775                                 continue;
1776                         if (j-- == 0)
1777                                 break;
1778                         compat_release_entry(iter0);
1779                 }
1780                 xt_entry_foreach(iter1, entry1, newinfo->size) {
1781                         if (i-- == 0)
1782                                 break;
1783                         cleanup_entry(iter1, net);
1784                 }
1785                 xt_free_table_info(newinfo);
1786                 return ret;
1787         }
1788
1789         /* And one copy for every other CPU */
1790         for_each_possible_cpu(i)
1791                 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1792                         memcpy(newinfo->entries[i], entry1, newinfo->size);
1793
1794         *pinfo = newinfo;
1795         *pentry0 = entry1;
1796         xt_free_table_info(info);
1797         return 0;
1798
1799 free_newinfo:
1800         xt_free_table_info(newinfo);
1801 out:
1802         xt_entry_foreach(iter0, entry0, total_size) {
1803                 if (j-- == 0)
1804                         break;
1805                 compat_release_entry(iter0);
1806         }
1807         return ret;
1808 out_unlock:
1809         xt_compat_flush_offsets(AF_INET);
1810         xt_compat_unlock(AF_INET);
1811         goto out;
1812 }
1813
1814 static int
1815 compat_do_replace(struct net *net, void __user *user, unsigned int len)
1816 {
1817         int ret;
1818         struct compat_ipt_replace tmp;
1819         struct xt_table_info *newinfo;
1820         void *loc_cpu_entry;
1821         struct ipt_entry *iter;
1822
1823         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1824                 return -EFAULT;
1825
1826         /* overflow check */
1827         if (tmp.size >= INT_MAX / num_possible_cpus())
1828                 return -ENOMEM;
1829         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1830                 return -ENOMEM;
1831
1832         newinfo = xt_alloc_table_info(tmp.size);
1833         if (!newinfo)
1834                 return -ENOMEM;
1835
1836         /* choose the copy that is on our node/cpu */
1837         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1838         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1839                            tmp.size) != 0) {
1840                 ret = -EFAULT;
1841                 goto free_newinfo;
1842         }
1843
1844         ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
1845                                      &newinfo, &loc_cpu_entry, tmp.size,
1846                                      tmp.num_entries, tmp.hook_entry,
1847                                      tmp.underflow);
1848         if (ret != 0)
1849                 goto free_newinfo;
1850
1851         duprintf("compat_do_replace: Translated table\n");
1852
1853         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1854                            tmp.num_counters, compat_ptr(tmp.counters));
1855         if (ret)
1856                 goto free_newinfo_untrans;
1857         return 0;
1858
1859  free_newinfo_untrans:
1860         xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1861                 cleanup_entry(iter, net);
1862  free_newinfo:
1863         xt_free_table_info(newinfo);
1864         return ret;
1865 }
1866
1867 static int
1868 compat_do_ipt_set_ctl(struct sock *sk,  int cmd, void __user *user,
1869                       unsigned int len)
1870 {
1871         int ret;
1872
1873         if (!capable(CAP_NET_ADMIN))
1874                 return -EPERM;
1875
1876         switch (cmd) {
1877         case IPT_SO_SET_REPLACE:
1878                 ret = compat_do_replace(sock_net(sk), user, len);
1879                 break;
1880
1881         case IPT_SO_SET_ADD_COUNTERS:
1882                 ret = do_add_counters(sock_net(sk), user, len, 1);
1883                 break;
1884
1885         default:
1886                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1887                 ret = -EINVAL;
1888         }
1889
1890         return ret;
1891 }
1892
1893 struct compat_ipt_get_entries {
1894         char name[IPT_TABLE_MAXNAMELEN];
1895         compat_uint_t size;
1896         struct compat_ipt_entry entrytable[0];
1897 };
1898
1899 static int
1900 compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1901                             void __user *userptr)
1902 {
1903         struct xt_counters *counters;
1904         const struct xt_table_info *private = table->private;
1905         void __user *pos;
1906         unsigned int size;
1907         int ret = 0;
1908         const void *loc_cpu_entry;
1909         unsigned int i = 0;
1910         struct ipt_entry *iter;
1911
1912         counters = alloc_counters(table);
1913         if (IS_ERR(counters))
1914                 return PTR_ERR(counters);
1915
1916         /* choose the copy that is on our node/cpu, ...
1917          * This choice is lazy (because current thread is
1918          * allowed to migrate to another cpu)
1919          */
1920         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1921         pos = userptr;
1922         size = total_size;
1923         xt_entry_foreach(iter, loc_cpu_entry, total_size) {
1924                 ret = compat_copy_entry_to_user(iter, &pos,
1925                       &size, counters, i++);
1926                 if (ret != 0)
1927                         break;
1928         }
1929
1930         vfree(counters);
1931         return ret;
1932 }
1933
1934 static int
1935 compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1936                    int *len)
1937 {
1938         int ret;
1939         struct compat_ipt_get_entries get;
1940         struct xt_table *t;
1941
1942         if (*len < sizeof(get)) {
1943                 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1944                 return -EINVAL;
1945         }
1946
1947         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1948                 return -EFAULT;
1949
1950         if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
1951                 duprintf("compat_get_entries: %u != %zu\n",
1952                          *len, sizeof(get) + get.size);
1953                 return -EINVAL;
1954         }
1955
1956         xt_compat_lock(AF_INET);
1957         t = xt_find_table_lock(net, AF_INET, get.name);
1958         if (t && !IS_ERR(t)) {
1959                 const struct xt_table_info *private = t->private;
1960                 struct xt_table_info info;
1961                 duprintf("t->private->number = %u\n", private->number);
1962                 ret = compat_table_info(private, &info);
1963                 if (!ret && get.size == info.size) {
1964                         ret = compat_copy_entries_to_user(private->size,
1965                                                           t, uptr->entrytable);
1966                 } else if (!ret) {
1967                         duprintf("compat_get_entries: I've got %u not %u!\n",
1968                                  private->size, get.size);
1969                         ret = -EAGAIN;
1970                 }
1971                 xt_compat_flush_offsets(AF_INET);
1972                 module_put(t->me);
1973                 xt_table_unlock(t);
1974         } else
1975                 ret = t ? PTR_ERR(t) : -ENOENT;
1976
1977         xt_compat_unlock(AF_INET);
1978         return ret;
1979 }
1980
1981 static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
1982
1983 static int
1984 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1985 {
1986         int ret;
1987
1988         if (!capable(CAP_NET_ADMIN))
1989                 return -EPERM;
1990
1991         switch (cmd) {
1992         case IPT_SO_GET_INFO:
1993                 ret = get_info(sock_net(sk), user, len, 1);
1994                 break;
1995         case IPT_SO_GET_ENTRIES:
1996                 ret = compat_get_entries(sock_net(sk), user, len);
1997                 break;
1998         default:
1999                 ret = do_ipt_get_ctl(sk, cmd, user, len);
2000         }
2001         return ret;
2002 }
2003 #endif
2004
2005 static int
2006 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2007 {
2008         int ret;
2009
2010         if (!capable(CAP_NET_ADMIN))
2011                 return -EPERM;
2012
2013         switch (cmd) {
2014         case IPT_SO_SET_REPLACE:
2015                 ret = do_replace(sock_net(sk), user, len);
2016                 break;
2017
2018         case IPT_SO_SET_ADD_COUNTERS:
2019                 ret = do_add_counters(sock_net(sk), user, len, 0);
2020                 break;
2021
2022         default:
2023                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
2024                 ret = -EINVAL;
2025         }
2026
2027         return ret;
2028 }
2029
2030 static int
2031 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2032 {
2033         int ret;
2034
2035         if (!capable(CAP_NET_ADMIN))
2036                 return -EPERM;
2037
2038         switch (cmd) {
2039         case IPT_SO_GET_INFO:
2040                 ret = get_info(sock_net(sk), user, len, 0);
2041                 break;
2042
2043         case IPT_SO_GET_ENTRIES:
2044                 ret = get_entries(sock_net(sk), user, len);
2045                 break;
2046
2047         case IPT_SO_GET_REVISION_MATCH:
2048         case IPT_SO_GET_REVISION_TARGET: {
2049                 struct ipt_get_revision rev;
2050                 int target;
2051
2052                 if (*len != sizeof(rev)) {
2053                         ret = -EINVAL;
2054                         break;
2055                 }
2056                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
2057                         ret = -EFAULT;
2058                         break;
2059                 }
2060
2061                 if (cmd == IPT_SO_GET_REVISION_TARGET)
2062                         target = 1;
2063                 else
2064                         target = 0;
2065
2066                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
2067                                                          rev.revision,
2068                                                          target, &ret),
2069                                         "ipt_%s", rev.name);
2070                 break;
2071         }
2072
2073         default:
2074                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
2075                 ret = -EINVAL;
2076         }
2077
2078         return ret;
2079 }
2080
2081 struct xt_table *ipt_register_table(struct net *net,
2082                                     const struct xt_table *table,
2083                                     const struct ipt_replace *repl)
2084 {
2085         int ret;
2086         struct xt_table_info *newinfo;
2087         struct xt_table_info bootstrap
2088                 = { 0, 0, 0, { 0 }, { 0 }, { } };
2089         void *loc_cpu_entry;
2090         struct xt_table *new_table;
2091
2092         newinfo = xt_alloc_table_info(repl->size);
2093         if (!newinfo) {
2094                 ret = -ENOMEM;
2095                 goto out;
2096         }
2097
2098         /* choose the copy on our node/cpu, but dont care about preemption */
2099         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2100         memcpy(loc_cpu_entry, repl->entries, repl->size);
2101
2102         ret = translate_table(net, table->name, table->valid_hooks,
2103                               newinfo, loc_cpu_entry, repl->size,
2104                               repl->num_entries,
2105                               repl->hook_entry,
2106                               repl->underflow);
2107         if (ret != 0)
2108                 goto out_free;
2109
2110         new_table = xt_register_table(net, table, &bootstrap, newinfo);
2111         if (IS_ERR(new_table)) {
2112                 ret = PTR_ERR(new_table);
2113                 goto out_free;
2114         }
2115
2116         return new_table;
2117
2118 out_free:
2119         xt_free_table_info(newinfo);
2120 out:
2121         return ERR_PTR(ret);
2122 }
2123
2124 void ipt_unregister_table(struct net *net, struct xt_table *table)
2125 {
2126         struct xt_table_info *private;
2127         void *loc_cpu_entry;
2128         struct module *table_owner = table->me;
2129         struct ipt_entry *iter;
2130
2131         private = xt_unregister_table(table);
2132
2133         /* Decrease module usage counts and free resources */
2134         loc_cpu_entry = private->entries[raw_smp_processor_id()];
2135         xt_entry_foreach(iter, loc_cpu_entry, private->size)
2136                 cleanup_entry(iter, net);
2137         if (private->number > private->initial_entries)
2138                 module_put(table_owner);
2139         xt_free_table_info(private);
2140 }
2141
2142 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
2143 static inline bool
2144 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2145                      u_int8_t type, u_int8_t code,
2146                      bool invert)
2147 {
2148         return ((test_type == 0xFF) ||
2149                 (type == test_type && code >= min_code && code <= max_code))
2150                 ^ invert;
2151 }
2152
2153 static bool
2154 icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
2155 {
2156         const struct icmphdr *ic;
2157         struct icmphdr _icmph;
2158         const struct ipt_icmp *icmpinfo = par->matchinfo;
2159
2160         /* Must not be a fragment. */
2161         if (par->fragoff != 0)
2162                 return false;
2163
2164         ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
2165         if (ic == NULL) {
2166                 /* We've been asked to examine this packet, and we
2167                  * can't.  Hence, no choice but to drop.
2168                  */
2169                 duprintf("Dropping evil ICMP tinygram.\n");
2170                 *par->hotdrop = true;
2171                 return false;
2172         }
2173
2174         return icmp_type_code_match(icmpinfo->type,
2175                                     icmpinfo->code[0],
2176                                     icmpinfo->code[1],
2177                                     ic->type, ic->code,
2178                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
2179 }
2180
2181 static bool icmp_checkentry(const struct xt_mtchk_param *par)
2182 {
2183         const struct ipt_icmp *icmpinfo = par->matchinfo;
2184
2185         /* Must specify no unknown invflags */
2186         return !(icmpinfo->invflags & ~IPT_ICMP_INV);
2187 }
2188
2189 /* The built-in targets: standard (NULL) and error. */
2190 static struct xt_target ipt_standard_target __read_mostly = {
2191         .name           = IPT_STANDARD_TARGET,
2192         .targetsize     = sizeof(int),
2193         .family         = NFPROTO_IPV4,
2194 #ifdef CONFIG_COMPAT
2195         .compatsize     = sizeof(compat_int_t),
2196         .compat_from_user = compat_standard_from_user,
2197         .compat_to_user = compat_standard_to_user,
2198 #endif
2199 };
2200
2201 static struct xt_target ipt_error_target __read_mostly = {
2202         .name           = IPT_ERROR_TARGET,
2203         .target         = ipt_error,
2204         .targetsize     = IPT_FUNCTION_MAXNAMELEN,
2205         .family         = NFPROTO_IPV4,
2206 };
2207
2208 static struct nf_sockopt_ops ipt_sockopts = {
2209         .pf             = PF_INET,
2210         .set_optmin     = IPT_BASE_CTL,
2211         .set_optmax     = IPT_SO_SET_MAX+1,
2212         .set            = do_ipt_set_ctl,
2213 #ifdef CONFIG_COMPAT
2214         .compat_set     = compat_do_ipt_set_ctl,
2215 #endif
2216         .get_optmin     = IPT_BASE_CTL,
2217         .get_optmax     = IPT_SO_GET_MAX+1,
2218         .get            = do_ipt_get_ctl,
2219 #ifdef CONFIG_COMPAT
2220         .compat_get     = compat_do_ipt_get_ctl,
2221 #endif
2222         .owner          = THIS_MODULE,
2223 };
2224
2225 static struct xt_match icmp_matchstruct __read_mostly = {
2226         .name           = "icmp",
2227         .match          = icmp_match,
2228         .matchsize      = sizeof(struct ipt_icmp),
2229         .checkentry     = icmp_checkentry,
2230         .proto          = IPPROTO_ICMP,
2231         .family         = NFPROTO_IPV4,
2232 };
2233
2234 static int __net_init ip_tables_net_init(struct net *net)
2235 {
2236         return xt_proto_init(net, NFPROTO_IPV4);
2237 }
2238
2239 static void __net_exit ip_tables_net_exit(struct net *net)
2240 {
2241         xt_proto_fini(net, NFPROTO_IPV4);
2242 }
2243
2244 static struct pernet_operations ip_tables_net_ops = {
2245         .init = ip_tables_net_init,
2246         .exit = ip_tables_net_exit,
2247 };
2248
2249 static int __init ip_tables_init(void)
2250 {
2251         int ret;
2252
2253         ret = register_pernet_subsys(&ip_tables_net_ops);
2254         if (ret < 0)
2255                 goto err1;
2256
2257         /* Noone else will be downing sem now, so we won't sleep */
2258         ret = xt_register_target(&ipt_standard_target);
2259         if (ret < 0)
2260                 goto err2;
2261         ret = xt_register_target(&ipt_error_target);
2262         if (ret < 0)
2263                 goto err3;
2264         ret = xt_register_match(&icmp_matchstruct);
2265         if (ret < 0)
2266                 goto err4;
2267
2268         /* Register setsockopt */
2269         ret = nf_register_sockopt(&ipt_sockopts);
2270         if (ret < 0)
2271                 goto err5;
2272
2273         printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
2274         return 0;
2275
2276 err5:
2277         xt_unregister_match(&icmp_matchstruct);
2278 err4:
2279         xt_unregister_target(&ipt_error_target);
2280 err3:
2281         xt_unregister_target(&ipt_standard_target);
2282 err2:
2283         unregister_pernet_subsys(&ip_tables_net_ops);
2284 err1:
2285         return ret;
2286 }
2287
2288 static void __exit ip_tables_fini(void)
2289 {
2290         nf_unregister_sockopt(&ipt_sockopts);
2291
2292         xt_unregister_match(&icmp_matchstruct);
2293         xt_unregister_target(&ipt_error_target);
2294         xt_unregister_target(&ipt_standard_target);
2295
2296         unregister_pernet_subsys(&ip_tables_net_ops);
2297 }
2298
2299 EXPORT_SYMBOL(ipt_register_table);
2300 EXPORT_SYMBOL(ipt_unregister_table);
2301 EXPORT_SYMBOL(ipt_do_table);
2302 module_init(ip_tables_init);
2303 module_exit(ip_tables_fini);