[NETFILTER]: Move ipv4 specific code from net/core/netfilter.c to net/ipv4/netfilter.c
[linux-2.6.git] / net / core / netfilter.c
1 /* netfilter.c: look after the filters for various protocols. 
2  * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
3  *
4  * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5  * way.
6  *
7  * Rusty Russell (C)2000 -- This code is GPL.
8  *
9  * February 2000: Modified by James Morris to have 1 queue per protocol.
10  * 15-Mar-2000:   Added NF_REPEAT --RR.
11  * 08-May-2003:   Internal logging interface added by Jozsef Kadlecsik.
12  */
13 #include <linux/config.h>
14 #include <linux/kernel.h>
15 #include <linux/netfilter.h>
16 #include <net/protocol.h>
17 #include <linux/init.h>
18 #include <linux/skbuff.h>
19 #include <linux/wait.h>
20 #include <linux/module.h>
21 #include <linux/interrupt.h>
22 #include <linux/if.h>
23 #include <linux/netdevice.h>
24 #include <linux/inetdevice.h>
25 #include <net/sock.h>
26
27 /* In this code, we can be waiting indefinitely for userspace to
28  * service a packet if a hook returns NF_QUEUE.  We could keep a count
29  * of skbuffs queued for userspace, and not deregister a hook unless
30  * this is zero, but that sucks.  Now, we simply check when the
31  * packets come back: if the hook is gone, the packet is discarded. */
32 #ifdef CONFIG_NETFILTER_DEBUG
33 #define NFDEBUG(format, args...)  printk(format , ## args)
34 #else
35 #define NFDEBUG(format, args...)
36 #endif
37
38 /* Sockopts only registered and called from user context, so
39    net locking would be overkill.  Also, [gs]etsockopt calls may
40    sleep. */
41 static DECLARE_MUTEX(nf_sockopt_mutex);
42
43 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
44 static LIST_HEAD(nf_sockopts);
45 static DEFINE_SPINLOCK(nf_hook_lock);
46
47 /* 
48  * A queue handler may be registered for each protocol.  Each is protected by
49  * long term mutex.  The handler must provide an an outfn() to accept packets
50  * for queueing and must reinject all packets it receives, no matter what.
51  */
52 static struct nf_queue_handler_t {
53         nf_queue_outfn_t outfn;
54         void *data;
55 } queue_handler[NPROTO];
56 static DEFINE_RWLOCK(queue_handler_lock);
57
58 int nf_register_hook(struct nf_hook_ops *reg)
59 {
60         struct list_head *i;
61
62         spin_lock_bh(&nf_hook_lock);
63         list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
64                 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
65                         break;
66         }
67         list_add_rcu(&reg->list, i->prev);
68         spin_unlock_bh(&nf_hook_lock);
69
70         synchronize_net();
71         return 0;
72 }
73
74 void nf_unregister_hook(struct nf_hook_ops *reg)
75 {
76         spin_lock_bh(&nf_hook_lock);
77         list_del_rcu(&reg->list);
78         spin_unlock_bh(&nf_hook_lock);
79
80         synchronize_net();
81 }
82
83 /* Do exclusive ranges overlap? */
84 static inline int overlap(int min1, int max1, int min2, int max2)
85 {
86         return max1 > min2 && min1 < max2;
87 }
88
89 /* Functions to register sockopt ranges (exclusive). */
90 int nf_register_sockopt(struct nf_sockopt_ops *reg)
91 {
92         struct list_head *i;
93         int ret = 0;
94
95         if (down_interruptible(&nf_sockopt_mutex) != 0)
96                 return -EINTR;
97
98         list_for_each(i, &nf_sockopts) {
99                 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
100                 if (ops->pf == reg->pf
101                     && (overlap(ops->set_optmin, ops->set_optmax, 
102                                 reg->set_optmin, reg->set_optmax)
103                         || overlap(ops->get_optmin, ops->get_optmax, 
104                                    reg->get_optmin, reg->get_optmax))) {
105                         NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
106                                 ops->set_optmin, ops->set_optmax, 
107                                 ops->get_optmin, ops->get_optmax, 
108                                 reg->set_optmin, reg->set_optmax,
109                                 reg->get_optmin, reg->get_optmax);
110                         ret = -EBUSY;
111                         goto out;
112                 }
113         }
114
115         list_add(&reg->list, &nf_sockopts);
116 out:
117         up(&nf_sockopt_mutex);
118         return ret;
119 }
120
121 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
122 {
123         /* No point being interruptible: we're probably in cleanup_module() */
124  restart:
125         down(&nf_sockopt_mutex);
126         if (reg->use != 0) {
127                 /* To be woken by nf_sockopt call... */
128                 /* FIXME: Stuart Young's name appears gratuitously. */
129                 set_current_state(TASK_UNINTERRUPTIBLE);
130                 reg->cleanup_task = current;
131                 up(&nf_sockopt_mutex);
132                 schedule();
133                 goto restart;
134         }
135         list_del(&reg->list);
136         up(&nf_sockopt_mutex);
137 }
138
139 /* Call get/setsockopt() */
140 static int nf_sockopt(struct sock *sk, int pf, int val, 
141                       char __user *opt, int *len, int get)
142 {
143         struct list_head *i;
144         struct nf_sockopt_ops *ops;
145         int ret;
146
147         if (down_interruptible(&nf_sockopt_mutex) != 0)
148                 return -EINTR;
149
150         list_for_each(i, &nf_sockopts) {
151                 ops = (struct nf_sockopt_ops *)i;
152                 if (ops->pf == pf) {
153                         if (get) {
154                                 if (val >= ops->get_optmin
155                                     && val < ops->get_optmax) {
156                                         ops->use++;
157                                         up(&nf_sockopt_mutex);
158                                         ret = ops->get(sk, val, opt, len);
159                                         goto out;
160                                 }
161                         } else {
162                                 if (val >= ops->set_optmin
163                                     && val < ops->set_optmax) {
164                                         ops->use++;
165                                         up(&nf_sockopt_mutex);
166                                         ret = ops->set(sk, val, opt, *len);
167                                         goto out;
168                                 }
169                         }
170                 }
171         }
172         up(&nf_sockopt_mutex);
173         return -ENOPROTOOPT;
174         
175  out:
176         down(&nf_sockopt_mutex);
177         ops->use--;
178         if (ops->cleanup_task)
179                 wake_up_process(ops->cleanup_task);
180         up(&nf_sockopt_mutex);
181         return ret;
182 }
183
184 int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
185                   int len)
186 {
187         return nf_sockopt(sk, pf, val, opt, &len, 0);
188 }
189
190 int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
191 {
192         return nf_sockopt(sk, pf, val, opt, len, 1);
193 }
194
195 static unsigned int nf_iterate(struct list_head *head,
196                                struct sk_buff **skb,
197                                int hook,
198                                const struct net_device *indev,
199                                const struct net_device *outdev,
200                                struct list_head **i,
201                                int (*okfn)(struct sk_buff *),
202                                int hook_thresh)
203 {
204         unsigned int verdict;
205
206         /*
207          * The caller must not block between calls to this
208          * function because of risk of continuing from deleted element.
209          */
210         list_for_each_continue_rcu(*i, head) {
211                 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
212
213                 if (hook_thresh > elem->priority)
214                         continue;
215
216                 /* Optimization: we don't need to hold module
217                    reference here, since function can't sleep. --RR */
218                 verdict = elem->hook(hook, skb, indev, outdev, okfn);
219                 if (verdict != NF_ACCEPT) {
220 #ifdef CONFIG_NETFILTER_DEBUG
221                         if (unlikely(verdict > NF_MAX_VERDICT)) {
222                                 NFDEBUG("Evil return from %p(%u).\n",
223                                         elem->hook, hook);
224                                 continue;
225                         }
226 #endif
227                         if (verdict != NF_REPEAT)
228                                 return verdict;
229                         *i = (*i)->prev;
230                 }
231         }
232         return NF_ACCEPT;
233 }
234
235 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
236 {      
237         int ret;
238
239         write_lock_bh(&queue_handler_lock);
240         if (queue_handler[pf].outfn)
241                 ret = -EBUSY;
242         else {
243                 queue_handler[pf].outfn = outfn;
244                 queue_handler[pf].data = data;
245                 ret = 0;
246         }
247         write_unlock_bh(&queue_handler_lock);
248
249         return ret;
250 }
251
252 /* The caller must flush their queue before this */
253 int nf_unregister_queue_handler(int pf)
254 {
255         write_lock_bh(&queue_handler_lock);
256         queue_handler[pf].outfn = NULL;
257         queue_handler[pf].data = NULL;
258         write_unlock_bh(&queue_handler_lock);
259         
260         return 0;
261 }
262
263 /* 
264  * Any packet that leaves via this function must come back 
265  * through nf_reinject().
266  */
267 static int nf_queue(struct sk_buff *skb, 
268                     struct list_head *elem, 
269                     int pf, unsigned int hook,
270                     struct net_device *indev,
271                     struct net_device *outdev,
272                     int (*okfn)(struct sk_buff *))
273 {
274         int status;
275         struct nf_info *info;
276 #ifdef CONFIG_BRIDGE_NETFILTER
277         struct net_device *physindev = NULL;
278         struct net_device *physoutdev = NULL;
279 #endif
280
281         /* QUEUE == DROP if noone is waiting, to be safe. */
282         read_lock(&queue_handler_lock);
283         if (!queue_handler[pf].outfn) {
284                 read_unlock(&queue_handler_lock);
285                 kfree_skb(skb);
286                 return 1;
287         }
288
289         info = kmalloc(sizeof(*info), GFP_ATOMIC);
290         if (!info) {
291                 if (net_ratelimit())
292                         printk(KERN_ERR "OOM queueing packet %p\n",
293                                skb);
294                 read_unlock(&queue_handler_lock);
295                 kfree_skb(skb);
296                 return 1;
297         }
298
299         *info = (struct nf_info) { 
300                 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
301
302         /* If it's going away, ignore hook. */
303         if (!try_module_get(info->elem->owner)) {
304                 read_unlock(&queue_handler_lock);
305                 kfree(info);
306                 return 0;
307         }
308
309         /* Bump dev refs so they don't vanish while packet is out */
310         if (indev) dev_hold(indev);
311         if (outdev) dev_hold(outdev);
312
313 #ifdef CONFIG_BRIDGE_NETFILTER
314         if (skb->nf_bridge) {
315                 physindev = skb->nf_bridge->physindev;
316                 if (physindev) dev_hold(physindev);
317                 physoutdev = skb->nf_bridge->physoutdev;
318                 if (physoutdev) dev_hold(physoutdev);
319         }
320 #endif
321
322         status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
323         read_unlock(&queue_handler_lock);
324
325         if (status < 0) {
326                 /* James M doesn't say fuck enough. */
327                 if (indev) dev_put(indev);
328                 if (outdev) dev_put(outdev);
329 #ifdef CONFIG_BRIDGE_NETFILTER
330                 if (physindev) dev_put(physindev);
331                 if (physoutdev) dev_put(physoutdev);
332 #endif
333                 module_put(info->elem->owner);
334                 kfree(info);
335                 kfree_skb(skb);
336                 return 1;
337         }
338         return 1;
339 }
340
341 /* Returns 1 if okfn() needs to be executed by the caller,
342  * -EPERM for NF_DROP, 0 otherwise. */
343 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
344                  struct net_device *indev,
345                  struct net_device *outdev,
346                  int (*okfn)(struct sk_buff *),
347                  int hook_thresh)
348 {
349         struct list_head *elem;
350         unsigned int verdict;
351         int ret = 0;
352
353         /* We may already have this, but read-locks nest anyway */
354         rcu_read_lock();
355
356         elem = &nf_hooks[pf][hook];
357 next_hook:
358         verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
359                              outdev, &elem, okfn, hook_thresh);
360         if (verdict == NF_ACCEPT || verdict == NF_STOP) {
361                 ret = 1;
362                 goto unlock;
363         } else if (verdict == NF_DROP) {
364                 kfree_skb(*pskb);
365                 ret = -EPERM;
366         } else if (verdict == NF_QUEUE) {
367                 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
368                 if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn))
369                         goto next_hook;
370         }
371 unlock:
372         rcu_read_unlock();
373         return ret;
374 }
375
376 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
377                  unsigned int verdict)
378 {
379         struct list_head *elem = &info->elem->list;
380         struct list_head *i;
381
382         rcu_read_lock();
383
384         /* Release those devices we held, or Alexey will kill me. */
385         if (info->indev) dev_put(info->indev);
386         if (info->outdev) dev_put(info->outdev);
387 #ifdef CONFIG_BRIDGE_NETFILTER
388         if (skb->nf_bridge) {
389                 if (skb->nf_bridge->physindev)
390                         dev_put(skb->nf_bridge->physindev);
391                 if (skb->nf_bridge->physoutdev)
392                         dev_put(skb->nf_bridge->physoutdev);
393         }
394 #endif
395
396         /* Drop reference to owner of hook which queued us. */
397         module_put(info->elem->owner);
398
399         list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
400                 if (i == elem) 
401                         break;
402         }
403   
404         if (elem == &nf_hooks[info->pf][info->hook]) {
405                 /* The module which sent it to userspace is gone. */
406                 NFDEBUG("%s: module disappeared, dropping packet.\n",
407                         __FUNCTION__);
408                 verdict = NF_DROP;
409         }
410
411         /* Continue traversal iff userspace said ok... */
412         if (verdict == NF_REPEAT) {
413                 elem = elem->prev;
414                 verdict = NF_ACCEPT;
415         }
416
417         if (verdict == NF_ACCEPT) {
418         next_hook:
419                 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
420                                      &skb, info->hook, 
421                                      info->indev, info->outdev, &elem,
422                                      info->okfn, INT_MIN);
423         }
424
425         switch (verdict) {
426         case NF_ACCEPT:
427                 info->okfn(skb);
428                 break;
429
430         case NF_QUEUE:
431                 if (!nf_queue(skb, elem, info->pf, info->hook, 
432                               info->indev, info->outdev, info->okfn))
433                         goto next_hook;
434                 break;
435         }
436         rcu_read_unlock();
437
438         if (verdict == NF_DROP)
439                 kfree_skb(skb);
440
441         kfree(info);
442         return;
443 }
444
445 int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len)
446 {
447         struct sk_buff *nskb;
448
449         if (writable_len > (*pskb)->len)
450                 return 0;
451
452         /* Not exclusive use of packet?  Must copy. */
453         if (skb_shared(*pskb) || skb_cloned(*pskb))
454                 goto copy_skb;
455
456         return pskb_may_pull(*pskb, writable_len);
457
458 copy_skb:
459         nskb = skb_copy(*pskb, GFP_ATOMIC);
460         if (!nskb)
461                 return 0;
462         BUG_ON(skb_is_nonlinear(nskb));
463
464         /* Rest of kernel will get very unhappy if we pass it a
465            suddenly-orphaned skbuff */
466         if ((*pskb)->sk)
467                 skb_set_owner_w(nskb, (*pskb)->sk);
468         kfree_skb(*pskb);
469         *pskb = nskb;
470         return 1;
471 }
472 EXPORT_SYMBOL(skb_make_writable);
473
474 /* Internal logging interface, which relies on the real 
475    LOG target modules */
476
477 #define NF_LOG_PREFIXLEN                128
478
479 static nf_logfn *nf_logging[NPROTO]; /* = NULL */
480 static int reported = 0;
481 static DEFINE_SPINLOCK(nf_log_lock);
482
483 int nf_log_register(int pf, nf_logfn *logfn)
484 {
485         int ret = -EBUSY;
486
487         /* Any setup of logging members must be done before
488          * substituting pointer. */
489         spin_lock(&nf_log_lock);
490         if (!nf_logging[pf]) {
491                 rcu_assign_pointer(nf_logging[pf], logfn);
492                 ret = 0;
493         }
494         spin_unlock(&nf_log_lock);
495         return ret;
496 }               
497
498 void nf_log_unregister(int pf, nf_logfn *logfn)
499 {
500         spin_lock(&nf_log_lock);
501         if (nf_logging[pf] == logfn)
502                 nf_logging[pf] = NULL;
503         spin_unlock(&nf_log_lock);
504
505         /* Give time to concurrent readers. */
506         synchronize_net();
507 }               
508
509 void nf_log_packet(int pf,
510                    unsigned int hooknum,
511                    const struct sk_buff *skb,
512                    const struct net_device *in,
513                    const struct net_device *out,
514                    const char *fmt, ...)
515 {
516         va_list args;
517         char prefix[NF_LOG_PREFIXLEN];
518         nf_logfn *logfn;
519         
520         rcu_read_lock();
521         logfn = rcu_dereference(nf_logging[pf]);
522         if (logfn) {
523                 va_start(args, fmt);
524                 vsnprintf(prefix, sizeof(prefix), fmt, args);
525                 va_end(args);
526                 /* We must read logging before nf_logfn[pf] */
527                 logfn(hooknum, skb, in, out, prefix);
528         } else if (!reported) {
529                 printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
530                        "no backend logging module loaded in!\n");
531                 reported++;
532         }
533         rcu_read_unlock();
534 }
535 EXPORT_SYMBOL(nf_log_register);
536 EXPORT_SYMBOL(nf_log_unregister);
537 EXPORT_SYMBOL(nf_log_packet);
538
539 /* This does not belong here, but locally generated errors need it if connection
540    tracking in use: without this, connection may not be in hash table, and hence
541    manufactured ICMP or RST packets will not be associated with it. */
542 void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
543
544 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
545 {
546         void (*attach)(struct sk_buff *, struct sk_buff *);
547
548         if (skb->nfct && (attach = ip_ct_attach) != NULL) {
549                 mb(); /* Just to be sure: must be read before executing this */
550                 attach(new, skb);
551         }
552 }
553
554 void __init netfilter_init(void)
555 {
556         int i, h;
557
558         for (i = 0; i < NPROTO; i++) {
559                 for (h = 0; h < NF_MAX_HOOKS; h++)
560                         INIT_LIST_HEAD(&nf_hooks[i][h]);
561         }
562 }
563
564 EXPORT_SYMBOL(ip_ct_attach);
565 EXPORT_SYMBOL(nf_ct_attach);
566 EXPORT_SYMBOL(nf_getsockopt);
567 EXPORT_SYMBOL(nf_hook_slow);
568 EXPORT_SYMBOL(nf_hooks);
569 EXPORT_SYMBOL(nf_register_hook);
570 EXPORT_SYMBOL(nf_register_queue_handler);
571 EXPORT_SYMBOL(nf_register_sockopt);
572 EXPORT_SYMBOL(nf_reinject);
573 EXPORT_SYMBOL(nf_setsockopt);
574 EXPORT_SYMBOL(nf_unregister_hook);
575 EXPORT_SYMBOL(nf_unregister_queue_handler);
576 EXPORT_SYMBOL(nf_unregister_sockopt);